garage/src/api/s3_get.rs

363 lines
9.8 KiB
Rust
Raw Normal View History

2021-03-26 21:32:09 +00:00
//! Function related to GET and HEAD requests
2020-04-24 18:47:11 +00:00
use std::sync::Arc;
use std::time::{Duration, UNIX_EPOCH};
use futures::stream::*;
use http::header::{
ACCEPT_RANGES, CONTENT_LENGTH, CONTENT_RANGE, CONTENT_TYPE, ETAG, IF_MODIFIED_SINCE,
IF_NONE_MATCH, LAST_MODIFIED,
};
2020-04-24 18:47:11 +00:00
use hyper::body::Bytes;
2020-05-04 13:09:23 +00:00
use hyper::{Body, Request, Response, StatusCode};
2020-04-24 18:47:11 +00:00
use garage_table::EmptyKey;
2021-12-14 12:55:11 +00:00
use garage_util::data::*;
2020-04-24 18:47:11 +00:00
2020-07-07 11:59:22 +00:00
use garage_model::garage::Garage;
use garage_model::object_table::*;
2020-04-24 18:47:11 +00:00
2020-11-08 14:04:30 +00:00
use crate::error::*;
2020-07-08 15:34:37 +00:00
fn object_headers(
version: &ObjectVersion,
version_meta: &ObjectVersionMeta,
) -> http::response::Builder {
debug!("Version meta: {:?}", version_meta);
2020-04-24 18:47:11 +00:00
let date = UNIX_EPOCH + Duration::from_millis(version.timestamp);
let date_str = httpdate::fmt_http_date(date);
2020-07-09 15:04:43 +00:00
let mut resp = Response::builder()
.header(CONTENT_TYPE, version_meta.headers.content_type.to_string())
.header(LAST_MODIFIED, date_str)
.header(ACCEPT_RANGES, "bytes".to_string());
2020-07-09 15:04:43 +00:00
2020-12-05 18:23:46 +00:00
if !version_meta.etag.is_empty() {
resp = resp.header(ETAG, format!("\"{}\"", version_meta.etag));
2020-12-05 18:23:46 +00:00
}
for (k, v) in version_meta.headers.other.iter() {
resp = resp.header(k, v.to_string());
}
2020-07-09 15:04:43 +00:00
resp
2020-04-24 18:47:11 +00:00
}
2021-03-18 14:46:33 +00:00
fn try_answer_cached(
version: &ObjectVersion,
version_meta: &ObjectVersionMeta,
req: &Request<Body>,
) -> Option<Response<Body>> {
2021-03-18 18:46:43 +00:00
// <trinity> It is possible, and is even usually the case, [that both If-None-Match and
// If-Modified-Since] are present in a request. In this situation If-None-Match takes
// precedence and If-Modified-Since is ignored (as per 6.Precedence from rfc7232). The rational
// being that etag based matching is more accurate, it has no issue with sub-second precision
// for instance (in case of very fast updates)
let cached = if let Some(none_match) = req.headers().get(IF_NONE_MATCH) {
2021-03-18 14:46:33 +00:00
let none_match = none_match.to_str().ok()?;
let expected = format!("\"{}\"", version_meta.etag);
let found = none_match
.split(',')
.map(str::trim)
.any(|etag| etag == expected || etag == "\"*\"");
found
} else if let Some(modified_since) = req.headers().get(IF_MODIFIED_SINCE) {
2021-03-18 14:46:33 +00:00
let modified_since = modified_since.to_str().ok()?;
let client_date = httpdate::parse_http_date(modified_since).ok()?;
let server_date = UNIX_EPOCH + Duration::from_millis(version.timestamp);
client_date >= server_date
} else {
false
};
if cached {
Some(
Response::builder()
.status(StatusCode::NOT_MODIFIED)
.body(Body::empty())
.unwrap(),
)
} else {
None
}
}
2021-03-26 21:32:09 +00:00
/// Handle HEAD request
2020-04-24 18:47:11 +00:00
pub async fn handle_head(
garage: Arc<Garage>,
2021-03-18 14:46:33 +00:00
req: &Request<Body>,
2021-12-14 12:55:11 +00:00
bucket_id: Uuid,
2020-04-24 18:47:11 +00:00
key: &str,
part_number: Option<u64>,
) -> Result<Response<Body>, Error> {
2020-11-11 15:12:42 +00:00
let object = garage
2020-04-24 18:47:11 +00:00
.object_table
2021-12-14 12:55:11 +00:00
.get(&bucket_id, &key.to_string())
2020-04-24 18:47:11 +00:00
.await?
2022-01-05 16:07:36 +00:00
.ok_or(Error::NoSuchKey)?;
2020-04-24 18:47:11 +00:00
let object_version = object
2020-04-24 18:47:11 +00:00
.versions()
.iter()
.rev()
2021-04-23 20:18:00 +00:00
.find(|v| v.is_data())
2022-01-05 16:07:36 +00:00
.ok_or(Error::NoSuchKey)?;
2020-11-11 15:12:42 +00:00
let version_data = match &object_version.state {
ObjectVersionState::Complete(c) => c,
_ => unreachable!(),
};
let version_meta = match version_data {
ObjectVersionData::Inline(meta, _) => meta,
ObjectVersionData::FirstBlock(meta, _) => meta,
2020-07-08 15:34:37 +00:00
_ => unreachable!(),
};
2020-04-24 18:47:11 +00:00
if let Some(cached) = try_answer_cached(object_version, version_meta, req) {
2021-03-18 14:46:33 +00:00
return Ok(cached);
}
if let Some(pn) = part_number {
if let ObjectVersionData::Inline(_, _) = version_data {
// Not a multipart upload
return Err(Error::BadRequest(
"Cannot process part_number argument: not a multipart upload".into(),
));
}
let version = garage
.version_table
.get(&object_version.uuid, &EmptyKey)
.await?
.ok_or(Error::NoSuchKey)?;
if !version.has_part_number(pn) {
return Err(Error::BadRequest(format!(
"Part number {} does not exist",
pn
)));
}
let part_size: u64 = version
.blocks
.items()
.iter()
.filter(|(k, _)| k.part_number == pn)
.map(|(_, b)| b.size)
.sum();
let n_parts = version.parts_etags.items().len();
Ok(object_headers(object_version, version_meta)
.header(CONTENT_LENGTH, format!("{}", part_size))
.header("x-amz-mp-parts-count", format!("{}", n_parts))
.status(StatusCode::OK)
.body(Body::empty())?)
} else {
Ok(object_headers(object_version, version_meta)
.header(CONTENT_LENGTH, format!("{}", version_meta.size))
.status(StatusCode::OK)
.body(Body::empty())?)
}
2020-04-24 18:47:11 +00:00
}
2021-03-26 21:32:09 +00:00
/// Handle GET request
2020-04-24 18:47:11 +00:00
pub async fn handle_get(
garage: Arc<Garage>,
2020-05-04 13:09:23 +00:00
req: &Request<Body>,
2021-12-14 12:55:11 +00:00
bucket_id: Uuid,
2020-04-24 18:47:11 +00:00
key: &str,
part_number: Option<u64>,
) -> Result<Response<Body>, Error> {
if part_number.is_some() {
return Err(Error::NotImplemented(
"part_number not supported for GetObject".into(),
));
}
2020-11-11 15:12:42 +00:00
let object = garage
2020-04-24 18:47:11 +00:00
.object_table
2021-12-14 12:55:11 +00:00
.get(&bucket_id, &key.to_string())
2020-04-24 18:47:11 +00:00
.await?
2022-01-05 16:07:36 +00:00
.ok_or(Error::NoSuchKey)?;
2020-04-24 18:47:11 +00:00
2020-11-11 15:12:42 +00:00
let last_v = object
2020-04-24 18:47:11 +00:00
.versions()
.iter()
.rev()
2021-04-23 20:18:00 +00:00
.find(|v| v.is_complete())
2022-01-05 16:07:36 +00:00
.ok_or(Error::NoSuchKey)?;
2020-11-11 15:12:42 +00:00
2020-07-08 15:34:37 +00:00
let last_v_data = match &last_v.state {
ObjectVersionState::Complete(x) => x,
_ => unreachable!(),
};
let last_v_meta = match last_v_data {
2022-01-05 16:07:36 +00:00
ObjectVersionData::DeleteMarker => return Err(Error::NoSuchKey),
2020-07-08 15:34:37 +00:00
ObjectVersionData::Inline(meta, _) => meta,
ObjectVersionData::FirstBlock(meta, _) => meta,
};
2020-04-24 18:47:11 +00:00
2021-10-26 08:20:05 +00:00
if let Some(cached) = try_answer_cached(last_v, last_v_meta, req) {
2021-03-18 14:46:33 +00:00
return Ok(cached);
}
2020-05-04 13:09:23 +00:00
let range = match req.headers().get("range") {
Some(range) => {
2020-11-08 14:04:30 +00:00
let range_str = range.to_str()?;
let mut ranges = http_range::HttpRange::parse(range_str, last_v_meta.size)
.map_err(|e| (e, last_v_meta.size))?;
2020-05-04 13:09:23 +00:00
if ranges.len() > 1 {
// garage does not support multi-range requests yet, so we respond with the entire
// object when multiple ranges are requested
None
2020-05-04 13:09:23 +00:00
} else {
ranges.pop()
}
}
None => None,
};
if let Some(range) = range {
2020-07-08 15:34:37 +00:00
return handle_get_range(
garage,
last_v,
last_v_data,
last_v_meta,
range.start,
range.start + range.length,
)
.await;
2020-05-04 13:09:23 +00:00
}
2021-10-26 08:20:05 +00:00
let resp_builder = object_headers(last_v, last_v_meta)
.header(CONTENT_LENGTH, format!("{}", last_v_meta.size))
2020-12-05 18:20:07 +00:00
.status(StatusCode::OK);
2020-04-24 18:47:11 +00:00
2020-07-08 15:33:24 +00:00
match &last_v_data {
ObjectVersionData::DeleteMarker => unreachable!(),
ObjectVersionData::Inline(_, bytes) => {
let body: Body = Body::from(bytes.to_vec());
2020-04-24 18:47:11 +00:00
Ok(resp_builder.body(body)?)
}
2020-07-08 15:33:24 +00:00
ObjectVersionData::FirstBlock(_, first_block_hash) => {
2021-10-26 08:20:05 +00:00
let read_first_block = garage.block_manager.rpc_get_block(first_block_hash);
2020-04-24 18:47:11 +00:00
let get_next_blocks = garage.version_table.get(&last_v.uuid, &EmptyKey);
let (first_block, version) = futures::try_join!(read_first_block, get_next_blocks)?;
2022-01-05 16:07:36 +00:00
let version = version.ok_or(Error::NoSuchKey)?;
2020-04-24 18:47:11 +00:00
let mut blocks = version
.blocks
.items()
2020-04-24 18:47:11 +00:00
.iter()
.map(|(_, vb)| (vb.hash, None))
2020-04-24 18:47:11 +00:00
.collect::<Vec<_>>();
blocks[0].1 = Some(first_block);
let body_stream = futures::stream::iter(blocks)
.map(move |(hash, data_opt)| {
let garage = garage.clone();
async move {
if let Some(data) = data_opt {
Ok(Bytes::from(data))
} else {
garage
.block_manager
.rpc_get_block(&hash)
.await
.map(Bytes::from)
}
}
})
.buffered(2);
2020-12-05 15:37:59 +00:00
let body = hyper::body::Body::wrap_stream(body_stream);
2020-04-24 18:47:11 +00:00
Ok(resp_builder.body(body)?)
}
}
}
2020-05-04 13:09:23 +00:00
2021-03-26 21:32:09 +00:00
async fn handle_get_range(
2020-05-04 13:09:23 +00:00
garage: Arc<Garage>,
version: &ObjectVersion,
2020-07-08 15:34:37 +00:00
version_data: &ObjectVersionData,
version_meta: &ObjectVersionMeta,
2020-05-04 13:09:23 +00:00
begin: u64,
end: u64,
) -> Result<Response<Body>, Error> {
2020-12-05 18:20:07 +00:00
let resp_builder = object_headers(version, version_meta)
.header(CONTENT_LENGTH, format!("{}", end - begin))
2020-05-04 13:09:23 +00:00
.header(
CONTENT_RANGE,
2020-12-05 15:37:59 +00:00
format!("bytes {}-{}/{}", begin, end - 1, version_meta.size),
2020-05-04 13:09:23 +00:00
)
.status(StatusCode::PARTIAL_CONTENT);
2020-07-08 15:33:24 +00:00
match &version_data {
ObjectVersionData::DeleteMarker => unreachable!(),
ObjectVersionData::Inline(_meta, bytes) => {
2020-05-04 13:09:23 +00:00
if end as usize <= bytes.len() {
let body: Body = Body::from(bytes[begin as usize..end as usize].to_vec());
2020-05-04 13:09:23 +00:00
Ok(resp_builder.body(body)?)
} else {
2020-11-08 14:04:30 +00:00
None.ok_or_internal_error(
"Requested range not present in inline bytes when it should have been",
)
2020-05-04 13:09:23 +00:00
}
}
2020-07-08 15:33:24 +00:00
ObjectVersionData::FirstBlock(_meta, _first_block_hash) => {
2020-05-04 13:09:23 +00:00
let version = garage.version_table.get(&version.uuid, &EmptyKey).await?;
let version = match version {
Some(v) => v,
2022-01-05 16:07:36 +00:00
None => return Err(Error::NoSuchKey),
2020-05-04 13:09:23 +00:00
};
2020-12-05 15:37:59 +00:00
// We will store here the list of blocks that have an intersection with the requested
// range, as well as their "true offset", which is their actual offset in the complete
// file (whereas block.offset designates the offset of the block WITHIN THE PART
// block.part_number, which is not the same in the case of a multipart upload)
let mut blocks = Vec::with_capacity(std::cmp::min(
version.blocks.len(),
4 + ((end - begin) / std::cmp::max(version.blocks.items()[0].1.size as u64, 1024))
as usize,
2020-12-05 15:37:59 +00:00
));
let mut true_offset = 0;
for (_, b) in version.blocks.items().iter() {
2020-12-05 15:37:59 +00:00
if true_offset >= end {
break;
}
// Keep only blocks that have an intersection with the requested range
if true_offset < end && true_offset + b.size > begin {
2021-04-23 20:18:00 +00:00
blocks.push((*b, true_offset));
2020-12-05 15:37:59 +00:00
}
true_offset += b.size;
}
2020-05-04 13:09:23 +00:00
let body_stream = futures::stream::iter(blocks)
2020-12-05 15:37:59 +00:00
.map(move |(block, true_offset)| {
2020-05-04 13:09:23 +00:00
let garage = garage.clone();
async move {
let data = garage.block_manager.rpc_get_block(&block.hash).await?;
2020-12-05 15:37:59 +00:00
let data = Bytes::from(data);
let start_in_block = if true_offset > begin {
2020-05-04 13:09:23 +00:00
0
} else {
2020-12-05 15:37:59 +00:00
begin - true_offset
2020-05-04 13:09:23 +00:00
};
2020-12-05 15:37:59 +00:00
let end_in_block = if true_offset + block.size < end {
2020-05-04 13:09:23 +00:00
block.size
} else {
2020-12-05 15:37:59 +00:00
end - true_offset
2020-05-04 13:09:23 +00:00
};
2021-04-23 20:18:00 +00:00
Result::<Bytes, Error>::Ok(
2020-12-05 15:37:59 +00:00
data.slice(start_in_block as usize..end_in_block as usize),
2021-04-23 20:18:00 +00:00
)
2020-05-04 13:09:23 +00:00
}
})
.buffered(2);
2020-12-05 15:37:59 +00:00
let body = hyper::body::Body::wrap_stream(body_stream);
2020-05-04 13:09:23 +00:00
Ok(resp_builder.body(body)?)
}
}
}