garage/src/api/api_server.rs

433 lines
11 KiB
Rust
Raw Normal View History

Implement ListMultipartUploads (#171) Implement ListMultipartUploads, also refactor ListObjects and ListObjectsV2. It took me some times as I wanted to propose the following things: - Using an iterator instead of the loop+goto pattern. I find it easier to read and it should enable some optimizations. For example, when consuming keys of a common prefix, we do many [redundant checks](https://git.deuxfleurs.fr/Deuxfleurs/garage/src/branch/main/src/api/s3_list.rs#L125-L156) while the only thing to do is to [check if the following key is still part of the common prefix](https://git.deuxfleurs.fr/Deuxfleurs/garage/src/branch/feature/s3-multipart-compat/src/api/s3_list.rs#L476). - Try to name things (see ExtractionResult and RangeBegin enums) and to separate concerns (see ListQuery and Accumulator) - An IO closure to make unit tests possibles. - Unit tests, to track regressions and document how to interact with the code - Integration tests with `s3api`. In the future, I would like to move them in Rust with the aws rust SDK. Merging of the logic of ListMultipartUploads and ListObjects was not a goal but a consequence of the previous modifications. Some points that we might want to discuss: - ListObjectsV1, when using pagination and delimiters, has a weird behavior (it lists multiple times the same prefix) with `aws s3api` due to the fact that it can not use our optimization to skip the whole prefix. It is independant from my refactor and can be tested with the commented `s3api` tests in `test-smoke.sh`. It probably has the same weird behavior on the official AWS S3 implementation. - Considering ListMultipartUploads, I had to "abuse" upload id marker to support prefix skipping. I send an `upload-id-marker` with the hardcoded value `include` to emulate your "including" token. - Some ways to test ListMultipartUploads with existing software (my tests are limited to s3api for now). Co-authored-by: Quentin Dufour <quentin@deuxfleurs.fr> Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/171 Co-authored-by: Quentin <quentin@dufour.io> Co-committed-by: Quentin <quentin@dufour.io>
2022-01-12 18:04:55 +00:00
use std::cmp::{max, min};
use std::net::SocketAddr;
use std::sync::Arc;
2020-04-06 17:55:39 +00:00
use futures::future::Future;
2021-11-11 10:26:02 +00:00
use hyper::header;
use hyper::server::conn::AddrStream;
use hyper::service::{make_service_fn, service_fn};
use hyper::{Body, Request, Response, Server};
2021-12-14 12:55:11 +00:00
use garage_util::data::*;
2020-11-08 14:04:30 +00:00
use garage_util::error::Error as GarageError;
2020-04-23 17:05:46 +00:00
2020-07-07 11:59:22 +00:00
use garage_model::garage::Garage;
2021-12-14 12:55:11 +00:00
use garage_model::key_table::Key;
2020-04-23 17:05:46 +00:00
2020-11-08 14:04:30 +00:00
use crate::error::*;
2020-04-24 17:46:52 +00:00
use crate::signature::check_signature;
2021-11-11 10:26:02 +00:00
use crate::helpers::*;
use crate::s3_bucket::*;
2020-04-28 10:18:14 +00:00
use crate::s3_copy::*;
use crate::s3_delete::*;
2020-04-26 20:39:32 +00:00
use crate::s3_get::*;
use crate::s3_list::*;
use crate::s3_put::*;
use crate::s3_router::{Authorization, Endpoint};
use crate::s3_website::*;
2021-03-26 21:32:09 +00:00
/// Run the S3 API server
pub async fn run_api_server(
garage: Arc<Garage>,
shutdown_signal: impl Future<Output = ()>,
2020-11-08 14:04:30 +00:00
) -> Result<(), GarageError> {
2020-04-24 17:46:52 +00:00
let addr = &garage.config.s3_api.api_bind_addr;
let service = make_service_fn(|conn: &AddrStream| {
2020-04-08 20:00:41 +00:00
let garage = garage.clone();
let client_addr = conn.remote_addr();
2020-04-06 17:55:39 +00:00
async move {
2020-11-08 14:04:30 +00:00
Ok::<_, GarageError>(service_fn(move |req: Request<Body>| {
2020-04-08 20:00:41 +00:00
let garage = garage.clone();
handler(garage, req, client_addr)
2020-04-06 17:55:39 +00:00
}))
}
});
2021-10-26 08:20:05 +00:00
let server = Server::bind(addr).serve(service);
let graceful = server.with_graceful_shutdown(shutdown_signal);
2020-04-21 12:54:55 +00:00
info!("API server listening on http://{}", addr);
graceful.await?;
Ok(())
}
async fn handler(
garage: Arc<Garage>,
req: Request<Body>,
addr: SocketAddr,
2020-11-08 14:04:30 +00:00
) -> Result<Response<Body>, GarageError> {
let uri = req.uri().clone();
info!("{} {} {}", addr, req.method(), uri);
2020-04-24 17:46:52 +00:00
debug!("{:?}", req);
match handler_inner(garage.clone(), req).await {
2020-04-24 17:46:52 +00:00
Ok(x) => {
debug!("{} {:?}", x.status(), x.headers());
Ok(x)
}
Err(e) => {
let body: Body = Body::from(e.aws_xml(&garage.config.s3_api.s3_region, uri.path()));
let mut http_error_builder = Response::builder()
.status(e.http_status_code())
.header("Content-Type", "application/xml");
if let Some(header_map) = http_error_builder.headers_mut() {
e.add_headers(header_map)
}
let http_error = http_error_builder.body(body)?;
if e.http_status_code().is_server_error() {
warn!("Response: error {}, {}", e.http_status_code(), e);
} else {
info!("Response: error {}, {}", e.http_status_code(), e);
}
2020-04-09 15:32:28 +00:00
Ok(http_error)
}
}
}
async fn handler_inner(garage: Arc<Garage>, req: Request<Body>) -> Result<Response<Body>, Error> {
2021-05-02 20:30:56 +00:00
let (api_key, content_sha256) = check_signature(&garage, &req).await?;
2021-11-11 10:26:02 +00:00
let authority = req
.headers()
.get(header::HOST)
.ok_or_else(|| Error::BadRequest("HOST header required".to_owned()))?
.to_str()?;
let host = authority_to_host(authority)?;
let bucket = garage
.config
.s3_api
.root_domain
.as_ref()
2021-11-11 14:37:48 +00:00
.and_then(|root_domain| host_to_bucket(&host, root_domain));
let endpoint = Endpoint::from_request(&req, bucket.map(ToOwned::to_owned))?;
2022-01-05 16:07:36 +00:00
debug!("Endpoint: {:?}", endpoint);
2021-12-14 12:55:11 +00:00
2022-01-05 14:56:48 +00:00
// Special code path for CreateBucket API endpoint
if let Endpoint::CreateBucket { bucket } = endpoint {
return handle_create_bucket(&garage, req, content_sha256, api_key, bucket).await;
}
let bucket_name = match endpoint.get_bucket() {
None => return handle_request_without_bucket(garage, req, api_key, endpoint).await,
Some(bucket) => bucket.to_string(),
2021-12-14 12:55:11 +00:00
};
let bucket_id = resolve_bucket(&garage, &bucket_name, &api_key).await?;
let allowed = match endpoint.authorization_type() {
2021-12-14 12:55:11 +00:00
Authorization::Read(_) => api_key.allow_read(&bucket_id),
Authorization::Write(_) => api_key.allow_write(&bucket_id),
2022-01-05 15:23:09 +00:00
Authorization::Owner(_) => api_key.allow_owner(&bucket_id),
2021-12-14 12:55:11 +00:00
_ => unreachable!(),
2020-04-24 17:46:52 +00:00
};
2020-04-24 17:46:52 +00:00
if !allowed {
2021-04-23 20:18:00 +00:00
return Err(Error::Forbidden(
"Operation is not allowed for this key.".to_string(),
));
2020-04-24 17:46:52 +00:00
}
match endpoint {
2021-12-14 12:55:11 +00:00
Endpoint::HeadObject { key, .. } => handle_head(garage, &req, bucket_id, &key).await,
Endpoint::GetObject { key, .. } => handle_get(garage, &req, bucket_id, &key).await,
Endpoint::UploadPart {
key,
part_number,
upload_id,
2021-12-14 12:55:11 +00:00
..
} => {
handle_put_part(
garage,
req,
2021-12-14 12:55:11 +00:00
bucket_id,
&key,
part_number,
&upload_id,
content_sha256,
)
.await
2020-04-26 20:39:32 +00:00
}
2021-12-14 12:55:11 +00:00
Endpoint::CopyObject { key, .. } => {
let copy_source = req.headers().get("x-amz-copy-source").unwrap().to_str()?;
let copy_source = percent_encoding::percent_decode_str(copy_source).decode_utf8()?;
let (source_bucket, source_key) = parse_bucket_key(&copy_source, None)?;
2021-12-14 12:55:11 +00:00
let source_bucket_id =
resolve_bucket(&garage, &source_bucket.to_string(), &api_key).await?;
if !api_key.allow_read(&source_bucket_id) {
return Err(Error::Forbidden(format!(
"Reading from bucket {} not allowed for this key",
source_bucket
)));
2020-04-26 20:39:32 +00:00
}
let source_key = source_key.ok_or_bad_request("No source key specified")?;
2021-12-14 12:55:11 +00:00
handle_copy(garage, &req, bucket_id, &key, source_bucket_id, source_key).await
}
2021-12-14 12:55:11 +00:00
Endpoint::PutObject { key, .. } => {
handle_put(garage, req, bucket_id, &key, content_sha256).await
}
2021-12-14 12:55:11 +00:00
Endpoint::AbortMultipartUpload { key, upload_id, .. } => {
handle_abort_multipart_upload(garage, bucket_id, &key, &upload_id).await
}
Endpoint::DeleteObject { key, .. } => handle_delete(garage, bucket_id, &key).await,
Endpoint::CreateMultipartUpload { bucket, key } => {
2021-12-14 12:55:11 +00:00
handle_create_multipart_upload(garage, &req, &bucket, bucket_id, &key).await
}
Endpoint::CompleteMultipartUpload {
bucket,
key,
upload_id,
} => {
2021-12-14 12:55:11 +00:00
handle_complete_multipart_upload(
garage,
req,
&bucket,
bucket_id,
&key,
&upload_id,
content_sha256,
)
.await
}
2022-01-05 14:56:48 +00:00
Endpoint::CreateBucket { .. } => unreachable!(),
Endpoint::HeadBucket { .. } => {
let empty_body: Body = Body::from(vec![]);
let response = Response::builder().body(empty_body).unwrap();
Ok(response)
}
2022-01-05 15:23:09 +00:00
Endpoint::DeleteBucket { .. } => {
handle_delete_bucket(&garage, bucket_id, bucket_name, api_key).await
}
Endpoint::GetBucketLocation { .. } => handle_get_bucket_location(garage),
Endpoint::GetBucketVersioning { .. } => handle_get_bucket_versioning(),
Endpoint::ListObjects {
bucket,
delimiter,
encoding_type,
marker,
max_keys,
prefix,
} => {
handle_list(
garage,
&ListObjectsQuery {
Implement ListMultipartUploads (#171) Implement ListMultipartUploads, also refactor ListObjects and ListObjectsV2. It took me some times as I wanted to propose the following things: - Using an iterator instead of the loop+goto pattern. I find it easier to read and it should enable some optimizations. For example, when consuming keys of a common prefix, we do many [redundant checks](https://git.deuxfleurs.fr/Deuxfleurs/garage/src/branch/main/src/api/s3_list.rs#L125-L156) while the only thing to do is to [check if the following key is still part of the common prefix](https://git.deuxfleurs.fr/Deuxfleurs/garage/src/branch/feature/s3-multipart-compat/src/api/s3_list.rs#L476). - Try to name things (see ExtractionResult and RangeBegin enums) and to separate concerns (see ListQuery and Accumulator) - An IO closure to make unit tests possibles. - Unit tests, to track regressions and document how to interact with the code - Integration tests with `s3api`. In the future, I would like to move them in Rust with the aws rust SDK. Merging of the logic of ListMultipartUploads and ListObjects was not a goal but a consequence of the previous modifications. Some points that we might want to discuss: - ListObjectsV1, when using pagination and delimiters, has a weird behavior (it lists multiple times the same prefix) with `aws s3api` due to the fact that it can not use our optimization to skip the whole prefix. It is independant from my refactor and can be tested with the commented `s3api` tests in `test-smoke.sh`. It probably has the same weird behavior on the official AWS S3 implementation. - Considering ListMultipartUploads, I had to "abuse" upload id marker to support prefix skipping. I send an `upload-id-marker` with the hardcoded value `include` to emulate your "including" token. - Some ways to test ListMultipartUploads with existing software (my tests are limited to s3api for now). Co-authored-by: Quentin Dufour <quentin@deuxfleurs.fr> Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/171 Co-authored-by: Quentin <quentin@dufour.io> Co-committed-by: Quentin <quentin@dufour.io>
2022-01-12 18:04:55 +00:00
common: ListQueryCommon {
bucket_name: bucket,
bucket_id,
delimiter: delimiter.map(|d| d.to_string()),
page_size: max_keys.map(|p| min(1000, max(1, p))).unwrap_or(1000),
prefix: prefix.unwrap_or_default(),
urlencode_resp: encoding_type.map(|e| e == "url").unwrap_or(false),
},
is_v2: false,
marker,
continuation_token: None,
start_after: None,
},
)
.await
}
Endpoint::ListObjectsV2 {
bucket,
delimiter,
encoding_type,
max_keys,
prefix,
continuation_token,
start_after,
list_type,
..
} => {
if list_type == "2" {
handle_list(
garage,
&ListObjectsQuery {
Implement ListMultipartUploads (#171) Implement ListMultipartUploads, also refactor ListObjects and ListObjectsV2. It took me some times as I wanted to propose the following things: - Using an iterator instead of the loop+goto pattern. I find it easier to read and it should enable some optimizations. For example, when consuming keys of a common prefix, we do many [redundant checks](https://git.deuxfleurs.fr/Deuxfleurs/garage/src/branch/main/src/api/s3_list.rs#L125-L156) while the only thing to do is to [check if the following key is still part of the common prefix](https://git.deuxfleurs.fr/Deuxfleurs/garage/src/branch/feature/s3-multipart-compat/src/api/s3_list.rs#L476). - Try to name things (see ExtractionResult and RangeBegin enums) and to separate concerns (see ListQuery and Accumulator) - An IO closure to make unit tests possibles. - Unit tests, to track regressions and document how to interact with the code - Integration tests with `s3api`. In the future, I would like to move them in Rust with the aws rust SDK. Merging of the logic of ListMultipartUploads and ListObjects was not a goal but a consequence of the previous modifications. Some points that we might want to discuss: - ListObjectsV1, when using pagination and delimiters, has a weird behavior (it lists multiple times the same prefix) with `aws s3api` due to the fact that it can not use our optimization to skip the whole prefix. It is independant from my refactor and can be tested with the commented `s3api` tests in `test-smoke.sh`. It probably has the same weird behavior on the official AWS S3 implementation. - Considering ListMultipartUploads, I had to "abuse" upload id marker to support prefix skipping. I send an `upload-id-marker` with the hardcoded value `include` to emulate your "including" token. - Some ways to test ListMultipartUploads with existing software (my tests are limited to s3api for now). Co-authored-by: Quentin Dufour <quentin@deuxfleurs.fr> Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/171 Co-authored-by: Quentin <quentin@dufour.io> Co-committed-by: Quentin <quentin@dufour.io>
2022-01-12 18:04:55 +00:00
common: ListQueryCommon {
bucket_name: bucket,
bucket_id,
delimiter: delimiter.map(|d| d.to_string()),
page_size: max_keys.map(|p| min(1000, max(1, p))).unwrap_or(1000),
urlencode_resp: encoding_type.map(|e| e == "url").unwrap_or(false),
prefix: prefix.unwrap_or_default(),
},
is_v2: true,
marker: None,
continuation_token,
start_after,
},
)
.await
} else {
Err(Error::BadRequest(format!(
"Invalid endpoint: list-type={}",
list_type
)))
2020-04-26 20:39:32 +00:00
}
}
Implement ListMultipartUploads (#171) Implement ListMultipartUploads, also refactor ListObjects and ListObjectsV2. It took me some times as I wanted to propose the following things: - Using an iterator instead of the loop+goto pattern. I find it easier to read and it should enable some optimizations. For example, when consuming keys of a common prefix, we do many [redundant checks](https://git.deuxfleurs.fr/Deuxfleurs/garage/src/branch/main/src/api/s3_list.rs#L125-L156) while the only thing to do is to [check if the following key is still part of the common prefix](https://git.deuxfleurs.fr/Deuxfleurs/garage/src/branch/feature/s3-multipart-compat/src/api/s3_list.rs#L476). - Try to name things (see ExtractionResult and RangeBegin enums) and to separate concerns (see ListQuery and Accumulator) - An IO closure to make unit tests possibles. - Unit tests, to track regressions and document how to interact with the code - Integration tests with `s3api`. In the future, I would like to move them in Rust with the aws rust SDK. Merging of the logic of ListMultipartUploads and ListObjects was not a goal but a consequence of the previous modifications. Some points that we might want to discuss: - ListObjectsV1, when using pagination and delimiters, has a weird behavior (it lists multiple times the same prefix) with `aws s3api` due to the fact that it can not use our optimization to skip the whole prefix. It is independant from my refactor and can be tested with the commented `s3api` tests in `test-smoke.sh`. It probably has the same weird behavior on the official AWS S3 implementation. - Considering ListMultipartUploads, I had to "abuse" upload id marker to support prefix skipping. I send an `upload-id-marker` with the hardcoded value `include` to emulate your "including" token. - Some ways to test ListMultipartUploads with existing software (my tests are limited to s3api for now). Co-authored-by: Quentin Dufour <quentin@deuxfleurs.fr> Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/171 Co-authored-by: Quentin <quentin@dufour.io> Co-committed-by: Quentin <quentin@dufour.io>
2022-01-12 18:04:55 +00:00
Endpoint::ListMultipartUploads {
bucket,
delimiter,
encoding_type,
key_marker,
max_uploads,
prefix,
upload_id_marker,
} => {
handle_list_multipart_upload(
garage,
&ListMultipartUploadsQuery {
common: ListQueryCommon {
bucket_name: bucket,
bucket_id,
delimiter: delimiter.map(|d| d.to_string()),
page_size: max_uploads.map(|p| min(1000, max(1, p))).unwrap_or(1000),
prefix: prefix.unwrap_or_default(),
urlencode_resp: encoding_type.map(|e| e == "url").unwrap_or(false),
},
key_marker,
upload_id_marker,
},
)
.await
}
2021-12-14 12:55:11 +00:00
Endpoint::DeleteObjects { .. } => {
handle_delete_objects(garage, bucket_id, req, content_sha256).await
2020-04-09 21:45:07 +00:00
}
2021-12-16 10:47:58 +00:00
Endpoint::PutBucketWebsite { .. } => {
handle_put_website(garage, bucket_id, req, content_sha256).await
}
2021-12-16 10:47:58 +00:00
Endpoint::DeleteBucketWebsite { .. } => handle_delete_website(garage, bucket_id).await,
endpoint => Err(Error::NotImplemented(endpoint.name().to_owned())),
2020-04-09 21:45:07 +00:00
}
}
2020-04-28 10:18:14 +00:00
2021-12-14 12:55:11 +00:00
async fn handle_request_without_bucket(
garage: Arc<Garage>,
_req: Request<Body>,
api_key: Key,
endpoint: Endpoint,
) -> Result<Response<Body>, Error> {
match endpoint {
Endpoint::ListBuckets => handle_list_buckets(&garage, &api_key).await,
endpoint => Err(Error::NotImplemented(endpoint.name().to_owned())),
}
}
#[allow(clippy::ptr_arg)]
async fn resolve_bucket(
garage: &Garage,
bucket_name: &String,
api_key: &Key,
) -> Result<Uuid, Error> {
let api_key_params = api_key
.state
.as_option()
2022-01-05 14:56:48 +00:00
.ok_or_internal_error("Key should not be deleted at this point")?;
2021-12-14 12:55:11 +00:00
if let Some(Some(bucket_id)) = api_key_params.local_aliases.get(bucket_name) {
2021-12-14 12:55:11 +00:00
Ok(*bucket_id)
} else {
Ok(garage
.bucket_helper()
.resolve_global_bucket_name(bucket_name)
.await?
2022-01-05 16:07:36 +00:00
.ok_or(Error::NoSuchBucket)?)
2021-12-14 12:55:11 +00:00
}
}
2021-11-15 16:39:36 +00:00
/// Extract the bucket name and the key name from an HTTP path and possibly a bucket provided in
/// the host header of the request
2020-11-07 12:53:32 +00:00
///
/// S3 internally manages only buckets and keys. This function splits
/// an HTTP path to get the corresponding bucket name and key.
2021-11-11 10:26:02 +00:00
fn parse_bucket_key<'a>(
path: &'a str,
host_bucket: Option<&'a str>,
2021-11-11 10:26:02 +00:00
) -> Result<(&'a str, Option<&'a str>), Error> {
2020-04-28 10:35:04 +00:00
let path = path.trim_start_matches('/');
2020-04-28 10:18:14 +00:00
if let Some(bucket) = host_bucket {
if !path.is_empty() {
return Ok((bucket, Some(path)));
} else {
return Ok((bucket, None));
2021-11-11 10:26:02 +00:00
}
}
let (bucket, key) = match path.find('/') {
Some(i) => {
let key = &path[i + 1..];
2021-04-23 20:18:00 +00:00
if !key.is_empty() {
(&path[..i], Some(key))
} else {
(&path[..i], None)
}
}
None => (path, None),
};
2021-04-23 20:18:00 +00:00
if bucket.is_empty() {
return Err(Error::BadRequest("No bucket specified".to_string()));
2020-04-28 10:18:14 +00:00
}
Ok((bucket, key))
2020-04-28 10:18:14 +00:00
}
2020-11-07 12:53:32 +00:00
#[cfg(test)]
mod tests {
use super::*;
#[test]
2020-11-07 14:34:53 +00:00
fn parse_bucket_containing_a_key() -> Result<(), Error> {
let (bucket, key) = parse_bucket_key("/my_bucket/a/super/file.jpg", None)?;
2020-11-07 12:53:32 +00:00
assert_eq!(bucket, "my_bucket");
2020-11-07 12:59:30 +00:00
assert_eq!(key.expect("key must be set"), "a/super/file.jpg");
2020-11-07 12:53:32 +00:00
Ok(())
}
2020-11-07 14:34:53 +00:00
#[test]
fn parse_bucket_containing_no_key() -> Result<(), Error> {
let (bucket, key) = parse_bucket_key("/my_bucket/", None)?;
2020-11-07 14:34:53 +00:00
assert_eq!(bucket, "my_bucket");
assert!(key.is_none());
let (bucket, key) = parse_bucket_key("/my_bucket", None)?;
2020-11-07 14:34:53 +00:00
assert_eq!(bucket, "my_bucket");
assert!(key.is_none());
Ok(())
2020-11-07 12:53:32 +00:00
}
#[test]
fn parse_bucket_containing_no_bucket() {
let parsed = parse_bucket_key("", None);
assert!(parsed.is_err());
let parsed = parse_bucket_key("/", None);
assert!(parsed.is_err());
let parsed = parse_bucket_key("////", None);
assert!(parsed.is_err());
}
2021-11-11 10:26:02 +00:00
#[test]
fn parse_bucket_with_vhost_and_key() -> Result<(), Error> {
let (bucket, key) = parse_bucket_key("/a/super/file.jpg", Some("my-bucket"))?;
2021-11-11 10:26:02 +00:00
assert_eq!(bucket, "my-bucket");
assert_eq!(key.expect("key must be set"), "a/super/file.jpg");
Ok(())
}
#[test]
fn parse_bucket_with_vhost_no_key() -> Result<(), Error> {
let (bucket, key) = parse_bucket_key("", Some("my-bucket"))?;
2021-11-11 10:26:02 +00:00
assert_eq!(bucket, "my-bucket");
assert!(key.is_none());
let (bucket, key) = parse_bucket_key("/", Some("my-bucket"))?;
2021-11-11 10:26:02 +00:00
assert_eq!(bucket, "my-bucket");
assert!(key.is_none());
Ok(())
}
2020-11-07 12:53:32 +00:00
}