From 3770a34e3d3e861c91f60ec6e997f47a746c0040 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Tue, 11 Jan 2022 12:43:46 +0100 Subject: [PATCH 1/4] Implement x-amz-copy-if-xxx copy preconditions and return more headers on copy (fix #187) --- src/api/error.rs | 6 +++ src/api/s3_copy.rs | 101 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 107 insertions(+) diff --git a/src/api/error.rs b/src/api/error.rs index c19c4f3b..955faed1 100644 --- a/src/api/error.rs +++ b/src/api/error.rs @@ -54,6 +54,10 @@ pub enum Error { #[error(display = "Tried to delete a non-empty bucket")] BucketNotEmpty, + /// Precondition failed (e.g. x-amz-copy-source-if-match) + #[error(display = "At least one of the preconditions you specified did not hold")] + PreconditionFailed, + // Category: bad request /// The request contained an invalid UTF-8 sequence in its path or in other parameters #[error(display = "Invalid UTF-8: {}", _0)] @@ -115,6 +119,7 @@ impl Error { match self { Error::NoSuchKey | Error::NoSuchBucket | Error::NoSuchUpload => StatusCode::NOT_FOUND, Error::BucketNotEmpty | Error::BucketAlreadyExists => StatusCode::CONFLICT, + Error::PreconditionFailed => StatusCode::PRECONDITION_FAILED, Error::Forbidden(_) => StatusCode::FORBIDDEN, Error::InternalError( GarageError::Timeout @@ -137,6 +142,7 @@ impl Error { Error::NoSuchUpload => "NoSuchUpload", Error::BucketAlreadyExists => "BucketAlreadyExists", Error::BucketNotEmpty => "BucketNotEmpty", + Error::PreconditionFailed => "PreconditionFailed", Error::Forbidden(_) => "AccessDenied", Error::AuthorizationHeaderMalformed(_) => "AuthorizationHeaderMalformed", Error::NotImplemented(_) => "NotImplemented", diff --git a/src/api/s3_copy.rs b/src/api/s3_copy.rs index 7952dae8..cab173b1 100644 --- a/src/api/s3_copy.rs +++ b/src/api/s3_copy.rs @@ -1,4 +1,5 @@ use std::sync::Arc; +use std::time::{Duration, SystemTime, UNIX_EPOCH}; use hyper::{Body, Request, Response}; @@ -23,6 +24,8 @@ pub async fn handle_copy( source_bucket_id: Uuid, source_key: &str, ) -> Result, Error> { + let copy_precondition = CopyPreconditionHeaders::parse(req)?; + let source_object = garage .object_table .get(&source_bucket_id, &source_key.to_string()) @@ -63,6 +66,9 @@ pub async fn handle_copy( let etag = new_meta.etag.to_string(); + // Check precondition, e.g. x-amz-copy-source-if-match + copy_precondition.check(source_last_v, etag.as_str())?; + // Save object copy match source_last_state { ObjectVersionData::DeleteMarker => unreachable!(), @@ -164,5 +170,100 @@ pub async fn handle_copy( Ok(Response::builder() .header("Content-Type", "application/xml") + .header("x-amz-version-id", hex::encode(new_uuid)) + .header( + "x-amz-copy-source-version-id", + hex::encode(source_last_v.uuid), + ) .body(Body::from(xml))?) } + +struct CopyPreconditionHeaders { + copy_source_if_match: Option>, + copy_source_if_modified_since: Option, + copy_source_if_none_match: Option>, + copy_source_if_unmodified_since: Option, +} + +impl CopyPreconditionHeaders { + fn parse(req: &Request) -> Result { + Ok(Self { + copy_source_if_match: req + .headers() + .get("x-amz-copy-source-if-match") + .map(|x| x.to_str()) + .transpose()? + .map(|x| { + x.split(',') + .map(|m| m.trim().trim_matches('"').to_string()) + .collect::>() + }), + copy_source_if_modified_since: req + .headers() + .get("x-amz-copy-source-if-modified-since") + .map(|x| x.to_str()) + .transpose()? + .map(|x| httpdate::parse_http_date(x)) + .transpose() + .ok_or_bad_request("Invalid date in x-amz-copy-source-if-modified-since")?, + copy_source_if_none_match: req + .headers() + .get("x-amz-copy-source-if-none-match") + .map(|x| x.to_str()) + .transpose()? + .map(|x| { + x.split(',') + .map(|m| m.trim().trim_matches('"').to_string()) + .collect::>() + }), + copy_source_if_unmodified_since: req + .headers() + .get("x-amz-copy-source-if-unmodified-since") + .map(|x| x.to_str()) + .transpose()? + .map(|x| httpdate::parse_http_date(x)) + .transpose() + .ok_or_bad_request("Invalid date in x-amz-copy-source-if-unmodified-since")?, + }) + } + + fn check(&self, v: &ObjectVersion, etag: &str) -> Result<(), Error> { + let v_date = UNIX_EPOCH + Duration::from_millis(v.timestamp); + + let ok = match ( + &self.copy_source_if_match, + &self.copy_source_if_unmodified_since, + &self.copy_source_if_none_match, + &self.copy_source_if_modified_since, + ) { + // TODO I'm not sure all of the conditions are evaluated correctly here + + // If we have both if-match and if-unmodified-since, + // basically we don't care about if-unmodified-since, + // because in the spec it says that if if-match evaluates to + // true but if-unmodified-since evaluates to false, + // the copy is still done. + (Some(im), _, None, None) => im.iter().any(|x| x == etag || x == "*"), + (None, Some(ius), None, None) => v_date <= *ius, + + // If we have both if-none-match and if-modified-since, + // then both of the two conditions must evaluate to true + (None, None, Some(inm), Some(ims)) => { + !inm.iter().any(|x| x == etag || x == "*") && v_date > *ims + } + (None, None, Some(inm), None) => !inm.iter().any(|x| x == etag || x == "*"), + (None, None, None, Some(ims)) => v_date > *ims, + _ => { + return Err(Error::BadRequest( + "Invalid combination of x-amz-copy-source-if-xxxxx headers".into(), + )) + } + }; + + if ok { + Ok(()) + } else { + Err(Error::PreconditionFailed) + } + } +} -- 2.45.2 From 6617a72220f2890fba0c0b7c099baf56142c494c Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Tue, 11 Jan 2022 17:31:09 +0100 Subject: [PATCH 2/4] Implement UploadPartCopy --- .../src/reference_manual/s3_compatibility.md | 3 +- .../working_documents/compatibility_target.md | 2 +- src/api/api_server.rs | 35 +- src/api/s3_copy.rs | 387 ++++++++++++++++-- src/api/s3_put.rs | 21 +- src/api/s3_xml.rs | 10 + src/model/version_table.rs | 14 + 7 files changed, 416 insertions(+), 56 deletions(-) diff --git a/doc/book/src/reference_manual/s3_compatibility.md b/doc/book/src/reference_manual/s3_compatibility.md index 7d517ac4..96bccde0 100644 --- a/doc/book/src/reference_manual/s3_compatibility.md +++ b/doc/book/src/reference_manual/s3_compatibility.md @@ -21,7 +21,7 @@ Not implemented: ## Endpoint implementation -All APIs that are not mentionned are not implemented and will return a 400 bad request. +All APIs that are not mentionned are not implemented and will return a 501 Not Implemented. | Endpoint | Status | |------------------------------|----------------------------------| @@ -48,6 +48,7 @@ All APIs that are not mentionned are not implemented and will return a 400 bad r | PutObject | Implemented | | PutBucketWebsite | Partially implemented (see below)| | UploadPart | Implemented | +| UploadPartCopy | Implemented | - **GetBucketVersioning:** Stub implementation (Garage does not yet support versionning so this always returns diff --git a/doc/book/src/working_documents/compatibility_target.md b/doc/book/src/working_documents/compatibility_target.md index d9027ccb..e6ff9ca8 100644 --- a/doc/book/src/working_documents/compatibility_target.md +++ b/doc/book/src/working_documents/compatibility_target.md @@ -30,7 +30,7 @@ your motivations for doing so in the PR message. | | [*GetBucketCors*](https://git.deuxfleurs.fr/Deuxfleurs/garage/issues/138) | | | [*PutBucketCors*](https://git.deuxfleurs.fr/Deuxfleurs/garage/issues/138) | | | [*DeleteBucketCors*](https://git.deuxfleurs.fr/Deuxfleurs/garage/issues/138) | -| | [*UploadPartCopy*](https://git.deuxfleurs.fr/Deuxfleurs/garage/issues/160) | +| | UploadPartCopy | | | [*GetBucketWebsite*](https://git.deuxfleurs.fr/Deuxfleurs/garage/issues/77) | | | [*PutBucketWebsite*](https://git.deuxfleurs.fr/Deuxfleurs/garage/issues/77) | | | DeleteBucketWebsite | diff --git a/src/api/api_server.rs b/src/api/api_server.rs index 16156e74..a38a3c5b 100644 --- a/src/api/api_server.rs +++ b/src/api/api_server.rs @@ -156,19 +156,24 @@ async fn handler_inner(garage: Arc, req: Request) -> Result { - let copy_source = req.headers().get("x-amz-copy-source").unwrap().to_str()?; - let copy_source = percent_encoding::percent_decode_str(copy_source).decode_utf8()?; - let (source_bucket, source_key) = parse_bucket_key(©_source, None)?; - let source_bucket_id = - resolve_bucket(&garage, &source_bucket.to_string(), &api_key).await?; - if !api_key.allow_read(&source_bucket_id) { - return Err(Error::Forbidden(format!( - "Reading from bucket {} not allowed for this key", - source_bucket - ))); - } - let source_key = source_key.ok_or_bad_request("No source key specified")?; - handle_copy(garage, &req, bucket_id, &key, source_bucket_id, source_key).await + handle_copy(garage, &api_key, &req, bucket_id, &key).await + } + Endpoint::UploadPartCopy { + key, + part_number, + upload_id, + .. + } => { + handle_upload_part_copy( + garage, + &api_key, + &req, + bucket_id, + &key, + part_number, + &upload_id, + ) + .await } Endpoint::PutObject { key, .. } => { handle_put(garage, req, bucket_id, &key, content_sha256).await @@ -321,7 +326,7 @@ async fn handle_request_without_bucket( } #[allow(clippy::ptr_arg)] -async fn resolve_bucket( +pub async fn resolve_bucket( garage: &Garage, bucket_name: &String, api_key: &Key, @@ -347,7 +352,7 @@ async fn resolve_bucket( /// /// S3 internally manages only buckets and keys. This function splits /// an HTTP path to get the corresponding bucket name and key. -fn parse_bucket_key<'a>( +pub fn parse_bucket_key<'a>( path: &'a str, host_bucket: Option<&'a str>, ) -> Result<(&'a str, Option<&'a str>), Error> { diff --git a/src/api/s3_copy.rs b/src/api/s3_copy.rs index cab173b1..c37bb138 100644 --- a/src/api/s3_copy.rs +++ b/src/api/s3_copy.rs @@ -1,7 +1,11 @@ use std::sync::Arc; use std::time::{Duration, SystemTime, UNIX_EPOCH}; +use futures::TryFutureExt; +use md5::{Digest as Md5Digest, Md5}; + use hyper::{Body, Request, Response}; +use serde::Serialize; use garage_table::*; use garage_util::data::*; @@ -9,68 +13,50 @@ use garage_util::time::*; use garage_model::block_ref_table::*; use garage_model::garage::Garage; +use garage_model::key_table::Key; use garage_model::object_table::*; use garage_model::version_table::*; +use crate::api_server::{parse_bucket_key, resolve_bucket}; use crate::error::*; -use crate::s3_put::get_headers; -use crate::s3_xml; +use crate::s3_put::{decode_upload_id, get_headers}; +use crate::s3_xml::{self, xmlns_tag}; pub async fn handle_copy( garage: Arc, + api_key: &Key, req: &Request, dest_bucket_id: Uuid, dest_key: &str, - source_bucket_id: Uuid, - source_key: &str, ) -> Result, Error> { let copy_precondition = CopyPreconditionHeaders::parse(req)?; - let source_object = garage - .object_table - .get(&source_bucket_id, &source_key.to_string()) - .await? - .ok_or(Error::NoSuchKey)?; + let source_object = get_copy_source(&garage, api_key, req).await?; - let source_last_v = source_object - .versions() - .iter() - .rev() - .find(|v| v.is_complete()) - .ok_or(Error::NoSuchKey)?; + let (source_version, source_version_data, source_version_meta) = + extract_source_info(&source_object)?; - let source_last_state = match &source_last_v.state { - ObjectVersionState::Complete(x) => x, - _ => unreachable!(), - }; + // Check precondition, e.g. x-amz-copy-source-if-match + copy_precondition.check(source_version, &source_version_meta.etag)?; + // Generate parameters for copied object let new_uuid = gen_uuid(); let new_timestamp = now_msec(); // Implement x-amz-metadata-directive: REPLACE - let old_meta = match source_last_state { - ObjectVersionData::DeleteMarker => { - return Err(Error::NoSuchKey); - } - ObjectVersionData::Inline(meta, _bytes) => meta, - ObjectVersionData::FirstBlock(meta, _fbh) => meta, - }; let new_meta = match req.headers().get("x-amz-metadata-directive") { Some(v) if v == hyper::header::HeaderValue::from_static("REPLACE") => ObjectVersionMeta { headers: get_headers(req)?, - size: old_meta.size, - etag: old_meta.etag.clone(), + size: source_version_meta.size, + etag: source_version_meta.etag.clone(), }, - _ => old_meta.clone(), + _ => source_version_meta.clone(), }; let etag = new_meta.etag.to_string(); - // Check precondition, e.g. x-amz-copy-source-if-match - copy_precondition.check(source_last_v, etag.as_str())?; - // Save object copy - match source_last_state { + match source_version_data { ObjectVersionData::DeleteMarker => unreachable!(), ObjectVersionData::Inline(_meta, bytes) => { let dest_object_version = ObjectVersion { @@ -92,7 +78,7 @@ pub async fn handle_copy( // Get block list from source version let source_version = garage .version_table - .get(&source_last_v.uuid, &EmptyKey) + .get(&source_version.uuid, &EmptyKey) .await?; let source_version = source_version.ok_or(Error::NoSuchKey)?; @@ -173,11 +159,309 @@ pub async fn handle_copy( .header("x-amz-version-id", hex::encode(new_uuid)) .header( "x-amz-copy-source-version-id", - hex::encode(source_last_v.uuid), + hex::encode(source_version.uuid), ) .body(Body::from(xml))?) } +pub async fn handle_upload_part_copy( + garage: Arc, + api_key: &Key, + req: &Request, + dest_bucket_id: Uuid, + dest_key: &str, + part_number: u64, + upload_id: &str, +) -> Result, Error> { + let copy_precondition = CopyPreconditionHeaders::parse(req)?; + + let dest_version_uuid = decode_upload_id(upload_id)?; + + let dest_key = dest_key.to_string(); + let (source_object, dest_object) = futures::try_join!( + get_copy_source(&garage, api_key, req), + garage + .object_table + .get(&dest_bucket_id, &dest_key) + .map_err(Error::from), + )?; + let dest_object = dest_object.ok_or(Error::NoSuchKey)?; + + let (source_object_version, source_version_data, source_version_meta) = + extract_source_info(&source_object)?; + + // Check precondition on source, e.g. x-amz-copy-source-if-match + copy_precondition.check(source_object_version, &source_version_meta.etag)?; + + // Check source range is valid + let source_range = match req.headers().get("x-amz-copy-source-range") { + Some(range) => { + let range_str = range.to_str()?; + let mut ranges = http_range::HttpRange::parse(range_str, source_version_meta.size) + .map_err(|e| (e, source_version_meta.size))?; + if ranges.len() != 1 { + return Err(Error::BadRequest( + "Invalid x-amz-copy-source-range header: exactly 1 range must be given".into(), + )); + } else { + ranges.pop().unwrap() + } + } + None => http_range::HttpRange { + start: 0, + length: source_version_meta.size, + }, + }; + + // Check destination version is indeed in uploading state + if !dest_object + .versions() + .iter() + .any(|v| v.uuid == dest_version_uuid && v.is_uploading()) + { + return Err(Error::NoSuchUpload); + } + + // Check source version is not inlined + match source_version_data { + ObjectVersionData::DeleteMarker => unreachable!(), + ObjectVersionData::Inline(_meta, _bytes) => { + // This is only for small files, we don't bother handling this. + // (in AWS UploadPartCopy works for parts at least 5MB which + // is never the case of an inline object) + return Err(Error::BadRequest( + "Source object is too small (minimum part size is 5Mb)".into(), + )); + } + ObjectVersionData::FirstBlock(_meta, _first_block_hash) => (), + }; + + // Fetch source versin with its block list, + // and destination version to check part hasn't yet been uploaded + let (source_version, dest_version) = futures::try_join!( + garage + .version_table + .get(&source_object_version.uuid, &EmptyKey), + garage.version_table.get(&dest_version_uuid, &EmptyKey), + )?; + let source_version = source_version.ok_or(Error::NoSuchKey)?; + + // Check this part number hasn't yet been uploaded + if let Some(dv) = dest_version { + if dv.has_part_number(part_number) { + return Err(Error::BadRequest(format!( + "Part number {} has already been uploaded", + part_number + ))); + } + } + + // We want to reuse blocks from the source version as much as possible. + // However, we still need to get the data from these blocks + // because we need to know it to calculate the MD5sum of the part + // which is used as its ETag. + + // First, calculate what blocks we want to keep, + // and the subrange of the block to take, if the bounds of the + // requested range are in the middle. + let (range_begin, range_end) = (source_range.start, source_range.start + source_range.length); + + let mut blocks_to_copy = vec![]; + let mut current_offset = 0; + let mut size_to_copy = 0; + for (_bk, block) in source_version.blocks.items().iter() { + let (block_begin, block_end) = (current_offset, current_offset + block.size); + + if block_begin < range_end && block_end > range_begin { + let subrange_begin = if block_begin < range_begin { + Some(range_begin - block_begin) + } else { + None + }; + let subrange_end = if block_end > range_end { + Some(range_end - block_begin) + } else { + None + }; + let range_to_copy = match (subrange_begin, subrange_end) { + (Some(b), Some(e)) => Some(b as usize..e as usize), + (None, Some(e)) => Some(0..e as usize), + (Some(b), None) => Some(b as usize..block.size as usize), + (None, None) => None, + }; + size_to_copy += range_to_copy + .as_ref() + .map(|x| x.len() as u64) + .unwrap_or(block.size); + + blocks_to_copy.push((block.hash, range_to_copy)); + } + + current_offset = block_end; + } + + if size_to_copy < 1024 * 1024 { + return Err(Error::BadRequest(format!( + "Not enough data to copy: {} bytes (minimum: 1MB)", + size_to_copy + ))); + } + + // Now, actually copy the blocks + let mut md5hasher = Md5::new(); + + let mut block = Some( + garage + .block_manager + .rpc_get_block(&blocks_to_copy[0].0) + .await?, + ); + + let mut current_offset = 0; + for (i, (block_hash, range_to_copy)) in blocks_to_copy.iter().enumerate() { + let (current_block, subrange_hash) = match range_to_copy.clone() { + Some(r) => { + let subrange = block.take().unwrap()[r].to_vec(); + let hash = blake2sum(&subrange); + (subrange, hash) + } + None => (block.take().unwrap(), *block_hash), + }; + md5hasher.update(¤t_block[..]); + + let mut version = Version::new(dest_version_uuid, dest_bucket_id, dest_key.clone(), false); + version.blocks.put( + VersionBlockKey { + part_number, + offset: current_offset, + }, + VersionBlock { + hash: subrange_hash, + size: current_block.len() as u64, + }, + ); + current_offset += current_block.len() as u64; + + let block_ref = BlockRef { + block: subrange_hash, + version: dest_version_uuid, + deleted: false.into(), + }; + + let next_block_hash = blocks_to_copy.get(i + 1).map(|(h, _)| *h); + + let garage2 = garage.clone(); + let garage3 = garage.clone(); + let is_subrange = range_to_copy.is_some(); + + let (_, _, _, next_block) = futures::try_join!( + // Thing 1: if we are taking a subrange of the source block, + // we need to insert that subrange as a new block. + async move { + if is_subrange { + garage2 + .block_manager + .rpc_put_block(subrange_hash, current_block) + .await + } else { + Ok(()) + } + }, + // Thing 2: we need to insert the block in the version + garage.version_table.insert(&version), + // Thing 3: we need to add a block reference + garage.block_ref_table.insert(&block_ref), + // Thing 4: we need to prefetch the next block + async move { + match next_block_hash { + Some(h) => Ok(Some(garage3.block_manager.rpc_get_block(&h).await?)), + None => Ok(None), + } + }, + )?; + + block = next_block; + } + + let data_md5sum = md5hasher.finalize(); + let etag = hex::encode(data_md5sum); + + // Put the part's ETag in the Versiontable + let mut version = Version::new(dest_version_uuid, dest_bucket_id, dest_key.clone(), false); + version.parts_etags.put(part_number, etag.clone()); + garage.version_table.insert(&version).await?; + + // LGTM + let resp_xml = s3_xml::to_xml_with_header(&CopyPartResult { + xmlns: (), + etag: s3_xml::Value(etag), + last_modified: s3_xml::Value(msec_to_rfc3339(source_object_version.timestamp)), + })?; + + Ok(Response::builder() + .header("Content-Type", "application/xml") + .header( + "x-amz-copy-source-version-id", + hex::encode(source_object_version.uuid), + ) + .body(Body::from(resp_xml))?) +} + +async fn get_copy_source( + garage: &Garage, + api_key: &Key, + req: &Request, +) -> Result { + let copy_source = req.headers().get("x-amz-copy-source").unwrap().to_str()?; + let copy_source = percent_encoding::percent_decode_str(copy_source).decode_utf8()?; + + let (source_bucket, source_key) = parse_bucket_key(©_source, None)?; + let source_bucket_id = resolve_bucket(garage, &source_bucket.to_string(), api_key).await?; + + if !api_key.allow_read(&source_bucket_id) { + return Err(Error::Forbidden(format!( + "Reading from bucket {} not allowed for this key", + source_bucket + ))); + } + + let source_key = source_key.ok_or_bad_request("No source key specified")?; + + let source_object = garage + .object_table + .get(&source_bucket_id, &source_key.to_string()) + .await? + .ok_or(Error::NoSuchKey)?; + + Ok(source_object) +} + +fn extract_source_info( + source_object: &Object, +) -> Result<(&ObjectVersion, &ObjectVersionData, &ObjectVersionMeta), Error> { + let source_version = source_object + .versions() + .iter() + .rev() + .find(|v| v.is_complete()) + .ok_or(Error::NoSuchKey)?; + + let source_version_data = match &source_version.state { + ObjectVersionState::Complete(x) => x, + _ => unreachable!(), + }; + + let source_version_meta = match source_version_data { + ObjectVersionData::DeleteMarker => { + return Err(Error::NoSuchKey); + } + ObjectVersionData::Inline(meta, _bytes) => meta, + ObjectVersionData::FirstBlock(meta, _fbh) => meta, + }; + + Ok((source_version, source_version_data, source_version_meta)) +} + struct CopyPreconditionHeaders { copy_source_if_match: Option>, copy_source_if_modified_since: Option, @@ -267,3 +551,36 @@ impl CopyPreconditionHeaders { } } } + +#[derive(Debug, Serialize, PartialEq)] +pub struct CopyPartResult { + #[serde(serialize_with = "xmlns_tag")] + pub xmlns: (), + #[serde(rename = "LastModified")] + pub last_modified: s3_xml::Value, + #[serde(rename = "ETag")] + pub etag: s3_xml::Value, +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::s3_xml::to_xml_with_header; + + #[test] + fn serialize_copy_part_result() -> Result<(), Error> { + // @FIXME: ETag should be quoted, but we can't add quotes + // because XML serializer replaces them by `"` + let expected_retval = r#"2011-04-11T20:34:56.000Z9b2cf535f27731c974343645a3985328"#; + let v = CopyPartResult { + xmlns: (), + last_modified: s3_xml::Value("2011-04-11T20:34:56.000Z".into()), + etag: s3_xml::Value("9b2cf535f27731c974343645a3985328".into()), + }; + println!("{}", to_xml_with_header(&v)?); + + assert_eq!(to_xml_with_header(&v)?, expected_retval); + + Ok(()) + } +} diff --git a/src/api/s3_put.rs b/src/api/s3_put.rs index d7ee5893..421b94a1 100644 --- a/src/api/s3_put.rs +++ b/src/api/s3_put.rs @@ -370,12 +370,15 @@ pub async fn handle_put_part( let key = key.to_string(); let mut chunker = BodyChunker::new(req.into_body(), garage.config.block_size); - let (object, first_block) = - futures::try_join!(garage.object_table.get(&bucket_id, &key), chunker.next(),)?; + let (object, version, first_block) = futures::try_join!( + garage.object_table.get(&bucket_id, &key), + garage.version_table.get(&version_uuid, &EmptyKey), + chunker.next() + )?; // Check object is valid and multipart block can be accepted - let first_block = first_block.ok_or_else(|| Error::BadRequest("Empty body".to_string()))?; - let object = object.ok_or_else(|| Error::BadRequest("Object not found".to_string()))?; + let first_block = first_block.ok_or_bad_request("Empty body")?; + let object = object.ok_or_bad_request("Object not found")?; if !object .versions() @@ -385,6 +388,16 @@ pub async fn handle_put_part( return Err(Error::NoSuchUpload); } + // Check part hasn't already been uploaded + if let Some(v) = version { + if v.has_part_number(part_number) { + return Err(Error::BadRequest(format!( + "Part number {} has already been uploaded", + part_number + ))); + } + } + // Copy block to store let version = Version::new(version_uuid, bucket_id, key, false); let first_block_hash = blake2sum(&first_block[..]); diff --git a/src/api/s3_xml.rs b/src/api/s3_xml.rs index 98c63d57..962b4780 100644 --- a/src/api/s3_xml.rs +++ b/src/api/s3_xml.rs @@ -428,6 +428,8 @@ mod tests { #[test] fn copy_object_result() -> Result<(), ApiError> { + // @FIXME: ETag should be quoted, but we can't add quotes + // because XML serializer replaces them by `"` let copy_result = CopyObjectResult { last_modified: Value(msec_to_rfc3339(0)), etag: Value("9b2cf535f27731c974343645a3985328".to_string()), @@ -466,6 +468,8 @@ mod tests { #[test] fn complete_multipart_upload_result() -> Result<(), ApiError> { + // @FIXME: ETag should be quoted, but we can't add quotes + // because XML serializer replaces them by `"` let result = CompleteMultipartUploadResult { xmlns: (), location: Some(Value("https://garage.tld/mybucket/a/plop".to_string())), @@ -540,6 +544,8 @@ mod tests { #[test] fn list_objects_v1_1() -> Result<(), ApiError> { + // @FIXME: ETag should be quoted, but we can't add quotes + // because XML serializer replaces them by `"` let result = ListBucketResult { xmlns: (), name: Value("example-bucket".to_string()), @@ -639,6 +645,8 @@ mod tests { #[test] fn list_objects_v2_1() -> Result<(), ApiError> { + // @FIXME: ETag should be quoted, but we can't add quotes + // because XML serializer replaces them by `"` let result = ListBucketResult { xmlns: (), name: Value("quotes".to_string()), @@ -685,6 +693,8 @@ mod tests { #[test] fn list_objects_v2_2() -> Result<(), ApiError> { + // @FIXME: ETag should be quoted, but we can't add quotes + // because XML serializer replaces them by `"` let result = ListBucketResult { xmlns: (), name: Value("bucket".to_string()), diff --git a/src/model/version_table.rs b/src/model/version_table.rs index e0b99770..839b1f4f 100644 --- a/src/model/version_table.rs +++ b/src/model/version_table.rs @@ -47,6 +47,20 @@ impl Version { key, } } + + pub fn has_part_number(&self, part_number: u64) -> bool { + let case1 = self + .parts_etags + .items() + .binary_search_by(|(k, _)| k.cmp(&part_number)) + .is_ok(); + let case2 = self + .blocks + .items() + .binary_search_by(|(k, _)| k.part_number.cmp(&part_number)) + .is_ok(); + case1 || case2 + } } #[derive(PartialEq, Eq, Clone, Copy, Debug, Serialize, Deserialize)] -- 2.45.2 From 1ee8f596ee6792b987b07dc08617feb4a3480c1a Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Wed, 12 Jan 2022 10:17:15 +0100 Subject: [PATCH 3/4] Testing for UploadPartCopies and bugfixes in AWS signatures --- script/test-smoke.sh | 82 ++++++++++++++++++++++++++++++++++++-------- src/api/s3_copy.rs | 1 + src/api/signature.rs | 9 +++-- 3 files changed, 74 insertions(+), 18 deletions(-) diff --git a/script/test-smoke.sh b/script/test-smoke.sh index 2505ae38..b85d9ed5 100755 --- a/script/test-smoke.sh +++ b/script/test-smoke.sh @@ -116,21 +116,7 @@ if [ -z "$SKIP_DUCK" ]; then done fi -rm /tmp/garage.{1..3}.{rnd,b64} - -if [ -z "$SKIP_AWS" ]; then - echo "๐Ÿงช Website Testing" - echo "

hello world

" > /tmp/garage-index.html - aws s3 cp /tmp/garage-index.html s3://eprouvette/index.html - [ `curl -s -o /dev/null -w "%{http_code}" --header "Host: eprouvette.garage.tld" http://127.0.0.1:3921/ ` == 404 ] - garage -c /tmp/config.1.toml bucket website --allow eprouvette - [ `curl -s -o /dev/null -w "%{http_code}" --header "Host: eprouvette.garage.tld" http://127.0.0.1:3921/ ` == 200 ] - garage -c /tmp/config.1.toml bucket website --deny eprouvette - [ `curl -s -o /dev/null -w "%{http_code}" --header "Host: eprouvette.garage.tld" http://127.0.0.1:3921/ ` == 404 ] - aws s3 rm s3://eprouvette/index.html - rm /tmp/garage-index.html -fi - +# Advanced testing via S3API if [ -z "$SKIP_AWS" ]; then echo "๐Ÿ”Œ Test S3API" @@ -265,8 +251,61 @@ if [ -z "$SKIP_AWS" ]; then aws s3api abort-multipart-upload --bucket eprouvette --key $key --upload-id $uid; echo "Deleted ${key}:${uid}" done + + # Test for UploadPartCopy + aws s3 cp "/tmp/garage.3.rnd" "s3://eprouvette/copy_part_source" + UPLOAD_ID=$(aws s3api create-multipart-upload --bucket eprouvette --key test_multipart | jq -r .UploadId) + PART1=$(aws s3api upload-part \ + --bucket eprouvette --key test_multipart \ + --upload-id $UPLOAD_ID --part-number 1 \ + --body /tmp/garage.2.rnd | jq .ETag) + PART2=$(aws s3api upload-part-copy \ + --bucket eprouvette --key test_multipart \ + --upload-id $UPLOAD_ID --part-number 2 \ + --copy-source "/eprouvette/copy_part_source" \ + --copy-source-range "bytes=500-5000500" \ + | jq .CopyPartResult.ETag) + PART3=$(aws s3api upload-part \ + --bucket eprouvette --key test_multipart \ + --upload-id $UPLOAD_ID --part-number 3 \ + --body /tmp/garage.3.rnd | jq .ETag) + cat >/tmp/garage.multipart_struct < /tmp/garage.test_multipart_reference + diff /tmp/garage.test_multipart /tmp/garage.test_multipart_reference >/tmp/garage.test_multipart_diff 2>&1 + + aws s3 rm "s3://eprouvette/copy_part_source" + aws s3 rm "s3://eprouvette/test_multipart" + + rm /tmp/garage.multipart_struct + rm /tmp/garage.test_multipart + rm /tmp/garage.test_multipart_reference + rm /tmp/garage.test_multipart_diff fi +rm /tmp/garage.{1..3}.{rnd,b64} + if [ -z "$SKIP_AWS" ]; then echo "๐Ÿชฃ Test bucket logic " AWS_ACCESS_KEY_ID=`cat /tmp/garage.s3 |cut -d' ' -f1` @@ -282,6 +321,19 @@ if [ -z "$SKIP_AWS" ]; then [ $(aws s3 ls | wc -l) == 1 ] fi +if [ -z "$SKIP_AWS" ]; then + echo "๐Ÿงช Website Testing" + echo "

hello world

" > /tmp/garage-index.html + aws s3 cp /tmp/garage-index.html s3://eprouvette/index.html + [ `curl -s -o /dev/null -w "%{http_code}" --header "Host: eprouvette.garage.tld" http://127.0.0.1:3921/ ` == 404 ] + garage -c /tmp/config.1.toml bucket website --allow eprouvette + [ `curl -s -o /dev/null -w "%{http_code}" --header "Host: eprouvette.garage.tld" http://127.0.0.1:3921/ ` == 200 ] + garage -c /tmp/config.1.toml bucket website --deny eprouvette + [ `curl -s -o /dev/null -w "%{http_code}" --header "Host: eprouvette.garage.tld" http://127.0.0.1:3921/ ` == 404 ] + aws s3 rm s3://eprouvette/index.html + rm /tmp/garage-index.html +fi + echo "๐Ÿ Teardown" AWS_ACCESS_KEY_ID=`cat /tmp/garage.s3 |cut -d' ' -f1` AWS_SECRET_ACCESS_KEY=`cat /tmp/garage.s3 |cut -d' ' -f2` diff --git a/src/api/s3_copy.rs b/src/api/s3_copy.rs index c37bb138..7e91ecd8 100644 --- a/src/api/s3_copy.rs +++ b/src/api/s3_copy.rs @@ -537,6 +537,7 @@ impl CopyPreconditionHeaders { } (None, None, Some(inm), None) => !inm.iter().any(|x| x == etag || x == "*"), (None, None, None, Some(ims)) => v_date > *ims, + (None, None, None, None) => true, _ => { return Err(Error::BadRequest( "Invalid combination of x-amz-copy-source-if-xxxxx headers".into(), diff --git a/src/api/signature.rs b/src/api/signature.rs index c580cb3a..311e6a9a 100644 --- a/src/api/signature.rs +++ b/src/api/signature.rs @@ -266,10 +266,13 @@ fn canonical_header_string(headers: &HashMap, signed_headers: &s let mut items = headers .iter() .filter(|(key, _)| signed_headers_vec.contains(&key.as_str())) - .map(|(key, value)| key.to_lowercase() + ":" + value.trim()) .collect::>(); - items.sort(); - items.join("\n") + items.sort_by(|(k1, _), (k2, _)| k1.cmp(k2)); + items + .iter() + .map(|(key, value)| key.to_lowercase() + ":" + value.trim()) + .collect::>() + .join("\n") } fn canonical_query_string(uri: &hyper::Uri) -> String { -- 2.45.2 From f7349f40050dfb5dc4f4f49df5732686c8d846ba Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Wed, 12 Jan 2022 11:41:20 +0100 Subject: [PATCH 4/4] Add quotes in returned etags --- src/api/s3_copy.rs | 42 ++++++++++++++++++++++++++++++------- src/api/s3_list.rs | 2 +- src/api/s3_put.rs | 2 +- src/api/s3_xml.rs | 52 +++++++--------------------------------------- 4 files changed, 45 insertions(+), 53 deletions(-) diff --git a/src/api/s3_copy.rs b/src/api/s3_copy.rs index 7e91ecd8..f8b3550e 100644 --- a/src/api/s3_copy.rs +++ b/src/api/s3_copy.rs @@ -148,9 +148,9 @@ pub async fn handle_copy( } let last_modified = msec_to_rfc3339(new_timestamp); - let result = s3_xml::CopyObjectResult { + let result = CopyObjectResult { last_modified: s3_xml::Value(last_modified), - etag: s3_xml::Value(etag), + etag: s3_xml::Value(format!("\"{}\"", etag)), }; let xml = s3_xml::to_xml_with_header(&result)?; @@ -394,7 +394,7 @@ pub async fn handle_upload_part_copy( // LGTM let resp_xml = s3_xml::to_xml_with_header(&CopyPartResult { xmlns: (), - etag: s3_xml::Value(etag), + etag: s3_xml::Value(format!("\"{}\"", etag)), last_modified: s3_xml::Value(msec_to_rfc3339(source_object_version.timestamp)), })?; @@ -553,6 +553,14 @@ impl CopyPreconditionHeaders { } } +#[derive(Debug, Serialize, PartialEq)] +pub struct CopyObjectResult { + #[serde(rename = "LastModified")] + pub last_modified: s3_xml::Value, + #[serde(rename = "ETag")] + pub etag: s3_xml::Value, +} + #[derive(Debug, Serialize, PartialEq)] pub struct CopyPartResult { #[serde(serialize_with = "xmlns_tag")] @@ -568,15 +576,35 @@ mod tests { use super::*; use crate::s3_xml::to_xml_with_header; + #[test] + fn copy_object_result() -> Result<(), Error> { + let copy_result = CopyObjectResult { + last_modified: s3_xml::Value(msec_to_rfc3339(0)), + etag: s3_xml::Value("\"9b2cf535f27731c974343645a3985328\"".to_string()), + }; + assert_eq!( + to_xml_with_header(©_result)?, + "\ +\ + 1970-01-01T00:00:00.000Z\ + "9b2cf535f27731c974343645a3985328"\ +\ + " + ); + Ok(()) + } + #[test] fn serialize_copy_part_result() -> Result<(), Error> { - // @FIXME: ETag should be quoted, but we can't add quotes - // because XML serializer replaces them by `"` - let expected_retval = r#"2011-04-11T20:34:56.000Z9b2cf535f27731c974343645a3985328"#; + let expected_retval = "\ +\ + 2011-04-11T20:34:56.000Z\ + "9b2cf535f27731c974343645a3985328"\ +"; let v = CopyPartResult { xmlns: (), last_modified: s3_xml::Value("2011-04-11T20:34:56.000Z".into()), - etag: s3_xml::Value("9b2cf535f27731c974343645a3985328".into()), + etag: s3_xml::Value("\"9b2cf535f27731c974343645a3985328\"".into()), }; println!("{}", to_xml_with_header(&v)?); diff --git a/src/api/s3_list.rs b/src/api/s3_list.rs index ddc03375..f991b07d 100644 --- a/src/api/s3_list.rs +++ b/src/api/s3_list.rs @@ -121,7 +121,7 @@ pub async fn handle_list( key: uriencode_maybe(key, query.common.urlencode_resp), last_modified: s3_xml::Value(msec_to_rfc3339(info.last_modified)), size: s3_xml::IntValue(info.size as i64), - etag: s3_xml::Value(info.etag.to_string()), + etag: s3_xml::Value(format!("\"{}\"", info.etag.to_string())), storage_class: s3_xml::Value("STANDARD".to_string()), }) .collect(), diff --git a/src/api/s3_put.rs b/src/api/s3_put.rs index 421b94a1..37658172 100644 --- a/src/api/s3_put.rs +++ b/src/api/s3_put.rs @@ -532,7 +532,7 @@ pub async fn handle_complete_multipart_upload( location: None, bucket: s3_xml::Value(bucket_name.to_string()), key: s3_xml::Value(key), - etag: s3_xml::Value(etag), + etag: s3_xml::Value(format!("\"{}\"", etag)), }; let xml = s3_xml::to_xml_with_header(&result)?; diff --git a/src/api/s3_xml.rs b/src/api/s3_xml.rs index 962b4780..1df4ed60 100644 --- a/src/api/s3_xml.rs +++ b/src/api/s3_xml.rs @@ -107,14 +107,6 @@ pub struct DeleteResult { pub errors: Vec, } -#[derive(Debug, Serialize, PartialEq)] -pub struct CopyObjectResult { - #[serde(rename = "LastModified")] - pub last_modified: Value, - #[serde(rename = "ETag")] - pub etag: Value, -} - #[derive(Debug, Serialize, PartialEq)] pub struct InitiateMultipartUploadResult { #[serde(serialize_with = "xmlns_tag")] @@ -426,26 +418,6 @@ mod tests { Ok(()) } - #[test] - fn copy_object_result() -> Result<(), ApiError> { - // @FIXME: ETag should be quoted, but we can't add quotes - // because XML serializer replaces them by `"` - let copy_result = CopyObjectResult { - last_modified: Value(msec_to_rfc3339(0)), - etag: Value("9b2cf535f27731c974343645a3985328".to_string()), - }; - assert_eq!( - to_xml_with_header(©_result)?, - "\ -\ - 1970-01-01T00:00:00.000Z\ - 9b2cf535f27731c974343645a3985328\ -\ - " - ); - Ok(()) - } - #[test] fn initiate_multipart_upload_result() -> Result<(), ApiError> { let result = InitiateMultipartUploadResult { @@ -468,14 +440,12 @@ mod tests { #[test] fn complete_multipart_upload_result() -> Result<(), ApiError> { - // @FIXME: ETag should be quoted, but we can't add quotes - // because XML serializer replaces them by `"` let result = CompleteMultipartUploadResult { xmlns: (), location: Some(Value("https://garage.tld/mybucket/a/plop".to_string())), bucket: Value("mybucket".to_string()), key: Value("a/plop".to_string()), - etag: Value("3858f62230ac3c915f300c664312c11f-9".to_string()), + etag: Value("\"3858f62230ac3c915f300c664312c11f-9\"".to_string()), }; assert_eq!( to_xml_with_header(&result)?, @@ -484,7 +454,7 @@ mod tests { https://garage.tld/mybucket/a/plop\ mybucket\ a/plop\ - 3858f62230ac3c915f300c664312c11f-9\ + "3858f62230ac3c915f300c664312c11f-9"\ " ); Ok(()) @@ -544,8 +514,6 @@ mod tests { #[test] fn list_objects_v1_1() -> Result<(), ApiError> { - // @FIXME: ETag should be quoted, but we can't add quotes - // because XML serializer replaces them by `"` let result = ListBucketResult { xmlns: (), name: Value("example-bucket".to_string()), @@ -563,7 +531,7 @@ mod tests { contents: vec![ListBucketItem { key: Value("sample.jpg".to_string()), last_modified: Value(msec_to_rfc3339(0)), - etag: Value("bf1d737a4d46a19f3bced6905cc8b902".to_string()), + etag: Value("\"bf1d737a4d46a19f3bced6905cc8b902\"".to_string()), size: IntValue(142863), storage_class: Value("STANDARD".to_string()), }], @@ -584,7 +552,7 @@ mod tests { \ sample.jpg\ 1970-01-01T00:00:00.000Z\ - bf1d737a4d46a19f3bced6905cc8b902\ + "bf1d737a4d46a19f3bced6905cc8b902"\ 142863\ STANDARD\ \ @@ -645,8 +613,6 @@ mod tests { #[test] fn list_objects_v2_1() -> Result<(), ApiError> { - // @FIXME: ETag should be quoted, but we can't add quotes - // because XML serializer replaces them by `"` let result = ListBucketResult { xmlns: (), name: Value("quotes".to_string()), @@ -664,7 +630,7 @@ mod tests { contents: vec![ListBucketItem { key: Value("ExampleObject.txt".to_string()), last_modified: Value(msec_to_rfc3339(0)), - etag: Value("599bab3ed2c697f1d26842727561fd94".to_string()), + etag: Value("\"599bab3ed2c697f1d26842727561fd94\"".to_string()), size: IntValue(857), storage_class: Value("REDUCED_REDUNDANCY".to_string()), }], @@ -682,7 +648,7 @@ mod tests { \ ExampleObject.txt\ 1970-01-01T00:00:00.000Z\ - 599bab3ed2c697f1d26842727561fd94\ + "599bab3ed2c697f1d26842727561fd94"\ 857\ REDUCED_REDUNDANCY\ \ @@ -693,8 +659,6 @@ mod tests { #[test] fn list_objects_v2_2() -> Result<(), ApiError> { - // @FIXME: ETag should be quoted, but we can't add quotes - // because XML serializer replaces them by `"` let result = ListBucketResult { xmlns: (), name: Value("bucket".to_string()), @@ -714,7 +678,7 @@ mod tests { contents: vec![ListBucketItem { key: Value("happyfacex.jpg".to_string()), last_modified: Value(msec_to_rfc3339(0)), - etag: Value("70ee1738b6b21e2c8a43f3a5ab0eee71".to_string()), + etag: Value("\"70ee1738b6b21e2c8a43f3a5ab0eee71\"".to_string()), size: IntValue(1111), storage_class: Value("STANDARD".to_string()), }], @@ -734,7 +698,7 @@ mod tests { \ happyfacex.jpg\ 1970-01-01T00:00:00.000Z\ - 70ee1738b6b21e2c8a43f3a5ab0eee71\ + "70ee1738b6b21e2c8a43f3a5ab0eee71"\ 1111\ STANDARD\ \ -- 2.45.2