From 1a8f74fc94495e1a6b60138f634bb8018ed2078b Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Wed, 19 Feb 2025 17:26:29 +0100 Subject: [PATCH 1/3] api: GetObject: implement if-match and if-unmodified-since --- src/api/s3/get.rs | 57 ++++++++++++++++++++++++++++++++++------------- 1 file changed, 41 insertions(+), 16 deletions(-) diff --git a/src/api/s3/get.rs b/src/api/s3/get.rs index bcb72cc3..65be220f 100644 --- a/src/api/s3/get.rs +++ b/src/api/s3/get.rs @@ -9,8 +9,8 @@ use futures::future; use futures::stream::{self, Stream, StreamExt}; use http::header::{ ACCEPT_RANGES, CACHE_CONTROL, CONTENT_DISPOSITION, CONTENT_ENCODING, CONTENT_LANGUAGE, - CONTENT_LENGTH, CONTENT_RANGE, CONTENT_TYPE, ETAG, EXPIRES, IF_MODIFIED_SINCE, IF_NONE_MATCH, - LAST_MODIFIED, RANGE, + CONTENT_LENGTH, CONTENT_RANGE, CONTENT_TYPE, ETAG, EXPIRES, IF_MATCH, IF_MODIFIED_SINCE, + IF_NONE_MATCH, IF_UNMODIFIED_SINCE, LAST_MODIFIED, RANGE, }; use hyper::{Request, Response, StatusCode}; use tokio::sync::mpsc; @@ -115,42 +115,67 @@ fn getobject_override_headers( Ok(()) } -fn try_answer_cached( +fn handle_http_precondition( version: &ObjectVersion, version_meta: &ObjectVersionMeta, req: &Request<()>, -) -> Option> { +) -> Result>, Error> { + if let Some(if_match) = req.headers().get(IF_MATCH) { + let if_match = if_match.to_str()?; + let expected = format!("\"{}\"", version_meta.etag); + let found = if_match + .split(',') + .map(str::trim) + .any(|etag| etag == expected || etag == "\"*\""); + if !found { + return Ok(Some( + Response::builder() + .status(StatusCode::PRECONDITION_FAILED) + .body(empty_body()) + .unwrap(), + )); + } + } + // It is possible, and is even usually the case, [that both If-None-Match and // If-Modified-Since] are present in a request. In this situation If-None-Match takes // precedence and If-Modified-Since is ignored (as per 6.Precedence from rfc7232). The rational // being that etag based matching is more accurate, it has no issue with sub-second precision // for instance (in case of very fast updates) + let object_date = UNIX_EPOCH + Duration::from_millis(version.timestamp); + let cached = if let Some(none_match) = req.headers().get(IF_NONE_MATCH) { - let none_match = none_match.to_str().ok()?; + let none_match = none_match.to_str()?; let expected = format!("\"{}\"", version_meta.etag); let found = none_match .split(',') .map(str::trim) .any(|etag| etag == expected || etag == "\"*\""); found + } else if let Some(unmodified_since) = req.headers().get(IF_UNMODIFIED_SINCE) { + let unmodified_since = unmodified_since.to_str()?; + let unmodified_since = + httpdate::parse_http_date(unmodified_since).ok_or_bad_request("invalid http date")?; + object_date <= unmodified_since } else if let Some(modified_since) = req.headers().get(IF_MODIFIED_SINCE) { - let modified_since = modified_since.to_str().ok()?; - let client_date = httpdate::parse_http_date(modified_since).ok()?; - let server_date = UNIX_EPOCH + Duration::from_millis(version.timestamp); - client_date >= server_date + let modified_since = modified_since.to_str()?; + let modified_since = + httpdate::parse_http_date(modified_since).ok_or_bad_request("invalid http date")?; + let object_date = UNIX_EPOCH + Duration::from_millis(version.timestamp); + object_date > modified_since } else { false }; if cached { - Some( + Ok(Some( Response::builder() .status(StatusCode::NOT_MODIFIED) .body(empty_body()) .unwrap(), - ) + )) } else { - None + Ok(None) } } @@ -196,8 +221,8 @@ pub async fn handle_head_without_ctx( _ => unreachable!(), }; - if let Some(cached) = try_answer_cached(object_version, version_meta, req) { - return Ok(cached); + if let Some(res) = handle_http_precondition(object_version, version_meta, req)? { + return Ok(res); } let (encryption, headers) = @@ -318,8 +343,8 @@ pub async fn handle_get_without_ctx( ObjectVersionData::FirstBlock(meta, _) => meta, }; - if let Some(cached) = try_answer_cached(last_v, last_v_meta, req) { - return Ok(cached); + if let Some(res) = handle_http_precondition(last_v, last_v_meta, req)? { + return Ok(res); } let (enc, headers) = From c0846c56fe360c859e573e645fead4a0978b9b9a Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Wed, 19 Feb 2025 17:54:58 +0100 Subject: [PATCH 2/3] api: unify http precondition handling --- src/api/s3/copy.rs | 112 ++++------------------------- src/api/s3/get.rs | 175 +++++++++++++++++++++++++++++++-------------- 2 files changed, 137 insertions(+), 150 deletions(-) diff --git a/src/api/s3/copy.rs b/src/api/s3/copy.rs index ff8019e6..a5b2d706 100644 --- a/src/api/s3/copy.rs +++ b/src/api/s3/copy.rs @@ -1,9 +1,9 @@ use std::pin::Pin; -use std::time::{Duration, SystemTime, UNIX_EPOCH}; use futures::{stream, stream::Stream, StreamExt, TryStreamExt}; use bytes::Bytes; +use http::header::HeaderName; use hyper::{Request, Response}; use serde::Serialize; @@ -26,11 +26,20 @@ use garage_api_common::signature::checksum::*; use crate::api_server::{ReqBody, ResBody}; use crate::encryption::EncryptionParams; use crate::error::*; -use crate::get::full_object_byte_stream; +use crate::get::{full_object_byte_stream, PreconditionHeaders}; use crate::multipart; use crate::put::{extract_metadata_headers, save_stream, ChecksumMode, SaveStreamResult}; use crate::xml::{self as s3_xml, xmlns_tag}; +pub const X_AMZ_COPY_SOURCE_IF_MATCH: HeaderName = + HeaderName::from_static("x-amz-copy-source-if-match"); +pub const X_AMZ_COPY_SOURCE_IF_NONE_MATCH: HeaderName = + HeaderName::from_static("x-amz-copy-source-if-none-match"); +pub const X_AMZ_COPY_SOURCE_IF_MODIFIED_SINCE: HeaderName = + HeaderName::from_static("x-amz-copy-source-if-modified-since"); +pub const X_AMZ_COPY_SOURCE_IF_UNMODIFIED_SINCE: HeaderName = + HeaderName::from_static("x-amz-copy-source-if-unmodified-since"); + // -------- CopyObject --------- pub async fn handle_copy( @@ -38,7 +47,7 @@ pub async fn handle_copy( req: &Request, dest_key: &str, ) -> Result, Error> { - let copy_precondition = CopyPreconditionHeaders::parse(req)?; + let copy_precondition = PreconditionHeaders::parse_copy_source(req)?; let checksum_algorithm = request_checksum_algorithm(req.headers())?; @@ -48,7 +57,7 @@ pub async fn handle_copy( extract_source_info(&source_object)?; // Check precondition, e.g. x-amz-copy-source-if-match - copy_precondition.check(source_version, &source_version_meta.etag)?; + copy_precondition.check_copy_source(source_version, &source_version_meta.etag)?; // Determine encryption parameters let (source_encryption, source_object_meta_inner) = @@ -335,7 +344,7 @@ pub async fn handle_upload_part_copy( part_number: u64, upload_id: &str, ) -> Result, Error> { - let copy_precondition = CopyPreconditionHeaders::parse(req)?; + let copy_precondition = PreconditionHeaders::parse_copy_source(req)?; let dest_upload_id = multipart::decode_upload_id(upload_id)?; @@ -351,7 +360,7 @@ pub async fn handle_upload_part_copy( extract_source_info(&source_object)?; // Check precondition on source, e.g. x-amz-copy-source-if-match - copy_precondition.check(source_object_version, &source_version_meta.etag)?; + copy_precondition.check_copy_source(source_object_version, &source_version_meta.etag)?; // Determine encryption parameters let (source_encryption, _) = EncryptionParams::check_decrypt_for_copy_source( @@ -703,97 +712,6 @@ fn extract_source_info( Ok((source_version, source_version_data, source_version_meta)) } -struct CopyPreconditionHeaders { - copy_source_if_match: Option>, - copy_source_if_modified_since: Option, - copy_source_if_none_match: Option>, - copy_source_if_unmodified_since: Option, -} - -impl CopyPreconditionHeaders { - fn parse(req: &Request) -> Result { - Ok(Self { - copy_source_if_match: req - .headers() - .get("x-amz-copy-source-if-match") - .map(|x| x.to_str()) - .transpose()? - .map(|x| { - x.split(',') - .map(|m| m.trim().trim_matches('"').to_string()) - .collect::>() - }), - copy_source_if_modified_since: req - .headers() - .get("x-amz-copy-source-if-modified-since") - .map(|x| x.to_str()) - .transpose()? - .map(httpdate::parse_http_date) - .transpose() - .ok_or_bad_request("Invalid date in x-amz-copy-source-if-modified-since")?, - copy_source_if_none_match: req - .headers() - .get("x-amz-copy-source-if-none-match") - .map(|x| x.to_str()) - .transpose()? - .map(|x| { - x.split(',') - .map(|m| m.trim().trim_matches('"').to_string()) - .collect::>() - }), - copy_source_if_unmodified_since: req - .headers() - .get("x-amz-copy-source-if-unmodified-since") - .map(|x| x.to_str()) - .transpose()? - .map(httpdate::parse_http_date) - .transpose() - .ok_or_bad_request("Invalid date in x-amz-copy-source-if-unmodified-since")?, - }) - } - - fn check(&self, v: &ObjectVersion, etag: &str) -> Result<(), Error> { - let v_date = UNIX_EPOCH + Duration::from_millis(v.timestamp); - - let ok = match ( - &self.copy_source_if_match, - &self.copy_source_if_unmodified_since, - &self.copy_source_if_none_match, - &self.copy_source_if_modified_since, - ) { - // TODO I'm not sure all of the conditions are evaluated correctly here - - // If we have both if-match and if-unmodified-since, - // basically we don't care about if-unmodified-since, - // because in the spec it says that if if-match evaluates to - // true but if-unmodified-since evaluates to false, - // the copy is still done. - (Some(im), _, None, None) => im.iter().any(|x| x == etag || x == "*"), - (None, Some(ius), None, None) => v_date <= *ius, - - // If we have both if-none-match and if-modified-since, - // then both of the two conditions must evaluate to true - (None, None, Some(inm), Some(ims)) => { - !inm.iter().any(|x| x == etag || x == "*") && v_date > *ims - } - (None, None, Some(inm), None) => !inm.iter().any(|x| x == etag || x == "*"), - (None, None, None, Some(ims)) => v_date > *ims, - (None, None, None, None) => true, - _ => { - return Err(Error::bad_request( - "Invalid combination of x-amz-copy-source-if-xxxxx headers", - )) - } - }; - - if ok { - Ok(()) - } else { - Err(Error::PreconditionFailed) - } - } -} - type BlockStreamItemOk = (Bytes, Option); type BlockStreamItem = Result; diff --git a/src/api/s3/get.rs b/src/api/s3/get.rs index 65be220f..22076603 100644 --- a/src/api/s3/get.rs +++ b/src/api/s3/get.rs @@ -2,15 +2,15 @@ use std::collections::BTreeMap; use std::convert::TryInto; use std::sync::Arc; -use std::time::{Duration, UNIX_EPOCH}; +use std::time::{Duration, SystemTime, UNIX_EPOCH}; use bytes::Bytes; use futures::future; use futures::stream::{self, Stream, StreamExt}; use http::header::{ - ACCEPT_RANGES, CACHE_CONTROL, CONTENT_DISPOSITION, CONTENT_ENCODING, CONTENT_LANGUAGE, - CONTENT_LENGTH, CONTENT_RANGE, CONTENT_TYPE, ETAG, EXPIRES, IF_MATCH, IF_MODIFIED_SINCE, - IF_NONE_MATCH, IF_UNMODIFIED_SINCE, LAST_MODIFIED, RANGE, + HeaderMap, HeaderName, ACCEPT_RANGES, CACHE_CONTROL, CONTENT_DISPOSITION, CONTENT_ENCODING, + CONTENT_LANGUAGE, CONTENT_LENGTH, CONTENT_RANGE, CONTENT_TYPE, ETAG, EXPIRES, IF_MATCH, + IF_MODIFIED_SINCE, IF_NONE_MATCH, IF_UNMODIFIED_SINCE, LAST_MODIFIED, RANGE, }; use hyper::{Request, Response, StatusCode}; use tokio::sync::mpsc; @@ -29,10 +29,11 @@ use garage_api_common::helpers::*; use garage_api_common::signature::checksum::{add_checksum_response_headers, X_AMZ_CHECKSUM_MODE}; use crate::api_server::ResBody; +use crate::copy::*; use crate::encryption::EncryptionParams; use crate::error::*; -const X_AMZ_MP_PARTS_COUNT: &str = "x-amz-mp-parts-count"; +const X_AMZ_MP_PARTS_COUNT: HeaderName = HeaderName::from_static("x-amz-mp-parts-count"); #[derive(Default)] pub struct GetObjectOverrides { @@ -120,57 +121,12 @@ fn handle_http_precondition( version_meta: &ObjectVersionMeta, req: &Request<()>, ) -> Result>, Error> { - if let Some(if_match) = req.headers().get(IF_MATCH) { - let if_match = if_match.to_str()?; - let expected = format!("\"{}\"", version_meta.etag); - let found = if_match - .split(',') - .map(str::trim) - .any(|etag| etag == expected || etag == "\"*\""); - if !found { - return Ok(Some( - Response::builder() - .status(StatusCode::PRECONDITION_FAILED) - .body(empty_body()) - .unwrap(), - )); - } - } + let precondition_headers = PreconditionHeaders::parse(req)?; - // It is possible, and is even usually the case, [that both If-None-Match and - // If-Modified-Since] are present in a request. In this situation If-None-Match takes - // precedence and If-Modified-Since is ignored (as per 6.Precedence from rfc7232). The rational - // being that etag based matching is more accurate, it has no issue with sub-second precision - // for instance (in case of very fast updates) - let object_date = UNIX_EPOCH + Duration::from_millis(version.timestamp); - - let cached = if let Some(none_match) = req.headers().get(IF_NONE_MATCH) { - let none_match = none_match.to_str()?; - let expected = format!("\"{}\"", version_meta.etag); - let found = none_match - .split(',') - .map(str::trim) - .any(|etag| etag == expected || etag == "\"*\""); - found - } else if let Some(unmodified_since) = req.headers().get(IF_UNMODIFIED_SINCE) { - let unmodified_since = unmodified_since.to_str()?; - let unmodified_since = - httpdate::parse_http_date(unmodified_since).ok_or_bad_request("invalid http date")?; - object_date <= unmodified_since - } else if let Some(modified_since) = req.headers().get(IF_MODIFIED_SINCE) { - let modified_since = modified_since.to_str()?; - let modified_since = - httpdate::parse_http_date(modified_since).ok_or_bad_request("invalid http date")?; - let object_date = UNIX_EPOCH + Duration::from_millis(version.timestamp); - object_date > modified_since - } else { - false - }; - - if cached { + if let Some(status_code) = precondition_headers.check(&version, &version_meta.etag)? { Ok(Some( Response::builder() - .status(StatusCode::NOT_MODIFIED) + .status(status_code) .body(empty_body()) .unwrap(), )) @@ -786,3 +742,116 @@ fn std_error_from_read_error(e: E) -> std::io::Error { format!("Error while reading object data: {}", e), ) } + +// ---- + +pub struct PreconditionHeaders { + if_match: Option>, + if_modified_since: Option, + if_none_match: Option>, + if_unmodified_since: Option, +} + +impl PreconditionHeaders { + fn parse(req: &Request) -> Result { + Self::parse_with( + req.headers(), + &IF_MATCH, + &IF_NONE_MATCH, + &IF_MODIFIED_SINCE, + &IF_UNMODIFIED_SINCE, + ) + } + + pub(crate) fn parse_copy_source(req: &Request) -> Result { + Self::parse_with( + req.headers(), + &X_AMZ_COPY_SOURCE_IF_MATCH, + &X_AMZ_COPY_SOURCE_IF_NONE_MATCH, + &X_AMZ_COPY_SOURCE_IF_MODIFIED_SINCE, + &X_AMZ_COPY_SOURCE_IF_UNMODIFIED_SINCE, + ) + } + + fn parse_with( + headers: &HeaderMap, + hdr_if_match: &HeaderName, + hdr_if_none_match: &HeaderName, + hdr_if_modified_since: &HeaderName, + hdr_if_unmodified_since: &HeaderName, + ) -> Result { + Ok(Self { + if_match: headers + .get(hdr_if_match) + .map(|x| x.to_str()) + .transpose()? + .map(|x| { + x.split(',') + .map(|m| m.trim().trim_matches('"').to_string()) + .collect::>() + }), + if_none_match: headers + .get(hdr_if_none_match) + .map(|x| x.to_str()) + .transpose()? + .map(|x| { + x.split(',') + .map(|m| m.trim().trim_matches('"').to_string()) + .collect::>() + }), + if_modified_since: headers + .get(hdr_if_modified_since) + .map(|x| x.to_str()) + .transpose()? + .map(httpdate::parse_http_date) + .transpose() + .ok_or_bad_request("Invalid date in if-modified-since")?, + if_unmodified_since: headers + .get(hdr_if_unmodified_since) + .map(|x| x.to_str()) + .transpose()? + .map(httpdate::parse_http_date) + .transpose() + .ok_or_bad_request("Invalid date in if-unmodified-since")?, + }) + } + + fn check(&self, v: &ObjectVersion, etag: &str) -> Result, Error> { + let v_date = UNIX_EPOCH + Duration::from_millis(v.timestamp); + + // Implemented from https://datatracker.ietf.org/doc/html/rfc7232#section-6 + + if let Some(im) = &self.if_match { + // Step 1: if-match is present + if !im.iter().any(|x| x == etag || x == "*") { + return Ok(Some(StatusCode::PRECONDITION_FAILED)); + } + } else if let Some(ius) = &self.if_unmodified_since { + // Step 2: if-unmodified-since is present, and if-match is absent + if v_date > *ius { + return Ok(Some(StatusCode::PRECONDITION_FAILED)); + } + } + + if let Some(inm) = &self.if_none_match { + // Step 3: if-none-match is present + if inm.iter().any(|x| x == etag || x == "*") { + return Ok(Some(StatusCode::NOT_MODIFIED)); + } + } else if let Some(ims) = &self.if_modified_since { + // Step 4: if-modified-since is present, and if-none-match is absent + if v_date <= *ims { + return Ok(Some(StatusCode::NOT_MODIFIED)); + } + } + + Ok(None) + } + + pub(crate) fn check_copy_source(&self, v: &ObjectVersion, etag: &str) -> Result<(), Error> { + match self.check(v, etag)? { + Some(_) => Err(Error::PreconditionFailed), + None => Ok(()), + } + } +} From f87943a39d81efcd250ff7ddfa3bf562a2e61ece Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Wed, 19 Feb 2025 18:26:03 +0100 Subject: [PATCH 3/3] tests: add test for http preconditions --- src/garage/tests/s3/objects.rs | 126 ++++++++++++++++++++++++++++++++- 1 file changed, 125 insertions(+), 1 deletion(-) diff --git a/src/garage/tests/s3/objects.rs b/src/garage/tests/s3/objects.rs index dfc5253d..d63ac000 100644 --- a/src/garage/tests/s3/objects.rs +++ b/src/garage/tests/s3/objects.rs @@ -1,5 +1,6 @@ use crate::common; -use aws_sdk_s3::primitives::ByteStream; +use aws_sdk_s3::error::SdkError; +use aws_sdk_s3::primitives::{ByteStream, DateTime}; use aws_sdk_s3::types::{Delete, ObjectIdentifier}; const STD_KEY: &str = "hello world"; @@ -125,6 +126,129 @@ async fn test_putobject() { } } +#[tokio::test] +async fn test_precondition() { + let ctx = common::context(); + let bucket = ctx.create_bucket("precondition"); + + let etag = "\"46cf18a9b447991b450cad3facf5937e\""; + let etag2 = "\"ae4984b984cd984fe98d4efa954dce98\""; + let data = ByteStream::from_static(BODY); + + let r = ctx + .client + .put_object() + .bucket(&bucket) + .key(STD_KEY) + .body(data) + .send() + .await + .unwrap(); + + assert_eq!(r.e_tag.unwrap().as_str(), etag); + + let last_modified; + { + let o = ctx + .client + .get_object() + .bucket(&bucket) + .key(STD_KEY) + .if_match(etag) + .send() + .await + .unwrap(); + assert_eq!(o.e_tag.as_ref().unwrap().as_str(), etag); + last_modified = o.last_modified.unwrap(); + + let err = ctx + .client + .get_object() + .bucket(&bucket) + .key(STD_KEY) + .if_match(etag2) + .send() + .await; + assert!( + matches!(err, Err(SdkError::ServiceError(se)) if se.raw().status().as_u16() == 412) + ); + } + { + let o = ctx + .client + .get_object() + .bucket(&bucket) + .key(STD_KEY) + .if_none_match(etag2) + .send() + .await + .unwrap(); + assert_eq!(o.e_tag.as_ref().unwrap().as_str(), etag); + + let err = ctx + .client + .get_object() + .bucket(&bucket) + .key(STD_KEY) + .if_none_match(etag) + .send() + .await; + assert!( + matches!(err, Err(SdkError::ServiceError(se)) if se.raw().status().as_u16() == 304) + ); + } + let older_date = DateTime::from_secs_f64(last_modified.as_secs_f64() - 10.0); + let newer_date = DateTime::from_secs_f64(last_modified.as_secs_f64() + 10.0); + { + let err = ctx + .client + .get_object() + .bucket(&bucket) + .key(STD_KEY) + .if_modified_since(newer_date) + .send() + .await; + assert!( + matches!(err, Err(SdkError::ServiceError(se)) if se.raw().status().as_u16() == 304) + ); + + let o = ctx + .client + .get_object() + .bucket(&bucket) + .key(STD_KEY) + .if_modified_since(older_date) + .send() + .await + .unwrap(); + assert_eq!(o.e_tag.as_ref().unwrap().as_str(), etag); + } + { + let err = ctx + .client + .get_object() + .bucket(&bucket) + .key(STD_KEY) + .if_unmodified_since(older_date) + .send() + .await; + assert!( + matches!(err, Err(SdkError::ServiceError(se)) if se.raw().status().as_u16() == 412) + ); + + let o = ctx + .client + .get_object() + .bucket(&bucket) + .key(STD_KEY) + .if_unmodified_since(newer_date) + .send() + .await + .unwrap(); + assert_eq!(o.e_tag.as_ref().unwrap().as_str(), etag); + } +} + #[tokio::test] async fn test_getobject() { let ctx = common::context();