Merge pull request 'Implement all HTTP preconditions in GetObject/HeadObject' (#967) from fix-804 into main

Reviewed-on: Deuxfleurs/garage#967
This commit is contained in:
Alex 2025-02-19 17:31:26 +00:00
commit c5237c31e7
3 changed files with 271 additions and 135 deletions

View file

@ -1,9 +1,9 @@
use std::pin::Pin;
use std::time::{Duration, SystemTime, UNIX_EPOCH};
use futures::{stream, stream::Stream, StreamExt, TryStreamExt};
use bytes::Bytes;
use http::header::HeaderName;
use hyper::{Request, Response};
use serde::Serialize;
@ -26,11 +26,20 @@ use garage_api_common::signature::checksum::*;
use crate::api_server::{ReqBody, ResBody};
use crate::encryption::EncryptionParams;
use crate::error::*;
use crate::get::full_object_byte_stream;
use crate::get::{full_object_byte_stream, PreconditionHeaders};
use crate::multipart;
use crate::put::{extract_metadata_headers, save_stream, ChecksumMode, SaveStreamResult};
use crate::xml::{self as s3_xml, xmlns_tag};
pub const X_AMZ_COPY_SOURCE_IF_MATCH: HeaderName =
HeaderName::from_static("x-amz-copy-source-if-match");
pub const X_AMZ_COPY_SOURCE_IF_NONE_MATCH: HeaderName =
HeaderName::from_static("x-amz-copy-source-if-none-match");
pub const X_AMZ_COPY_SOURCE_IF_MODIFIED_SINCE: HeaderName =
HeaderName::from_static("x-amz-copy-source-if-modified-since");
pub const X_AMZ_COPY_SOURCE_IF_UNMODIFIED_SINCE: HeaderName =
HeaderName::from_static("x-amz-copy-source-if-unmodified-since");
// -------- CopyObject ---------
pub async fn handle_copy(
@ -38,7 +47,7 @@ pub async fn handle_copy(
req: &Request<ReqBody>,
dest_key: &str,
) -> Result<Response<ResBody>, Error> {
let copy_precondition = CopyPreconditionHeaders::parse(req)?;
let copy_precondition = PreconditionHeaders::parse_copy_source(req)?;
let checksum_algorithm = request_checksum_algorithm(req.headers())?;
@ -48,7 +57,7 @@ pub async fn handle_copy(
extract_source_info(&source_object)?;
// Check precondition, e.g. x-amz-copy-source-if-match
copy_precondition.check(source_version, &source_version_meta.etag)?;
copy_precondition.check_copy_source(source_version, &source_version_meta.etag)?;
// Determine encryption parameters
let (source_encryption, source_object_meta_inner) =
@ -335,7 +344,7 @@ pub async fn handle_upload_part_copy(
part_number: u64,
upload_id: &str,
) -> Result<Response<ResBody>, Error> {
let copy_precondition = CopyPreconditionHeaders::parse(req)?;
let copy_precondition = PreconditionHeaders::parse_copy_source(req)?;
let dest_upload_id = multipart::decode_upload_id(upload_id)?;
@ -351,7 +360,7 @@ pub async fn handle_upload_part_copy(
extract_source_info(&source_object)?;
// Check precondition on source, e.g. x-amz-copy-source-if-match
copy_precondition.check(source_object_version, &source_version_meta.etag)?;
copy_precondition.check_copy_source(source_object_version, &source_version_meta.etag)?;
// Determine encryption parameters
let (source_encryption, _) = EncryptionParams::check_decrypt_for_copy_source(
@ -703,97 +712,6 @@ fn extract_source_info(
Ok((source_version, source_version_data, source_version_meta))
}
struct CopyPreconditionHeaders {
copy_source_if_match: Option<Vec<String>>,
copy_source_if_modified_since: Option<SystemTime>,
copy_source_if_none_match: Option<Vec<String>>,
copy_source_if_unmodified_since: Option<SystemTime>,
}
impl CopyPreconditionHeaders {
fn parse(req: &Request<ReqBody>) -> Result<Self, Error> {
Ok(Self {
copy_source_if_match: req
.headers()
.get("x-amz-copy-source-if-match")
.map(|x| x.to_str())
.transpose()?
.map(|x| {
x.split(',')
.map(|m| m.trim().trim_matches('"').to_string())
.collect::<Vec<_>>()
}),
copy_source_if_modified_since: req
.headers()
.get("x-amz-copy-source-if-modified-since")
.map(|x| x.to_str())
.transpose()?
.map(httpdate::parse_http_date)
.transpose()
.ok_or_bad_request("Invalid date in x-amz-copy-source-if-modified-since")?,
copy_source_if_none_match: req
.headers()
.get("x-amz-copy-source-if-none-match")
.map(|x| x.to_str())
.transpose()?
.map(|x| {
x.split(',')
.map(|m| m.trim().trim_matches('"').to_string())
.collect::<Vec<_>>()
}),
copy_source_if_unmodified_since: req
.headers()
.get("x-amz-copy-source-if-unmodified-since")
.map(|x| x.to_str())
.transpose()?
.map(httpdate::parse_http_date)
.transpose()
.ok_or_bad_request("Invalid date in x-amz-copy-source-if-unmodified-since")?,
})
}
fn check(&self, v: &ObjectVersion, etag: &str) -> Result<(), Error> {
let v_date = UNIX_EPOCH + Duration::from_millis(v.timestamp);
let ok = match (
&self.copy_source_if_match,
&self.copy_source_if_unmodified_since,
&self.copy_source_if_none_match,
&self.copy_source_if_modified_since,
) {
// TODO I'm not sure all of the conditions are evaluated correctly here
// If we have both if-match and if-unmodified-since,
// basically we don't care about if-unmodified-since,
// because in the spec it says that if if-match evaluates to
// true but if-unmodified-since evaluates to false,
// the copy is still done.
(Some(im), _, None, None) => im.iter().any(|x| x == etag || x == "*"),
(None, Some(ius), None, None) => v_date <= *ius,
// If we have both if-none-match and if-modified-since,
// then both of the two conditions must evaluate to true
(None, None, Some(inm), Some(ims)) => {
!inm.iter().any(|x| x == etag || x == "*") && v_date > *ims
}
(None, None, Some(inm), None) => !inm.iter().any(|x| x == etag || x == "*"),
(None, None, None, Some(ims)) => v_date > *ims,
(None, None, None, None) => true,
_ => {
return Err(Error::bad_request(
"Invalid combination of x-amz-copy-source-if-xxxxx headers",
))
}
};
if ok {
Ok(())
} else {
Err(Error::PreconditionFailed)
}
}
}
type BlockStreamItemOk = (Bytes, Option<Hash>);
type BlockStreamItem = Result<BlockStreamItemOk, garage_util::error::Error>;

View file

@ -2,15 +2,15 @@
use std::collections::BTreeMap;
use std::convert::TryInto;
use std::sync::Arc;
use std::time::{Duration, UNIX_EPOCH};
use std::time::{Duration, SystemTime, UNIX_EPOCH};
use bytes::Bytes;
use futures::future;
use futures::stream::{self, Stream, StreamExt};
use http::header::{
ACCEPT_RANGES, CACHE_CONTROL, CONTENT_DISPOSITION, CONTENT_ENCODING, CONTENT_LANGUAGE,
CONTENT_LENGTH, CONTENT_RANGE, CONTENT_TYPE, ETAG, EXPIRES, IF_MODIFIED_SINCE, IF_NONE_MATCH,
LAST_MODIFIED, RANGE,
HeaderMap, HeaderName, ACCEPT_RANGES, CACHE_CONTROL, CONTENT_DISPOSITION, CONTENT_ENCODING,
CONTENT_LANGUAGE, CONTENT_LENGTH, CONTENT_RANGE, CONTENT_TYPE, ETAG, EXPIRES, IF_MATCH,
IF_MODIFIED_SINCE, IF_NONE_MATCH, IF_UNMODIFIED_SINCE, LAST_MODIFIED, RANGE,
};
use hyper::{Request, Response, StatusCode};
use tokio::sync::mpsc;
@ -29,10 +29,11 @@ use garage_api_common::helpers::*;
use garage_api_common::signature::checksum::{add_checksum_response_headers, X_AMZ_CHECKSUM_MODE};
use crate::api_server::ResBody;
use crate::copy::*;
use crate::encryption::EncryptionParams;
use crate::error::*;
const X_AMZ_MP_PARTS_COUNT: &str = "x-amz-mp-parts-count";
const X_AMZ_MP_PARTS_COUNT: HeaderName = HeaderName::from_static("x-amz-mp-parts-count");
#[derive(Default)]
pub struct GetObjectOverrides {
@ -115,42 +116,22 @@ fn getobject_override_headers(
Ok(())
}
fn try_answer_cached(
fn handle_http_precondition(
version: &ObjectVersion,
version_meta: &ObjectVersionMeta,
req: &Request<()>,
) -> Option<Response<ResBody>> {
// <trinity> It is possible, and is even usually the case, [that both If-None-Match and
// If-Modified-Since] are present in a request. In this situation If-None-Match takes
// precedence and If-Modified-Since is ignored (as per 6.Precedence from rfc7232). The rational
// being that etag based matching is more accurate, it has no issue with sub-second precision
// for instance (in case of very fast updates)
let cached = if let Some(none_match) = req.headers().get(IF_NONE_MATCH) {
let none_match = none_match.to_str().ok()?;
let expected = format!("\"{}\"", version_meta.etag);
let found = none_match
.split(',')
.map(str::trim)
.any(|etag| etag == expected || etag == "\"*\"");
found
} else if let Some(modified_since) = req.headers().get(IF_MODIFIED_SINCE) {
let modified_since = modified_since.to_str().ok()?;
let client_date = httpdate::parse_http_date(modified_since).ok()?;
let server_date = UNIX_EPOCH + Duration::from_millis(version.timestamp);
client_date >= server_date
} else {
false
};
) -> Result<Option<Response<ResBody>>, Error> {
let precondition_headers = PreconditionHeaders::parse(req)?;
if cached {
Some(
if let Some(status_code) = precondition_headers.check(&version, &version_meta.etag)? {
Ok(Some(
Response::builder()
.status(StatusCode::NOT_MODIFIED)
.status(status_code)
.body(empty_body())
.unwrap(),
)
))
} else {
None
Ok(None)
}
}
@ -196,8 +177,8 @@ pub async fn handle_head_without_ctx(
_ => unreachable!(),
};
if let Some(cached) = try_answer_cached(object_version, version_meta, req) {
return Ok(cached);
if let Some(res) = handle_http_precondition(object_version, version_meta, req)? {
return Ok(res);
}
let (encryption, headers) =
@ -318,8 +299,8 @@ pub async fn handle_get_without_ctx(
ObjectVersionData::FirstBlock(meta, _) => meta,
};
if let Some(cached) = try_answer_cached(last_v, last_v_meta, req) {
return Ok(cached);
if let Some(res) = handle_http_precondition(last_v, last_v_meta, req)? {
return Ok(res);
}
let (enc, headers) =
@ -761,3 +742,116 @@ fn std_error_from_read_error<E: std::fmt::Display>(e: E) -> std::io::Error {
format!("Error while reading object data: {}", e),
)
}
// ----
pub struct PreconditionHeaders {
if_match: Option<Vec<String>>,
if_modified_since: Option<SystemTime>,
if_none_match: Option<Vec<String>>,
if_unmodified_since: Option<SystemTime>,
}
impl PreconditionHeaders {
fn parse<B>(req: &Request<B>) -> Result<Self, Error> {
Self::parse_with(
req.headers(),
&IF_MATCH,
&IF_NONE_MATCH,
&IF_MODIFIED_SINCE,
&IF_UNMODIFIED_SINCE,
)
}
pub(crate) fn parse_copy_source<B>(req: &Request<B>) -> Result<Self, Error> {
Self::parse_with(
req.headers(),
&X_AMZ_COPY_SOURCE_IF_MATCH,
&X_AMZ_COPY_SOURCE_IF_NONE_MATCH,
&X_AMZ_COPY_SOURCE_IF_MODIFIED_SINCE,
&X_AMZ_COPY_SOURCE_IF_UNMODIFIED_SINCE,
)
}
fn parse_with(
headers: &HeaderMap,
hdr_if_match: &HeaderName,
hdr_if_none_match: &HeaderName,
hdr_if_modified_since: &HeaderName,
hdr_if_unmodified_since: &HeaderName,
) -> Result<Self, Error> {
Ok(Self {
if_match: headers
.get(hdr_if_match)
.map(|x| x.to_str())
.transpose()?
.map(|x| {
x.split(',')
.map(|m| m.trim().trim_matches('"').to_string())
.collect::<Vec<_>>()
}),
if_none_match: headers
.get(hdr_if_none_match)
.map(|x| x.to_str())
.transpose()?
.map(|x| {
x.split(',')
.map(|m| m.trim().trim_matches('"').to_string())
.collect::<Vec<_>>()
}),
if_modified_since: headers
.get(hdr_if_modified_since)
.map(|x| x.to_str())
.transpose()?
.map(httpdate::parse_http_date)
.transpose()
.ok_or_bad_request("Invalid date in if-modified-since")?,
if_unmodified_since: headers
.get(hdr_if_unmodified_since)
.map(|x| x.to_str())
.transpose()?
.map(httpdate::parse_http_date)
.transpose()
.ok_or_bad_request("Invalid date in if-unmodified-since")?,
})
}
fn check(&self, v: &ObjectVersion, etag: &str) -> Result<Option<StatusCode>, Error> {
let v_date = UNIX_EPOCH + Duration::from_millis(v.timestamp);
// Implemented from https://datatracker.ietf.org/doc/html/rfc7232#section-6
if let Some(im) = &self.if_match {
// Step 1: if-match is present
if !im.iter().any(|x| x == etag || x == "*") {
return Ok(Some(StatusCode::PRECONDITION_FAILED));
}
} else if let Some(ius) = &self.if_unmodified_since {
// Step 2: if-unmodified-since is present, and if-match is absent
if v_date > *ius {
return Ok(Some(StatusCode::PRECONDITION_FAILED));
}
}
if let Some(inm) = &self.if_none_match {
// Step 3: if-none-match is present
if inm.iter().any(|x| x == etag || x == "*") {
return Ok(Some(StatusCode::NOT_MODIFIED));
}
} else if let Some(ims) = &self.if_modified_since {
// Step 4: if-modified-since is present, and if-none-match is absent
if v_date <= *ims {
return Ok(Some(StatusCode::NOT_MODIFIED));
}
}
Ok(None)
}
pub(crate) fn check_copy_source(&self, v: &ObjectVersion, etag: &str) -> Result<(), Error> {
match self.check(v, etag)? {
Some(_) => Err(Error::PreconditionFailed),
None => Ok(()),
}
}
}

View file

@ -1,5 +1,6 @@
use crate::common;
use aws_sdk_s3::primitives::ByteStream;
use aws_sdk_s3::error::SdkError;
use aws_sdk_s3::primitives::{ByteStream, DateTime};
use aws_sdk_s3::types::{Delete, ObjectIdentifier};
const STD_KEY: &str = "hello world";
@ -125,6 +126,129 @@ async fn test_putobject() {
}
}
#[tokio::test]
async fn test_precondition() {
let ctx = common::context();
let bucket = ctx.create_bucket("precondition");
let etag = "\"46cf18a9b447991b450cad3facf5937e\"";
let etag2 = "\"ae4984b984cd984fe98d4efa954dce98\"";
let data = ByteStream::from_static(BODY);
let r = ctx
.client
.put_object()
.bucket(&bucket)
.key(STD_KEY)
.body(data)
.send()
.await
.unwrap();
assert_eq!(r.e_tag.unwrap().as_str(), etag);
let last_modified;
{
let o = ctx
.client
.get_object()
.bucket(&bucket)
.key(STD_KEY)
.if_match(etag)
.send()
.await
.unwrap();
assert_eq!(o.e_tag.as_ref().unwrap().as_str(), etag);
last_modified = o.last_modified.unwrap();
let err = ctx
.client
.get_object()
.bucket(&bucket)
.key(STD_KEY)
.if_match(etag2)
.send()
.await;
assert!(
matches!(err, Err(SdkError::ServiceError(se)) if se.raw().status().as_u16() == 412)
);
}
{
let o = ctx
.client
.get_object()
.bucket(&bucket)
.key(STD_KEY)
.if_none_match(etag2)
.send()
.await
.unwrap();
assert_eq!(o.e_tag.as_ref().unwrap().as_str(), etag);
let err = ctx
.client
.get_object()
.bucket(&bucket)
.key(STD_KEY)
.if_none_match(etag)
.send()
.await;
assert!(
matches!(err, Err(SdkError::ServiceError(se)) if se.raw().status().as_u16() == 304)
);
}
let older_date = DateTime::from_secs_f64(last_modified.as_secs_f64() - 10.0);
let newer_date = DateTime::from_secs_f64(last_modified.as_secs_f64() + 10.0);
{
let err = ctx
.client
.get_object()
.bucket(&bucket)
.key(STD_KEY)
.if_modified_since(newer_date)
.send()
.await;
assert!(
matches!(err, Err(SdkError::ServiceError(se)) if se.raw().status().as_u16() == 304)
);
let o = ctx
.client
.get_object()
.bucket(&bucket)
.key(STD_KEY)
.if_modified_since(older_date)
.send()
.await
.unwrap();
assert_eq!(o.e_tag.as_ref().unwrap().as_str(), etag);
}
{
let err = ctx
.client
.get_object()
.bucket(&bucket)
.key(STD_KEY)
.if_unmodified_since(older_date)
.send()
.await;
assert!(
matches!(err, Err(SdkError::ServiceError(se)) if se.raw().status().as_u16() == 412)
);
let o = ctx
.client
.get_object()
.bucket(&bucket)
.key(STD_KEY)
.if_unmodified_since(newer_date)
.send()
.await
.unwrap();
assert_eq!(o.e_tag.as_ref().unwrap().as_str(), etag);
}
}
#[tokio::test]
async fn test_getobject() {
let ctx = common::context();