From 45b0453d0f5b08f44dbd010c084daa87c2876945 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Tue, 18 Apr 2023 18:03:10 +0200 Subject: [PATCH 1/6] Ensure increasing version timestamps in PutObject --- src/api/s3/put.rs | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/src/api/s3/put.rs b/src/api/s3/put.rs index c7ac5030..bd032165 100644 --- a/src/api/s3/put.rs +++ b/src/api/s3/put.rs @@ -3,6 +3,7 @@ use std::sync::Arc; use base64::prelude::*; use futures::prelude::*; +use futures::try_join; use hyper::body::{Body, Bytes}; use hyper::header::{HeaderMap, HeaderValue}; use hyper::{Request, Response}; @@ -35,7 +36,7 @@ pub async fn handle_put( garage: Arc, req: Request, bucket: &Bucket, - key: &str, + key: &String, content_sha256: Option, ) -> Result, Error> { // Retrieve interesting headers from request @@ -68,16 +69,27 @@ pub(crate) async fn save_stream> + Unpin>( headers: ObjectVersionHeaders, body: S, bucket: &Bucket, - key: &str, + key: &String, content_md5: Option, content_sha256: Option, ) -> Result<(Uuid, String), Error> { + let mut chunker = StreamChunker::new(body, garage.config.block_size); + let (first_block_opt, existing_object) = try_join!( + chunker.next(), + garage + .object_table + .get(&bucket.id, key) + .map_err(Error::from), + )?; + + let first_block = first_block_opt.unwrap_or_default(); + // Generate identity of new version let version_uuid = gen_uuid(); - let version_timestamp = now_msec(); - - let mut chunker = StreamChunker::new(body, garage.config.block_size); - let first_block = chunker.next().await?.unwrap_or_default(); + let version_timestamp = existing_object + .and_then(|obj| obj.versions().iter().map(|v| v.timestamp).max()) + .map(|t| std::cmp::max(t + 1, now_msec())) + .unwrap_or_else(now_msec); // If body is small enough, store it directly in the object table // as "inline data". We can then return immediately. From 3d6ed63824ac2190ba0522d897ef9addb8823140 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Wed, 18 Oct 2023 16:36:48 +0200 Subject: [PATCH 2/6] check_quotas: avoid re-fetching object from object table --- src/api/s3/multipart.rs | 4 ++-- src/api/s3/put.rs | 18 ++++++++++-------- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/src/api/s3/multipart.rs b/src/api/s3/multipart.rs index 52ea8e78..b6001142 100644 --- a/src/api/s3/multipart.rs +++ b/src/api/s3/multipart.rs @@ -233,7 +233,7 @@ pub async fn handle_complete_multipart_upload( // Get object and multipart upload let key = key.to_string(); - let (_, mut object_version, mpu) = get_upload(&garage, &bucket.id, &key, &upload_id).await?; + let (object, mut object_version, mpu) = get_upload(&garage, &bucket.id, &key, &upload_id).await?; if mpu.parts.is_empty() { return Err(Error::bad_request("No data was uploaded")); @@ -331,7 +331,7 @@ pub async fn handle_complete_multipart_upload( // Calculate total size of final object let total_size = parts.iter().map(|x| x.size.unwrap()).sum(); - if let Err(e) = check_quotas(&garage, bucket, &key, total_size).await { + if let Err(e) = check_quotas(&garage, bucket, &key, total_size, Some(&object)).await { object_version.state = ObjectVersionState::Aborted; let final_object = Object::new(bucket.id, key.clone(), vec![object_version]); garage.object_table.insert(&final_object).await?; diff --git a/src/api/s3/put.rs b/src/api/s3/put.rs index bd032165..a3fe9cca 100644 --- a/src/api/s3/put.rs +++ b/src/api/s3/put.rs @@ -87,6 +87,7 @@ pub(crate) async fn save_stream> + Unpin>( // Generate identity of new version let version_uuid = gen_uuid(); let version_timestamp = existing_object + .as_ref() .and_then(|obj| obj.versions().iter().map(|v| v.timestamp).max()) .map(|t| std::cmp::max(t + 1, now_msec())) .unwrap_or_else(now_msec); @@ -109,7 +110,7 @@ pub(crate) async fn save_stream> + Unpin>( content_sha256, )?; - check_quotas(&garage, bucket, key, size).await?; + check_quotas(&garage, bucket, key, size, existing_object.as_ref()).await?; let object_version = ObjectVersion { uuid: version_uuid, @@ -188,7 +189,7 @@ pub(crate) async fn save_stream> + Unpin>( content_sha256, )?; - check_quotas(&garage, bucket, key, total_size).await?; + check_quotas(&garage, bucket, key, total_size, existing_object.as_ref()).await?; // Save final object state, marked as Complete let md5sum_hex = hex::encode(data_md5sum); @@ -243,17 +244,18 @@ pub(crate) async fn check_quotas( bucket: &Bucket, key: &str, size: u64, + prev_object: Option<&Object>, ) -> Result<(), Error> { let quotas = bucket.state.as_option().unwrap().quotas.get(); if quotas.max_objects.is_none() && quotas.max_size.is_none() { return Ok(()); }; - let key = key.to_string(); - let (prev_object, counters) = futures::try_join!( - garage.object_table.get(&bucket.id, &key), - garage.object_counter_table.table.get(&bucket.id, &EmptyKey), - )?; + let counters = garage + .object_counter_table + .table + .get(&bucket.id, &EmptyKey) + .await?; let counters = counters .map(|x| x.filtered_values(&garage.system.ring.borrow())) @@ -287,7 +289,7 @@ pub(crate) async fn check_quotas( if cnt_size_diff > 0 && current_size + cnt_size_diff > ms as i64 { return Err(Error::forbidden(format!( "Bucket size quota is reached, maximum total size of objects for this bucket: {}. The bucket is already {} bytes, and this object would add {} bytes.", - ms, current_size, size + ms, current_size, cnt_size_diff ))); } } From d146cdd5b66ca1d3ed65ce93ca42c6db22defc09 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Wed, 18 Oct 2023 16:38:26 +0200 Subject: [PATCH 3/6] cargo fmt --- src/api/s3/multipart.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/api/s3/multipart.rs b/src/api/s3/multipart.rs index b6001142..978ad9cf 100644 --- a/src/api/s3/multipart.rs +++ b/src/api/s3/multipart.rs @@ -233,7 +233,8 @@ pub async fn handle_complete_multipart_upload( // Get object and multipart upload let key = key.to_string(); - let (object, mut object_version, mpu) = get_upload(&garage, &bucket.id, &key, &upload_id).await?; + let (object, mut object_version, mpu) = + get_upload(&garage, &bucket.id, &key, &upload_id).await?; if mpu.parts.is_empty() { return Err(Error::bad_request("No data was uploaded")); From c6cde1f1437a6cab90b22df6fe0641e5ad34c287 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Fri, 20 Oct 2023 13:20:47 +0200 Subject: [PATCH 4/6] remove now-unused key parameter in check_quotas --- src/api/s3/multipart.rs | 2 +- src/api/s3/put.rs | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/api/s3/multipart.rs b/src/api/s3/multipart.rs index 978ad9cf..672ab198 100644 --- a/src/api/s3/multipart.rs +++ b/src/api/s3/multipart.rs @@ -332,7 +332,7 @@ pub async fn handle_complete_multipart_upload( // Calculate total size of final object let total_size = parts.iter().map(|x| x.size.unwrap()).sum(); - if let Err(e) = check_quotas(&garage, bucket, &key, total_size, Some(&object)).await { + if let Err(e) = check_quotas(&garage, bucket, total_size, Some(&object)).await { object_version.state = ObjectVersionState::Aborted; let final_object = Object::new(bucket.id, key.clone(), vec![object_version]); garage.object_table.insert(&final_object).await?; diff --git a/src/api/s3/put.rs b/src/api/s3/put.rs index a3fe9cca..62a1f76a 100644 --- a/src/api/s3/put.rs +++ b/src/api/s3/put.rs @@ -110,7 +110,7 @@ pub(crate) async fn save_stream> + Unpin>( content_sha256, )?; - check_quotas(&garage, bucket, key, size, existing_object.as_ref()).await?; + check_quotas(&garage, bucket, size, existing_object.as_ref()).await?; let object_version = ObjectVersion { uuid: version_uuid, @@ -189,7 +189,7 @@ pub(crate) async fn save_stream> + Unpin>( content_sha256, )?; - check_quotas(&garage, bucket, key, total_size, existing_object.as_ref()).await?; + check_quotas(&garage, bucket, total_size, existing_object.as_ref()).await?; // Save final object state, marked as Complete let md5sum_hex = hex::encode(data_md5sum); @@ -242,7 +242,6 @@ pub(crate) fn ensure_checksum_matches( pub(crate) async fn check_quotas( garage: &Arc, bucket: &Bucket, - key: &str, size: u64, prev_object: Option<&Object>, ) -> Result<(), Error> { From 8686cfd0b10a49048021102a08d637b0d4fe6a91 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Fri, 20 Oct 2023 13:37:37 +0200 Subject: [PATCH 5/6] s3 api: also ensure increasing timestamps for create_multipart_upload --- src/api/s3/multipart.rs | 7 +++++-- src/api/s3/put.rs | 14 +++++++++----- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/src/api/s3/multipart.rs b/src/api/s3/multipart.rs index 672ab198..aaf271ab 100644 --- a/src/api/s3/multipart.rs +++ b/src/api/s3/multipart.rs @@ -30,10 +30,13 @@ pub async fn handle_create_multipart_upload( req: &Request, bucket_name: &str, bucket_id: Uuid, - key: &str, + key: &String, ) -> Result, Error> { + let existing_object = garage.object_table.get(&bucket_id, &key).await?; + let upload_id = gen_uuid(); - let timestamp = now_msec(); + let timestamp = next_timestamp(&existing_object); + let headers = get_headers(req.headers())?; // Create object in object table diff --git a/src/api/s3/put.rs b/src/api/s3/put.rs index 62a1f76a..c4df7561 100644 --- a/src/api/s3/put.rs +++ b/src/api/s3/put.rs @@ -86,11 +86,7 @@ pub(crate) async fn save_stream> + Unpin>( // Generate identity of new version let version_uuid = gen_uuid(); - let version_timestamp = existing_object - .as_ref() - .and_then(|obj| obj.versions().iter().map(|v| v.timestamp).max()) - .map(|t| std::cmp::max(t + 1, now_msec())) - .unwrap_or_else(now_msec); + let version_timestamp = next_timestamp(&existing_object); // If body is small enough, store it directly in the object table // as "inline data". We can then return immediately. @@ -532,3 +528,11 @@ pub(crate) fn get_headers(headers: &HeaderMap) -> Result) -> u64 { + existing_object + .as_ref() + .and_then(|obj| obj.versions().iter().map(|v| v.timestamp).max()) + .map(|t| std::cmp::max(t + 1, now_msec())) + .unwrap_or_else(now_msec) +} From c82d91c6bccf307186332b6c5c6fc0b128b1b2b1 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Fri, 20 Oct 2023 13:55:34 +0200 Subject: [PATCH 6/6] DeleteObject: always insert a deletion marker with a bigger timestamp than everything before --- src/api/s3/delete.rs | 40 ++++++++++++++++++---------------------- src/api/s3/multipart.rs | 3 +-- src/api/s3/put.rs | 4 ++-- 3 files changed, 21 insertions(+), 26 deletions(-) diff --git a/src/api/s3/delete.rs b/src/api/s3/delete.rs index b337155f..1c491eac 100644 --- a/src/api/s3/delete.rs +++ b/src/api/s3/delete.rs @@ -3,12 +3,12 @@ use std::sync::Arc; use hyper::{Body, Request, Response, StatusCode}; use garage_util::data::*; -use garage_util::time::*; use garage_model::garage::Garage; use garage_model::s3::object_table::*; use crate::s3::error::*; +use crate::s3::put::next_timestamp; use crate::s3::xml as s3_xml; use crate::signature::verify_signed_content; @@ -23,40 +23,36 @@ async fn handle_delete_internal( .await? .ok_or(Error::NoSuchKey)?; // No need to delete - let interesting_versions = object.versions().iter().filter(|v| { - !matches!( - v.state, - ObjectVersionState::Aborted - | ObjectVersionState::Complete(ObjectVersionData::DeleteMarker) - ) - }); + let del_timestamp = next_timestamp(Some(&object)); + let del_uuid = gen_uuid(); - let mut version_to_delete = None; - let mut timestamp = now_msec(); - for v in interesting_versions { - if v.timestamp + 1 > timestamp || version_to_delete.is_none() { - version_to_delete = Some(v.uuid); + let deleted_version = object + .versions() + .iter() + .rev() + .find(|v| !matches!(&v.state, ObjectVersionState::Aborted)) + .or_else(|| object.versions().iter().rev().next()); + let deleted_version = match deleted_version { + Some(dv) => dv.uuid, + None => { + warn!("Object has no versions: {:?}", object); + Uuid::from([0u8; 32]) } - timestamp = std::cmp::max(timestamp, v.timestamp + 1); - } - - let deleted_version = version_to_delete.ok_or(Error::NoSuchKey)?; - - let version_uuid = gen_uuid(); + }; let object = Object::new( bucket_id, key.into(), vec![ObjectVersion { - uuid: version_uuid, - timestamp, + uuid: del_uuid, + timestamp: del_timestamp, state: ObjectVersionState::Complete(ObjectVersionData::DeleteMarker), }], ); garage.object_table.insert(&object).await?; - Ok((deleted_version, version_uuid)) + Ok((deleted_version, del_uuid)) } pub async fn handle_delete( diff --git a/src/api/s3/multipart.rs b/src/api/s3/multipart.rs index aaf271ab..6b786318 100644 --- a/src/api/s3/multipart.rs +++ b/src/api/s3/multipart.rs @@ -9,7 +9,6 @@ use md5::{Digest as Md5Digest, Md5}; use garage_table::*; use garage_util::async_hash::*; use garage_util::data::*; -use garage_util::time::*; use garage_model::bucket_table::Bucket; use garage_model::garage::Garage; @@ -35,7 +34,7 @@ pub async fn handle_create_multipart_upload( let existing_object = garage.object_table.get(&bucket_id, &key).await?; let upload_id = gen_uuid(); - let timestamp = next_timestamp(&existing_object); + let timestamp = next_timestamp(existing_object.as_ref()); let headers = get_headers(req.headers())?; diff --git a/src/api/s3/put.rs b/src/api/s3/put.rs index c4df7561..606facc4 100644 --- a/src/api/s3/put.rs +++ b/src/api/s3/put.rs @@ -86,7 +86,7 @@ pub(crate) async fn save_stream> + Unpin>( // Generate identity of new version let version_uuid = gen_uuid(); - let version_timestamp = next_timestamp(&existing_object); + let version_timestamp = next_timestamp(existing_object.as_ref()); // If body is small enough, store it directly in the object table // as "inline data". We can then return immediately. @@ -529,7 +529,7 @@ pub(crate) fn get_headers(headers: &HeaderMap) -> Result) -> u64 { +pub(crate) fn next_timestamp(existing_object: Option<&Object>) -> u64 { existing_object .as_ref() .and_then(|obj| obj.versions().iter().map(|v| v.timestamp).max())