From 0b83e0558e5e3fee5237edac0ae6d9ba304bb073 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Tue, 29 Aug 2023 16:44:27 +0200 Subject: [PATCH 01/25] bucket_table: data model for lifecycle configuration --- src/model/bucket_table.rs | 40 +++++++++++++++++++++++++++++++++++++++ src/model/migrate.rs | 1 + 2 files changed, 41 insertions(+) diff --git a/src/model/bucket_table.rs b/src/model/bucket_table.rs index ac163736..dc4e4509 100644 --- a/src/model/bucket_table.rs +++ b/src/model/bucket_table.rs @@ -48,6 +48,9 @@ mod v08 { pub website_config: crdt::Lww>, /// CORS rules pub cors_config: crdt::Lww>>, + /// Lifecycle configuration + #[serde(default)] + pub lifecycle_config: crdt::Lww>>, /// Bucket quotas #[serde(default)] pub quotas: crdt::Lww, @@ -69,6 +72,42 @@ mod v08 { pub expose_headers: Vec, } + /// Lifecycle configuration rule + #[derive(PartialEq, Eq, Clone, Debug, Serialize, Deserialize)] + pub struct LifecycleRule { + /// The ID of the rule + pub id: Option, + /// Whether the rule is active + pub enabled: bool, + /// The filter to check whether rule applies to a given object + pub filter: LifecycleFilter, + /// Number of days after which incomplete multipart uploads are aborted + pub abort_incomplete_mpu_days: Option, + /// Expiration policy for stored objects + pub expiration: Option, + } + + /// A lifecycle filter is a set of conditions that must all be true. + /// For each condition, if it is None, it is not verified (always true), + /// and if it is Some(x), then it is verified for value x + #[derive(PartialEq, Eq, Clone, Debug, Serialize, Deserialize)] + pub struct LifecycleFilter { + /// If Some(x), object key has to start with prefix x + pub prefix: Option, + /// If Some(x), object size has to be more than x + pub size_gt: Option, + /// If Some(x), object size has to be less than x + pub size_lt: Option, + } + + #[derive(PartialEq, Eq, Clone, Debug, Serialize, Deserialize)] + pub enum LifecycleExpiration { + /// Objects expire x days after they were created + AfterDays(usize), + /// Objects expire at date x (must be in yyyy-mm-dd format) + AtDate(String), + } + #[derive(Default, PartialEq, Eq, PartialOrd, Ord, Clone, Debug, Serialize, Deserialize)] pub struct BucketQuotas { /// Maximum size in bytes (bucket size = sum of sizes of objects in the bucket) @@ -96,6 +135,7 @@ impl BucketParams { local_aliases: crdt::LwwMap::new(), website_config: crdt::Lww::new(None), cors_config: crdt::Lww::new(None), + lifecycle_config: crdt::Lww::new(None), quotas: crdt::Lww::new(BucketQuotas::default()), } } diff --git a/src/model/migrate.rs b/src/model/migrate.rs index 6b4c3eed..4c74b43b 100644 --- a/src/model/migrate.rs +++ b/src/model/migrate.rs @@ -78,6 +78,7 @@ impl Migrate { local_aliases: LwwMap::new(), website_config: Lww::new(website), cors_config: Lww::new(None), + lifecycle_config: Lww::new(None), quotas: Lww::new(Default::default()), }), }) From 8041d9a8274619b9a7cb66735ed560bcfba16078 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Tue, 29 Aug 2023 17:44:17 +0200 Subject: [PATCH 02/25] s3: add xml structures to serialize/deserialize lifecycle configs --- src/api/s3/api_server.rs | 6 + src/api/s3/lifecycle.rs | 256 +++++++++++++++++++++++++++++++++++++++ src/api/s3/mod.rs | 1 + 3 files changed, 263 insertions(+) create mode 100644 src/api/s3/lifecycle.rs diff --git a/src/api/s3/api_server.rs b/src/api/s3/api_server.rs index 5e793082..06fef6d5 100644 --- a/src/api/s3/api_server.rs +++ b/src/api/s3/api_server.rs @@ -26,6 +26,7 @@ use crate::s3::copy::*; use crate::s3::cors::*; use crate::s3::delete::*; use crate::s3::get::*; +use crate::s3::lifecycle::*; use crate::s3::list::*; use crate::s3::multipart::*; use crate::s3::post_object::handle_post_object; @@ -362,6 +363,11 @@ impl ApiHandler for S3ApiServer { handle_put_cors(garage, bucket_id, req, content_sha256).await } Endpoint::DeleteBucketCors {} => handle_delete_cors(garage, bucket_id).await, + Endpoint::GetBucketLifecycleConfiguration {} => handle_get_lifecycle(&bucket).await, + Endpoint::PutBucketLifecycleConfiguration {} => { + handle_put_lifecycle(garage, bucket_id, req, content_sha256).await + } + Endpoint::DeleteBucketLifecycle {} => handle_delete_lifecycle(garage, bucket_id).await, endpoint => Err(Error::NotImplemented(endpoint.name().to_owned())), }; diff --git a/src/api/s3/lifecycle.rs b/src/api/s3/lifecycle.rs new file mode 100644 index 00000000..cb0cc83a --- /dev/null +++ b/src/api/s3/lifecycle.rs @@ -0,0 +1,256 @@ +use quick_xml::de::from_reader; +use std::sync::Arc; + +use hyper::{Body, Request, Response, StatusCode}; + +use serde::{Deserialize, Serialize}; + +use crate::s3::error::*; +use crate::s3::xml::{to_xml_with_header, xmlns_tag, IntValue, Value}; +use crate::signature::verify_signed_content; + +use garage_model::bucket_table::{ + Bucket, LifecycleExpiration as GarageLifecycleExpiration, + LifecycleFilter as GarageLifecycleFilter, LifecycleRule as GarageLifecycleRule, +}; +use garage_model::garage::Garage; +use garage_util::data::*; + +pub async fn handle_get_lifecycle(bucket: &Bucket) -> Result, Error> { + let param = bucket + .params() + .ok_or_internal_error("Bucket should not be deleted at this point")?; + + if let Some(lifecycle) = param.lifecycle_config.get() { + let wc = LifecycleConfiguration { + xmlns: (), + lifecycle_rules: lifecycle + .iter() + .map(LifecycleRule::from_garage_lifecycle_rule) + .collect::>(), + }; + let xml = to_xml_with_header(&wc)?; + Ok(Response::builder() + .status(StatusCode::OK) + .header(http::header::CONTENT_TYPE, "application/xml") + .body(Body::from(xml))?) + } else { + Ok(Response::builder() + .status(StatusCode::NO_CONTENT) + .body(Body::empty())?) + } +} + +pub async fn handle_delete_lifecycle( + garage: Arc, + bucket_id: Uuid, +) -> Result, Error> { + let mut bucket = garage + .bucket_helper() + .get_existing_bucket(bucket_id) + .await?; + + let param = bucket.params_mut().unwrap(); + + param.lifecycle_config.update(None); + garage.bucket_table.insert(&bucket).await?; + + Ok(Response::builder() + .status(StatusCode::NO_CONTENT) + .body(Body::empty())?) +} + +pub async fn handle_put_lifecycle( + garage: Arc, + bucket_id: Uuid, + req: Request, + content_sha256: Option, +) -> Result, Error> { + let body = hyper::body::to_bytes(req.into_body()).await?; + + if let Some(content_sha256) = content_sha256 { + verify_signed_content(content_sha256, &body[..])?; + } + + let mut bucket = garage + .bucket_helper() + .get_existing_bucket(bucket_id) + .await?; + + let param = bucket.params_mut().unwrap(); + + let conf: LifecycleConfiguration = from_reader(&body as &[u8])?; + + param + .lifecycle_config + .update(Some(conf.validate_into_garage_lifecycle_config()?)); + garage.bucket_table.insert(&bucket).await?; + + Ok(Response::builder() + .status(StatusCode::OK) + .body(Body::empty())?) +} + +// ---- SERIALIZATION AND DESERIALIZATION TO/FROM S3 XML ---- + +#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)] +#[serde(rename = "LifecycleConfiguration")] +pub struct LifecycleConfiguration { + #[serde(serialize_with = "xmlns_tag", skip_deserializing)] + pub xmlns: (), + #[serde(rename = "Rule")] + pub lifecycle_rules: Vec, +} + +#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)] +pub struct LifecycleRule { + #[serde(rename = "ID")] + pub id: Option, + #[serde(rename = "Status")] + pub status: Value, + #[serde(rename = "Filter", default)] + pub filter: Filter, + #[serde(rename = "Expiration", default)] + pub expiration: Option, + #[serde(rename = "AbortIncompleteMultipartUpload", default)] + pub abort_incomplete_mpu: Option, +} + +#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Default)] +pub struct Filter { + #[serde(rename = "And")] + pub and: Option>, + #[serde(rename = "Prefix")] + pub prefix: Option, + #[serde(rename = "ObjectSizeGreaterThan")] + pub size_gt: Option, + #[serde(rename = "ObjectSizeLessThan")] + pub size_lt: Option, +} + +#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)] +pub struct Expiration { + #[serde(rename = "Days")] + pub days: Option, + #[serde(rename = "Date")] + pub at_date: Option, +} + +#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)] +pub struct AbortIncompleteMpu { + #[serde(rename = "DaysAfterInitiation")] + pub days: Option, +} + +impl LifecycleConfiguration { + pub fn validate_into_garage_lifecycle_config(self) -> Result, Error> { + let mut ret = vec![]; + for rule in self.lifecycle_rules { + ret.push(rule.validate_into_garage_lifecycle_rule()?); + } + Ok(ret) + } + + pub fn from_garage_lifecycle_config(config: &[GarageLifecycleRule]) -> Self { + Self { + xmlns: (), + lifecycle_rules: config + .iter() + .map(LifecycleRule::from_garage_lifecycle_rule) + .collect(), + } + } +} + +impl LifecycleRule { + pub fn validate_into_garage_lifecycle_rule(self) -> Result { + todo!() + } + + pub fn from_garage_lifecycle_rule(rule: &GarageLifecycleRule) -> Self { + todo!() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + use quick_xml::de::from_str; + + #[test] + fn test_deserialize_lifecycle_config() -> Result<(), Error> { + let message = r#" + + + id1 + Enabled + + documents/ + + + 7 + + + + id2 + Enabled + + + logs/ + 1000000 + + + + 365 + + +"#; + let conf: LifecycleConfiguration = from_str(message).unwrap(); + let ref_value = LifecycleConfiguration { + xmlns: (), + lifecycle_rules: vec![ + LifecycleRule { + id: Some("id1".into()), + status: "Enabled".into(), + filter: Filter { + prefix: Some("documents/".into()), + ..Default::default() + }, + expiration: None, + abort_incomplete_mpu: Some(AbortIncompleteMpu { + days: Some(IntValue(7)), + }), + }, + LifecycleRule { + id: Some("id2".into()), + status: "Enabled".into(), + filter: Filter { + and: Some(Box::new(Filter { + prefix: Some("logs/".into()), + size_gt: Some(IntValue(1000000)), + ..Default::default() + })), + ..Default::default() + }, + expiration: Some(Expiration { + days: Some(IntValue(365)), + at_date: None, + }), + abort_incomplete_mpu: None, + }, + ], + }; + assert_eq! { + ref_value, + conf + }; + + let message2 = to_xml_with_header(&ref_value)?; + + let cleanup = |c: &str| c.replace(char::is_whitespace, ""); + assert_eq!(cleanup(message), cleanup(&message2)); + + Ok(()) + } +} diff --git a/src/api/s3/mod.rs b/src/api/s3/mod.rs index b5237bf7..cbdb94ab 100644 --- a/src/api/s3/mod.rs +++ b/src/api/s3/mod.rs @@ -6,6 +6,7 @@ mod copy; pub mod cors; mod delete; pub mod get; +mod lifecycle; mod list; mod multipart; mod post_object; From abf011c2906d04200bb39d7bc82f7ed973215500 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Tue, 29 Aug 2023 18:22:03 +0200 Subject: [PATCH 03/25] lifecycle: implement validation into garage's internal data structure --- src/api/s3/lifecycle.rs | 200 +++++++++++++++++++++++++++++++++----- src/model/bucket_table.rs | 2 +- 2 files changed, 178 insertions(+), 24 deletions(-) diff --git a/src/api/s3/lifecycle.rs b/src/api/s3/lifecycle.rs index cb0cc83a..48265870 100644 --- a/src/api/s3/lifecycle.rs +++ b/src/api/s3/lifecycle.rs @@ -22,13 +22,7 @@ pub async fn handle_get_lifecycle(bucket: &Bucket) -> Result, Err .ok_or_internal_error("Bucket should not be deleted at this point")?; if let Some(lifecycle) = param.lifecycle_config.get() { - let wc = LifecycleConfiguration { - xmlns: (), - lifecycle_rules: lifecycle - .iter() - .map(LifecycleRule::from_garage_lifecycle_rule) - .collect::>(), - }; + let wc = LifecycleConfiguration::from_garage_lifecycle_config(lifecycle); let xml = to_xml_with_header(&wc)?; Ok(Response::builder() .status(StatusCode::OK) @@ -81,9 +75,10 @@ pub async fn handle_put_lifecycle( let conf: LifecycleConfiguration = from_reader(&body as &[u8])?; - param - .lifecycle_config - .update(Some(conf.validate_into_garage_lifecycle_config()?)); + let config = conf + .validate_into_garage_lifecycle_config() + .ok_or_bad_request("Invalid lifecycle configuration")?; + param.lifecycle_config.update(Some(config)); garage.bucket_table.insert(&bucket).await?; Ok(Response::builder() @@ -109,7 +104,7 @@ pub struct LifecycleRule { #[serde(rename = "Status")] pub status: Value, #[serde(rename = "Filter", default)] - pub filter: Filter, + pub filter: Option, #[serde(rename = "Expiration", default)] pub expiration: Option, #[serde(rename = "AbortIncompleteMultipartUpload", default)] @@ -139,11 +134,13 @@ pub struct Expiration { #[derive(Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)] pub struct AbortIncompleteMpu { #[serde(rename = "DaysAfterInitiation")] - pub days: Option, + pub days: IntValue, } impl LifecycleConfiguration { - pub fn validate_into_garage_lifecycle_config(self) -> Result, Error> { + pub fn validate_into_garage_lifecycle_config( + self, + ) -> Result, &'static str> { let mut ret = vec![]; for rule in self.lifecycle_rules { ret.push(rule.validate_into_garage_lifecycle_rule()?); @@ -163,12 +160,136 @@ impl LifecycleConfiguration { } impl LifecycleRule { - pub fn validate_into_garage_lifecycle_rule(self) -> Result { - todo!() + pub fn validate_into_garage_lifecycle_rule(self) -> Result { + let enabled = match self.status.0.as_str() { + "Enabled" => true, + "Disabled" => false, + _ => return Err("invalid value for "), + }; + + let filter = self + .filter + .map(Filter::validate_into_garage_lifecycle_filter) + .transpose()? + .unwrap_or_default(); + + let abort_incomplete_mpu_days = self.abort_incomplete_mpu.map(|x| x.days.0 as usize); + + let expiration = self + .expiration + .map(Expiration::validate_into_garage_lifecycle_expiration) + .transpose()?; + + Ok(GarageLifecycleRule { + id: self.id.map(|x| x.0), + enabled, + filter, + abort_incomplete_mpu_days, + expiration, + }) } pub fn from_garage_lifecycle_rule(rule: &GarageLifecycleRule) -> Self { - todo!() + Self { + id: rule.id.as_deref().map(Value::from), + status: if rule.enabled { + Value::from("Enabled") + } else { + Value::from("Disabled") + }, + filter: Filter::from_garage_lifecycle_filter(&rule.filter), + abort_incomplete_mpu: rule + .abort_incomplete_mpu_days + .map(|days| AbortIncompleteMpu { + days: IntValue(days as i64), + }), + expiration: rule + .expiration + .as_ref() + .map(Expiration::from_garage_lifecycle_expiration), + } + } +} + +impl Filter { + pub fn count(&self) -> i32 { + fn count(x: &Option) -> i32 { + x.as_ref().map(|_| 1).unwrap_or(0) + } + count(&self.prefix) + count(&self.size_gt) + count(&self.size_lt) + } + + pub fn validate_into_garage_lifecycle_filter( + self, + ) -> Result { + if self.count() > 0 && self.and.is_some() { + Err("Filter tag cannot contain both and another condition") + } else if let Some(and) = self.and { + if and.and.is_some() { + return Err("Nested tags"); + } + Ok(and.internal_into_garage_lifecycle_filter()) + } else if self.count() > 1 { + Err("Multiple Filter conditions must be wrapped in an tag") + } else { + Ok(self.internal_into_garage_lifecycle_filter()) + } + } + + fn internal_into_garage_lifecycle_filter(self) -> GarageLifecycleFilter { + GarageLifecycleFilter { + prefix: self.prefix.map(|x| x.0), + size_gt: self.size_gt.map(|x| x.0 as usize), + size_lt: self.size_lt.map(|x| x.0 as usize), + } + } + + pub fn from_garage_lifecycle_filter(rule: &GarageLifecycleFilter) -> Option { + let filter = Filter { + and: None, + prefix: rule.prefix.as_deref().map(Value::from), + size_gt: rule.size_gt.map(|x| IntValue(x as i64)), + size_lt: rule.size_lt.map(|x| IntValue(x as i64)), + }; + match filter.count() { + 0 => None, + 1 => Some(filter), + _ => Some(Filter { + and: Some(Box::new(filter)), + ..Default::default() + }), + } + } +} + +impl Expiration { + pub fn validate_into_garage_lifecycle_expiration( + self, + ) -> Result { + match (self.days, self.at_date) { + (Some(_), Some(_)) => Err("cannot have both and in "), + (None, None) => Err(" must contain either or "), + (Some(days), None) => Ok(GarageLifecycleExpiration::AfterDays(days.0 as usize)), + (None, Some(date)) => { + if date.0.parse::().is_err() { + return Err("Invalid expiration "); + } + Ok(GarageLifecycleExpiration::AtDate(date.0)) + } + } + } + + pub fn from_garage_lifecycle_expiration(exp: &GarageLifecycleExpiration) -> Self { + match exp { + GarageLifecycleExpiration::AfterDays(days) => Expiration { + days: Some(IntValue(*days as i64)), + at_date: None, + }, + GarageLifecycleExpiration::AtDate(days) => Expiration { + days: None, + at_date: Some(Value::from(days.as_str())), + }, + } } } @@ -213,26 +334,24 @@ mod tests { LifecycleRule { id: Some("id1".into()), status: "Enabled".into(), - filter: Filter { + filter: Some(Filter { prefix: Some("documents/".into()), ..Default::default() - }, - expiration: None, - abort_incomplete_mpu: Some(AbortIncompleteMpu { - days: Some(IntValue(7)), }), + expiration: None, + abort_incomplete_mpu: Some(AbortIncompleteMpu { days: IntValue(7) }), }, LifecycleRule { id: Some("id2".into()), status: "Enabled".into(), - filter: Filter { + filter: Some(Filter { and: Some(Box::new(Filter { prefix: Some("logs/".into()), size_gt: Some(IntValue(1000000)), ..Default::default() })), ..Default::default() - }, + }), expiration: Some(Expiration { days: Some(IntValue(365)), at_date: None, @@ -251,6 +370,41 @@ mod tests { let cleanup = |c: &str| c.replace(char::is_whitespace, ""); assert_eq!(cleanup(message), cleanup(&message2)); + // Check validation + let validated = ref_value + .validate_into_garage_lifecycle_config() + .ok_or_bad_request("invalid xml config")?; + + let ref_config = vec![ + GarageLifecycleRule { + id: Some("id1".into()), + enabled: true, + filter: GarageLifecycleFilter { + prefix: Some("documents/".into()), + ..Default::default() + }, + expiration: None, + abort_incomplete_mpu_days: Some(7), + }, + GarageLifecycleRule { + id: Some("id2".into()), + enabled: true, + filter: GarageLifecycleFilter { + prefix: Some("logs/".into()), + size_gt: Some(1000000), + ..Default::default() + }, + expiration: Some(GarageLifecycleExpiration::AfterDays(365)), + abort_incomplete_mpu_days: None, + }, + ]; + assert_eq!(validated, ref_config); + + let message3 = to_xml_with_header(&LifecycleConfiguration::from_garage_lifecycle_config( + &validated, + ))?; + assert_eq!(cleanup(message), cleanup(&message3)); + Ok(()) } } diff --git a/src/model/bucket_table.rs b/src/model/bucket_table.rs index dc4e4509..fed20e05 100644 --- a/src/model/bucket_table.rs +++ b/src/model/bucket_table.rs @@ -90,7 +90,7 @@ mod v08 { /// A lifecycle filter is a set of conditions that must all be true. /// For each condition, if it is None, it is not verified (always true), /// and if it is Some(x), then it is verified for value x - #[derive(PartialEq, Eq, Clone, Debug, Serialize, Deserialize)] + #[derive(PartialEq, Eq, Clone, Debug, Serialize, Deserialize, Default)] pub struct LifecycleFilter { /// If Some(x), object key has to start with prefix x pub prefix: Option, From f7b409f1140addd508c626b1e80f0f8de52a5639 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Wed, 30 Aug 2023 11:24:01 +0200 Subject: [PATCH 04/25] use a NaiveDate in data model, it serializes to string (iso 8601 format) --- Cargo.lock | 1 + Cargo.nix | 7 ++++--- src/api/s3/lifecycle.rs | 15 +++++++-------- src/model/Cargo.toml | 1 + src/model/bucket_table.rs | 2 +- 5 files changed, 14 insertions(+), 12 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3472190b..79b35191 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1340,6 +1340,7 @@ dependencies = [ "async-trait", "base64 0.21.3", "blake2", + "chrono", "err-derive", "futures", "futures-util", diff --git a/Cargo.nix b/Cargo.nix index d044c649..645985a8 100644 --- a/Cargo.nix +++ b/Cargo.nix @@ -33,7 +33,7 @@ args@{ ignoreLockHash, }: let - nixifiedLockHash = "d4392b23d407f7ebc20d7f5db7583847e362665c1abb09f1c1d3305205e5996d"; + nixifiedLockHash = "f5b86f9d75664ba528a26ae71f07a38e9c72c78fe331420b9b639e2a099d4dad"; workspaceSrc = if args.workspaceSrc == null then ./. else args.workspaceSrc; currentLockHash = builtins.hashFile "sha256" (workspaceSrc + /Cargo.lock); lockHashIgnored = if ignoreLockHash @@ -981,7 +981,7 @@ in [ "iana-time-zone" ] [ "js-sys" ] [ "oldtime" ] - (lib.optional (rootFeatures' ? "garage/kubernetes-discovery" || rootFeatures' ? "garage_rpc/k8s-openapi" || rootFeatures' ? "garage_rpc/kube" || rootFeatures' ? "garage_rpc/kubernetes-discovery") "serde") + [ "serde" ] [ "std" ] [ "time" ] [ "wasm-bindgen" ] @@ -993,7 +993,7 @@ in ${ if hostPlatform.isUnix then "iana_time_zone" else null } = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".iana-time-zone."0.1.57" { inherit profileName; }).out; ${ if hostPlatform.parsed.cpu.name == "wasm32" && !(hostPlatform.parsed.kernel.name == "emscripten" || hostPlatform.parsed.kernel.name == "wasi") then "js_sys" else null } = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".js-sys."0.3.64" { inherit profileName; }).out; num_traits = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".num-traits."0.2.16" { inherit profileName; }).out; - ${ if rootFeatures' ? "garage/kubernetes-discovery" || rootFeatures' ? "garage_rpc/k8s-openapi" || rootFeatures' ? "garage_rpc/kube" || rootFeatures' ? "garage_rpc/kubernetes-discovery" then "serde" else null } = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".serde."1.0.188" { inherit profileName; }).out; + serde = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".serde."1.0.188" { inherit profileName; }).out; time = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".time."0.1.45" { inherit profileName; }).out; ${ if hostPlatform.parsed.cpu.name == "wasm32" && !(hostPlatform.parsed.kernel.name == "emscripten" || hostPlatform.parsed.kernel.name == "wasi") then "wasm_bindgen" else null } = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".wasm-bindgen."0.2.87" { inherit profileName; }).out; ${ if hostPlatform.isWindows then "winapi" else null } = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".winapi."0.3.9" { inherit profileName; }).out; @@ -1911,6 +1911,7 @@ in async_trait = (buildRustPackages."registry+https://github.com/rust-lang/crates.io-index".async-trait."0.1.73" { profileName = "__noProfile"; }).out; base64 = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".base64."0.21.3" { inherit profileName; }).out; blake2 = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".blake2."0.10.6" { inherit profileName; }).out; + chrono = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".chrono."0.4.26" { inherit profileName; }).out; err_derive = (buildRustPackages."registry+https://github.com/rust-lang/crates.io-index".err-derive."0.3.1" { profileName = "__noProfile"; }).out; futures = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".futures."0.3.28" { inherit profileName; }).out; futures_util = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".futures-util."0.3.28" { inherit profileName; }).out; diff --git a/src/api/s3/lifecycle.rs b/src/api/s3/lifecycle.rs index 48265870..278cf26d 100644 --- a/src/api/s3/lifecycle.rs +++ b/src/api/s3/lifecycle.rs @@ -270,12 +270,11 @@ impl Expiration { (Some(_), Some(_)) => Err("cannot have both and in "), (None, None) => Err(" must contain either or "), (Some(days), None) => Ok(GarageLifecycleExpiration::AfterDays(days.0 as usize)), - (None, Some(date)) => { - if date.0.parse::().is_err() { - return Err("Invalid expiration "); - } - Ok(GarageLifecycleExpiration::AtDate(date.0)) - } + (None, Some(date)) => date + .0 + .parse::() + .map(GarageLifecycleExpiration::AtDate) + .map_err(|_| "Invalid expiration "), } } @@ -285,9 +284,9 @@ impl Expiration { days: Some(IntValue(*days as i64)), at_date: None, }, - GarageLifecycleExpiration::AtDate(days) => Expiration { + GarageLifecycleExpiration::AtDate(date) => Expiration { days: None, - at_date: Some(Value::from(days.as_str())), + at_date: Some(Value(date.to_string())), }, } } diff --git a/src/model/Cargo.toml b/src/model/Cargo.toml index 69f7eea4..58d9fdb7 100644 --- a/src/model/Cargo.toml +++ b/src/model/Cargo.toml @@ -23,6 +23,7 @@ garage_util.workspace = true async-trait = "0.1.7" arc-swap = "1.0" blake2 = "0.10" +chrono = { version = "0.4", features = ["serde"] } err-derive = "0.3" hex = "0.4" base64 = "0.21" diff --git a/src/model/bucket_table.rs b/src/model/bucket_table.rs index fed20e05..306a58ab 100644 --- a/src/model/bucket_table.rs +++ b/src/model/bucket_table.rs @@ -105,7 +105,7 @@ mod v08 { /// Objects expire x days after they were created AfterDays(usize), /// Objects expire at date x (must be in yyyy-mm-dd format) - AtDate(String), + AtDate(chrono::naive::NaiveDate), } #[derive(Default, PartialEq, Eq, PartialOrd, Ord, Clone, Debug, Serialize, Deserialize)] From a2e0e34db57b326ad5c9e7c9218fb9e29900e705 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Wed, 30 Aug 2023 12:41:11 +0200 Subject: [PATCH 05/25] lifecycle: skeleton for lifecycle worker --- src/model/s3/lifecycle_worker.rs | 252 +++++++++++++++++++++++++++++++ src/model/s3/mod.rs | 2 + 2 files changed, 254 insertions(+) create mode 100644 src/model/s3/lifecycle_worker.rs diff --git a/src/model/s3/lifecycle_worker.rs b/src/model/s3/lifecycle_worker.rs new file mode 100644 index 00000000..049fa2a3 --- /dev/null +++ b/src/model/s3/lifecycle_worker.rs @@ -0,0 +1,252 @@ +use std::sync::Arc; + +use async_trait::async_trait; +use chrono::prelude::*; +use std::time::{Duration, Instant}; +use tokio::sync::watch; + +use garage_util::background::*; +use garage_util::error::{Error, OkOrMessage}; +use garage_util::persister::PersisterShared; +use garage_util::time::*; + +use garage_table::EmptyKey; + +use crate::bucket_table::*; +use crate::s3::object_table::*; + +use crate::garage::Garage; + +mod v090 { + use chrono::naive::NaiveDate; + use serde::{Deserialize, Serialize}; + + #[derive(Serialize, Deserialize, Default, Clone, Copy)] + pub struct LifecycleWorkerPersisted { + pub last_completed: Option, + } + + impl garage_util::migrate::InitialFormat for LifecycleWorkerPersisted { + const VERSION_MARKER: &'static [u8] = b"G09lwp"; + } +} + +pub use v090::*; + +pub struct LifecycleWorker { + garage: Arc, + + state: State, + + persister: PersisterShared, +} + +enum State { + Completed(NaiveDate), + Running { + date: NaiveDate, + pos: Vec, + counter: usize, + objects_expired: usize, + mpu_aborted: usize, + last_bucket: Option, + }, +} + +pub fn register_bg_vars( + persister: &PersisterShared, + vars: &mut vars::BgVars, +) { + vars.register_ro(persister, "lifecycle-last-completed", |p| { + p.get_with(|x| { + x.last_completed + .map(|date| date.to_string()) + .unwrap_or("never".to_string()) + }) + }); +} + +impl LifecycleWorker { + pub fn new(garage: Arc, persister: PersisterShared) -> Self { + let today = today(); + let state = match persister.get_with(|x| x.last_completed) { + Some(d) if d >= today => State::Completed(d), + _ => State::Running { + date: today, + pos: vec![], + counter: 0, + objects_expired: 0, + mpu_aborted: 0, + last_bucket: None, + }, + }; + Self { + garage, + state, + persister, + } + } +} + +#[async_trait] +impl Worker for LifecycleWorker { + fn name(&self) -> String { + "object lifecycle worker".to_string() + } + + fn status(&self) -> WorkerStatus { + match &self.state { + State::Completed(d) => WorkerStatus { + freeform: vec![format!("Last completed: {}", d)], + ..Default::default() + }, + State::Running { + date, + counter, + objects_expired, + mpu_aborted, + .. + } => { + let n_objects = self + .garage + .object_table + .data + .store + .fast_len() + .unwrap_or(None); + let progress = match n_objects { + None => "...".to_string(), + Some(total) => format!( + "~{:.2}%", + 100. * std::cmp::min(*counter, total) as f32 / total as f32 + ), + }; + WorkerStatus { + progress: Some(progress), + freeform: vec![ + format!("Started: {}", date), + format!("Objects expired: {}", objects_expired), + format!("Multipart uploads aborted: { }", mpu_aborted), + ], + ..Default::default() + } + } + } + } + + async fn work(&mut self, _must_exit: &mut watch::Receiver) -> Result { + match &mut self.state { + State::Completed(_) => Ok(WorkerState::Idle), + State::Running { + date, + counter, + objects_expired, + mpu_aborted, + pos, + last_bucket, + } => { + let (object_bytes, next_pos) = match self + .garage + .object_table + .data + .store + .get_gt(&pos)? + { + None => { + info!("Lifecycle worker finished for {}, objects expired: {}, mpu aborted: {}", date, *objects_expired, *mpu_aborted); + self.persister + .set_with(|x| x.last_completed = Some(*date))?; + self.state = State::Completed(*date); + return Ok(WorkerState::Idle); + } + Some((k, v)) => (v, k), + }; + + let object = self.garage.object_table.data.decode_entry(&object_bytes)?; + process_object( + &self.garage, + object, + objects_expired, + mpu_aborted, + last_bucket, + ) + .await?; + + *counter += 1; + *pos = next_pos; + + Ok(WorkerState::Busy) + } + } + } + + async fn wait_for_work(&mut self) -> WorkerState { + match &self.state { + State::Completed(d) => { + let now = now_msec(); + let next_start = midnight_ts(d.succ()); + if now < next_start { + tokio::time::sleep_until( + (Instant::now() + Duration::from_millis(next_start - now)).into(), + ) + .await; + } + self.state = State::Running { + date: today(), + pos: vec![], + counter: 0, + objects_expired: 0, + mpu_aborted: 0, + last_bucket: None, + }; + } + State::Running { .. } => (), + } + WorkerState::Busy + } +} + +async fn process_object( + garage: &Arc, + object: Object, + objects_expired: &mut usize, + mpu_aborted: &mut usize, + last_bucket: &mut Option, +) -> Result<(), Error> { + let bucket = match last_bucket.take() { + Some(b) if b.id == object.bucket_id => b, + _ => garage + .bucket_table + .get(&EmptyKey, &object.bucket_id) + .await? + .ok_or_message("object in non-existent bucket")?, + }; + + let lifecycle_policy: &[LifecycleRule] = bucket + .state + .as_option() + .and_then(|s| s.lifecycle_config.get().as_deref()) + .unwrap_or_default(); + + for rule in lifecycle_policy.iter() { + todo!() + } + + *last_bucket = Some(bucket); + Ok(()) +} + +fn midnight_ts(date: NaiveDate) -> u64 { + date.and_hms(0, 0, 0).timestamp_millis() as u64 +} + +fn next_date(ts: u64) -> NaiveDate { + NaiveDateTime::from_timestamp_millis(ts as i64) + .expect("bad timestamp") + .date() + .succ() +} + +fn today() -> NaiveDate { + Utc::today().naive_utc() +} diff --git a/src/model/s3/mod.rs b/src/model/s3/mod.rs index 36d67093..5c776fb0 100644 --- a/src/model/s3/mod.rs +++ b/src/model/s3/mod.rs @@ -2,3 +2,5 @@ pub mod block_ref_table; pub mod mpu_table; pub mod object_table; pub mod version_table; + +pub mod lifecycle_worker; From 2996dc875fc378ec3597bfa3bdb8ba8951e1865c Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Wed, 30 Aug 2023 14:28:48 +0200 Subject: [PATCH 06/25] lifecycle worker: implement main functionality --- src/api/s3/lifecycle.rs | 4 +- src/model/bucket_table.rs | 4 +- src/model/s3/lifecycle_worker.rs | 102 +++++++++++++++++++++++++++++-- 3 files changed, 101 insertions(+), 9 deletions(-) diff --git a/src/api/s3/lifecycle.rs b/src/api/s3/lifecycle.rs index 278cf26d..2d621eac 100644 --- a/src/api/s3/lifecycle.rs +++ b/src/api/s3/lifecycle.rs @@ -239,8 +239,8 @@ impl Filter { fn internal_into_garage_lifecycle_filter(self) -> GarageLifecycleFilter { GarageLifecycleFilter { prefix: self.prefix.map(|x| x.0), - size_gt: self.size_gt.map(|x| x.0 as usize), - size_lt: self.size_lt.map(|x| x.0 as usize), + size_gt: self.size_gt.map(|x| x.0 as u64), + size_lt: self.size_lt.map(|x| x.0 as u64), } } diff --git a/src/model/bucket_table.rs b/src/model/bucket_table.rs index 306a58ab..e9d574c5 100644 --- a/src/model/bucket_table.rs +++ b/src/model/bucket_table.rs @@ -95,9 +95,9 @@ mod v08 { /// If Some(x), object key has to start with prefix x pub prefix: Option, /// If Some(x), object size has to be more than x - pub size_gt: Option, + pub size_gt: Option, /// If Some(x), object size has to be less than x - pub size_lt: Option, + pub size_lt: Option, } #[derive(PartialEq, Eq, Clone, Debug, Serialize, Deserialize)] diff --git a/src/model/s3/lifecycle_worker.rs b/src/model/s3/lifecycle_worker.rs index 049fa2a3..069f44a0 100644 --- a/src/model/s3/lifecycle_worker.rs +++ b/src/model/s3/lifecycle_worker.rs @@ -6,6 +6,7 @@ use std::time::{Duration, Instant}; use tokio::sync::watch; use garage_util::background::*; +use garage_util::data::*; use garage_util::error::{Error, OkOrMessage}; use garage_util::persister::PersisterShared; use garage_util::time::*; @@ -165,6 +166,7 @@ impl Worker for LifecycleWorker { let object = self.garage.object_table.data.decode_entry(&object_bytes)?; process_object( &self.garage, + *date, object, objects_expired, mpu_aborted, @@ -184,7 +186,7 @@ impl Worker for LifecycleWorker { match &self.state { State::Completed(d) => { let now = now_msec(); - let next_start = midnight_ts(d.succ()); + let next_start = midnight_ts(d.succ_opt().expect("no next day")); if now < next_start { tokio::time::sleep_until( (Instant::now() + Duration::from_millis(next_start - now)).into(), @@ -208,6 +210,7 @@ impl Worker for LifecycleWorker { async fn process_object( garage: &Arc, + now_date: NaiveDate, object: Object, objects_expired: &mut usize, mpu_aborted: &mut usize, @@ -229,24 +232,113 @@ async fn process_object( .unwrap_or_default(); for rule in lifecycle_policy.iter() { - todo!() + if let Some(pfx) = &rule.filter.prefix { + if !object.key.starts_with(pfx) { + continue; + } + } + + if let Some(expire) = &rule.expiration { + if let Some(current_version) = object.versions().iter().rev().find(|v| v.is_data()) { + let version_date = next_date(current_version.timestamp); + + let current_version_data = match ¤t_version.state { + ObjectVersionState::Complete(c) => c, + _ => unreachable!(), + }; + + let size_match = check_size_filter(current_version_data, &rule.filter); + let date_match = match expire { + LifecycleExpiration::AfterDays(n_days) => { + (now_date - version_date) >= chrono::Duration::days(*n_days as i64) + } + LifecycleExpiration::AtDate(exp_date) => now_date >= *exp_date, + }; + + if size_match && date_match { + // Delete expired version + let deleted_object = Object::new( + object.bucket_id, + object.key.clone(), + vec![ObjectVersion { + uuid: gen_uuid(), + timestamp: std::cmp::max(now_msec(), current_version.timestamp + 1), + state: ObjectVersionState::Complete(ObjectVersionData::DeleteMarker), + }], + ); + garage.object_table.insert(&deleted_object).await?; + *objects_expired += 1; + } + } + } + + if let Some(abort_mpu_days) = &rule.abort_incomplete_mpu_days { + let aborted_versions = object + .versions() + .iter() + .filter_map(|v| { + let version_date = next_date(v.timestamp); + match &v.state { + ObjectVersionState::Uploading { .. } + if (now_date - version_date) + >= chrono::Duration::days(*abort_mpu_days as i64) => + { + Some(ObjectVersion { + state: ObjectVersionState::Aborted, + ..*v + }) + } + _ => None, + } + }) + .collect::>(); + if !aborted_versions.is_empty() { + // Insert aborted mpu info + let n_aborted = aborted_versions.len(); + let aborted_object = + Object::new(object.bucket_id, object.key.clone(), aborted_versions); + garage.object_table.insert(&aborted_object).await?; + *mpu_aborted += n_aborted; + } + } } *last_bucket = Some(bucket); Ok(()) } +fn check_size_filter(version_data: &ObjectVersionData, filter: &LifecycleFilter) -> bool { + let size = match version_data { + ObjectVersionData::Inline(meta, _) | ObjectVersionData::FirstBlock(meta, _) => meta.size, + _ => unreachable!(), + }; + if let Some(size_gt) = filter.size_gt { + if !(size > size_gt) { + return false; + } + } + if let Some(size_lt) = filter.size_lt { + if !(size < size_lt) { + return false; + } + } + return true; +} + fn midnight_ts(date: NaiveDate) -> u64 { - date.and_hms(0, 0, 0).timestamp_millis() as u64 + date.and_hms_opt(0, 0, 0) + .expect("midnight does not exist") + .timestamp_millis() as u64 } fn next_date(ts: u64) -> NaiveDate { NaiveDateTime::from_timestamp_millis(ts as i64) .expect("bad timestamp") .date() - .succ() + .succ_opt() + .expect("no next day") } fn today() -> NaiveDate { - Utc::today().naive_utc() + Utc::now().naive_utc().date() } From da8b224e241edad8cfe25f0b0256ebb0d60fa8dd Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Wed, 30 Aug 2023 14:38:19 +0200 Subject: [PATCH 07/25] lifecycle worker: skip entire bucket when no lifecycle config is set --- src/model/s3/lifecycle_worker.rs | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/src/model/s3/lifecycle_worker.rs b/src/model/s3/lifecycle_worker.rs index 069f44a0..1981e0fd 100644 --- a/src/model/s3/lifecycle_worker.rs +++ b/src/model/s3/lifecycle_worker.rs @@ -54,6 +54,12 @@ enum State { }, } +#[derive(Clone, Copy, Eq, PartialEq)] +enum Skip { + SkipBucket, + NextObject, +} + pub fn register_bg_vars( persister: &PersisterShared, vars: &mut vars::BgVars, @@ -164,10 +170,10 @@ impl Worker for LifecycleWorker { }; let object = self.garage.object_table.data.decode_entry(&object_bytes)?; - process_object( + let skip = process_object( &self.garage, *date, - object, + &object, objects_expired, mpu_aborted, last_bucket, @@ -175,7 +181,13 @@ impl Worker for LifecycleWorker { .await?; *counter += 1; - *pos = next_pos; + if skip == Skip::SkipBucket { + let bucket_id_len = object.bucket_id.as_slice().len(); + assert_eq!(pos.get(..bucket_id_len), Some(object.bucket_id.as_slice())); + *pos = [&pos[..bucket_id_len], &[0xFFu8][..]].concat(); + } else { + *pos = next_pos; + } Ok(WorkerState::Busy) } @@ -211,11 +223,11 @@ impl Worker for LifecycleWorker { async fn process_object( garage: &Arc, now_date: NaiveDate, - object: Object, + object: &Object, objects_expired: &mut usize, mpu_aborted: &mut usize, last_bucket: &mut Option, -) -> Result<(), Error> { +) -> Result { let bucket = match last_bucket.take() { Some(b) if b.id == object.bucket_id => b, _ => garage @@ -231,6 +243,10 @@ async fn process_object( .and_then(|s| s.lifecycle_config.get().as_deref()) .unwrap_or_default(); + if lifecycle_policy.is_empty() { + return Ok(Skip::SkipBucket); + } + for rule in lifecycle_policy.iter() { if let Some(pfx) = &rule.filter.prefix { if !object.key.starts_with(pfx) { @@ -304,7 +320,7 @@ async fn process_object( } *last_bucket = Some(bucket); - Ok(()) + Ok(Skip::NextObject) } fn check_size_filter(version_data: &ObjectVersionData, filter: &LifecycleFilter) -> bool { From 0f1849e1ac882f5f88fe341549f0e7f01a1a7b70 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Wed, 30 Aug 2023 14:51:08 +0200 Subject: [PATCH 08/25] lifecycle worker: launch with the rest of Garage --- src/model/garage.rs | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/src/model/garage.rs b/src/model/garage.rs index db2475ed..981430fb 100644 --- a/src/model/garage.rs +++ b/src/model/garage.rs @@ -7,6 +7,7 @@ use garage_db as db; use garage_util::background::*; use garage_util::config::*; use garage_util::error::*; +use garage_util::persister::PersisterShared; use garage_rpc::replication_mode::ReplicationMode; use garage_rpc::system::System; @@ -17,6 +18,7 @@ use garage_table::replication::TableShardedReplication; use garage_table::*; use crate::s3::block_ref_table::*; +use crate::s3::lifecycle_worker; use crate::s3::mpu_table::*; use crate::s3::object_table::*; use crate::s3::version_table::*; @@ -67,6 +69,9 @@ pub struct Garage { /// Table containing S3 block references (not blocks themselves) pub block_ref_table: Arc>, + /// Persister for lifecycle worker info + pub lifecycle_persister: PersisterShared, + #[cfg(feature = "k2v")] pub k2v: GarageK2V, } @@ -199,6 +204,9 @@ impl Garage { let replication_mode = ReplicationMode::parse(&config.replication_mode) .ok_or_message("Invalid replication_mode in config file.")?; + info!("Initialize background variable system..."); + let mut bg_vars = vars::BgVars::new(); + info!("Initialize membership management system..."); let system = System::new(network_key, replication_mode, &config)?; @@ -230,6 +238,7 @@ impl Garage { data_rep_param, system.clone(), ); + block_manager.register_bg_vars(&mut bg_vars); // ---- admin tables ---- info!("Initialize bucket_table..."); @@ -296,14 +305,15 @@ impl Garage { &db, ); + info!("Load lifecycle worker state..."); + let lifecycle_persister = + PersisterShared::new(&system.metadata_dir, "lifecycle_worker_state"); + lifecycle_worker::register_bg_vars(&lifecycle_persister, &mut bg_vars); + // ---- K2V ---- #[cfg(feature = "k2v")] let k2v = GarageK2V::new(system.clone(), &db, meta_rep_param); - // Initialize bg vars - let mut bg_vars = vars::BgVars::new(); - block_manager.register_bg_vars(&mut bg_vars); - // -- done -- Ok(Arc::new(Self { config, @@ -321,12 +331,13 @@ impl Garage { mpu_counter_table, version_table, block_ref_table, + lifecycle_persister, #[cfg(feature = "k2v")] k2v, })) } - pub fn spawn_workers(&self, bg: &BackgroundRunner) { + pub fn spawn_workers(self: &Arc, bg: &BackgroundRunner) { self.block_manager.spawn_workers(bg); self.bucket_table.spawn_workers(bg); @@ -340,6 +351,11 @@ impl Garage { self.version_table.spawn_workers(bg); self.block_ref_table.spawn_workers(bg); + bg.spawn_worker(lifecycle_worker::LifecycleWorker::new( + self.clone(), + self.lifecycle_persister.clone(), + )); + #[cfg(feature = "k2v")] self.k2v.spawn_workers(bg); } From 7200954318a1b248b4194ee9273bcd2502b50d58 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Wed, 30 Aug 2023 14:54:52 +0200 Subject: [PATCH 09/25] lifecycle worker: add logging --- src/model/s3/lifecycle_worker.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/model/s3/lifecycle_worker.rs b/src/model/s3/lifecycle_worker.rs index 1981e0fd..02e296e7 100644 --- a/src/model/s3/lifecycle_worker.rs +++ b/src/model/s3/lifecycle_worker.rs @@ -282,6 +282,10 @@ async fn process_object( state: ObjectVersionState::Complete(ObjectVersionData::DeleteMarker), }], ); + info!( + "Lifecycle: expiring 1 object in bucket {:?}", + object.bucket_id + ); garage.object_table.insert(&deleted_object).await?; *objects_expired += 1; } @@ -311,6 +315,10 @@ async fn process_object( if !aborted_versions.is_empty() { // Insert aborted mpu info let n_aborted = aborted_versions.len(); + info!( + "Lifecycle: aborting {} incomplete upload(s) in bucket {:?}", + n_aborted, object.bucket_id + ); let aborted_object = Object::new(object.bucket_id, object.key.clone(), aborted_versions); garage.object_table.insert(&aborted_object).await?; From 75ccc5a95c76f31235fcaab8a2c1795693733a4b Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Wed, 30 Aug 2023 20:02:07 +0200 Subject: [PATCH 10/25] lifecycle config: store date as given, try to debug --- src/api/s3/lifecycle.rs | 22 ++++++++++++++++------ src/model/bucket_table.rs | 16 +++++++++++++++- src/model/s3/lifecycle_worker.rs | 9 ++++++++- 3 files changed, 39 insertions(+), 8 deletions(-) diff --git a/src/api/s3/lifecycle.rs b/src/api/s3/lifecycle.rs index 2d621eac..f0fde083 100644 --- a/src/api/s3/lifecycle.rs +++ b/src/api/s3/lifecycle.rs @@ -10,7 +10,7 @@ use crate::s3::xml::{to_xml_with_header, xmlns_tag, IntValue, Value}; use crate::signature::verify_signed_content; use garage_model::bucket_table::{ - Bucket, LifecycleExpiration as GarageLifecycleExpiration, + parse_lifecycle_date, Bucket, LifecycleExpiration as GarageLifecycleExpiration, LifecycleFilter as GarageLifecycleFilter, LifecycleRule as GarageLifecycleRule, }; use garage_model::garage::Garage; @@ -21,6 +21,8 @@ pub async fn handle_get_lifecycle(bucket: &Bucket) -> Result, Err .params() .ok_or_internal_error("Bucket should not be deleted at this point")?; + trace!("bucket: {:#?}", bucket); + if let Some(lifecycle) = param.lifecycle_config.get() { let wc = LifecycleConfiguration::from_garage_lifecycle_config(lifecycle); let xml = to_xml_with_header(&wc)?; @@ -79,7 +81,15 @@ pub async fn handle_put_lifecycle( .validate_into_garage_lifecycle_config() .ok_or_bad_request("Invalid lifecycle configuration")?; param.lifecycle_config.update(Some(config)); + garage.bucket_table.insert(&bucket).await?; + trace!("new bucket: {:#?}", bucket); + + let bucket = garage + .bucket_helper() + .get_existing_bucket(bucket_id) + .await?; + trace!("new bucket again: {:#?}", bucket); Ok(Response::builder() .status(StatusCode::OK) @@ -270,11 +280,11 @@ impl Expiration { (Some(_), Some(_)) => Err("cannot have both and in "), (None, None) => Err(" must contain either or "), (Some(days), None) => Ok(GarageLifecycleExpiration::AfterDays(days.0 as usize)), - (None, Some(date)) => date - .0 - .parse::() - .map(GarageLifecycleExpiration::AtDate) - .map_err(|_| "Invalid expiration "), + (None, Some(date)) => { + trace!("date: {}", date.0); + parse_lifecycle_date(&date.0)?; + Ok(GarageLifecycleExpiration::AtDate(date.0)) + } } } diff --git a/src/model/bucket_table.rs b/src/model/bucket_table.rs index e9d574c5..df2e9b4a 100644 --- a/src/model/bucket_table.rs +++ b/src/model/bucket_table.rs @@ -105,7 +105,7 @@ mod v08 { /// Objects expire x days after they were created AfterDays(usize), /// Objects expire at date x (must be in yyyy-mm-dd format) - AtDate(chrono::naive::NaiveDate), + AtDate(String), } #[derive(Default, PartialEq, Eq, PartialOrd, Ord, Clone, Debug, Serialize, Deserialize)] @@ -155,6 +155,20 @@ impl Crdt for BucketParams { } } +pub fn parse_lifecycle_date(date: &str) -> Result { + use chrono::prelude::*; + + if let Ok(datetime) = NaiveDateTime::parse_from_str(date, "%Y-%m-%dT%H:%M:%SZ") { + if datetime.time() == NaiveTime::MIN { + Ok(datetime.date()) + } else { + Err("date must be at midnight") + } + } else { + NaiveDate::parse_from_str(date, "%Y-%m-%d").map_err(|_| "date has invalid format") + } +} + impl Default for Bucket { fn default() -> Self { Self::new() diff --git a/src/model/s3/lifecycle_worker.rs b/src/model/s3/lifecycle_worker.rs index 02e296e7..5641b093 100644 --- a/src/model/s3/lifecycle_worker.rs +++ b/src/model/s3/lifecycle_worker.rs @@ -268,7 +268,14 @@ async fn process_object( LifecycleExpiration::AfterDays(n_days) => { (now_date - version_date) >= chrono::Duration::days(*n_days as i64) } - LifecycleExpiration::AtDate(exp_date) => now_date >= *exp_date, + LifecycleExpiration::AtDate(exp_date) => { + if let Ok(exp_date) = parse_lifecycle_date(&exp_date) { + now_date >= exp_date + } else { + warn!("Invalid expiraiton date stored in bucket {:?} lifecycle config: {}", bucket.id, exp_date); + false + } + } }; if size_match && date_match { From d2e94e36d64d4062ebe1fabac65ac1a6f265de17 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Wed, 30 Aug 2023 20:05:53 +0200 Subject: [PATCH 11/25] lifecycle config: add missing line in merge() and remove tracing --- src/api/s3/lifecycle.rs | 13 +------------ src/model/bucket_table.rs | 1 + 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/src/api/s3/lifecycle.rs b/src/api/s3/lifecycle.rs index f0fde083..9036f84c 100644 --- a/src/api/s3/lifecycle.rs +++ b/src/api/s3/lifecycle.rs @@ -21,8 +21,6 @@ pub async fn handle_get_lifecycle(bucket: &Bucket) -> Result, Err .params() .ok_or_internal_error("Bucket should not be deleted at this point")?; - trace!("bucket: {:#?}", bucket); - if let Some(lifecycle) = param.lifecycle_config.get() { let wc = LifecycleConfiguration::from_garage_lifecycle_config(lifecycle); let xml = to_xml_with_header(&wc)?; @@ -76,20 +74,12 @@ pub async fn handle_put_lifecycle( let param = bucket.params_mut().unwrap(); let conf: LifecycleConfiguration = from_reader(&body as &[u8])?; - let config = conf .validate_into_garage_lifecycle_config() .ok_or_bad_request("Invalid lifecycle configuration")?; + param.lifecycle_config.update(Some(config)); - garage.bucket_table.insert(&bucket).await?; - trace!("new bucket: {:#?}", bucket); - - let bucket = garage - .bucket_helper() - .get_existing_bucket(bucket_id) - .await?; - trace!("new bucket again: {:#?}", bucket); Ok(Response::builder() .status(StatusCode::OK) @@ -281,7 +271,6 @@ impl Expiration { (None, None) => Err(" must contain either or "), (Some(days), None) => Ok(GarageLifecycleExpiration::AfterDays(days.0 as usize)), (None, Some(date)) => { - trace!("date: {}", date.0); parse_lifecycle_date(&date.0)?; Ok(GarageLifecycleExpiration::AtDate(date.0)) } diff --git a/src/model/bucket_table.rs b/src/model/bucket_table.rs index df2e9b4a..0eefa0e5 100644 --- a/src/model/bucket_table.rs +++ b/src/model/bucket_table.rs @@ -151,6 +151,7 @@ impl Crdt for BucketParams { self.website_config.merge(&o.website_config); self.cors_config.merge(&o.cors_config); + self.lifecycle_config.merge(&o.lifecycle_config); self.quotas.merge(&o.quotas); } } From a1d57283c0b37baabfb624d3696cc6efbaa4a500 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Wed, 30 Aug 2023 20:07:14 +0200 Subject: [PATCH 12/25] bucket_table: bucketparams::new doesn't need to be pub --- src/model/bucket_table.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/model/bucket_table.rs b/src/model/bucket_table.rs index 0eefa0e5..4c48a76f 100644 --- a/src/model/bucket_table.rs +++ b/src/model/bucket_table.rs @@ -127,7 +127,7 @@ impl AutoCrdt for BucketQuotas { impl BucketParams { /// Create an empty BucketParams with no authorized keys and no website accesss - pub fn new() -> Self { + fn new() -> Self { BucketParams { creation_date: now_msec(), authorized_keys: crdt::Map::new(), From 5c923d48d732649eef4f51fc9d5cb14fde3d4ca8 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Wed, 30 Aug 2023 23:24:28 +0200 Subject: [PATCH 13/25] reference manual: document support for lifecycle configuration --- doc/book/reference-manual/s3-compatibility.md | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/doc/book/reference-manual/s3-compatibility.md b/doc/book/reference-manual/s3-compatibility.md index 15b29bd1..ace4dc36 100644 --- a/doc/book/reference-manual/s3-compatibility.md +++ b/doc/book/reference-manual/s3-compatibility.md @@ -127,15 +127,22 @@ If you need this feature, please [share your use case in our dedicated issue](ht | Endpoint | Garage | [Openstack Swift](https://docs.openstack.org/swift/latest/s3_compat.html) | [Ceph Object Gateway](https://docs.ceph.com/en/latest/radosgw/s3/) | [Riak CS](https://docs.riak.com/riak/cs/2.1.1/references/apis/storage/s3/index.html) | [OpenIO](https://docs.openio.io/latest/source/arch-design/s3_compliancy.html) | |------------------------------|----------------------------------|-----------------|---------------|---------|-----| -| [DeleteBucketLifecycle](https://docs.aws.amazon.com/AmazonS3/latest/API/API_DeleteBucketLifecycle.html) | ❌ Missing | ❌| ✅| ❌| ✅| -| [GetBucketLifecycleConfiguration](https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetBucketLifecycleConfiguration.html) | ❌ Missing | ❌| ✅ | ❌| ✅| -| [PutBucketLifecycleConfiguration](https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutBucketLifecycleConfiguration.html) | ❌ Missing | ❌| ✅ | ❌| ✅| +| [DeleteBucketLifecycle](https://docs.aws.amazon.com/AmazonS3/latest/API/API_DeleteBucketLifecycle.html) | ✅ Implemented | ❌| ✅| ❌| ✅| +| [GetBucketLifecycleConfiguration](https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetBucketLifecycleConfiguration.html) | ✅ Implemented | ❌| ✅ | ❌| ✅| +| [PutBucketLifecycleConfiguration](https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutBucketLifecycleConfiguration.html) | ⚠ Partially implemented (see below) | ❌| ✅ | ❌| ✅| | [GetBucketVersioning](https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetBucketVersioning.html) | ❌ Stub (see below) | ✅| ✅ | ❌| ✅| | [ListObjectVersions](https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListObjectVersions.html) | ❌ Missing | ❌| ✅ | ❌| ✅| | [PutBucketVersioning](https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutBucketVersioning.html) | ❌ Missing | ❌| ✅| ❌| ✅| +**PutBucketLifecycleConfiguration:** The only actions supported are +`AbortIncompleteMultipartUpload` and `Expiration` (without the +`ExpiredObjectDeleteMarker` field). All other operations are dependent on +either bucket versionning or storage classes which Garage currently does not +implement. The deprecated `Prefix` member directly in the the `Rule` +structure/XML tag is not supported, specified prefixes must be inside the +`Filter` structure/XML tag. -**GetBucketVersioning:** Stub implementation (Garage does not yet support versionning so this always returns "versionning not enabled"). +**GetBucketVersioning:** Stub implementation which always returns "versionning not enabled", since Garage does not yet support bucket versionning. ### Replication endpoints From d94f1c9178da4c346f35c27e4451d1b115b9acfb Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Wed, 30 Aug 2023 23:27:02 +0200 Subject: [PATCH 14/25] reference manual: remove obsolete caveat about multipart uploads --- doc/book/reference-manual/s3-compatibility.md | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/doc/book/reference-manual/s3-compatibility.md b/doc/book/reference-manual/s3-compatibility.md index ace4dc36..1bcfd123 100644 --- a/doc/book/reference-manual/s3-compatibility.md +++ b/doc/book/reference-manual/s3-compatibility.md @@ -75,16 +75,13 @@ but these endpoints are documented in [Red Hat Ceph Storage - Chapter 2. Ceph Ob | Endpoint | Garage | [Openstack Swift](https://docs.openstack.org/swift/latest/s3_compat.html) | [Ceph Object Gateway](https://docs.ceph.com/en/latest/radosgw/s3/) | [Riak CS](https://docs.riak.com/riak/cs/2.1.1/references/apis/storage/s3/index.html) | [OpenIO](https://docs.openio.io/latest/source/arch-design/s3_compliancy.html) | |------------------------------|----------------------------------|-----------------|---------------|---------|-----| -| [AbortMultipartUpload](https://docs.aws.amazon.com/AmazonS3/latest/API/API_AbortMultipartUpload.html) | ✅ Implemented | ✅ | ✅ | ✅ | ✅ | -| [CompleteMultipartUpload](https://docs.aws.amazon.com/AmazonS3/latest/API/API_CompleteMultipartUpload.html) | ✅ Implemented (see details below) | ✅ | ✅ | ✅ | ✅ | -| [CreateMultipartUpload](https://docs.aws.amazon.com/AmazonS3/latest/API/API_CreateMultipartUpload.html) | ✅ Implemented | ✅| ✅ | ✅ | ✅ | -| [ListMultipartUpload](https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListMultipartUpload.html) | ✅ Implemented | ✅ | ✅ | ✅ | ✅ | -| [ListParts](https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListParts.html) | ✅ Implemented | ✅ | ✅ | ✅ | ✅ | -| [UploadPart](https://docs.aws.amazon.com/AmazonS3/latest/API/API_UploadPart.html) | ✅ Implemented (see details below) | ✅ | ✅| ✅ | ✅ | -| [UploadPartCopy](https://docs.aws.amazon.com/AmazonS3/latest/API/API_UploadPartCopy.html) | ✅ Implemented | ✅ | ✅ | ✅ | ✅ | - -Our implementation of Multipart Upload is currently a bit more restrictive than Amazon's one in some edge cases. -For more information, please refer to our [issue tracker](https://git.deuxfleurs.fr/Deuxfleurs/garage/issues/204). +| [AbortMultipartUpload](https://docs.aws.amazon.com/AmazonS3/latest/API/API_AbortMultipartUpload.html) | ✅ Implemented | ✅ | ✅ | ✅ | ✅ | +| [CompleteMultipartUpload](https://docs.aws.amazon.com/AmazonS3/latest/API/API_CompleteMultipartUpload.html) | ✅ Implemented | ✅ | ✅ | ✅ | ✅ | +| [CreateMultipartUpload](https://docs.aws.amazon.com/AmazonS3/latest/API/API_CreateMultipartUpload.html) | ✅ Implemented | ✅| ✅ | ✅ | ✅ | +| [ListMultipartUpload](https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListMultipartUpload.html) | ✅ Implemented | ✅ | ✅ | ✅ | ✅ | +| [ListParts](https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListParts.html) | ✅ Implemented | ✅ | ✅ | ✅ | ✅ | +| [UploadPart](https://docs.aws.amazon.com/AmazonS3/latest/API/API_UploadPart.html) | ✅ Implemented | ✅ | ✅| ✅ | ✅ | +| [UploadPartCopy](https://docs.aws.amazon.com/AmazonS3/latest/API/API_UploadPartCopy.html) | ✅ Implemented | ✅ | ✅ | ✅ | ✅ | ### Website endpoints From f0a395e2e5db977caff0ea46e17061e02929178a Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Wed, 30 Aug 2023 23:39:28 +0200 Subject: [PATCH 15/25] s3 bucket apis: remove redundant call --- src/api/s3/api_server.rs | 14 ++++++++------ src/api/s3/cors.rs | 22 ++++++++-------------- src/api/s3/lifecycle.rs | 22 ++++++++-------------- src/api/s3/website.rs | 22 ++++++++-------------- 4 files changed, 32 insertions(+), 48 deletions(-) diff --git a/src/api/s3/api_server.rs b/src/api/s3/api_server.rs index 06fef6d5..3f995d34 100644 --- a/src/api/s3/api_server.rs +++ b/src/api/s3/api_server.rs @@ -355,19 +355,21 @@ impl ApiHandler for S3ApiServer { } Endpoint::GetBucketWebsite {} => handle_get_website(&bucket).await, Endpoint::PutBucketWebsite {} => { - handle_put_website(garage, bucket_id, req, content_sha256).await + handle_put_website(garage, bucket.clone(), req, content_sha256).await } - Endpoint::DeleteBucketWebsite {} => handle_delete_website(garage, bucket_id).await, + Endpoint::DeleteBucketWebsite {} => handle_delete_website(garage, bucket.clone()).await, Endpoint::GetBucketCors {} => handle_get_cors(&bucket).await, Endpoint::PutBucketCors {} => { - handle_put_cors(garage, bucket_id, req, content_sha256).await + handle_put_cors(garage, bucket.clone(), req, content_sha256).await } - Endpoint::DeleteBucketCors {} => handle_delete_cors(garage, bucket_id).await, + Endpoint::DeleteBucketCors {} => handle_delete_cors(garage, bucket.clone()).await, Endpoint::GetBucketLifecycleConfiguration {} => handle_get_lifecycle(&bucket).await, Endpoint::PutBucketLifecycleConfiguration {} => { - handle_put_lifecycle(garage, bucket_id, req, content_sha256).await + handle_put_lifecycle(garage, bucket.clone(), req, content_sha256).await + } + Endpoint::DeleteBucketLifecycle {} => { + handle_delete_lifecycle(garage, bucket.clone()).await } - Endpoint::DeleteBucketLifecycle {} => handle_delete_lifecycle(garage, bucket_id).await, endpoint => Err(Error::NotImplemented(endpoint.name().to_owned())), }; diff --git a/src/api/s3/cors.rs b/src/api/s3/cors.rs index c7273464..49097ad1 100644 --- a/src/api/s3/cors.rs +++ b/src/api/s3/cors.rs @@ -44,14 +44,11 @@ pub async fn handle_get_cors(bucket: &Bucket) -> Result, Error> { pub async fn handle_delete_cors( garage: Arc, - bucket_id: Uuid, + mut bucket: Bucket, ) -> Result, Error> { - let mut bucket = garage - .bucket_helper() - .get_existing_bucket(bucket_id) - .await?; - - let param = bucket.params_mut().unwrap(); + let param = bucket + .params_mut() + .ok_or_internal_error("Bucket should not be deleted at this point")?; param.cors_config.update(None); garage.bucket_table.insert(&bucket).await?; @@ -63,7 +60,7 @@ pub async fn handle_delete_cors( pub async fn handle_put_cors( garage: Arc, - bucket_id: Uuid, + mut bucket: Bucket, req: Request, content_sha256: Option, ) -> Result, Error> { @@ -73,12 +70,9 @@ pub async fn handle_put_cors( verify_signed_content(content_sha256, &body[..])?; } - let mut bucket = garage - .bucket_helper() - .get_existing_bucket(bucket_id) - .await?; - - let param = bucket.params_mut().unwrap(); + let param = bucket + .params_mut() + .ok_or_internal_error("Bucket should not be deleted at this point")?; let conf: CorsConfiguration = from_reader(&body as &[u8])?; conf.validate()?; diff --git a/src/api/s3/lifecycle.rs b/src/api/s3/lifecycle.rs index 9036f84c..11199190 100644 --- a/src/api/s3/lifecycle.rs +++ b/src/api/s3/lifecycle.rs @@ -37,14 +37,11 @@ pub async fn handle_get_lifecycle(bucket: &Bucket) -> Result, Err pub async fn handle_delete_lifecycle( garage: Arc, - bucket_id: Uuid, + mut bucket: Bucket, ) -> Result, Error> { - let mut bucket = garage - .bucket_helper() - .get_existing_bucket(bucket_id) - .await?; - - let param = bucket.params_mut().unwrap(); + let param = bucket + .params_mut() + .ok_or_internal_error("Bucket should not be deleted at this point")?; param.lifecycle_config.update(None); garage.bucket_table.insert(&bucket).await?; @@ -56,7 +53,7 @@ pub async fn handle_delete_lifecycle( pub async fn handle_put_lifecycle( garage: Arc, - bucket_id: Uuid, + mut bucket: Bucket, req: Request, content_sha256: Option, ) -> Result, Error> { @@ -66,12 +63,9 @@ pub async fn handle_put_lifecycle( verify_signed_content(content_sha256, &body[..])?; } - let mut bucket = garage - .bucket_helper() - .get_existing_bucket(bucket_id) - .await?; - - let param = bucket.params_mut().unwrap(); + let param = bucket + .params_mut() + .ok_or_internal_error("Bucket should not be deleted at this point")?; let conf: LifecycleConfiguration = from_reader(&body as &[u8])?; let config = conf diff --git a/src/api/s3/website.rs b/src/api/s3/website.rs index 77738971..7f2ab925 100644 --- a/src/api/s3/website.rs +++ b/src/api/s3/website.rs @@ -43,14 +43,11 @@ pub async fn handle_get_website(bucket: &Bucket) -> Result, Error pub async fn handle_delete_website( garage: Arc, - bucket_id: Uuid, + mut bucket: Bucket, ) -> Result, Error> { - let mut bucket = garage - .bucket_helper() - .get_existing_bucket(bucket_id) - .await?; - - let param = bucket.params_mut().unwrap(); + let param = bucket + .params_mut() + .ok_or_internal_error("Bucket should not be deleted at this point")?; param.website_config.update(None); garage.bucket_table.insert(&bucket).await?; @@ -62,7 +59,7 @@ pub async fn handle_delete_website( pub async fn handle_put_website( garage: Arc, - bucket_id: Uuid, + mut bucket: Bucket, req: Request, content_sha256: Option, ) -> Result, Error> { @@ -72,12 +69,9 @@ pub async fn handle_put_website( verify_signed_content(content_sha256, &body[..])?; } - let mut bucket = garage - .bucket_helper() - .get_existing_bucket(bucket_id) - .await?; - - let param = bucket.params_mut().unwrap(); + let param = bucket + .params_mut() + .ok_or_internal_error("Bucket should not be deleted at this point")?; let conf: WebsiteConfiguration = from_reader(&body as &[u8])?; conf.validate()?; From 01c327a07a6045055fef6f923848fe6046e937c4 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Wed, 30 Aug 2023 23:46:15 +0200 Subject: [PATCH 16/25] lifecycle worker: avoid building chrono's serde feature --- src/model/Cargo.toml | 2 +- src/model/s3/lifecycle_worker.rs | 20 ++++++++++---------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/model/Cargo.toml b/src/model/Cargo.toml index 58d9fdb7..3794cc59 100644 --- a/src/model/Cargo.toml +++ b/src/model/Cargo.toml @@ -23,7 +23,7 @@ garage_util.workspace = true async-trait = "0.1.7" arc-swap = "1.0" blake2 = "0.10" -chrono = { version = "0.4", features = ["serde"] } +chrono = "0.4" err-derive = "0.3" hex = "0.4" base64 = "0.21" diff --git a/src/model/s3/lifecycle_worker.rs b/src/model/s3/lifecycle_worker.rs index 5641b093..02374bf0 100644 --- a/src/model/s3/lifecycle_worker.rs +++ b/src/model/s3/lifecycle_worker.rs @@ -19,12 +19,11 @@ use crate::s3::object_table::*; use crate::garage::Garage; mod v090 { - use chrono::naive::NaiveDate; use serde::{Deserialize, Serialize}; - #[derive(Serialize, Deserialize, Default, Clone, Copy)] + #[derive(Serialize, Deserialize, Default, Clone)] pub struct LifecycleWorkerPersisted { - pub last_completed: Option, + pub last_completed: Option, } impl garage_util::migrate::InitialFormat for LifecycleWorkerPersisted { @@ -65,18 +64,19 @@ pub fn register_bg_vars( vars: &mut vars::BgVars, ) { vars.register_ro(persister, "lifecycle-last-completed", |p| { - p.get_with(|x| { - x.last_completed - .map(|date| date.to_string()) - .unwrap_or("never".to_string()) - }) + p.get_with(|x| x.last_completed.clone().unwrap_or("never".to_string())) }); } impl LifecycleWorker { pub fn new(garage: Arc, persister: PersisterShared) -> Self { let today = today(); - let state = match persister.get_with(|x| x.last_completed) { + let last_completed = persister.get_with(|x| { + x.last_completed + .as_deref() + .and_then(|x| x.parse::().ok()) + }); + let state = match last_completed { Some(d) if d >= today => State::Completed(d), _ => State::Running { date: today, @@ -162,7 +162,7 @@ impl Worker for LifecycleWorker { None => { info!("Lifecycle worker finished for {}, objects expired: {}, mpu aborted: {}", date, *objects_expired, *mpu_aborted); self.persister - .set_with(|x| x.last_completed = Some(*date))?; + .set_with(|x| x.last_completed = Some(date.to_string()))?; self.state = State::Completed(*date); return Ok(WorkerState::Idle); } From 5fad4c4658676be898186c352f216ca72e0e8601 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Wed, 30 Aug 2023 23:47:42 +0200 Subject: [PATCH 17/25] update cargo.nix --- Cargo.nix | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.nix b/Cargo.nix index 645985a8..dc30c355 100644 --- a/Cargo.nix +++ b/Cargo.nix @@ -981,7 +981,7 @@ in [ "iana-time-zone" ] [ "js-sys" ] [ "oldtime" ] - [ "serde" ] + (lib.optional (rootFeatures' ? "garage/kubernetes-discovery" || rootFeatures' ? "garage_rpc/k8s-openapi" || rootFeatures' ? "garage_rpc/kube" || rootFeatures' ? "garage_rpc/kubernetes-discovery") "serde") [ "std" ] [ "time" ] [ "wasm-bindgen" ] @@ -993,7 +993,7 @@ in ${ if hostPlatform.isUnix then "iana_time_zone" else null } = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".iana-time-zone."0.1.57" { inherit profileName; }).out; ${ if hostPlatform.parsed.cpu.name == "wasm32" && !(hostPlatform.parsed.kernel.name == "emscripten" || hostPlatform.parsed.kernel.name == "wasi") then "js_sys" else null } = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".js-sys."0.3.64" { inherit profileName; }).out; num_traits = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".num-traits."0.2.16" { inherit profileName; }).out; - serde = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".serde."1.0.188" { inherit profileName; }).out; + ${ if rootFeatures' ? "garage/kubernetes-discovery" || rootFeatures' ? "garage_rpc/k8s-openapi" || rootFeatures' ? "garage_rpc/kube" || rootFeatures' ? "garage_rpc/kubernetes-discovery" then "serde" else null } = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".serde."1.0.188" { inherit profileName; }).out; time = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".time."0.1.45" { inherit profileName; }).out; ${ if hostPlatform.parsed.cpu.name == "wasm32" && !(hostPlatform.parsed.kernel.name == "emscripten" || hostPlatform.parsed.kernel.name == "wasi") then "wasm_bindgen" else null } = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".wasm-bindgen."0.2.87" { inherit profileName; }).out; ${ if hostPlatform.isWindows then "winapi" else null } = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".winapi."0.3.9" { inherit profileName; }).out; From b2f679675e3390bea6c6b3b9fb3632d0ed414a75 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Wed, 30 Aug 2023 23:52:09 +0200 Subject: [PATCH 18/25] lifecycle worker: take into account disabled rules --- src/model/s3/lifecycle_worker.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/model/s3/lifecycle_worker.rs b/src/model/s3/lifecycle_worker.rs index 02374bf0..d46d70f3 100644 --- a/src/model/s3/lifecycle_worker.rs +++ b/src/model/s3/lifecycle_worker.rs @@ -243,11 +243,15 @@ async fn process_object( .and_then(|s| s.lifecycle_config.get().as_deref()) .unwrap_or_default(); - if lifecycle_policy.is_empty() { + if lifecycle_policy.iter().all(|x| !x.enabled) { return Ok(Skip::SkipBucket); } for rule in lifecycle_policy.iter() { + if !rule.enabled { + continue; + } + if let Some(pfx) = &rule.filter.prefix { if !object.key.starts_with(pfx) { continue; From be03a4610f4a6e3863e6113491e308bbcea9ca94 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 31 Aug 2023 00:00:26 +0200 Subject: [PATCH 19/25] s3api: remove redundant serde rename attribute --- src/api/s3/lifecycle.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/api/s3/lifecycle.rs b/src/api/s3/lifecycle.rs index 11199190..1e7d6755 100644 --- a/src/api/s3/lifecycle.rs +++ b/src/api/s3/lifecycle.rs @@ -83,7 +83,6 @@ pub async fn handle_put_lifecycle( // ---- SERIALIZATION AND DESERIALIZATION TO/FROM S3 XML ---- #[derive(Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)] -#[serde(rename = "LifecycleConfiguration")] pub struct LifecycleConfiguration { #[serde(serialize_with = "xmlns_tag", skip_deserializing)] pub xmlns: (), From 1cfcc61de83b832a78c8f93aaaf935a29845cd8b Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 31 Aug 2023 00:28:37 +0200 Subject: [PATCH 20/25] lifecycle worker: mitigate potential bugs + refactoring --- src/model/s3/lifecycle_worker.rs | 51 +++++++++++++++++++------------- 1 file changed, 31 insertions(+), 20 deletions(-) diff --git a/src/model/s3/lifecycle_worker.rs b/src/model/s3/lifecycle_worker.rs index d46d70f3..670ed9fe 100644 --- a/src/model/s3/lifecycle_worker.rs +++ b/src/model/s3/lifecycle_worker.rs @@ -197,16 +197,21 @@ impl Worker for LifecycleWorker { async fn wait_for_work(&mut self) -> WorkerState { match &self.state { State::Completed(d) => { - let now = now_msec(); - let next_start = midnight_ts(d.succ_opt().expect("no next day")); - if now < next_start { - tokio::time::sleep_until( - (Instant::now() + Duration::from_millis(next_start - now)).into(), - ) - .await; + let next_day = d.succ_opt().expect("no next day"); + let next_start = midnight_ts(next_day); + loop { + let now = now_msec(); + if now < next_start { + tokio::time::sleep_until( + (Instant::now() + Duration::from_millis(next_start - now)).into(), + ) + .await; + } else { + break; + } } self.state = State::Running { - date: today(), + date: std::cmp::max(next_day, today()), pos: vec![], counter: 0, objects_expired: 0, @@ -228,6 +233,14 @@ async fn process_object( mpu_aborted: &mut usize, last_bucket: &mut Option, ) -> Result { + if !object + .versions() + .iter() + .any(|x| x.is_data() || x.is_uploading(None)) + { + return Ok(Skip::NextObject); + } + let bucket = match last_bucket.take() { Some(b) if b.id == object.bucket_id => b, _ => garage @@ -276,7 +289,7 @@ async fn process_object( if let Ok(exp_date) = parse_lifecycle_date(&exp_date) { now_date >= exp_date } else { - warn!("Invalid expiraiton date stored in bucket {:?} lifecycle config: {}", bucket.id, exp_date); + warn!("Invalid expiration date stored in bucket {:?} lifecycle config: {}", bucket.id, exp_date); false } } @@ -309,17 +322,15 @@ async fn process_object( .iter() .filter_map(|v| { let version_date = next_date(v.timestamp); - match &v.state { - ObjectVersionState::Uploading { .. } - if (now_date - version_date) - >= chrono::Duration::days(*abort_mpu_days as i64) => - { - Some(ObjectVersion { - state: ObjectVersionState::Aborted, - ..*v - }) - } - _ => None, + if (now_date - version_date) >= chrono::Duration::days(*abort_mpu_days as i64) + && matches!(&v.state, ObjectVersionState::Uploading { .. }) + { + Some(ObjectVersion { + state: ObjectVersionState::Aborted, + ..*v + }) + } else { + None } }) .collect::>(); From adbf5925de733484998c3a788c4ec7e8cda2cec4 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 31 Aug 2023 11:19:26 +0200 Subject: [PATCH 21/25] lifecycle worker: use queue_insert and process objects in batches --- src/model/s3/lifecycle_worker.rs | 81 ++++++++++++++++++-------------- 1 file changed, 45 insertions(+), 36 deletions(-) diff --git a/src/model/s3/lifecycle_worker.rs b/src/model/s3/lifecycle_worker.rs index 670ed9fe..f99cc935 100644 --- a/src/model/s3/lifecycle_worker.rs +++ b/src/model/s3/lifecycle_worker.rs @@ -152,41 +152,44 @@ impl Worker for LifecycleWorker { pos, last_bucket, } => { - let (object_bytes, next_pos) = match self - .garage - .object_table - .data - .store - .get_gt(&pos)? - { - None => { - info!("Lifecycle worker finished for {}, objects expired: {}, mpu aborted: {}", date, *objects_expired, *mpu_aborted); - self.persister - .set_with(|x| x.last_completed = Some(date.to_string()))?; - self.state = State::Completed(*date); - return Ok(WorkerState::Idle); + // Process a batch of 100 items before yielding to bg task scheduler + for _ in 0..100 { + let (object_bytes, next_pos) = match self + .garage + .object_table + .data + .store + .get_gt(&pos)? + { + None => { + info!("Lifecycle worker finished for {}, objects expired: {}, mpu aborted: {}", date, *objects_expired, *mpu_aborted); + self.persister + .set_with(|x| x.last_completed = Some(date.to_string()))?; + self.state = State::Completed(*date); + return Ok(WorkerState::Idle); + } + Some((k, v)) => (v, k), + }; + + let object = self.garage.object_table.data.decode_entry(&object_bytes)?; + let skip = process_object( + &self.garage, + *date, + &object, + objects_expired, + mpu_aborted, + last_bucket, + ) + .await?; + + *counter += 1; + if skip == Skip::SkipBucket { + let bucket_id_len = object.bucket_id.as_slice().len(); + assert_eq!(pos.get(..bucket_id_len), Some(object.bucket_id.as_slice())); + *pos = [&pos[..bucket_id_len], &[0xFFu8][..]].concat(); + } else { + *pos = next_pos; } - Some((k, v)) => (v, k), - }; - - let object = self.garage.object_table.data.decode_entry(&object_bytes)?; - let skip = process_object( - &self.garage, - *date, - &object, - objects_expired, - mpu_aborted, - last_bucket, - ) - .await?; - - *counter += 1; - if skip == Skip::SkipBucket { - let bucket_id_len = object.bucket_id.as_slice().len(); - assert_eq!(pos.get(..bucket_id_len), Some(object.bucket_id.as_slice())); - *pos = [&pos[..bucket_id_len], &[0xFFu8][..]].concat(); - } else { - *pos = next_pos; } Ok(WorkerState::Busy) @@ -260,6 +263,8 @@ async fn process_object( return Ok(Skip::SkipBucket); } + let db = garage.object_table.data.store.db(); + for rule in lifecycle_policy.iter() { if !rule.enabled { continue; @@ -310,7 +315,9 @@ async fn process_object( "Lifecycle: expiring 1 object in bucket {:?}", object.bucket_id ); - garage.object_table.insert(&deleted_object).await?; + db.transaction(|mut tx| { + garage.object_table.queue_insert(&mut tx, &deleted_object) + })?; *objects_expired += 1; } } @@ -343,7 +350,9 @@ async fn process_object( ); let aborted_object = Object::new(object.bucket_id, object.key.clone(), aborted_versions); - garage.object_table.insert(&aborted_object).await?; + db.transaction(|mut tx| { + garage.object_table.queue_insert(&mut tx, &aborted_object) + })?; *mpu_aborted += n_aborted; } } From a00a52633f7846c3683da65a07266a03f88b0f74 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 31 Aug 2023 11:25:14 +0200 Subject: [PATCH 22/25] lifecycle worker: add log message when starting --- src/model/s3/lifecycle_worker.rs | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/src/model/s3/lifecycle_worker.rs b/src/model/s3/lifecycle_worker.rs index f99cc935..53c84a17 100644 --- a/src/model/s3/lifecycle_worker.rs +++ b/src/model/s3/lifecycle_worker.rs @@ -78,14 +78,7 @@ impl LifecycleWorker { }); let state = match last_completed { Some(d) if d >= today => State::Completed(d), - _ => State::Running { - date: today, - pos: vec![], - counter: 0, - objects_expired: 0, - mpu_aborted: 0, - last_bucket: None, - }, + _ => State::start(today), }; Self { garage, @@ -95,6 +88,20 @@ impl LifecycleWorker { } } +impl State { + fn start(date: NaiveDate) -> Self { + info!("Starting lifecycle worker for {}", date); + State::Running { + date, + pos: vec![], + counter: 0, + objects_expired: 0, + mpu_aborted: 0, + last_bucket: None, + } + } +} + #[async_trait] impl Worker for LifecycleWorker { fn name(&self) -> String { @@ -213,14 +220,7 @@ impl Worker for LifecycleWorker { break; } } - self.state = State::Running { - date: std::cmp::max(next_day, today()), - pos: vec![], - counter: 0, - objects_expired: 0, - mpu_aborted: 0, - last_bucket: None, - }; + self.state = State::start(std::cmp::max(next_day, today())); } State::Running { .. } => (), } From f579d6d9b42ef03d639cc7356b2fa15265074120 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 31 Aug 2023 11:29:54 +0200 Subject: [PATCH 23/25] lifecycle worker: fix potential inifinite loop --- src/model/s3/lifecycle_worker.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/model/s3/lifecycle_worker.rs b/src/model/s3/lifecycle_worker.rs index 53c84a17..0747ffb8 100644 --- a/src/model/s3/lifecycle_worker.rs +++ b/src/model/s3/lifecycle_worker.rs @@ -193,7 +193,10 @@ impl Worker for LifecycleWorker { if skip == Skip::SkipBucket { let bucket_id_len = object.bucket_id.as_slice().len(); assert_eq!(pos.get(..bucket_id_len), Some(object.bucket_id.as_slice())); - *pos = [&pos[..bucket_id_len], &[0xFFu8][..]].concat(); + *pos = std::cmp::max( + next_pos, + [&pos[..bucket_id_len], &[0xFFu8][..]].concat(), + ); } else { *pos = next_pos; } From 1cdc321e28ccfbbe425365f3a03a526c3f456e3f Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 31 Aug 2023 11:36:30 +0200 Subject: [PATCH 24/25] lifecycle worker: don't get stuck on non-existent bucket --- src/model/s3/lifecycle_worker.rs | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/src/model/s3/lifecycle_worker.rs b/src/model/s3/lifecycle_worker.rs index 0747ffb8..ed762413 100644 --- a/src/model/s3/lifecycle_worker.rs +++ b/src/model/s3/lifecycle_worker.rs @@ -249,11 +249,22 @@ async fn process_object( let bucket = match last_bucket.take() { Some(b) if b.id == object.bucket_id => b, - _ => garage - .bucket_table - .get(&EmptyKey, &object.bucket_id) - .await? - .ok_or_message("object in non-existent bucket")?, + _ => { + match garage + .bucket_table + .get(&EmptyKey, &object.bucket_id) + .await? + { + Some(b) => b, + None => { + warn!( + "Lifecycle worker: object in non-existent bucket {:?}", + object.bucket_id + ); + return Ok(Skip::SkipBucket); + } + } + } }; let lifecycle_policy: &[LifecycleRule] = bucket From 8e0c020bb95a05ea657fa75cf19f8e125d9c602d Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 31 Aug 2023 11:45:19 +0200 Subject: [PATCH 25/25] lifecycle worker: correct small clippy lints --- src/model/s3/lifecycle_worker.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/model/s3/lifecycle_worker.rs b/src/model/s3/lifecycle_worker.rs index ed762413..4734742d 100644 --- a/src/model/s3/lifecycle_worker.rs +++ b/src/model/s3/lifecycle_worker.rs @@ -7,7 +7,7 @@ use tokio::sync::watch; use garage_util::background::*; use garage_util::data::*; -use garage_util::error::{Error, OkOrMessage}; +use garage_util::error::Error; use garage_util::persister::PersisterShared; use garage_util::time::*; @@ -305,7 +305,7 @@ async fn process_object( (now_date - version_date) >= chrono::Duration::days(*n_days as i64) } LifecycleExpiration::AtDate(exp_date) => { - if let Ok(exp_date) = parse_lifecycle_date(&exp_date) { + if let Ok(exp_date) = parse_lifecycle_date(exp_date) { now_date >= exp_date } else { warn!("Invalid expiration date stored in bucket {:?} lifecycle config: {}", bucket.id, exp_date); @@ -391,7 +391,7 @@ fn check_size_filter(version_data: &ObjectVersionData, filter: &LifecycleFilter) return false; } } - return true; + true } fn midnight_ts(date: NaiveDate) -> u64 {