From 940988e484ee8ecbc397654716ed9926bea0fc5d Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Tue, 18 Mar 2025 11:35:55 +0100 Subject: [PATCH 1/2] more resilience to inconsistent alias states --- src/api/admin/bucket.rs | 2 +- src/api/s3/bucket.rs | 4 +- src/model/helper/locked.rs | 118 +++++++++++++++++++++++++------------ 3 files changed, 84 insertions(+), 40 deletions(-) diff --git a/src/api/admin/bucket.rs b/src/api/admin/bucket.rs index 2537bfc9..c477b918 100644 --- a/src/api/admin/bucket.rs +++ b/src/api/admin/bucket.rs @@ -382,7 +382,7 @@ pub async fn handle_delete_bucket( for ((key_id, alias), _, active) in state.local_aliases.items().iter() { if *active { helper - .unset_local_bucket_alias(bucket.id, key_id, alias) + .purge_local_bucket_alias(bucket.id, key_id, alias) .await?; } } diff --git a/src/api/s3/bucket.rs b/src/api/s3/bucket.rs index 3a09e769..cadbd6a2 100644 --- a/src/api/s3/bucket.rs +++ b/src/api/s3/bucket.rs @@ -241,11 +241,11 @@ pub async fn handle_delete_bucket(ctx: ReqCtx) -> Result, Erro // 1. delete bucket alias if is_local_alias { helper - .unset_local_bucket_alias(*bucket_id, &api_key.key_id, bucket_name) + .purge_local_bucket_alias(*bucket_id, &api_key.key_id, bucket_name) .await?; } else { helper - .unset_global_bucket_alias(*bucket_id, bucket_name) + .purge_global_bucket_alias(*bucket_id, bucket_name) .await?; } diff --git a/src/model/helper/locked.rs b/src/model/helper/locked.rs index 482e91b0..a36a05e0 100644 --- a/src/model/helper/locked.rs +++ b/src/model/helper/locked.rs @@ -47,6 +47,10 @@ impl<'a> LockedHelper<'a> { KeyHelper(self.0) } + // ================================================ + // global bucket aliases + // ================================================ + /// Sets a new alias for a bucket in global namespace. /// This function fails if: /// - alias name is not valid according to S3 spec @@ -180,13 +184,14 @@ impl<'a> LockedHelper<'a> { .ok_or_else(|| Error::NoSuchBucket(alias_name.to_string()))?; // Checks ok, remove alias - let alias_ts = match bucket.state.as_option() { - Some(bucket_state) => increment_logical_clock_2( - alias.state.timestamp(), - bucket_state.aliases.get_timestamp(alias_name), - ), - None => increment_logical_clock(alias.state.timestamp()), - }; + let alias_ts = increment_logical_clock_2( + alias.state.timestamp(), + bucket + .state + .as_option() + .map(|p| p.aliases.get_timestamp(alias_name)) + .unwrap_or(0), + ); // ---- timestamp-ensured causality barrier ---- // writes are now done and all writes use timestamp alias_ts @@ -204,6 +209,10 @@ impl<'a> LockedHelper<'a> { Ok(()) } + // ================================================ + // local bucket aliases + // ================================================ + /// Sets a new alias for a bucket in the local namespace of a key. /// This function fails if: /// - alias name is not valid according to S3 spec @@ -216,14 +225,12 @@ impl<'a> LockedHelper<'a> { key_id: &String, alias_name: &String, ) -> Result<(), Error> { - let key_helper = KeyHelper(self.0); - if !is_valid_bucket_name(alias_name) { return Err(Error::InvalidBucketName(alias_name.to_string())); } let mut bucket = self.bucket().get_existing_bucket(bucket_id).await?; - let mut key = key_helper.get_existing_key(key_id).await?; + let mut key = self.key().get_existing_key(key_id).await?; let key_param = key.state.as_option_mut().unwrap(); @@ -272,23 +279,13 @@ impl<'a> LockedHelper<'a> { key_id: &String, alias_name: &String, ) -> Result<(), Error> { - let key_helper = KeyHelper(self.0); - let mut bucket = self.bucket().get_existing_bucket(bucket_id).await?; - let mut key = key_helper.get_existing_key(key_id).await?; + let mut key = self.key().get_existing_key(key_id).await?; + let key_p = key.state.as_option().unwrap(); let bucket_p = bucket.state.as_option_mut().unwrap(); - if key - .state - .as_option() - .unwrap() - .local_aliases - .get(alias_name) - .cloned() - .flatten() - != Some(bucket_id) - { + if key_p.local_aliases.get(alias_name).cloned().flatten() != Some(bucket_id) { return Err(GarageError::Message(format!( "Bucket {:?} does not have alias {} in namespace of key {}", bucket_id, alias_name, key_id @@ -305,17 +302,17 @@ impl<'a> LockedHelper<'a> { .local_aliases .items() .iter() - .any(|((k, n), _, active)| *k == key.key_id && n == alias_name && *active); + .any(|((k, n), _, active)| (*k != key.key_id || n != alias_name) && *active); + if !has_other_global_aliases && !has_other_local_aliases { return Err(Error::BadRequest(format!("Bucket {} doesn't have other aliases, please delete it instead of just unaliasing.", alias_name))); } // Checks ok, remove alias - let key_param = key.state.as_option_mut().unwrap(); let bucket_p_local_alias_key = (key.key_id.clone(), alias_name.clone()); let alias_ts = increment_logical_clock_2( - key_param.local_aliases.get_timestamp(alias_name), + key_p.local_aliases.get_timestamp(alias_name), bucket_p .local_aliases .get_timestamp(&bucket_p_local_alias_key), @@ -324,7 +321,8 @@ impl<'a> LockedHelper<'a> { // ---- timestamp-ensured causality barrier ---- // writes are now done and all writes use timestamp alias_ts - key_param.local_aliases = LwwMap::raw_item(alias_name.clone(), alias_ts, None); + key.state.as_option_mut().unwrap().local_aliases = + LwwMap::raw_item(alias_name.clone(), alias_ts, None); self.0.key_table.insert(&key).await?; bucket_p.local_aliases = LwwMap::raw_item(bucket_p_local_alias_key, alias_ts, false); @@ -333,21 +331,68 @@ impl<'a> LockedHelper<'a> { Ok(()) } + /// Ensures a bucket does not have a certain local alias. + /// Contrarily to unset_local_bucket_alias, this does not + /// fail on any condition other than: + /// - bucket cannot be found (its fine if it is in deleted state) + /// - key cannot be found (its fine if alias in key points to nothing + /// or to another bucket) + pub async fn purge_local_bucket_alias( + &self, + bucket_id: Uuid, + key_id: &String, + alias_name: &String, + ) -> Result<(), Error> { + let mut bucket = self.bucket().get_internal_bucket(bucket_id).await?; + let mut key = self.key().get_internal_key(key_id).await?; + + let bucket_p_local_alias_key = (key.key_id.clone(), alias_name.clone()); + + let alias_ts = increment_logical_clock_2( + key.state + .as_option() + .map(|p| p.local_aliases.get_timestamp(alias_name)) + .unwrap_or(0), + bucket + .state + .as_option() + .map(|p| p.local_aliases.get_timestamp(&bucket_p_local_alias_key)) + .unwrap_or(0), + ); + + // ---- timestamp-ensured causality barrier ---- + // writes are now done and all writes use timestamp alias_ts + + if let Some(kp) = key.state.as_option_mut() { + kp.local_aliases = LwwMap::raw_item(alias_name.clone(), alias_ts, None); + self.0.key_table.insert(&key).await?; + } + + if let Some(bp) = bucket.state.as_option_mut() { + bp.local_aliases = LwwMap::raw_item(bucket_p_local_alias_key, alias_ts, false); + self.0.bucket_table.insert(&bucket).await?; + } + + Ok(()) + } + + // ================================================ + // permissions + // ================================================ + /// Sets permissions for a key on a bucket. /// This function fails if: /// - bucket or key cannot be found at all (its ok if they are in deleted state) - /// - bucket or key is in deleted state and we are trying to set permissions other than "deny - /// all" + /// - bucket or key is in deleted state and we are trying to set + /// permissions other than "deny all" pub async fn set_bucket_key_permissions( &self, bucket_id: Uuid, key_id: &String, mut perm: BucketKeyPerm, ) -> Result<(), Error> { - let key_helper = KeyHelper(self.0); - let mut bucket = self.bucket().get_internal_bucket(bucket_id).await?; - let mut key = key_helper.get_internal_key(key_id).await?; + let mut key = self.key().get_internal_key(key_id).await?; if let Some(bstate) = bucket.state.as_option() { if let Some(kp) = bstate.authorized_keys.get(key_id) { @@ -384,21 +429,20 @@ impl<'a> LockedHelper<'a> { Ok(()) } - // ---- + // ================================================ + // keys + // ================================================ /// Deletes an API access key pub async fn delete_key(&self, key: &mut Key) -> Result<(), Error> { let state = key.state.as_option_mut().unwrap(); // --- done checking, now commit --- - // (the step at unset_local_bucket_alias will fail if a bucket - // does not have another alias, the deletion will be - // interrupted in the middle if that happens) // 1. Delete local aliases for (alias, _, to) in state.local_aliases.items().iter() { if let Some(bucket_id) = to { - self.unset_local_bucket_alias(*bucket_id, &key.key_id, alias) + self.purge_local_bucket_alias(*bucket_id, &key.key_id, alias) .await?; } } -- 2.45.3 From 776294ebbe3b54314bbc3b6d18511a51ebe2a710 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Wed, 19 Mar 2025 12:39:32 +0100 Subject: [PATCH 2/2] implement repair procedure to fix inconsistent bucket aliases --- src/garage/cli/structs.rs | 3 + src/garage/repair/online.rs | 4 + src/model/helper/locked.rs | 193 ++++++++++++++++++++++++++++++++++++ 3 files changed, 200 insertions(+) diff --git a/src/garage/cli/structs.rs b/src/garage/cli/structs.rs index 4ec35e68..3652ef6b 100644 --- a/src/garage/cli/structs.rs +++ b/src/garage/cli/structs.rs @@ -478,6 +478,9 @@ pub enum RepairWhat { /// Recalculate block reference counters #[structopt(name = "block-rc", version = garage_version())] BlockRc, + /// Fix inconsistency in bucket aliases (WARNING: EXPERIMENTAL) + #[structopt(name = "aliases", version = garage_version())] + Aliases, /// Verify integrity of all blocks on disc #[structopt(name = "scrub", version = garage_version())] Scrub { diff --git a/src/garage/repair/online.rs b/src/garage/repair/online.rs index 47883f97..950cd5f7 100644 --- a/src/garage/repair/online.rs +++ b/src/garage/repair/online.rs @@ -88,6 +88,10 @@ pub async fn launch_online_repair( garage.block_manager.clone(), )); } + RepairWhat::Aliases => { + info!("Repairing bucket aliases (foreground)"); + garage.locked_helper().await.repair_aliases().await?; + } } Ok(()) } diff --git a/src/model/helper/locked.rs b/src/model/helper/locked.rs index a36a05e0..7088cdce 100644 --- a/src/model/helper/locked.rs +++ b/src/model/helper/locked.rs @@ -1,3 +1,7 @@ +use std::collections::{HashMap, HashSet}; + +use garage_db as db; + use garage_util::crdt::*; use garage_util::data::*; use garage_util::error::{Error as GarageError, OkOrMessage}; @@ -459,4 +463,193 @@ impl<'a> LockedHelper<'a> { Ok(()) } + + // ================================================ + // repair procedure + // ================================================ + + pub async fn repair_aliases(&self) -> Result<(), GarageError> { + self.0.db.transaction(|tx| { + info!("--- begin repair_aliases transaction ----"); + + // 1. List all non-deleted buckets, so that we can fix bad aliases + let mut all_buckets: HashSet = HashSet::new(); + + for item in tx.range::<&[u8], _>(&self.0.bucket_table.data.store, ..)? { + let bucket = self + .0 + .bucket_table + .data + .decode_entry(&(item?.1)) + .map_err(db::TxError::Abort)?; + if !bucket.is_deleted() { + all_buckets.insert(bucket.id); + } + } + + info!("number of buckets: {}", all_buckets.len()); + + // 2. List all aliases declared in bucket_alias_table and key_table + // Take note of aliases that point to non-existing buckets + let mut global_aliases: HashMap = HashMap::new(); + + { + let mut delete_global = vec![]; + for item in tx.range::<&[u8], _>(&self.0.bucket_alias_table.data.store, ..)? { + let mut alias = self + .0 + .bucket_alias_table + .data + .decode_entry(&(item?.1)) + .map_err(db::TxError::Abort)?; + if let Some(id) = alias.state.get() { + if all_buckets.contains(id) { + // keep aliases + global_aliases.insert(alias.name().to_string(), *id); + } else { + // delete alias + warn!( + "global alias: remove {} -> {:?} (bucket is deleted)", + alias.name(), + id + ); + alias.state.update(None); + delete_global.push(alias); + } + } + } + + info!("number of global aliases: {}", global_aliases.len()); + + info!("global alias table: {} entries fixed", delete_global.len()); + for ga in delete_global { + debug!("Enqueue update to global alias table: {:?}", ga); + self.0.bucket_alias_table.queue_insert(tx, &ga)?; + } + } + + let mut local_aliases: HashMap<(String, String), Uuid> = HashMap::new(); + + { + let mut delete_local = vec![]; + + for item in tx.range::<&[u8], _>(&self.0.key_table.data.store, ..)? { + let mut key = self + .0 + .key_table + .data + .decode_entry(&(item?.1)) + .map_err(db::TxError::Abort)?; + let Some(p) = key.state.as_option_mut() else { + continue; + }; + let mut has_changes = false; + for (name, _, to) in p.local_aliases.items().to_vec() { + if let Some(id) = to { + if all_buckets.contains(&id) { + local_aliases.insert((key.key_id.clone(), name), id); + } else { + warn!( + "local alias: remove ({}, {}) -> {:?} (bucket is deleted)", + key.key_id, name, id + ); + p.local_aliases.update_in_place(name, None); + has_changes = true; + } + } + } + if has_changes { + delete_local.push(key); + } + } + + info!("number of local aliases: {}", local_aliases.len()); + + info!("key table: {} entries fixed", delete_local.len()); + for la in delete_local { + debug!("Enqueue update to key table: {:?}", la); + self.0.key_table.queue_insert(tx, &la)?; + } + } + + // 4. Reverse the alias maps to determine the aliases per-bucket + let mut bucket_global: HashMap> = HashMap::new(); + let mut bucket_local: HashMap> = HashMap::new(); + + for (name, bucket) in global_aliases { + bucket_global.entry(bucket).or_default().push(name); + } + for ((key, name), bucket) in local_aliases { + bucket_local.entry(bucket).or_default().push((key, name)); + } + + // 5. Fix the bucket table to ensure consistency + let mut bucket_updates = vec![]; + + for item in tx.range::<&[u8], _>(&self.0.bucket_table.data.store, ..)? { + let bucket = self + .0 + .bucket_table + .data + .decode_entry(&(item?.1)) + .map_err(db::TxError::Abort)?; + let mut bucket2 = bucket.clone(); + let Some(param) = bucket2.state.as_option_mut() else { + continue; + }; + + // fix global aliases + { + let ga = bucket_global.remove(&bucket.id).unwrap_or_default(); + for (name, _, active) in param.aliases.items().to_vec() { + if active && !ga.contains(&name) { + warn!("bucket {:?}: remove global alias {}", bucket.id, name); + param.aliases.update_in_place(name, false); + } + } + for name in ga { + if param.aliases.get(&name).copied().unwrap_or(false) == false { + warn!("bucket {:?}: add global alias {}", bucket.id, name); + param.aliases.update_in_place(name, true); + } + } + } + + // fix local aliases + { + let la = bucket_local.remove(&bucket.id).unwrap_or_default(); + for (pair, _, active) in param.local_aliases.items().to_vec() { + if active && !la.contains(&pair) { + warn!("bucket {:?}: remove local alias {:?}", bucket.id, pair); + param.local_aliases.update_in_place(pair, false); + } + } + for pair in la { + if param.local_aliases.get(&pair).copied().unwrap_or(false) == false { + warn!("bucket {:?}: add local alias {:?}", bucket.id, pair); + param.local_aliases.update_in_place(pair, true); + } + } + } + + if bucket2 != bucket { + bucket_updates.push(bucket2); + } + } + + info!("bucket table: {} entries fixed", bucket_updates.len()); + for b in bucket_updates { + debug!("Enqueue update to bucket table: {:?}", b); + self.0.bucket_table.queue_insert(tx, &b)?; + } + + info!("--- end repair_aliases transaction ----"); + + Ok(()) + })?; + + info!("repair_aliases is done"); + + Ok(()) + } } -- 2.45.3