WIP: more resilience to inconsistent alias states #989

Draft
lx wants to merge 2 commits from fix-bucket-aliases into main
5 changed files with 284 additions and 40 deletions

View file

@ -382,7 +382,7 @@ pub async fn handle_delete_bucket(
for ((key_id, alias), _, active) in state.local_aliases.items().iter() {
if *active {
helper
.unset_local_bucket_alias(bucket.id, key_id, alias)
.purge_local_bucket_alias(bucket.id, key_id, alias)
.await?;
}
}

View file

@ -241,11 +241,11 @@ pub async fn handle_delete_bucket(ctx: ReqCtx) -> Result<Response<ResBody>, Erro
// 1. delete bucket alias
if is_local_alias {
helper
.unset_local_bucket_alias(*bucket_id, &api_key.key_id, bucket_name)
.purge_local_bucket_alias(*bucket_id, &api_key.key_id, bucket_name)
.await?;
} else {
helper
.unset_global_bucket_alias(*bucket_id, bucket_name)
.purge_global_bucket_alias(*bucket_id, bucket_name)
.await?;
}

View file

@ -478,6 +478,9 @@ pub enum RepairWhat {
/// Recalculate block reference counters
#[structopt(name = "block-rc", version = garage_version())]
BlockRc,
/// Fix inconsistency in bucket aliases (WARNING: EXPERIMENTAL)
#[structopt(name = "aliases", version = garage_version())]
Aliases,
/// Verify integrity of all blocks on disc
#[structopt(name = "scrub", version = garage_version())]
Scrub {

View file

@ -88,6 +88,10 @@ pub async fn launch_online_repair(
garage.block_manager.clone(),
));
}
RepairWhat::Aliases => {
info!("Repairing bucket aliases (foreground)");
garage.locked_helper().await.repair_aliases().await?;
}
}
Ok(())
}

View file

@ -1,3 +1,7 @@
use std::collections::{HashMap, HashSet};
use garage_db as db;
use garage_util::crdt::*;
use garage_util::data::*;
use garage_util::error::{Error as GarageError, OkOrMessage};
@ -47,6 +51,10 @@ impl<'a> LockedHelper<'a> {
KeyHelper(self.0)
}
// ================================================
// global bucket aliases
// ================================================
/// Sets a new alias for a bucket in global namespace.
/// This function fails if:
/// - alias name is not valid according to S3 spec
@ -180,13 +188,14 @@ impl<'a> LockedHelper<'a> {
.ok_or_else(|| Error::NoSuchBucket(alias_name.to_string()))?;
// Checks ok, remove alias
let alias_ts = match bucket.state.as_option() {
Some(bucket_state) => increment_logical_clock_2(
alias.state.timestamp(),
bucket_state.aliases.get_timestamp(alias_name),
),
None => increment_logical_clock(alias.state.timestamp()),
};
let alias_ts = increment_logical_clock_2(
alias.state.timestamp(),
bucket
.state
.as_option()
.map(|p| p.aliases.get_timestamp(alias_name))
.unwrap_or(0),
);
// ---- timestamp-ensured causality barrier ----
// writes are now done and all writes use timestamp alias_ts
@ -204,6 +213,10 @@ impl<'a> LockedHelper<'a> {
Ok(())
}
// ================================================
// local bucket aliases
// ================================================
/// Sets a new alias for a bucket in the local namespace of a key.
/// This function fails if:
/// - alias name is not valid according to S3 spec
@ -216,14 +229,12 @@ impl<'a> LockedHelper<'a> {
key_id: &String,
alias_name: &String,
) -> Result<(), Error> {
let key_helper = KeyHelper(self.0);
if !is_valid_bucket_name(alias_name) {
return Err(Error::InvalidBucketName(alias_name.to_string()));
}
let mut bucket = self.bucket().get_existing_bucket(bucket_id).await?;
let mut key = key_helper.get_existing_key(key_id).await?;
let mut key = self.key().get_existing_key(key_id).await?;
let key_param = key.state.as_option_mut().unwrap();
@ -272,23 +283,13 @@ impl<'a> LockedHelper<'a> {
key_id: &String,
alias_name: &String,
) -> Result<(), Error> {
let key_helper = KeyHelper(self.0);
let mut bucket = self.bucket().get_existing_bucket(bucket_id).await?;
let mut key = key_helper.get_existing_key(key_id).await?;
let mut key = self.key().get_existing_key(key_id).await?;
let key_p = key.state.as_option().unwrap();
let bucket_p = bucket.state.as_option_mut().unwrap();
if key
.state
.as_option()
.unwrap()
.local_aliases
.get(alias_name)
.cloned()
.flatten()
!= Some(bucket_id)
{
if key_p.local_aliases.get(alias_name).cloned().flatten() != Some(bucket_id) {
return Err(GarageError::Message(format!(
"Bucket {:?} does not have alias {} in namespace of key {}",
bucket_id, alias_name, key_id
@ -305,17 +306,17 @@ impl<'a> LockedHelper<'a> {
.local_aliases
.items()
.iter()
.any(|((k, n), _, active)| *k == key.key_id && n == alias_name && *active);
.any(|((k, n), _, active)| (*k != key.key_id || n != alias_name) && *active);
if !has_other_global_aliases && !has_other_local_aliases {
return Err(Error::BadRequest(format!("Bucket {} doesn't have other aliases, please delete it instead of just unaliasing.", alias_name)));
}
// Checks ok, remove alias
let key_param = key.state.as_option_mut().unwrap();
let bucket_p_local_alias_key = (key.key_id.clone(), alias_name.clone());
let alias_ts = increment_logical_clock_2(
key_param.local_aliases.get_timestamp(alias_name),
key_p.local_aliases.get_timestamp(alias_name),
bucket_p
.local_aliases
.get_timestamp(&bucket_p_local_alias_key),
@ -324,7 +325,8 @@ impl<'a> LockedHelper<'a> {
// ---- timestamp-ensured causality barrier ----
// writes are now done and all writes use timestamp alias_ts
key_param.local_aliases = LwwMap::raw_item(alias_name.clone(), alias_ts, None);
key.state.as_option_mut().unwrap().local_aliases =
LwwMap::raw_item(alias_name.clone(), alias_ts, None);
self.0.key_table.insert(&key).await?;
bucket_p.local_aliases = LwwMap::raw_item(bucket_p_local_alias_key, alias_ts, false);
@ -333,21 +335,68 @@ impl<'a> LockedHelper<'a> {
Ok(())
}
/// Ensures a bucket does not have a certain local alias.
/// Contrarily to unset_local_bucket_alias, this does not
/// fail on any condition other than:
/// - bucket cannot be found (its fine if it is in deleted state)
/// - key cannot be found (its fine if alias in key points to nothing
/// or to another bucket)
pub async fn purge_local_bucket_alias(
&self,
bucket_id: Uuid,
key_id: &String,
alias_name: &String,
) -> Result<(), Error> {
let mut bucket = self.bucket().get_internal_bucket(bucket_id).await?;
let mut key = self.key().get_internal_key(key_id).await?;
let bucket_p_local_alias_key = (key.key_id.clone(), alias_name.clone());
let alias_ts = increment_logical_clock_2(
key.state
.as_option()
.map(|p| p.local_aliases.get_timestamp(alias_name))
.unwrap_or(0),
bucket
.state
.as_option()
.map(|p| p.local_aliases.get_timestamp(&bucket_p_local_alias_key))
.unwrap_or(0),
);
// ---- timestamp-ensured causality barrier ----
// writes are now done and all writes use timestamp alias_ts
if let Some(kp) = key.state.as_option_mut() {
kp.local_aliases = LwwMap::raw_item(alias_name.clone(), alias_ts, None);
self.0.key_table.insert(&key).await?;
}
if let Some(bp) = bucket.state.as_option_mut() {
bp.local_aliases = LwwMap::raw_item(bucket_p_local_alias_key, alias_ts, false);
self.0.bucket_table.insert(&bucket).await?;
}
Ok(())
}
// ================================================
// permissions
// ================================================
/// Sets permissions for a key on a bucket.
/// This function fails if:
/// - bucket or key cannot be found at all (its ok if they are in deleted state)
/// - bucket or key is in deleted state and we are trying to set permissions other than "deny
/// all"
/// - bucket or key is in deleted state and we are trying to set
/// permissions other than "deny all"
pub async fn set_bucket_key_permissions(
&self,
bucket_id: Uuid,
key_id: &String,
mut perm: BucketKeyPerm,
) -> Result<(), Error> {
let key_helper = KeyHelper(self.0);
let mut bucket = self.bucket().get_internal_bucket(bucket_id).await?;
let mut key = key_helper.get_internal_key(key_id).await?;
let mut key = self.key().get_internal_key(key_id).await?;
if let Some(bstate) = bucket.state.as_option() {
if let Some(kp) = bstate.authorized_keys.get(key_id) {
@ -384,21 +433,20 @@ impl<'a> LockedHelper<'a> {
Ok(())
}
// ----
// ================================================
// keys
// ================================================
/// Deletes an API access key
pub async fn delete_key(&self, key: &mut Key) -> Result<(), Error> {
let state = key.state.as_option_mut().unwrap();
// --- done checking, now commit ---
// (the step at unset_local_bucket_alias will fail if a bucket
// does not have another alias, the deletion will be
// interrupted in the middle if that happens)
// 1. Delete local aliases
for (alias, _, to) in state.local_aliases.items().iter() {
if let Some(bucket_id) = to {
self.unset_local_bucket_alias(*bucket_id, &key.key_id, alias)
self.purge_local_bucket_alias(*bucket_id, &key.key_id, alias)
.await?;
}
}
@ -415,4 +463,193 @@ impl<'a> LockedHelper<'a> {
Ok(())
}
// ================================================
// repair procedure
// ================================================
pub async fn repair_aliases(&self) -> Result<(), GarageError> {
self.0.db.transaction(|tx| {
info!("--- begin repair_aliases transaction ----");
// 1. List all non-deleted buckets, so that we can fix bad aliases
let mut all_buckets: HashSet<Uuid> = HashSet::new();
for item in tx.range::<&[u8], _>(&self.0.bucket_table.data.store, ..)? {
let bucket = self
.0
.bucket_table
.data
.decode_entry(&(item?.1))
.map_err(db::TxError::Abort)?;
if !bucket.is_deleted() {
all_buckets.insert(bucket.id);
}
}
info!("number of buckets: {}", all_buckets.len());
// 2. List all aliases declared in bucket_alias_table and key_table
// Take note of aliases that point to non-existing buckets
let mut global_aliases: HashMap<String, Uuid> = HashMap::new();
{
let mut delete_global = vec![];
for item in tx.range::<&[u8], _>(&self.0.bucket_alias_table.data.store, ..)? {
let mut alias = self
.0
.bucket_alias_table
.data
.decode_entry(&(item?.1))
.map_err(db::TxError::Abort)?;
if let Some(id) = alias.state.get() {
if all_buckets.contains(id) {
// keep aliases
global_aliases.insert(alias.name().to_string(), *id);
} else {
// delete alias
warn!(
"global alias: remove {} -> {:?} (bucket is deleted)",
alias.name(),
id
);
alias.state.update(None);
delete_global.push(alias);
}
}
}
info!("number of global aliases: {}", global_aliases.len());
info!("global alias table: {} entries fixed", delete_global.len());
for ga in delete_global {
debug!("Enqueue update to global alias table: {:?}", ga);
self.0.bucket_alias_table.queue_insert(tx, &ga)?;
}
}
let mut local_aliases: HashMap<(String, String), Uuid> = HashMap::new();
{
let mut delete_local = vec![];
for item in tx.range::<&[u8], _>(&self.0.key_table.data.store, ..)? {
let mut key = self
.0
.key_table
.data
.decode_entry(&(item?.1))
.map_err(db::TxError::Abort)?;
let Some(p) = key.state.as_option_mut() else {
continue;
};
let mut has_changes = false;
for (name, _, to) in p.local_aliases.items().to_vec() {
if let Some(id) = to {
if all_buckets.contains(&id) {
local_aliases.insert((key.key_id.clone(), name), id);
} else {
warn!(
"local alias: remove ({}, {}) -> {:?} (bucket is deleted)",
key.key_id, name, id
);
p.local_aliases.update_in_place(name, None);
has_changes = true;
}
}
}
if has_changes {
delete_local.push(key);
}
}
info!("number of local aliases: {}", local_aliases.len());
info!("key table: {} entries fixed", delete_local.len());
for la in delete_local {
debug!("Enqueue update to key table: {:?}", la);
self.0.key_table.queue_insert(tx, &la)?;
}
}
// 4. Reverse the alias maps to determine the aliases per-bucket
let mut bucket_global: HashMap<Uuid, Vec<String>> = HashMap::new();
let mut bucket_local: HashMap<Uuid, Vec<(String, String)>> = HashMap::new();
for (name, bucket) in global_aliases {
bucket_global.entry(bucket).or_default().push(name);
}
for ((key, name), bucket) in local_aliases {
bucket_local.entry(bucket).or_default().push((key, name));
}
// 5. Fix the bucket table to ensure consistency
let mut bucket_updates = vec![];
for item in tx.range::<&[u8], _>(&self.0.bucket_table.data.store, ..)? {
let bucket = self
.0
.bucket_table
.data
.decode_entry(&(item?.1))
.map_err(db::TxError::Abort)?;
let mut bucket2 = bucket.clone();
let Some(param) = bucket2.state.as_option_mut() else {
continue;
};
// fix global aliases
{
let ga = bucket_global.remove(&bucket.id).unwrap_or_default();
for (name, _, active) in param.aliases.items().to_vec() {
if active && !ga.contains(&name) {
warn!("bucket {:?}: remove global alias {}", bucket.id, name);
param.aliases.update_in_place(name, false);
}
}
for name in ga {
if param.aliases.get(&name).copied().unwrap_or(false) == false {
warn!("bucket {:?}: add global alias {}", bucket.id, name);
param.aliases.update_in_place(name, true);
}
}
}
// fix local aliases
{
let la = bucket_local.remove(&bucket.id).unwrap_or_default();
for (pair, _, active) in param.local_aliases.items().to_vec() {
if active && !la.contains(&pair) {
warn!("bucket {:?}: remove local alias {:?}", bucket.id, pair);
param.local_aliases.update_in_place(pair, false);
}
}
for pair in la {
if param.local_aliases.get(&pair).copied().unwrap_or(false) == false {
warn!("bucket {:?}: add local alias {:?}", bucket.id, pair);
param.local_aliases.update_in_place(pair, true);
}
}
}
if bucket2 != bucket {
bucket_updates.push(bucket2);
}
}
info!("bucket table: {} entries fixed", bucket_updates.len());
for b in bucket_updates {
debug!("Enqueue update to bucket table: {:?}", b);
self.0.bucket_table.queue_insert(tx, &b)?;
}
info!("--- end repair_aliases transaction ----");
Ok(())
})?;
info!("repair_aliases is done");
Ok(())
}
}