From 05c92204ecab87540806073ac4deedfd58519240 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Fri, 8 Mar 2024 14:59:56 +0100 Subject: [PATCH] [rm-sled] Remove counted_tree_hack --- src/block/manager.rs | 7 +- src/block/metrics.rs | 17 +++-- src/block/resync.rs | 15 +--- src/db/counted_tree_hack.rs | 127 ------------------------------- src/db/lib.rs | 9 --- src/db/lmdb_adapter.rs | 4 - src/db/sqlite_adapter.rs | 4 - src/garage/admin/mod.rs | 51 +++---------- src/garage/cli/structs.rs | 4 - src/model/s3/lifecycle_worker.rs | 8 +- src/table/data.rs | 6 +- src/table/gc.rs | 18 +++-- src/table/merkle.rs | 4 - src/table/metrics.rs | 21 ++--- 14 files changed, 48 insertions(+), 247 deletions(-) delete mode 100644 src/db/counted_tree_hack.rs diff --git a/src/block/manager.rs b/src/block/manager.rs index c7e4df17..18fadf85 100644 --- a/src/block/manager.rs +++ b/src/block/manager.rs @@ -378,11 +378,6 @@ impl BlockManager { Ok(self.rc.rc.len()?) } - /// Get number of items in the refcount table - pub fn rc_fast_len(&self) -> Result, Error> { - Ok(self.rc.rc.fast_len()?) - } - /// Send command to start/stop/manager scrub worker pub async fn send_scrub_command(&self, cmd: ScrubWorkerCommand) -> Result<(), Error> { let tx = self.tx_scrub_command.load(); @@ -398,7 +393,7 @@ impl BlockManager { /// List all resync errors pub fn list_resync_errors(&self) -> Result, Error> { - let mut blocks = Vec::with_capacity(self.resync.errors.len()); + let mut blocks = Vec::with_capacity(self.resync.errors.len()?); for ent in self.resync.errors.iter()? { let (hash, cnt) = ent?; let cnt = ErrorCounter::decode(&cnt); diff --git a/src/block/metrics.rs b/src/block/metrics.rs index 6659df32..8e10afdf 100644 --- a/src/block/metrics.rs +++ b/src/block/metrics.rs @@ -1,7 +1,6 @@ use opentelemetry::{global, metrics::*}; use garage_db as db; -use garage_db::counted_tree_hack::CountedTree; /// TableMetrics reference all counter used for metrics pub struct BlockManagerMetrics { @@ -29,8 +28,8 @@ impl BlockManagerMetrics { pub fn new( compression_level: Option, rc_tree: db::Tree, - resync_queue: CountedTree, - resync_errors: CountedTree, + resync_queue: db::Tree, + resync_errors: db::Tree, ) -> Self { let meter = global::meter("garage_model/block"); Self { @@ -45,15 +44,17 @@ impl BlockManagerMetrics { .init(), _rc_size: meter .u64_value_observer("block.rc_size", move |observer| { - if let Ok(Some(v)) = rc_tree.fast_len() { - observer.observe(v as u64, &[]) + if let Ok(value) = rc_tree.len() { + observer.observe(value as u64, &[]) } }) .with_description("Number of blocks known to the reference counter") .init(), _resync_queue_len: meter .u64_value_observer("block.resync_queue_length", move |observer| { - observer.observe(resync_queue.len() as u64, &[]) + if let Ok(value) = resync_queue.len() { + observer.observe(value as u64, &[]); + } }) .with_description( "Number of block hashes queued for local check and possible resync", @@ -61,7 +62,9 @@ impl BlockManagerMetrics { .init(), _resync_errored_blocks: meter .u64_value_observer("block.resync_errored_blocks", move |observer| { - observer.observe(resync_errors.len() as u64, &[]) + if let Ok(value) = resync_errors.len() { + observer.observe(value as u64, &[]); + } }) .with_description("Number of block hashes whose last resync resulted in an error") .init(), diff --git a/src/block/resync.rs b/src/block/resync.rs index 2516ba08..48c2cef1 100644 --- a/src/block/resync.rs +++ b/src/block/resync.rs @@ -15,7 +15,6 @@ use opentelemetry::{ }; use garage_db as db; -use garage_db::counted_tree_hack::CountedTree; use garage_util::background::*; use garage_util::data::*; @@ -47,9 +46,9 @@ pub(crate) const MAX_RESYNC_WORKERS: usize = 8; const INITIAL_RESYNC_TRANQUILITY: u32 = 2; pub struct BlockResyncManager { - pub(crate) queue: CountedTree, + pub(crate) queue: db::Tree, pub(crate) notify: Arc, - pub(crate) errors: CountedTree, + pub(crate) errors: db::Tree, busy_set: BusySet, @@ -90,12 +89,10 @@ impl BlockResyncManager { let queue = db .open_tree("block_local_resync_queue") .expect("Unable to open block_local_resync_queue tree"); - let queue = CountedTree::new(queue).expect("Could not count block_local_resync_queue"); let errors = db .open_tree("block_local_resync_errors") .expect("Unable to open block_local_resync_errors tree"); - let errors = CountedTree::new(errors).expect("Could not count block_local_resync_errors"); let persister = PersisterShared::new(&system.metadata_dir, "resync_cfg"); @@ -110,16 +107,12 @@ impl BlockResyncManager { /// Get lenght of resync queue pub fn queue_len(&self) -> Result { - // This currently can't return an error because the CountedTree hack - // doesn't error on .len(), but this will change when we remove the hack - // (hopefully someday!) - Ok(self.queue.len()) + Ok(self.queue.len()?) } /// Get number of blocks that have an error pub fn errors_len(&self) -> Result { - // (see queue_len comment) - Ok(self.errors.len()) + Ok(self.errors.len()?) } /// Clear the error counter for a block and put it in queue immediately diff --git a/src/db/counted_tree_hack.rs b/src/db/counted_tree_hack.rs deleted file mode 100644 index a4ce12e0..00000000 --- a/src/db/counted_tree_hack.rs +++ /dev/null @@ -1,127 +0,0 @@ -//! This hack allows a db tree to keep in RAM a counter of the number of entries -//! it contains, which is used to call .len() on it. This is usefull only for -//! the sled backend where .len() otherwise would have to traverse the whole -//! tree to count items. For sqlite and lmdb, this is mostly useless (but -//! hopefully not harmfull!). Note that a CountedTree cannot be part of a -//! transaction. - -use std::sync::{ - atomic::{AtomicUsize, Ordering}, - Arc, -}; - -use crate::{Result, Tree, TxError, Value, ValueIter}; - -#[derive(Clone)] -pub struct CountedTree(Arc); - -struct CountedTreeInternal { - tree: Tree, - len: AtomicUsize, -} - -impl CountedTree { - pub fn new(tree: Tree) -> Result { - let len = tree.len()?; - Ok(Self(Arc::new(CountedTreeInternal { - tree, - len: AtomicUsize::new(len), - }))) - } - - pub fn len(&self) -> usize { - self.0.len.load(Ordering::SeqCst) - } - - pub fn is_empty(&self) -> bool { - self.len() == 0 - } - - pub fn get>(&self, key: K) -> Result> { - self.0.tree.get(key) - } - - pub fn first(&self) -> Result> { - self.0.tree.first() - } - - pub fn iter(&self) -> Result> { - self.0.tree.iter() - } - - // ---- writing functions ---- - - pub fn insert(&self, key: K, value: V) -> Result> - where - K: AsRef<[u8]>, - V: AsRef<[u8]>, - { - let old_val = self.0.tree.insert(key, value)?; - if old_val.is_none() { - self.0.len.fetch_add(1, Ordering::SeqCst); - } - Ok(old_val) - } - - pub fn remove>(&self, key: K) -> Result> { - let old_val = self.0.tree.remove(key)?; - if old_val.is_some() { - self.0.len.fetch_sub(1, Ordering::SeqCst); - } - Ok(old_val) - } - - pub fn compare_and_swap( - &self, - key: K, - expected_old: Option, - new: Option, - ) -> Result - where - K: AsRef<[u8]>, - OV: AsRef<[u8]>, - NV: AsRef<[u8]>, - { - let old_some = expected_old.is_some(); - let new_some = new.is_some(); - - let tx_res = self.0.tree.db().transaction(|tx| { - let old_val = tx.get(&self.0.tree, &key)?; - let is_same = match (&old_val, &expected_old) { - (None, None) => true, - (Some(x), Some(y)) if x == y.as_ref() => true, - _ => false, - }; - if is_same { - match &new { - Some(v) => { - tx.insert(&self.0.tree, &key, v)?; - } - None => { - tx.remove(&self.0.tree, &key)?; - } - } - Ok(()) - } else { - Err(TxError::Abort(())) - } - }); - - match tx_res { - Ok(()) => { - match (old_some, new_some) { - (false, true) => { - self.0.len.fetch_add(1, Ordering::SeqCst); - } - (true, false) => { - self.0.len.fetch_sub(1, Ordering::SeqCst); - } - _ => (), - } - Ok(true) - } - Err(TxError::Abort(())) => Ok(false), - Err(TxError::Db(e)) => Err(e), - } - } -} diff --git a/src/db/lib.rs b/src/db/lib.rs index 8975f295..e81c712c 100644 --- a/src/db/lib.rs +++ b/src/db/lib.rs @@ -6,8 +6,6 @@ pub mod lmdb_adapter; #[cfg(feature = "sqlite")] pub mod sqlite_adapter; -pub mod counted_tree_hack; - pub mod open; #[cfg(test)] @@ -187,10 +185,6 @@ impl Tree { pub fn len(&self) -> Result { self.0.len(self.1) } - #[inline] - pub fn fast_len(&self) -> Result> { - self.0.fast_len(self.1) - } #[inline] pub fn first(&self) -> Result> { @@ -326,9 +320,6 @@ pub(crate) trait IDb: Send + Sync { fn get(&self, tree: usize, key: &[u8]) -> Result>; fn len(&self, tree: usize) -> Result; - fn fast_len(&self, _tree: usize) -> Result> { - Ok(None) - } fn insert(&self, tree: usize, key: &[u8], value: &[u8]) -> Result>; fn remove(&self, tree: usize, key: &[u8]) -> Result>; diff --git a/src/db/lmdb_adapter.rs b/src/db/lmdb_adapter.rs index 59fa132d..99b29a74 100644 --- a/src/db/lmdb_adapter.rs +++ b/src/db/lmdb_adapter.rs @@ -121,10 +121,6 @@ impl IDb for LmdbDb { Ok(tree.len(&tx)?.try_into().unwrap()) } - fn fast_len(&self, tree: usize) -> Result> { - Ok(Some(self.len(tree)?)) - } - fn insert(&self, tree: usize, key: &[u8], value: &[u8]) -> Result> { let tree = self.get_tree(tree)?; let mut tx = self.db.write_txn()?; diff --git a/src/db/sqlite_adapter.rs b/src/db/sqlite_adapter.rs index 9f967c66..1a7ae5f0 100644 --- a/src/db/sqlite_adapter.rs +++ b/src/db/sqlite_adapter.rs @@ -144,10 +144,6 @@ impl IDb for SqliteDb { } } - fn fast_len(&self, tree: usize) -> Result> { - Ok(Some(self.len(tree)?)) - } - fn insert(&self, tree: usize, key: &[u8], value: &[u8]) -> Result> { trace!("insert {}: lock db", tree); let this = self.0.lock().unwrap(); diff --git a/src/garage/admin/mod.rs b/src/garage/admin/mod.rs index de7851e1..896751cc 100644 --- a/src/garage/admin/mod.rs +++ b/src/garage/admin/mod.rs @@ -217,11 +217,11 @@ impl AdminRpcHandler { // Gather table statistics let mut table = vec![" Table\tItems\tMklItems\tMklTodo\tGcTodo".into()]; - table.push(self.gather_table_stats(&self.garage.bucket_table, opt.detailed)?); - table.push(self.gather_table_stats(&self.garage.key_table, opt.detailed)?); - table.push(self.gather_table_stats(&self.garage.object_table, opt.detailed)?); - table.push(self.gather_table_stats(&self.garage.version_table, opt.detailed)?); - table.push(self.gather_table_stats(&self.garage.block_ref_table, opt.detailed)?); + table.push(self.gather_table_stats(&self.garage.bucket_table)?); + table.push(self.gather_table_stats(&self.garage.key_table)?); + table.push(self.gather_table_stats(&self.garage.object_table)?); + table.push(self.gather_table_stats(&self.garage.version_table)?); + table.push(self.gather_table_stats(&self.garage.block_ref_table)?); write!( &mut ret, "\nTable stats:\n{}", @@ -231,15 +231,7 @@ impl AdminRpcHandler { // Gather block manager statistics writeln!(&mut ret, "\nBlock manager stats:").unwrap(); - let rc_len = if opt.detailed { - self.garage.block_manager.rc_len()?.to_string() - } else { - self.garage - .block_manager - .rc_fast_len()? - .map(|x| x.to_string()) - .unwrap_or_else(|| "NC".into()) - }; + let rc_len = self.garage.block_manager.rc_len()?.to_string(); writeln!( &mut ret, @@ -260,10 +252,6 @@ impl AdminRpcHandler { ) .unwrap(); - if !opt.detailed { - writeln!(&mut ret, "\nIf values are missing above (marked as NC), consider adding the --detailed flag (this will be slow).").unwrap(); - } - if !opt.skip_global { write!(&mut ret, "\n{}", self.gather_cluster_stats()).unwrap(); } @@ -365,34 +353,13 @@ impl AdminRpcHandler { ret } - fn gather_table_stats( - &self, - t: &Arc>, - detailed: bool, - ) -> Result + fn gather_table_stats(&self, t: &Arc>) -> Result where F: TableSchema + 'static, R: TableReplication + 'static, { - let (data_len, mkl_len) = if detailed { - ( - t.data.store.len().map_err(GarageError::from)?.to_string(), - t.merkle_updater.merkle_tree_len()?.to_string(), - ) - } else { - ( - t.data - .store - .fast_len() - .map_err(GarageError::from)? - .map(|x| x.to_string()) - .unwrap_or_else(|| "NC".into()), - t.merkle_updater - .merkle_tree_fast_len()? - .map(|x| x.to_string()) - .unwrap_or_else(|| "NC".into()), - ) - }; + let data_len = t.data.store.len().map_err(GarageError::from)?.to_string(); + let mkl_len = t.merkle_updater.merkle_tree_len()?.to_string(); Ok(format!( " {}\t{}\t{}\t{}\t{}", diff --git a/src/garage/cli/structs.rs b/src/garage/cli/structs.rs index 40e47ee1..7e7ab71b 100644 --- a/src/garage/cli/structs.rs +++ b/src/garage/cli/structs.rs @@ -553,10 +553,6 @@ pub struct StatsOpt { #[structopt(short = "a", long = "all-nodes")] pub all_nodes: bool, - /// Gather detailed statistics (this can be long) - #[structopt(short = "d", long = "detailed")] - pub detailed: bool, - /// Don't show global cluster stats (internal use in RPC) #[structopt(skip)] #[serde(default)] diff --git a/src/model/s3/lifecycle_worker.rs b/src/model/s3/lifecycle_worker.rs index 50d4283f..9ecf168c 100644 --- a/src/model/s3/lifecycle_worker.rs +++ b/src/model/s3/lifecycle_worker.rs @@ -121,13 +121,7 @@ impl Worker for LifecycleWorker { mpu_aborted, .. } => { - let n_objects = self - .garage - .object_table - .data - .store - .fast_len() - .unwrap_or(None); + let n_objects = self.garage.object_table.data.store.len().ok(); let progress = match n_objects { None => "...".to_string(), Some(total) => format!( diff --git a/src/table/data.rs b/src/table/data.rs index 7f6b7847..09f4e008 100644 --- a/src/table/data.rs +++ b/src/table/data.rs @@ -6,7 +6,6 @@ use serde_bytes::ByteBuf; use tokio::sync::Notify; use garage_db as db; -use garage_db::counted_tree_hack::CountedTree; use garage_util::data::*; use garage_util::error::*; @@ -36,7 +35,7 @@ pub struct TableData { pub(crate) insert_queue: db::Tree, pub(crate) insert_queue_notify: Arc, - pub(crate) gc_todo: CountedTree, + pub(crate) gc_todo: db::Tree, pub(crate) metrics: TableMetrics, } @@ -61,7 +60,6 @@ impl TableData { let gc_todo = db .open_tree(format!("{}:gc_todo_v2", F::TABLE_NAME)) .expect("Unable to open GC DB tree"); - let gc_todo = CountedTree::new(gc_todo).expect("Cannot count gc_todo_v2"); let metrics = TableMetrics::new( F::TABLE_NAME, @@ -370,6 +368,6 @@ impl TableData { } pub fn gc_todo_len(&self) -> Result { - Ok(self.gc_todo.len()) + Ok(self.gc_todo.len()?) } } diff --git a/src/table/gc.rs b/src/table/gc.rs index 65ad0c42..d30a1849 100644 --- a/src/table/gc.rs +++ b/src/table/gc.rs @@ -10,7 +10,7 @@ use serde_bytes::ByteBuf; use futures::future::join_all; use tokio::sync::watch; -use garage_db::counted_tree_hack::CountedTree; +use garage_db as db; use garage_util::background::*; use garage_util::data::*; @@ -376,7 +376,7 @@ impl GcTodoEntry { } /// Saves the GcTodoEntry in the gc_todo tree - pub(crate) fn save(&self, gc_todo_tree: &CountedTree) -> Result<(), Error> { + pub(crate) fn save(&self, gc_todo_tree: &db::Tree) -> Result<(), Error> { gc_todo_tree.insert(self.todo_table_key(), self.value_hash.as_slice())?; Ok(()) } @@ -386,12 +386,14 @@ impl GcTodoEntry { /// This is usefull to remove a todo entry only under the condition /// that it has not changed since the time it was read, i.e. /// what we have to do is still the same - pub(crate) fn remove_if_equal(&self, gc_todo_tree: &CountedTree) -> Result<(), Error> { - gc_todo_tree.compare_and_swap::<_, _, &[u8]>( - &self.todo_table_key(), - Some(self.value_hash), - None, - )?; + pub(crate) fn remove_if_equal(&self, gc_todo_tree: &db::Tree) -> Result<(), Error> { + gc_todo_tree.db().transaction(|txn| { + let key = self.todo_table_key(); + if txn.get(gc_todo_tree, &key)?.as_deref() == Some(self.value_hash.as_slice()) { + txn.remove(gc_todo_tree, &key)?; + } + Ok(()) + })?; Ok(()) } diff --git a/src/table/merkle.rs b/src/table/merkle.rs index be0ae243..596d5805 100644 --- a/src/table/merkle.rs +++ b/src/table/merkle.rs @@ -291,10 +291,6 @@ impl MerkleUpdater { Ok(self.data.merkle_tree.len()?) } - pub fn merkle_tree_fast_len(&self) -> Result, Error> { - Ok(self.data.merkle_tree.fast_len()?) - } - pub fn todo_len(&self) -> Result { Ok(self.data.merkle_todo.len()?) } diff --git a/src/table/metrics.rs b/src/table/metrics.rs index 8318a84f..7bb0959a 100644 --- a/src/table/metrics.rs +++ b/src/table/metrics.rs @@ -1,7 +1,6 @@ use opentelemetry::{global, metrics::*, KeyValue}; use garage_db as db; -use garage_db::counted_tree_hack::CountedTree; /// TableMetrics reference all counter used for metrics pub struct TableMetrics { @@ -27,7 +26,7 @@ impl TableMetrics { store: db::Tree, merkle_tree: db::Tree, merkle_todo: db::Tree, - gc_todo: CountedTree, + gc_todo: db::Tree, ) -> Self { let meter = global::meter(table_name); TableMetrics { @@ -35,9 +34,9 @@ impl TableMetrics { .u64_value_observer( "table.size", move |observer| { - if let Ok(Some(v)) = store.fast_len() { + if let Ok(value) = store.len() { observer.observe( - v as u64, + value as u64, &[KeyValue::new("table_name", table_name)], ); } @@ -49,9 +48,9 @@ impl TableMetrics { .u64_value_observer( "table.merkle_tree_size", move |observer| { - if let Ok(Some(v)) = merkle_tree.fast_len() { + if let Ok(value) = merkle_tree.len() { observer.observe( - v as u64, + value as u64, &[KeyValue::new("table_name", table_name)], ); } @@ -77,10 +76,12 @@ impl TableMetrics { .u64_value_observer( "table.gc_todo_queue_length", move |observer| { - observer.observe( - gc_todo.len() as u64, - &[KeyValue::new("table_name", table_name)], - ); + if let Ok(value) = gc_todo.len() { + observer.observe( + value as u64, + &[KeyValue::new("table_name", table_name)], + ); + } }, ) .with_description("Table garbage collector TODO queue length")