Garage v1.0 #683

Merged
lx merged 119 commits from next-0.10 into main 2024-04-10 15:23:13 +00:00
14 changed files with 48 additions and 247 deletions
Showing only changes of commit 05c92204ec - Show all commits

View file

@ -378,11 +378,6 @@ impl BlockManager {
Ok(self.rc.rc.len()?) Ok(self.rc.rc.len()?)
} }
/// Get number of items in the refcount table
pub fn rc_fast_len(&self) -> Result<Option<usize>, Error> {
Ok(self.rc.rc.fast_len()?)
}
/// Send command to start/stop/manager scrub worker /// Send command to start/stop/manager scrub worker
pub async fn send_scrub_command(&self, cmd: ScrubWorkerCommand) -> Result<(), Error> { pub async fn send_scrub_command(&self, cmd: ScrubWorkerCommand) -> Result<(), Error> {
let tx = self.tx_scrub_command.load(); let tx = self.tx_scrub_command.load();
@ -398,7 +393,7 @@ impl BlockManager {
/// List all resync errors /// List all resync errors
pub fn list_resync_errors(&self) -> Result<Vec<BlockResyncErrorInfo>, Error> { pub fn list_resync_errors(&self) -> Result<Vec<BlockResyncErrorInfo>, Error> {
let mut blocks = Vec::with_capacity(self.resync.errors.len()); let mut blocks = Vec::with_capacity(self.resync.errors.len()?);
for ent in self.resync.errors.iter()? { for ent in self.resync.errors.iter()? {
let (hash, cnt) = ent?; let (hash, cnt) = ent?;
let cnt = ErrorCounter::decode(&cnt); let cnt = ErrorCounter::decode(&cnt);

View file

@ -1,7 +1,6 @@
use opentelemetry::{global, metrics::*}; use opentelemetry::{global, metrics::*};
use garage_db as db; use garage_db as db;
use garage_db::counted_tree_hack::CountedTree;
/// TableMetrics reference all counter used for metrics /// TableMetrics reference all counter used for metrics
pub struct BlockManagerMetrics { pub struct BlockManagerMetrics {
@ -29,8 +28,8 @@ impl BlockManagerMetrics {
pub fn new( pub fn new(
compression_level: Option<i32>, compression_level: Option<i32>,
rc_tree: db::Tree, rc_tree: db::Tree,
resync_queue: CountedTree, resync_queue: db::Tree,
resync_errors: CountedTree, resync_errors: db::Tree,
) -> Self { ) -> Self {
let meter = global::meter("garage_model/block"); let meter = global::meter("garage_model/block");
Self { Self {
@ -45,15 +44,17 @@ impl BlockManagerMetrics {
.init(), .init(),
_rc_size: meter _rc_size: meter
.u64_value_observer("block.rc_size", move |observer| { .u64_value_observer("block.rc_size", move |observer| {
if let Ok(Some(v)) = rc_tree.fast_len() { if let Ok(value) = rc_tree.len() {
observer.observe(v as u64, &[]) observer.observe(value as u64, &[])
} }
}) })
.with_description("Number of blocks known to the reference counter") .with_description("Number of blocks known to the reference counter")
.init(), .init(),
_resync_queue_len: meter _resync_queue_len: meter
.u64_value_observer("block.resync_queue_length", move |observer| { .u64_value_observer("block.resync_queue_length", move |observer| {
observer.observe(resync_queue.len() as u64, &[]) if let Ok(value) = resync_queue.len() {
observer.observe(value as u64, &[]);
}
}) })
.with_description( .with_description(
"Number of block hashes queued for local check and possible resync", "Number of block hashes queued for local check and possible resync",
@ -61,7 +62,9 @@ impl BlockManagerMetrics {
.init(), .init(),
_resync_errored_blocks: meter _resync_errored_blocks: meter
.u64_value_observer("block.resync_errored_blocks", move |observer| { .u64_value_observer("block.resync_errored_blocks", move |observer| {
observer.observe(resync_errors.len() as u64, &[]) if let Ok(value) = resync_errors.len() {
observer.observe(value as u64, &[]);
}
}) })
.with_description("Number of block hashes whose last resync resulted in an error") .with_description("Number of block hashes whose last resync resulted in an error")
.init(), .init(),

View file

@ -15,7 +15,6 @@ use opentelemetry::{
}; };
use garage_db as db; use garage_db as db;
use garage_db::counted_tree_hack::CountedTree;
use garage_util::background::*; use garage_util::background::*;
use garage_util::data::*; use garage_util::data::*;
@ -47,9 +46,9 @@ pub(crate) const MAX_RESYNC_WORKERS: usize = 8;
const INITIAL_RESYNC_TRANQUILITY: u32 = 2; const INITIAL_RESYNC_TRANQUILITY: u32 = 2;
pub struct BlockResyncManager { pub struct BlockResyncManager {
pub(crate) queue: CountedTree, pub(crate) queue: db::Tree,
pub(crate) notify: Arc<Notify>, pub(crate) notify: Arc<Notify>,
pub(crate) errors: CountedTree, pub(crate) errors: db::Tree,
busy_set: BusySet, busy_set: BusySet,
@ -90,12 +89,10 @@ impl BlockResyncManager {
let queue = db let queue = db
.open_tree("block_local_resync_queue") .open_tree("block_local_resync_queue")
.expect("Unable to open block_local_resync_queue tree"); .expect("Unable to open block_local_resync_queue tree");
let queue = CountedTree::new(queue).expect("Could not count block_local_resync_queue");
let errors = db let errors = db
.open_tree("block_local_resync_errors") .open_tree("block_local_resync_errors")
.expect("Unable to open block_local_resync_errors tree"); .expect("Unable to open block_local_resync_errors tree");
let errors = CountedTree::new(errors).expect("Could not count block_local_resync_errors");
let persister = PersisterShared::new(&system.metadata_dir, "resync_cfg"); let persister = PersisterShared::new(&system.metadata_dir, "resync_cfg");
@ -110,16 +107,12 @@ impl BlockResyncManager {
/// Get lenght of resync queue /// Get lenght of resync queue
pub fn queue_len(&self) -> Result<usize, Error> { pub fn queue_len(&self) -> Result<usize, Error> {
// This currently can't return an error because the CountedTree hack Ok(self.queue.len()?)
// doesn't error on .len(), but this will change when we remove the hack
// (hopefully someday!)
Ok(self.queue.len())
} }
/// Get number of blocks that have an error /// Get number of blocks that have an error
pub fn errors_len(&self) -> Result<usize, Error> { pub fn errors_len(&self) -> Result<usize, Error> {
// (see queue_len comment) Ok(self.errors.len()?)
Ok(self.errors.len())
} }
/// Clear the error counter for a block and put it in queue immediately /// Clear the error counter for a block and put it in queue immediately

View file

@ -1,127 +0,0 @@
//! This hack allows a db tree to keep in RAM a counter of the number of entries
//! it contains, which is used to call .len() on it. This is usefull only for
//! the sled backend where .len() otherwise would have to traverse the whole
//! tree to count items. For sqlite and lmdb, this is mostly useless (but
//! hopefully not harmfull!). Note that a CountedTree cannot be part of a
//! transaction.
use std::sync::{
atomic::{AtomicUsize, Ordering},
Arc,
};
use crate::{Result, Tree, TxError, Value, ValueIter};
#[derive(Clone)]
pub struct CountedTree(Arc<CountedTreeInternal>);
struct CountedTreeInternal {
tree: Tree,
len: AtomicUsize,
}
impl CountedTree {
pub fn new(tree: Tree) -> Result<Self> {
let len = tree.len()?;
Ok(Self(Arc::new(CountedTreeInternal {
tree,
len: AtomicUsize::new(len),
})))
}
pub fn len(&self) -> usize {
self.0.len.load(Ordering::SeqCst)
}
pub fn is_empty(&self) -> bool {
self.len() == 0
}
pub fn get<K: AsRef<[u8]>>(&self, key: K) -> Result<Option<Value>> {
self.0.tree.get(key)
}
pub fn first(&self) -> Result<Option<(Value, Value)>> {
self.0.tree.first()
}
pub fn iter(&self) -> Result<ValueIter<'_>> {
self.0.tree.iter()
}
// ---- writing functions ----
pub fn insert<K, V>(&self, key: K, value: V) -> Result<Option<Value>>
where
K: AsRef<[u8]>,
V: AsRef<[u8]>,
{
let old_val = self.0.tree.insert(key, value)?;
if old_val.is_none() {
self.0.len.fetch_add(1, Ordering::SeqCst);
}
Ok(old_val)
}
pub fn remove<K: AsRef<[u8]>>(&self, key: K) -> Result<Option<Value>> {
let old_val = self.0.tree.remove(key)?;
if old_val.is_some() {
self.0.len.fetch_sub(1, Ordering::SeqCst);
}
Ok(old_val)
}
pub fn compare_and_swap<K, OV, NV>(
&self,
key: K,
expected_old: Option<OV>,
new: Option<NV>,
) -> Result<bool>
where
K: AsRef<[u8]>,
OV: AsRef<[u8]>,
NV: AsRef<[u8]>,
{
let old_some = expected_old.is_some();
let new_some = new.is_some();
let tx_res = self.0.tree.db().transaction(|tx| {
let old_val = tx.get(&self.0.tree, &key)?;
let is_same = match (&old_val, &expected_old) {
(None, None) => true,
(Some(x), Some(y)) if x == y.as_ref() => true,
_ => false,
};
if is_same {
match &new {
Some(v) => {
tx.insert(&self.0.tree, &key, v)?;
}
None => {
tx.remove(&self.0.tree, &key)?;
}
}
Ok(())
} else {
Err(TxError::Abort(()))
}
});
match tx_res {
Ok(()) => {
match (old_some, new_some) {
(false, true) => {
self.0.len.fetch_add(1, Ordering::SeqCst);
}
(true, false) => {
self.0.len.fetch_sub(1, Ordering::SeqCst);
}
_ => (),
}
Ok(true)
}
Err(TxError::Abort(())) => Ok(false),
Err(TxError::Db(e)) => Err(e),
}
}
}

View file

@ -6,8 +6,6 @@ pub mod lmdb_adapter;
#[cfg(feature = "sqlite")] #[cfg(feature = "sqlite")]
pub mod sqlite_adapter; pub mod sqlite_adapter;
pub mod counted_tree_hack;
pub mod open; pub mod open;
#[cfg(test)] #[cfg(test)]
@ -187,10 +185,6 @@ impl Tree {
pub fn len(&self) -> Result<usize> { pub fn len(&self) -> Result<usize> {
self.0.len(self.1) self.0.len(self.1)
} }
#[inline]
pub fn fast_len(&self) -> Result<Option<usize>> {
self.0.fast_len(self.1)
}
#[inline] #[inline]
pub fn first(&self) -> Result<Option<(Value, Value)>> { pub fn first(&self) -> Result<Option<(Value, Value)>> {
@ -326,9 +320,6 @@ pub(crate) trait IDb: Send + Sync {
fn get(&self, tree: usize, key: &[u8]) -> Result<Option<Value>>; fn get(&self, tree: usize, key: &[u8]) -> Result<Option<Value>>;
fn len(&self, tree: usize) -> Result<usize>; fn len(&self, tree: usize) -> Result<usize>;
fn fast_len(&self, _tree: usize) -> Result<Option<usize>> {
Ok(None)
}
fn insert(&self, tree: usize, key: &[u8], value: &[u8]) -> Result<Option<Value>>; fn insert(&self, tree: usize, key: &[u8], value: &[u8]) -> Result<Option<Value>>;
fn remove(&self, tree: usize, key: &[u8]) -> Result<Option<Value>>; fn remove(&self, tree: usize, key: &[u8]) -> Result<Option<Value>>;

View file

@ -121,10 +121,6 @@ impl IDb for LmdbDb {
Ok(tree.len(&tx)?.try_into().unwrap()) Ok(tree.len(&tx)?.try_into().unwrap())
} }
fn fast_len(&self, tree: usize) -> Result<Option<usize>> {
Ok(Some(self.len(tree)?))
}
fn insert(&self, tree: usize, key: &[u8], value: &[u8]) -> Result<Option<Value>> { fn insert(&self, tree: usize, key: &[u8], value: &[u8]) -> Result<Option<Value>> {
let tree = self.get_tree(tree)?; let tree = self.get_tree(tree)?;
let mut tx = self.db.write_txn()?; let mut tx = self.db.write_txn()?;

View file

@ -144,10 +144,6 @@ impl IDb for SqliteDb {
} }
} }
fn fast_len(&self, tree: usize) -> Result<Option<usize>> {
Ok(Some(self.len(tree)?))
}
fn insert(&self, tree: usize, key: &[u8], value: &[u8]) -> Result<Option<Value>> { fn insert(&self, tree: usize, key: &[u8], value: &[u8]) -> Result<Option<Value>> {
trace!("insert {}: lock db", tree); trace!("insert {}: lock db", tree);
let this = self.0.lock().unwrap(); let this = self.0.lock().unwrap();

View file

@ -217,11 +217,11 @@ impl AdminRpcHandler {
// Gather table statistics // Gather table statistics
let mut table = vec![" Table\tItems\tMklItems\tMklTodo\tGcTodo".into()]; let mut table = vec![" Table\tItems\tMklItems\tMklTodo\tGcTodo".into()];
table.push(self.gather_table_stats(&self.garage.bucket_table, opt.detailed)?); table.push(self.gather_table_stats(&self.garage.bucket_table)?);
table.push(self.gather_table_stats(&self.garage.key_table, opt.detailed)?); table.push(self.gather_table_stats(&self.garage.key_table)?);
table.push(self.gather_table_stats(&self.garage.object_table, opt.detailed)?); table.push(self.gather_table_stats(&self.garage.object_table)?);
table.push(self.gather_table_stats(&self.garage.version_table, opt.detailed)?); table.push(self.gather_table_stats(&self.garage.version_table)?);
table.push(self.gather_table_stats(&self.garage.block_ref_table, opt.detailed)?); table.push(self.gather_table_stats(&self.garage.block_ref_table)?);
write!( write!(
&mut ret, &mut ret,
"\nTable stats:\n{}", "\nTable stats:\n{}",
@ -231,15 +231,7 @@ impl AdminRpcHandler {
// Gather block manager statistics // Gather block manager statistics
writeln!(&mut ret, "\nBlock manager stats:").unwrap(); writeln!(&mut ret, "\nBlock manager stats:").unwrap();
let rc_len = if opt.detailed { let rc_len = self.garage.block_manager.rc_len()?.to_string();
self.garage.block_manager.rc_len()?.to_string()
} else {
self.garage
.block_manager
.rc_fast_len()?
.map(|x| x.to_string())
.unwrap_or_else(|| "NC".into())
};
writeln!( writeln!(
&mut ret, &mut ret,
@ -260,10 +252,6 @@ impl AdminRpcHandler {
) )
.unwrap(); .unwrap();
if !opt.detailed {
writeln!(&mut ret, "\nIf values are missing above (marked as NC), consider adding the --detailed flag (this will be slow).").unwrap();
}
if !opt.skip_global { if !opt.skip_global {
write!(&mut ret, "\n{}", self.gather_cluster_stats()).unwrap(); write!(&mut ret, "\n{}", self.gather_cluster_stats()).unwrap();
} }
@ -365,34 +353,13 @@ impl AdminRpcHandler {
ret ret
} }
fn gather_table_stats<F, R>( fn gather_table_stats<F, R>(&self, t: &Arc<Table<F, R>>) -> Result<String, Error>
&self,
t: &Arc<Table<F, R>>,
detailed: bool,
) -> Result<String, Error>
where where
F: TableSchema + 'static, F: TableSchema + 'static,
R: TableReplication + 'static, R: TableReplication + 'static,
{ {
let (data_len, mkl_len) = if detailed { let data_len = t.data.store.len().map_err(GarageError::from)?.to_string();
( let mkl_len = t.merkle_updater.merkle_tree_len()?.to_string();
t.data.store.len().map_err(GarageError::from)?.to_string(),
t.merkle_updater.merkle_tree_len()?.to_string(),
)
} else {
(
t.data
.store
.fast_len()
.map_err(GarageError::from)?
.map(|x| x.to_string())
.unwrap_or_else(|| "NC".into()),
t.merkle_updater
.merkle_tree_fast_len()?
.map(|x| x.to_string())
.unwrap_or_else(|| "NC".into()),
)
};
Ok(format!( Ok(format!(
" {}\t{}\t{}\t{}\t{}", " {}\t{}\t{}\t{}\t{}",

View file

@ -553,10 +553,6 @@ pub struct StatsOpt {
#[structopt(short = "a", long = "all-nodes")] #[structopt(short = "a", long = "all-nodes")]
pub all_nodes: bool, pub all_nodes: bool,
/// Gather detailed statistics (this can be long)
#[structopt(short = "d", long = "detailed")]
pub detailed: bool,
/// Don't show global cluster stats (internal use in RPC) /// Don't show global cluster stats (internal use in RPC)
#[structopt(skip)] #[structopt(skip)]
#[serde(default)] #[serde(default)]

View file

@ -121,13 +121,7 @@ impl Worker for LifecycleWorker {
mpu_aborted, mpu_aborted,
.. ..
} => { } => {
let n_objects = self let n_objects = self.garage.object_table.data.store.len().ok();
.garage
.object_table
.data
.store
.fast_len()
.unwrap_or(None);
let progress = match n_objects { let progress = match n_objects {
None => "...".to_string(), None => "...".to_string(),
Some(total) => format!( Some(total) => format!(

View file

@ -6,7 +6,6 @@ use serde_bytes::ByteBuf;
use tokio::sync::Notify; use tokio::sync::Notify;
use garage_db as db; use garage_db as db;
use garage_db::counted_tree_hack::CountedTree;
use garage_util::data::*; use garage_util::data::*;
use garage_util::error::*; use garage_util::error::*;
@ -36,7 +35,7 @@ pub struct TableData<F: TableSchema, R: TableReplication> {
pub(crate) insert_queue: db::Tree, pub(crate) insert_queue: db::Tree,
pub(crate) insert_queue_notify: Arc<Notify>, pub(crate) insert_queue_notify: Arc<Notify>,
pub(crate) gc_todo: CountedTree, pub(crate) gc_todo: db::Tree,
pub(crate) metrics: TableMetrics, pub(crate) metrics: TableMetrics,
} }
@ -61,7 +60,6 @@ impl<F: TableSchema, R: TableReplication> TableData<F, R> {
let gc_todo = db let gc_todo = db
.open_tree(format!("{}:gc_todo_v2", F::TABLE_NAME)) .open_tree(format!("{}:gc_todo_v2", F::TABLE_NAME))
.expect("Unable to open GC DB tree"); .expect("Unable to open GC DB tree");
let gc_todo = CountedTree::new(gc_todo).expect("Cannot count gc_todo_v2");
let metrics = TableMetrics::new( let metrics = TableMetrics::new(
F::TABLE_NAME, F::TABLE_NAME,
@ -370,6 +368,6 @@ impl<F: TableSchema, R: TableReplication> TableData<F, R> {
} }
pub fn gc_todo_len(&self) -> Result<usize, Error> { pub fn gc_todo_len(&self) -> Result<usize, Error> {
Ok(self.gc_todo.len()) Ok(self.gc_todo.len()?)
} }
} }

View file

@ -10,7 +10,7 @@ use serde_bytes::ByteBuf;
use futures::future::join_all; use futures::future::join_all;
use tokio::sync::watch; use tokio::sync::watch;
use garage_db::counted_tree_hack::CountedTree; use garage_db as db;
use garage_util::background::*; use garage_util::background::*;
use garage_util::data::*; use garage_util::data::*;
@ -376,7 +376,7 @@ impl GcTodoEntry {
} }
/// Saves the GcTodoEntry in the gc_todo tree /// Saves the GcTodoEntry in the gc_todo tree
pub(crate) fn save(&self, gc_todo_tree: &CountedTree) -> Result<(), Error> { pub(crate) fn save(&self, gc_todo_tree: &db::Tree) -> Result<(), Error> {
gc_todo_tree.insert(self.todo_table_key(), self.value_hash.as_slice())?; gc_todo_tree.insert(self.todo_table_key(), self.value_hash.as_slice())?;
Ok(()) Ok(())
} }
@ -386,12 +386,14 @@ impl GcTodoEntry {
/// This is usefull to remove a todo entry only under the condition /// This is usefull to remove a todo entry only under the condition
/// that it has not changed since the time it was read, i.e. /// that it has not changed since the time it was read, i.e.
/// what we have to do is still the same /// what we have to do is still the same
pub(crate) fn remove_if_equal(&self, gc_todo_tree: &CountedTree) -> Result<(), Error> { pub(crate) fn remove_if_equal(&self, gc_todo_tree: &db::Tree) -> Result<(), Error> {
gc_todo_tree.compare_and_swap::<_, _, &[u8]>( gc_todo_tree.db().transaction(|txn| {
&self.todo_table_key(), let key = self.todo_table_key();
Some(self.value_hash), if txn.get(gc_todo_tree, &key)?.as_deref() == Some(self.value_hash.as_slice()) {
None, txn.remove(gc_todo_tree, &key)?;
)?; }
Ok(())
})?;
Ok(()) Ok(())
} }

View file

@ -291,10 +291,6 @@ impl<F: TableSchema, R: TableReplication> MerkleUpdater<F, R> {
Ok(self.data.merkle_tree.len()?) Ok(self.data.merkle_tree.len()?)
} }
pub fn merkle_tree_fast_len(&self) -> Result<Option<usize>, Error> {
Ok(self.data.merkle_tree.fast_len()?)
}
pub fn todo_len(&self) -> Result<usize, Error> { pub fn todo_len(&self) -> Result<usize, Error> {
Ok(self.data.merkle_todo.len()?) Ok(self.data.merkle_todo.len()?)
} }

View file

@ -1,7 +1,6 @@
use opentelemetry::{global, metrics::*, KeyValue}; use opentelemetry::{global, metrics::*, KeyValue};
use garage_db as db; use garage_db as db;
use garage_db::counted_tree_hack::CountedTree;
/// TableMetrics reference all counter used for metrics /// TableMetrics reference all counter used for metrics
pub struct TableMetrics { pub struct TableMetrics {
@ -27,7 +26,7 @@ impl TableMetrics {
store: db::Tree, store: db::Tree,
merkle_tree: db::Tree, merkle_tree: db::Tree,
merkle_todo: db::Tree, merkle_todo: db::Tree,
gc_todo: CountedTree, gc_todo: db::Tree,
) -> Self { ) -> Self {
let meter = global::meter(table_name); let meter = global::meter(table_name);
TableMetrics { TableMetrics {
@ -35,9 +34,9 @@ impl TableMetrics {
.u64_value_observer( .u64_value_observer(
"table.size", "table.size",
move |observer| { move |observer| {
if let Ok(Some(v)) = store.fast_len() { if let Ok(value) = store.len() {
observer.observe( observer.observe(
v as u64, value as u64,
&[KeyValue::new("table_name", table_name)], &[KeyValue::new("table_name", table_name)],
); );
} }
@ -49,9 +48,9 @@ impl TableMetrics {
.u64_value_observer( .u64_value_observer(
"table.merkle_tree_size", "table.merkle_tree_size",
move |observer| { move |observer| {
if let Ok(Some(v)) = merkle_tree.fast_len() { if let Ok(value) = merkle_tree.len() {
observer.observe( observer.observe(
v as u64, value as u64,
&[KeyValue::new("table_name", table_name)], &[KeyValue::new("table_name", table_name)],
); );
} }
@ -77,10 +76,12 @@ impl TableMetrics {
.u64_value_observer( .u64_value_observer(
"table.gc_todo_queue_length", "table.gc_todo_queue_length",
move |observer| { move |observer| {
if let Ok(value) = gc_todo.len() {
observer.observe( observer.observe(
gc_todo.len() as u64, value as u64,
&[KeyValue::new("table_name", table_name)], &[KeyValue::new("table_name", table_name)],
); );
}
}, },
) )
.with_description("Table garbage collector TODO queue length") .with_description("Table garbage collector TODO queue length")