From a1442f072ad9427851f49103083582637ddcdbd4 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Fri, 12 Mar 2021 15:40:54 +0100 Subject: [PATCH] Implement garage stats to get info on node contents --- Cargo.lock | 23 ++++++++++++ Makefile | 2 +- src/garage/Cargo.toml | 1 + src/garage/admin_rpc.rs | 80 +++++++++++++++++++++++++++++++++++++++++ src/garage/main.rs | 18 ++++++++++ src/table/data.rs | 2 +- src/table/merkle.rs | 4 +-- 7 files changed, 126 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 45244b8b..b0633e21 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -489,6 +489,7 @@ dependencies = [ "garage_table", "garage_util", "garage_web", + "git-version", "hex", "log", "pretty_env_logger", @@ -705,6 +706,28 @@ dependencies = [ "wasi 0.9.0+wasi-snapshot-preview1", ] +[[package]] +name = "git-version" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94918e83f1e01dedc2e361d00ce9487b14c58c7f40bab148026fa39d42cb41e2" +dependencies = [ + "git-version-macro", + "proc-macro-hack", +] + +[[package]] +name = "git-version-macro" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34a97a52fdee1870a34fa6e4b77570cba531b27d1838874fef4429a791a3d657" +dependencies = [ + "proc-macro-hack", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "h2" version = "0.2.7" diff --git a/Makefile b/Makefile index 61105856..cbbcee29 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,7 @@ DOCKER=lxpz/garage_amd64 all: #cargo fmt || true #RUSTFLAGS="-C link-arg=-fuse-ld=lld" cargo build - cargo build + clear; cargo build $(BIN): #RUSTFLAGS="-C link-arg=-fuse-ld=lld" cargo build --release diff --git a/src/garage/Cargo.toml b/src/garage/Cargo.toml index 115e2d0e..8c28394b 100644 --- a/src/garage/Cargo.toml +++ b/src/garage/Cargo.toml @@ -27,6 +27,7 @@ hex = "0.3" sha2 = "0.8" log = "0.4" pretty_env_logger = "0.4" +git-version = "0.3.4" sled = "0.34" diff --git a/src/garage/admin_rpc.rs b/src/garage/admin_rpc.rs index b4a65cad..42540d15 100644 --- a/src/garage/admin_rpc.rs +++ b/src/garage/admin_rpc.rs @@ -1,4 +1,6 @@ use std::sync::Arc; +use std::fmt::Write; +use std::collections::HashMap; use serde::{Deserialize, Serialize}; @@ -6,6 +8,7 @@ use garage_util::error::Error; use garage_table::crdt::CRDT; use garage_table::*; +use garage_table::replication::*; use garage_rpc::rpc_client::*; use garage_rpc::rpc_server::*; @@ -25,6 +28,7 @@ pub enum AdminRPC { BucketOperation(BucketOperation), KeyOperation(KeyOperation), LaunchRepair(RepairOpt), + Stats(StatsOpt), // Replies Ok(String), @@ -55,6 +59,7 @@ impl AdminRpcHandler { AdminRPC::BucketOperation(bo) => self2.handle_bucket_cmd(bo).await, AdminRPC::KeyOperation(ko) => self2.handle_key_cmd(ko).await, AdminRPC::LaunchRepair(opt) => self2.handle_launch_repair(opt).await, + AdminRPC::Stats(opt) => self2.handle_stats(opt).await, _ => Err(Error::BadRPC(format!("Invalid RPC"))), } } @@ -357,4 +362,79 @@ impl AdminRpcHandler { ))) } } + + async fn handle_stats(&self, opt: StatsOpt) -> Result { + if opt.all_nodes { + + let mut ret = String::new(); + let ring = self.garage.system.ring.borrow().clone(); + + for node in ring.config.members.keys() { + let mut opt = opt.clone(); + opt.all_nodes = false; + + writeln!(&mut ret, "\n======================").unwrap(); + writeln!(&mut ret, "Stats for node {:?}:", node).unwrap(); + match self + .rpc_client + .call( + *node, + AdminRPC::Stats(opt), + ADMIN_RPC_TIMEOUT, + ) + .await + { + Ok(AdminRPC::Ok(s)) => writeln!(&mut ret, "{}", s).unwrap(), + Ok(x) => writeln!(&mut ret, "Bad answer: {:?}", x).unwrap(), + Err(e) => writeln!(&mut ret, "Error: {}", e).unwrap(), + } + } + Ok(AdminRPC::Ok(ret)) + } else { + Ok(AdminRPC::Ok(self.gather_stats_local(opt)?)) + } + } + + fn gather_stats_local(&self, opt: StatsOpt) -> Result { + let mut ret = String::new(); + writeln!(&mut ret, "\nGarage version: {}", git_version::git_version!()).unwrap(); + + // Gather ring statistics + let ring = self.garage.system.ring.borrow().clone(); + let mut ring_nodes = HashMap::new(); + for r in ring.ring.iter() { + for n in r.nodes.iter() { + if !ring_nodes.contains_key(n) { + ring_nodes.insert(*n, 0usize); + } + *ring_nodes.get_mut(n).unwrap() += 1; + } + } + writeln!(&mut ret, "\nRing nodes & partition count:").unwrap(); + for (n, c) in ring_nodes.iter() { + writeln!(&mut ret, " {:?} {}", n, c).unwrap(); + } + + self.gather_table_stats(&mut ret, &self.garage.bucket_table, &opt)?; + self.gather_table_stats(&mut ret, &self.garage.key_table, &opt)?; + self.gather_table_stats(&mut ret, &self.garage.object_table, &opt)?; + self.gather_table_stats(&mut ret, &self.garage.version_table, &opt)?; + self.gather_table_stats(&mut ret, &self.garage.block_ref_table, &opt)?; + + writeln!(&mut ret, "\nBlock manager stats:").unwrap(); + writeln!(&mut ret, " resync queue length: {}", self.garage.block_manager.resync_queue.len()).unwrap(); + + if opt.detailed { + writeln!(&mut ret, "\nDetailed stats not implemented yet.").unwrap(); + } + + Ok(ret) + } + + fn gather_table_stats(&self, to: &mut String, t: &Arc>, _opt: &StatsOpt) -> Result<(), Error> { + writeln!(to, "\nTable stats for {}", t.data.name).unwrap(); + writeln!(to, " number of items: {}", t.data.store.len()).unwrap(); + writeln!(to, " Merkle updater todo queue length: {}", t.data.merkle_updater.todo.len()).unwrap(); + Ok(()) + } } diff --git a/src/garage/main.rs b/src/garage/main.rs index 8757a1bb..0bd70c79 100644 --- a/src/garage/main.rs +++ b/src/garage/main.rs @@ -69,6 +69,10 @@ pub enum Command { /// Start repair of node data #[structopt(name = "repair")] Repair(RepairOpt), + + /// Gather node statistics + #[structopt(name = "stats")] + Stats(StatsOpt), } #[derive(StructOpt, Debug)] @@ -281,6 +285,17 @@ pub enum RepairWhat { BlockRefs, } +#[derive(Serialize, Deserialize, StructOpt, Debug, Clone)] +pub struct StatsOpt { + /// Gather statistics from all nodes + #[structopt(short = "a", long = "all-nodes")] + pub all_nodes: bool, + + /// Gather detailed statistics (this can be long) + #[structopt(short = "d", long = "detailed")] + pub detailed: bool, +} + #[tokio::main] async fn main() { pretty_env_logger::init(); @@ -332,6 +347,9 @@ async fn main() { Command::Repair(ro) => { cmd_admin(admin_rpc_cli, opt.rpc_host, AdminRPC::LaunchRepair(ro)).await } + Command::Stats(so) => { + cmd_admin(admin_rpc_cli, opt.rpc_host, AdminRPC::Stats(so)).await + } }; if let Err(e) = resp { diff --git a/src/table/data.rs b/src/table/data.rs index 2817a849..5e7314d2 100644 --- a/src/table/data.rs +++ b/src/table/data.rs @@ -17,7 +17,7 @@ pub struct TableData { pub instance: F, pub store: sled::Tree, - pub(crate) merkle_updater: Arc, + pub merkle_updater: Arc, } impl TableData diff --git a/src/table/merkle.rs b/src/table/merkle.rs index a164df04..b04a2a88 100644 --- a/src/table/merkle.rs +++ b/src/table/merkle.rs @@ -32,7 +32,7 @@ pub fn hash_of_merkle_partition_opt(p: Option) -> Hash { // 16 bits (two bytes) of item's partition keys' hashes. // It builds one Merkle tree for each of these 2**16 partitions. -pub(crate) struct MerkleUpdater { +pub struct MerkleUpdater { table_name: String, background: Arc, @@ -40,7 +40,7 @@ pub(crate) struct MerkleUpdater { // - key = the key of an item in the main table, ie hash(partition_key)+sort_key // - value = the hash of the full serialized item, if present, // or an empty vec if item is absent (deleted) - pub(crate) todo: sled::Tree, + pub todo: sled::Tree, pub(crate) todo_notify: Notify, // Content of the merkle tree: items where