garage/src/garage/admin/mod.rs

539 lines
14 KiB
Rust
Raw Normal View History

mod block;
mod bucket;
mod key;
use std::collections::HashMap;
2021-03-12 17:16:03 +00:00
use std::fmt::Write;
use std::sync::Arc;
2021-10-14 09:50:12 +00:00
use async_trait::async_trait;
use serde::{Deserialize, Serialize};
use format_table::format_table_to_string;
use garage_util::background::BackgroundRunner;
2021-12-14 12:55:11 +00:00
use garage_util::data::*;
use garage_util::error::Error as GarageError;
2020-04-23 17:05:46 +00:00
use garage_table::replication::*;
2021-03-12 17:16:03 +00:00
use garage_table::*;
use garage_rpc::ring::PARTITION_BITS;
2021-10-14 09:50:12 +00:00
use garage_rpc::*;
2020-04-23 17:05:46 +00:00
use garage_block::manager::BlockResyncErrorInfo;
2020-07-07 11:59:22 +00:00
use garage_model::bucket_table::*;
use garage_model::garage::Garage;
use garage_model::helper::error::{Error, OkOrBadRequest};
2020-07-07 11:59:22 +00:00
use garage_model::key_table::*;
use garage_model::migrate::Migrate;
use garage_model::s3::version_table::Version;
2020-04-23 17:05:46 +00:00
2021-03-12 17:12:31 +00:00
use crate::cli::*;
use crate::repair::online::launch_online_repair;
2021-10-14 09:50:12 +00:00
pub const ADMIN_RPC_PATH: &str = "garage/admin_rpc.rs/Rpc";
#[derive(Debug, Serialize, Deserialize)]
#[allow(clippy::large_enum_variant)]
2021-04-23 20:41:24 +00:00
pub enum AdminRpc {
BucketOperation(BucketOperation),
2020-04-23 18:36:12 +00:00
KeyOperation(KeyOperation),
LaunchRepair(RepairOpt),
Migrate(MigrateOpt),
Stats(StatsOpt),
Worker(WorkerOperation),
BlockOperation(BlockOperation),
// Replies
Ok(String),
BucketList(Vec<Bucket>),
BucketInfo {
bucket: Bucket,
relevant_keys: HashMap<String, Key>,
counters: HashMap<String, i64>,
},
2020-04-23 20:25:45 +00:00
KeyList(Vec<(String, String)>),
KeyInfo(Key, HashMap<Uuid, Bucket>),
WorkerList(
HashMap<usize, garage_util::background::WorkerInfo>,
WorkerListOpt,
),
2023-01-04 12:25:57 +00:00
WorkerVars(Vec<(Uuid, String, String)>),
2022-12-13 11:24:30 +00:00
WorkerInfo(usize, garage_util::background::WorkerInfo),
BlockErrorList(Vec<BlockResyncErrorInfo>),
BlockInfo {
hash: Hash,
refcount: u64,
versions: Vec<Result<Version, Uuid>>,
},
}
impl Rpc for AdminRpc {
type Response = Result<AdminRpc, Error>;
2021-10-14 09:50:12 +00:00
}
pub struct AdminRpcHandler {
garage: Arc<Garage>,
background: Arc<BackgroundRunner>,
2021-10-14 09:50:12 +00:00
endpoint: Arc<Endpoint<AdminRpc, Self>>,
}
impl AdminRpcHandler {
pub fn new(garage: Arc<Garage>, background: Arc<BackgroundRunner>) -> Arc<Self> {
2021-10-14 09:50:12 +00:00
let endpoint = garage.system.netapp.endpoint(ADMIN_RPC_PATH.into());
let admin = Arc::new(Self {
garage,
background,
endpoint,
});
2021-10-14 09:50:12 +00:00
admin.endpoint.set_handler(admin.clone());
admin
}
// ================ MIGRATION COMMANDS ====================
async fn handle_migrate(self: &Arc<Self>, opt: MigrateOpt) -> Result<AdminRpc, Error> {
if !opt.yes {
return Err(Error::BadRequest(
"Please provide the --yes flag to initiate migration operation.".to_string(),
));
}
let m = Migrate {
garage: self.garage.clone(),
};
match opt.what {
MigrateWhat::Buckets050 => m.migrate_buckets050().await,
}?;
Ok(AdminRpc::Ok("Migration successfull.".into()))
}
// ================ REPAIR COMMANDS ====================
2021-04-23 20:41:24 +00:00
async fn handle_launch_repair(self: &Arc<Self>, opt: RepairOpt) -> Result<AdminRpc, Error> {
if !opt.yes {
return Err(Error::BadRequest(
2021-04-23 20:41:24 +00:00
"Please provide the --yes flag to initiate repair operations.".to_string(),
));
}
if opt.all_nodes {
let mut opt_to_send = opt.clone();
opt_to_send.all_nodes = false;
2020-04-19 20:36:36 +00:00
let mut failures = vec![];
let ring = self.garage.system.ring.borrow().clone();
for node in ring.layout.node_ids().iter() {
let node = (*node).into();
let resp = self
2021-10-14 09:50:12 +00:00
.endpoint
.call(
2021-10-14 09:50:12 +00:00
&node,
AdminRpc::LaunchRepair(opt_to_send.clone()),
2021-10-14 09:50:12 +00:00
PRIO_NORMAL,
)
.await;
if !matches!(resp, Ok(Ok(_))) {
2021-10-14 09:50:12 +00:00
failures.push(node);
2020-04-19 20:36:36 +00:00
}
}
if failures.is_empty() {
2021-04-23 20:41:24 +00:00
Ok(AdminRpc::Ok("Repair launched on all nodes".to_string()))
2020-04-19 20:36:36 +00:00
} else {
Err(Error::BadRequest(format!(
2020-04-19 20:36:36 +00:00
"Could not launch repair on nodes: {:?} (launched successfully on other nodes)",
failures
)))
}
} else {
launch_online_repair(&self.garage, &self.background, opt).await?;
2021-04-23 20:41:24 +00:00
Ok(AdminRpc::Ok(format!(
2020-04-19 20:36:36 +00:00
"Repair launched on {:?}",
self.garage.system.id
)))
}
}
// ================ STATS COMMANDS ====================
2021-04-23 20:41:24 +00:00
async fn handle_stats(&self, opt: StatsOpt) -> Result<AdminRpc, Error> {
if opt.all_nodes {
let mut ret = String::new();
let ring = self.garage.system.ring.borrow().clone();
for node in ring.layout.node_ids().iter() {
let mut opt = opt.clone();
opt.all_nodes = false;
opt.skip_global = true;
writeln!(&mut ret, "\n======================").unwrap();
writeln!(&mut ret, "Stats for node {:?}:", node).unwrap();
let node_id = (*node).into();
match self
2021-10-14 09:50:12 +00:00
.endpoint
.call(&node_id, AdminRpc::Stats(opt), PRIO_NORMAL)
.await
{
Ok(Ok(AdminRpc::Ok(s))) => writeln!(&mut ret, "{}", s).unwrap(),
Ok(Ok(x)) => writeln!(&mut ret, "Bad answer: {:?}", x).unwrap(),
Ok(Err(e)) => writeln!(&mut ret, "Remote error: {}", e).unwrap(),
Err(e) => writeln!(&mut ret, "Network error: {}", e).unwrap(),
}
}
writeln!(&mut ret, "\n======================").unwrap();
write!(
&mut ret,
"Cluster statistics:\n\n{}",
self.gather_cluster_stats()
)
.unwrap();
2021-04-23 20:41:24 +00:00
Ok(AdminRpc::Ok(ret))
} else {
Abstract database behind generic interface and implement alternative drivers (#322) - [x] Design interface - [x] Implement Sled backend - [x] Re-implement the SledCountedTree hack ~~on Sled backend~~ on all backends (i.e. over the abstraction) - [x] Convert Garage code to use generic interface - [x] Proof-read converted Garage code - [ ] Test everything well - [x] Implement sqlite backend - [x] Implement LMDB backend - [ ] (Implement Persy backend?) - [ ] (Implement other backends? (like RocksDB, ...)) - [x] Implement backend choice in config file and garage server module - [x] Add CLI for converting between DB formats - Exploit the new interface to put more things in transactions - [x] `.updated()` trigger on Garage tables Fix #284 **Bugs** - [x] When exporting sqlite, trees iterate empty?? - [x] LMDB doesn't work **Known issues for various back-ends** - Sled: - Eats all my RAM and also all my disk space - `.len()` has to traverse the whole table - Is actually quite slow on some operations - And is actually pretty bad code... - Sqlite: - Requires a lock to be taken on all operations. The lock is also taken when iterating on a table with `.iter()`, and the lock isn't released until the iterator is dropped. This means that we must be VERY carefull to not do anything else inside a `.iter()` loop or else we will have a deadlock! Most such cases have been eliminated from the Garage codebase, but there might still be some that remain. If your Garage-over-Sqlite seems to hang/freeze, this is the reason. - (adapter uses a bunch of unsafe code) - Heed (LMDB): - Not suited for 32-bit machines as it has to map the whole DB in memory. - (adpater uses a tiny bit of unsafe code) **My recommendation:** avoid 32-bit machines and use LMDB as much as possible. **Converting databases** is actually quite easy. For example from Sled to LMDB: ```bash cd src/db cargo run --features cli --bin convert -- -i path/to/garage/meta/db -a sled -o path/to/garage/meta/db.lmdb -b lmdb ``` Then, just add this to your `config.toml`: ```toml db_engine = "lmdb" ``` Co-authored-by: Alex Auvolat <alex@adnab.me> Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/322 Co-authored-by: Alex <alex@adnab.me> Co-committed-by: Alex <alex@adnab.me>
2022-06-08 08:01:44 +00:00
Ok(AdminRpc::Ok(self.gather_stats_local(opt)?))
}
}
Abstract database behind generic interface and implement alternative drivers (#322) - [x] Design interface - [x] Implement Sled backend - [x] Re-implement the SledCountedTree hack ~~on Sled backend~~ on all backends (i.e. over the abstraction) - [x] Convert Garage code to use generic interface - [x] Proof-read converted Garage code - [ ] Test everything well - [x] Implement sqlite backend - [x] Implement LMDB backend - [ ] (Implement Persy backend?) - [ ] (Implement other backends? (like RocksDB, ...)) - [x] Implement backend choice in config file and garage server module - [x] Add CLI for converting between DB formats - Exploit the new interface to put more things in transactions - [x] `.updated()` trigger on Garage tables Fix #284 **Bugs** - [x] When exporting sqlite, trees iterate empty?? - [x] LMDB doesn't work **Known issues for various back-ends** - Sled: - Eats all my RAM and also all my disk space - `.len()` has to traverse the whole table - Is actually quite slow on some operations - And is actually pretty bad code... - Sqlite: - Requires a lock to be taken on all operations. The lock is also taken when iterating on a table with `.iter()`, and the lock isn't released until the iterator is dropped. This means that we must be VERY carefull to not do anything else inside a `.iter()` loop or else we will have a deadlock! Most such cases have been eliminated from the Garage codebase, but there might still be some that remain. If your Garage-over-Sqlite seems to hang/freeze, this is the reason. - (adapter uses a bunch of unsafe code) - Heed (LMDB): - Not suited for 32-bit machines as it has to map the whole DB in memory. - (adpater uses a tiny bit of unsafe code) **My recommendation:** avoid 32-bit machines and use LMDB as much as possible. **Converting databases** is actually quite easy. For example from Sled to LMDB: ```bash cd src/db cargo run --features cli --bin convert -- -i path/to/garage/meta/db -a sled -o path/to/garage/meta/db.lmdb -b lmdb ``` Then, just add this to your `config.toml`: ```toml db_engine = "lmdb" ``` Co-authored-by: Alex Auvolat <alex@adnab.me> Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/322 Co-authored-by: Alex <alex@adnab.me> Co-committed-by: Alex <alex@adnab.me>
2022-06-08 08:01:44 +00:00
fn gather_stats_local(&self, opt: StatsOpt) -> Result<String, Error> {
let mut ret = String::new();
2021-03-12 17:16:03 +00:00
writeln!(
&mut ret,
"\nGarage version: {} [features: {}]\nRust compiler version: {}",
2022-09-07 16:36:46 +00:00
garage_util::version::garage_version(),
garage_util::version::garage_features()
2022-09-07 15:05:21 +00:00
.map(|list| list.join(", "))
.unwrap_or_else(|| "(unknown)".into()),
garage_util::version::rust_version(),
2021-03-12 17:16:03 +00:00
)
.unwrap();
2022-12-13 14:43:22 +00:00
Abstract database behind generic interface and implement alternative drivers (#322) - [x] Design interface - [x] Implement Sled backend - [x] Re-implement the SledCountedTree hack ~~on Sled backend~~ on all backends (i.e. over the abstraction) - [x] Convert Garage code to use generic interface - [x] Proof-read converted Garage code - [ ] Test everything well - [x] Implement sqlite backend - [x] Implement LMDB backend - [ ] (Implement Persy backend?) - [ ] (Implement other backends? (like RocksDB, ...)) - [x] Implement backend choice in config file and garage server module - [x] Add CLI for converting between DB formats - Exploit the new interface to put more things in transactions - [x] `.updated()` trigger on Garage tables Fix #284 **Bugs** - [x] When exporting sqlite, trees iterate empty?? - [x] LMDB doesn't work **Known issues for various back-ends** - Sled: - Eats all my RAM and also all my disk space - `.len()` has to traverse the whole table - Is actually quite slow on some operations - And is actually pretty bad code... - Sqlite: - Requires a lock to be taken on all operations. The lock is also taken when iterating on a table with `.iter()`, and the lock isn't released until the iterator is dropped. This means that we must be VERY carefull to not do anything else inside a `.iter()` loop or else we will have a deadlock! Most such cases have been eliminated from the Garage codebase, but there might still be some that remain. If your Garage-over-Sqlite seems to hang/freeze, this is the reason. - (adapter uses a bunch of unsafe code) - Heed (LMDB): - Not suited for 32-bit machines as it has to map the whole DB in memory. - (adpater uses a tiny bit of unsafe code) **My recommendation:** avoid 32-bit machines and use LMDB as much as possible. **Converting databases** is actually quite easy. For example from Sled to LMDB: ```bash cd src/db cargo run --features cli --bin convert -- -i path/to/garage/meta/db -a sled -o path/to/garage/meta/db.lmdb -b lmdb ``` Then, just add this to your `config.toml`: ```toml db_engine = "lmdb" ``` Co-authored-by: Alex Auvolat <alex@adnab.me> Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/322 Co-authored-by: Alex <alex@adnab.me> Co-committed-by: Alex <alex@adnab.me>
2022-06-08 08:01:44 +00:00
writeln!(&mut ret, "\nDatabase engine: {}", self.garage.db.engine()).unwrap();
2022-12-13 14:43:22 +00:00
// Gather table statistics
let mut table = vec![" Table\tItems\tMklItems\tMklTodo\tGcTodo".into()];
table.push(self.gather_table_stats(&self.garage.bucket_table, opt.detailed)?);
table.push(self.gather_table_stats(&self.garage.key_table, opt.detailed)?);
table.push(self.gather_table_stats(&self.garage.object_table, opt.detailed)?);
table.push(self.gather_table_stats(&self.garage.version_table, opt.detailed)?);
table.push(self.gather_table_stats(&self.garage.block_ref_table, opt.detailed)?);
write!(
&mut ret,
"\nTable stats:\n{}",
format_table_to_string(table)
)
.unwrap();
2022-12-13 14:43:22 +00:00
// Gather block manager statistics
writeln!(&mut ret, "\nBlock manager stats:").unwrap();
2022-12-13 14:43:22 +00:00
let rc_len = if opt.detailed {
self.garage.block_manager.rc_len()?.to_string()
} else {
self.garage
.block_manager
.rc_fast_len()?
.map(|x| x.to_string())
.unwrap_or_else(|| "NC".into())
};
writeln!(
&mut ret,
" number of RC entries (~= number of blocks): {}",
rc_len
)
.unwrap();
2021-03-12 17:16:03 +00:00
writeln!(
&mut ret,
" resync queue length: {}",
self.garage.block_manager.resync.queue_len()?
2021-03-12 17:16:03 +00:00
)
.unwrap();
writeln!(
&mut ret,
" blocks with resync errors: {}",
self.garage.block_manager.resync.errors_len()?
)
.unwrap();
2022-12-13 14:43:22 +00:00
if !opt.detailed {
writeln!(&mut ret, "\nIf values are missing above (marked as NC), consider adding the --detailed flag (this will be slow).").unwrap();
}
if !opt.skip_global {
write!(&mut ret, "\n{}", self.gather_cluster_stats()).unwrap();
2022-12-13 14:43:22 +00:00
}
Abstract database behind generic interface and implement alternative drivers (#322) - [x] Design interface - [x] Implement Sled backend - [x] Re-implement the SledCountedTree hack ~~on Sled backend~~ on all backends (i.e. over the abstraction) - [x] Convert Garage code to use generic interface - [x] Proof-read converted Garage code - [ ] Test everything well - [x] Implement sqlite backend - [x] Implement LMDB backend - [ ] (Implement Persy backend?) - [ ] (Implement other backends? (like RocksDB, ...)) - [x] Implement backend choice in config file and garage server module - [x] Add CLI for converting between DB formats - Exploit the new interface to put more things in transactions - [x] `.updated()` trigger on Garage tables Fix #284 **Bugs** - [x] When exporting sqlite, trees iterate empty?? - [x] LMDB doesn't work **Known issues for various back-ends** - Sled: - Eats all my RAM and also all my disk space - `.len()` has to traverse the whole table - Is actually quite slow on some operations - And is actually pretty bad code... - Sqlite: - Requires a lock to be taken on all operations. The lock is also taken when iterating on a table with `.iter()`, and the lock isn't released until the iterator is dropped. This means that we must be VERY carefull to not do anything else inside a `.iter()` loop or else we will have a deadlock! Most such cases have been eliminated from the Garage codebase, but there might still be some that remain. If your Garage-over-Sqlite seems to hang/freeze, this is the reason. - (adapter uses a bunch of unsafe code) - Heed (LMDB): - Not suited for 32-bit machines as it has to map the whole DB in memory. - (adpater uses a tiny bit of unsafe code) **My recommendation:** avoid 32-bit machines and use LMDB as much as possible. **Converting databases** is actually quite easy. For example from Sled to LMDB: ```bash cd src/db cargo run --features cli --bin convert -- -i path/to/garage/meta/db -a sled -o path/to/garage/meta/db.lmdb -b lmdb ``` Then, just add this to your `config.toml`: ```toml db_engine = "lmdb" ``` Co-authored-by: Alex Auvolat <alex@adnab.me> Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/322 Co-authored-by: Alex <alex@adnab.me> Co-committed-by: Alex <alex@adnab.me>
2022-06-08 08:01:44 +00:00
Ok(ret)
}
fn gather_cluster_stats(&self) -> String {
let mut ret = String::new();
// Gather storage node and free space statistics
let layout = &self.garage.system.ring.borrow().layout;
let mut node_partition_count = HashMap::<Uuid, u64>::new();
for short_id in layout.ring_assignation_data.iter() {
let id = layout.node_id_vec[*short_id as usize];
*node_partition_count.entry(id).or_default() += 1;
}
let node_info = self
.garage
.system
.get_known_nodes()
.into_iter()
.map(|n| (n.id, n))
.collect::<HashMap<_, _>>();
let mut table = vec![" ID\tHostname\tZone\tCapacity\tPart.\tDataAvail\tMetaAvail".into()];
for (id, parts) in node_partition_count.iter() {
let info = node_info.get(id);
let status = info.map(|x| &x.status);
let role = layout.roles.get(id).and_then(|x| x.0.as_ref());
let hostname = status.map(|x| x.hostname.as_str()).unwrap_or("?");
let zone = role.map(|x| x.zone.as_str()).unwrap_or("?");
2023-01-26 16:05:31 +00:00
let capacity = role
.map(|x| x.capacity_string())
.unwrap_or_else(|| "?".into());
let avail_str = |x| match x {
Some((avail, total)) => {
let pct = (avail as f64) / (total as f64) * 100.;
let avail = bytesize::ByteSize::b(avail);
let total = bytesize::ByteSize::b(total);
format!("{}/{} ({:.1}%)", avail, total, pct)
}
None => "?".into(),
};
let data_avail = avail_str(status.and_then(|x| x.data_disk_avail));
let meta_avail = avail_str(status.and_then(|x| x.meta_disk_avail));
table.push(format!(
" {:?}\t{}\t{}\t{}\t{}\t{}\t{}",
id, hostname, zone, capacity, parts, data_avail, meta_avail
));
}
write!(
&mut ret,
"Storage nodes:\n{}",
format_table_to_string(table)
)
.unwrap();
let meta_part_avail = node_partition_count
.iter()
.filter_map(|(id, parts)| {
node_info
.get(id)
.and_then(|x| x.status.meta_disk_avail)
.map(|c| c.0 / *parts)
})
.collect::<Vec<_>>();
let data_part_avail = node_partition_count
.iter()
.filter_map(|(id, parts)| {
node_info
.get(id)
.and_then(|x| x.status.data_disk_avail)
.map(|c| c.0 / *parts)
})
.collect::<Vec<_>>();
if !meta_part_avail.is_empty() && !data_part_avail.is_empty() {
let meta_avail =
bytesize::ByteSize(meta_part_avail.iter().min().unwrap() * (1 << PARTITION_BITS));
let data_avail =
bytesize::ByteSize(data_part_avail.iter().min().unwrap() * (1 << PARTITION_BITS));
writeln!(
&mut ret,
"\nEstimated available storage space cluster-wide (might be lower in practice):"
)
.unwrap();
if meta_part_avail.len() < node_partition_count.len()
|| data_part_avail.len() < node_partition_count.len()
{
writeln!(&mut ret, " data: < {}", data_avail).unwrap();
writeln!(&mut ret, " metadata: < {}", meta_avail).unwrap();
writeln!(&mut ret, "A precise estimate could not be given as information is missing for some storage nodes.").unwrap();
} else {
writeln!(&mut ret, " data: {}", data_avail).unwrap();
writeln!(&mut ret, " metadata: {}", meta_avail).unwrap();
}
}
ret
}
Abstract database behind generic interface and implement alternative drivers (#322) - [x] Design interface - [x] Implement Sled backend - [x] Re-implement the SledCountedTree hack ~~on Sled backend~~ on all backends (i.e. over the abstraction) - [x] Convert Garage code to use generic interface - [x] Proof-read converted Garage code - [ ] Test everything well - [x] Implement sqlite backend - [x] Implement LMDB backend - [ ] (Implement Persy backend?) - [ ] (Implement other backends? (like RocksDB, ...)) - [x] Implement backend choice in config file and garage server module - [x] Add CLI for converting between DB formats - Exploit the new interface to put more things in transactions - [x] `.updated()` trigger on Garage tables Fix #284 **Bugs** - [x] When exporting sqlite, trees iterate empty?? - [x] LMDB doesn't work **Known issues for various back-ends** - Sled: - Eats all my RAM and also all my disk space - `.len()` has to traverse the whole table - Is actually quite slow on some operations - And is actually pretty bad code... - Sqlite: - Requires a lock to be taken on all operations. The lock is also taken when iterating on a table with `.iter()`, and the lock isn't released until the iterator is dropped. This means that we must be VERY carefull to not do anything else inside a `.iter()` loop or else we will have a deadlock! Most such cases have been eliminated from the Garage codebase, but there might still be some that remain. If your Garage-over-Sqlite seems to hang/freeze, this is the reason. - (adapter uses a bunch of unsafe code) - Heed (LMDB): - Not suited for 32-bit machines as it has to map the whole DB in memory. - (adpater uses a tiny bit of unsafe code) **My recommendation:** avoid 32-bit machines and use LMDB as much as possible. **Converting databases** is actually quite easy. For example from Sled to LMDB: ```bash cd src/db cargo run --features cli --bin convert -- -i path/to/garage/meta/db -a sled -o path/to/garage/meta/db.lmdb -b lmdb ``` Then, just add this to your `config.toml`: ```toml db_engine = "lmdb" ``` Co-authored-by: Alex Auvolat <alex@adnab.me> Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/322 Co-authored-by: Alex <alex@adnab.me> Co-committed-by: Alex <alex@adnab.me>
2022-06-08 08:01:44 +00:00
fn gather_table_stats<F, R>(
&self,
t: &Arc<Table<F, R>>,
2022-12-13 14:43:22 +00:00
detailed: bool,
) -> Result<String, Error>
2021-03-16 10:43:58 +00:00
where
F: TableSchema + 'static,
R: TableReplication + 'static,
{
2022-12-13 14:43:22 +00:00
let (data_len, mkl_len) = if detailed {
(
t.data.store.len().map_err(GarageError::from)?.to_string(),
t.merkle_updater.merkle_tree_len()?.to_string(),
Abstract database behind generic interface and implement alternative drivers (#322) - [x] Design interface - [x] Implement Sled backend - [x] Re-implement the SledCountedTree hack ~~on Sled backend~~ on all backends (i.e. over the abstraction) - [x] Convert Garage code to use generic interface - [x] Proof-read converted Garage code - [ ] Test everything well - [x] Implement sqlite backend - [x] Implement LMDB backend - [ ] (Implement Persy backend?) - [ ] (Implement other backends? (like RocksDB, ...)) - [x] Implement backend choice in config file and garage server module - [x] Add CLI for converting between DB formats - Exploit the new interface to put more things in transactions - [x] `.updated()` trigger on Garage tables Fix #284 **Bugs** - [x] When exporting sqlite, trees iterate empty?? - [x] LMDB doesn't work **Known issues for various back-ends** - Sled: - Eats all my RAM and also all my disk space - `.len()` has to traverse the whole table - Is actually quite slow on some operations - And is actually pretty bad code... - Sqlite: - Requires a lock to be taken on all operations. The lock is also taken when iterating on a table with `.iter()`, and the lock isn't released until the iterator is dropped. This means that we must be VERY carefull to not do anything else inside a `.iter()` loop or else we will have a deadlock! Most such cases have been eliminated from the Garage codebase, but there might still be some that remain. If your Garage-over-Sqlite seems to hang/freeze, this is the reason. - (adapter uses a bunch of unsafe code) - Heed (LMDB): - Not suited for 32-bit machines as it has to map the whole DB in memory. - (adpater uses a tiny bit of unsafe code) **My recommendation:** avoid 32-bit machines and use LMDB as much as possible. **Converting databases** is actually quite easy. For example from Sled to LMDB: ```bash cd src/db cargo run --features cli --bin convert -- -i path/to/garage/meta/db -a sled -o path/to/garage/meta/db.lmdb -b lmdb ``` Then, just add this to your `config.toml`: ```toml db_engine = "lmdb" ``` Co-authored-by: Alex Auvolat <alex@adnab.me> Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/322 Co-authored-by: Alex <alex@adnab.me> Co-committed-by: Alex <alex@adnab.me>
2022-06-08 08:01:44 +00:00
)
2022-12-13 14:43:22 +00:00
} else {
(
t.data
.store
.fast_len()
.map_err(GarageError::from)?
.map(|x| x.to_string())
.unwrap_or_else(|| "NC".into()),
t.merkle_updater
.merkle_tree_fast_len()?
.map(|x| x.to_string())
.unwrap_or_else(|| "NC".into()),
2021-03-16 15:35:10 +00:00
)
2022-12-13 14:43:22 +00:00
};
Abstract database behind generic interface and implement alternative drivers (#322) - [x] Design interface - [x] Implement Sled backend - [x] Re-implement the SledCountedTree hack ~~on Sled backend~~ on all backends (i.e. over the abstraction) - [x] Convert Garage code to use generic interface - [x] Proof-read converted Garage code - [ ] Test everything well - [x] Implement sqlite backend - [x] Implement LMDB backend - [ ] (Implement Persy backend?) - [ ] (Implement other backends? (like RocksDB, ...)) - [x] Implement backend choice in config file and garage server module - [x] Add CLI for converting between DB formats - Exploit the new interface to put more things in transactions - [x] `.updated()` trigger on Garage tables Fix #284 **Bugs** - [x] When exporting sqlite, trees iterate empty?? - [x] LMDB doesn't work **Known issues for various back-ends** - Sled: - Eats all my RAM and also all my disk space - `.len()` has to traverse the whole table - Is actually quite slow on some operations - And is actually pretty bad code... - Sqlite: - Requires a lock to be taken on all operations. The lock is also taken when iterating on a table with `.iter()`, and the lock isn't released until the iterator is dropped. This means that we must be VERY carefull to not do anything else inside a `.iter()` loop or else we will have a deadlock! Most such cases have been eliminated from the Garage codebase, but there might still be some that remain. If your Garage-over-Sqlite seems to hang/freeze, this is the reason. - (adapter uses a bunch of unsafe code) - Heed (LMDB): - Not suited for 32-bit machines as it has to map the whole DB in memory. - (adpater uses a tiny bit of unsafe code) **My recommendation:** avoid 32-bit machines and use LMDB as much as possible. **Converting databases** is actually quite easy. For example from Sled to LMDB: ```bash cd src/db cargo run --features cli --bin convert -- -i path/to/garage/meta/db -a sled -o path/to/garage/meta/db.lmdb -b lmdb ``` Then, just add this to your `config.toml`: ```toml db_engine = "lmdb" ``` Co-authored-by: Alex Auvolat <alex@adnab.me> Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/322 Co-authored-by: Alex <alex@adnab.me> Co-committed-by: Alex <alex@adnab.me>
2022-06-08 08:01:44 +00:00
2022-12-13 14:43:22 +00:00
Ok(format!(
" {}\t{}\t{}\t{}\t{}",
F::TABLE_NAME,
data_len,
mkl_len,
t.merkle_updater.todo_len()?,
t.data.gc_todo_len()?
))
}
// ================ WORKER COMMANDS ====================
async fn handle_worker_cmd(&self, cmd: &WorkerOperation) -> Result<AdminRpc, Error> {
match cmd {
WorkerOperation::List { opt } => {
let workers = self.background.get_worker_info();
Ok(AdminRpc::WorkerList(workers, *opt))
}
WorkerOperation::Info { tid } => {
2022-12-13 11:24:30 +00:00
let info = self
.background
.get_worker_info()
.get(tid)
2022-12-13 11:24:30 +00:00
.ok_or_bad_request(format!("No worker with TID {}", tid))?
.clone();
Ok(AdminRpc::WorkerInfo(*tid, info))
2022-12-13 11:24:30 +00:00
}
2023-01-04 12:25:57 +00:00
WorkerOperation::Get {
all_nodes,
variable,
} => self.handle_get_var(*all_nodes, variable).await,
WorkerOperation::Set {
all_nodes,
variable,
value,
} => self.handle_set_var(*all_nodes, variable, value).await,
}
}
async fn handle_get_var(
&self,
all_nodes: bool,
variable: &Option<String>,
) -> Result<AdminRpc, Error> {
if all_nodes {
let mut ret = vec![];
let ring = self.garage.system.ring.borrow().clone();
for node in ring.layout.node_ids().iter() {
let node = (*node).into();
match self
.endpoint
.call(
&node,
AdminRpc::Worker(WorkerOperation::Get {
all_nodes: false,
variable: variable.clone(),
}),
PRIO_NORMAL,
)
.await??
{
AdminRpc::WorkerVars(v) => ret.extend(v),
m => return Err(GarageError::unexpected_rpc_message(m).into()),
}
}
2023-01-04 12:25:57 +00:00
Ok(AdminRpc::WorkerVars(ret))
} else {
#[allow(clippy::collapsible_else_if)]
if let Some(v) = variable {
Ok(AdminRpc::WorkerVars(vec![(
self.garage.system.id,
v.clone(),
self.garage.bg_vars.get(v)?,
)]))
} else {
let mut vars = self.garage.bg_vars.get_all();
vars.sort();
Ok(AdminRpc::WorkerVars(
vars.into_iter()
.map(|(k, v)| (self.garage.system.id, k.to_string(), v))
.collect(),
))
}
}
}
2023-01-04 12:25:57 +00:00
async fn handle_set_var(
&self,
all_nodes: bool,
variable: &str,
value: &str,
) -> Result<AdminRpc, Error> {
if all_nodes {
let mut ret = vec![];
let ring = self.garage.system.ring.borrow().clone();
for node in ring.layout.node_ids().iter() {
let node = (*node).into();
match self
.endpoint
.call(
&node,
AdminRpc::Worker(WorkerOperation::Set {
all_nodes: false,
variable: variable.to_string(),
value: value.to_string(),
}),
PRIO_NORMAL,
)
.await??
{
AdminRpc::WorkerVars(v) => ret.extend(v),
m => return Err(GarageError::unexpected_rpc_message(m).into()),
}
}
Ok(AdminRpc::WorkerVars(ret))
} else {
self.garage.bg_vars.set(variable, value)?;
Ok(AdminRpc::WorkerVars(vec![(
self.garage.system.id,
variable.to_string(),
value.to_string(),
)]))
}
}
}
2021-10-14 09:50:12 +00:00
#[async_trait]
impl EndpointHandler<AdminRpc> for AdminRpcHandler {
async fn handle(
self: &Arc<Self>,
message: &AdminRpc,
_from: NodeID,
) -> Result<AdminRpc, Error> {
match message {
2021-10-14 09:50:12 +00:00
AdminRpc::BucketOperation(bo) => self.handle_bucket_cmd(bo).await,
AdminRpc::KeyOperation(ko) => self.handle_key_cmd(ko).await,
AdminRpc::Migrate(opt) => self.handle_migrate(opt.clone()).await,
2021-10-14 09:50:12 +00:00
AdminRpc::LaunchRepair(opt) => self.handle_launch_repair(opt.clone()).await,
AdminRpc::Stats(opt) => self.handle_stats(opt.clone()).await,
AdminRpc::Worker(wo) => self.handle_worker_cmd(wo).await,
AdminRpc::BlockOperation(bo) => self.handle_block_cmd(bo).await,
m => Err(GarageError::unexpected_rpc_message(m).into()),
2021-10-14 09:50:12 +00:00
}
}
}