281 lines
7.4 KiB
Rust
281 lines
7.4 KiB
Rust
use std::collections::{HashMap, HashSet};
|
|
use std::time::Duration;
|
|
|
|
use format_table::format_table;
|
|
use garage_util::error::*;
|
|
|
|
use garage_rpc::layout::*;
|
|
use garage_rpc::system::*;
|
|
use garage_rpc::*;
|
|
|
|
use garage_model::helper::error::Error as HelperError;
|
|
|
|
use crate::admin::*;
|
|
use crate::cli::*;
|
|
|
|
pub async fn cli_command_dispatch(
|
|
cmd: Command,
|
|
system_rpc_endpoint: &Endpoint<SystemRpc, ()>,
|
|
admin_rpc_endpoint: &Endpoint<AdminRpc, ()>,
|
|
rpc_host: NodeID,
|
|
) -> Result<(), HelperError> {
|
|
match cmd {
|
|
Command::Status => Ok(cmd_status(system_rpc_endpoint, rpc_host).await?),
|
|
Command::Node(NodeOperation::Connect(connect_opt)) => {
|
|
Ok(cmd_connect(system_rpc_endpoint, rpc_host, connect_opt).await?)
|
|
}
|
|
Command::Layout(layout_opt) => {
|
|
Ok(cli_layout_command_dispatch(layout_opt, system_rpc_endpoint, rpc_host).await?)
|
|
}
|
|
Command::Bucket(bo) => {
|
|
cmd_admin(admin_rpc_endpoint, rpc_host, AdminRpc::BucketOperation(bo)).await
|
|
}
|
|
Command::Key(ko) => {
|
|
cmd_admin(admin_rpc_endpoint, rpc_host, AdminRpc::KeyOperation(ko)).await
|
|
}
|
|
Command::Repair(ro) => {
|
|
cmd_admin(admin_rpc_endpoint, rpc_host, AdminRpc::LaunchRepair(ro)).await
|
|
}
|
|
Command::Stats(so) => cmd_admin(admin_rpc_endpoint, rpc_host, AdminRpc::Stats(so)).await,
|
|
Command::Worker(wo) => cmd_admin(admin_rpc_endpoint, rpc_host, AdminRpc::Worker(wo)).await,
|
|
Command::Block(bo) => {
|
|
cmd_admin(admin_rpc_endpoint, rpc_host, AdminRpc::BlockOperation(bo)).await
|
|
}
|
|
Command::Meta(mo) => {
|
|
cmd_admin(admin_rpc_endpoint, rpc_host, AdminRpc::MetaOperation(mo)).await
|
|
}
|
|
_ => unreachable!(),
|
|
}
|
|
}
|
|
|
|
pub async fn cmd_status(rpc_cli: &Endpoint<SystemRpc, ()>, rpc_host: NodeID) -> Result<(), Error> {
|
|
let status = fetch_status(rpc_cli, rpc_host).await?;
|
|
let layout = fetch_layout(rpc_cli, rpc_host).await?;
|
|
|
|
println!("==== HEALTHY NODES ====");
|
|
let mut healthy_nodes =
|
|
vec!["ID\tHostname\tAddress\tTags\tZone\tCapacity\tDataAvail".to_string()];
|
|
for adv in status.iter().filter(|adv| adv.is_up) {
|
|
let host = adv.status.hostname.as_deref().unwrap_or("?");
|
|
let addr = match adv.addr {
|
|
Some(addr) => addr.to_string(),
|
|
None => "N/A".to_string(),
|
|
};
|
|
if let Some(NodeRoleV(Some(cfg))) = layout.current().roles.get(&adv.id) {
|
|
let data_avail = match &adv.status.data_disk_avail {
|
|
_ if cfg.capacity.is_none() => "N/A".into(),
|
|
Some((avail, total)) => {
|
|
let pct = (*avail as f64) / (*total as f64) * 100.;
|
|
let avail = bytesize::ByteSize::b(*avail);
|
|
format!("{} ({:.1}%)", avail, pct)
|
|
}
|
|
None => "?".into(),
|
|
};
|
|
healthy_nodes.push(format!(
|
|
"{id:?}\t{host}\t{addr}\t[{tags}]\t{zone}\t{capacity}\t{data_avail}",
|
|
id = adv.id,
|
|
host = host,
|
|
addr = addr,
|
|
tags = cfg.tags.join(","),
|
|
zone = cfg.zone,
|
|
capacity = cfg.capacity_string(),
|
|
data_avail = data_avail,
|
|
));
|
|
} else {
|
|
let prev_role = layout
|
|
.versions
|
|
.iter()
|
|
.rev()
|
|
.find_map(|x| match x.roles.get(&adv.id) {
|
|
Some(NodeRoleV(Some(cfg))) => Some(cfg),
|
|
_ => None,
|
|
});
|
|
if let Some(cfg) = prev_role {
|
|
healthy_nodes.push(format!(
|
|
"{id:?}\t{host}\t{addr}\t[{tags}]\t{zone}\tdraining metadata...",
|
|
id = adv.id,
|
|
host = host,
|
|
addr = addr,
|
|
tags = cfg.tags.join(","),
|
|
zone = cfg.zone,
|
|
));
|
|
} else {
|
|
let new_role = match layout.staging.get().roles.get(&adv.id) {
|
|
Some(NodeRoleV(Some(_))) => "pending...",
|
|
_ => "NO ROLE ASSIGNED",
|
|
};
|
|
healthy_nodes.push(format!(
|
|
"{id:?}\t{h}\t{addr}\t\t\t{new_role}",
|
|
id = adv.id,
|
|
h = host,
|
|
addr = addr,
|
|
new_role = new_role,
|
|
));
|
|
}
|
|
}
|
|
}
|
|
format_table(healthy_nodes);
|
|
|
|
// Determine which nodes are unhealthy and print that to stdout
|
|
let status_map = status
|
|
.iter()
|
|
.map(|adv| (adv.id, adv))
|
|
.collect::<HashMap<_, _>>();
|
|
|
|
let tf = timeago::Formatter::new();
|
|
let mut drain_msg = false;
|
|
let mut failed_nodes = vec!["ID\tHostname\tTags\tZone\tCapacity\tLast seen".to_string()];
|
|
let mut listed = HashSet::new();
|
|
for ver in layout.versions.iter().rev() {
|
|
for (node, _, role) in ver.roles.items().iter() {
|
|
let cfg = match role {
|
|
NodeRoleV(Some(role)) if role.capacity.is_some() => role,
|
|
_ => continue,
|
|
};
|
|
|
|
if listed.contains(node) {
|
|
continue;
|
|
}
|
|
listed.insert(*node);
|
|
|
|
let adv = status_map.get(node);
|
|
if adv.map(|x| x.is_up).unwrap_or(false) {
|
|
continue;
|
|
}
|
|
|
|
// Node is in a layout version, is not a gateway node, and is not up:
|
|
// it is in a failed state, add proper line to the output
|
|
let (host, last_seen) = match adv {
|
|
Some(adv) => (
|
|
adv.status.hostname.as_deref().unwrap_or("?"),
|
|
adv.last_seen_secs_ago
|
|
.map(|s| tf.convert(Duration::from_secs(s)))
|
|
.unwrap_or_else(|| "never seen".into()),
|
|
),
|
|
None => ("??", "never seen".into()),
|
|
};
|
|
let capacity = if ver.version == layout.current().version {
|
|
cfg.capacity_string()
|
|
} else {
|
|
drain_msg = true;
|
|
"draining metadata...".to_string()
|
|
};
|
|
failed_nodes.push(format!(
|
|
"{id:?}\t{host}\t[{tags}]\t{zone}\t{capacity}\t{last_seen}",
|
|
id = node,
|
|
host = host,
|
|
tags = cfg.tags.join(","),
|
|
zone = cfg.zone,
|
|
capacity = capacity,
|
|
last_seen = last_seen,
|
|
));
|
|
}
|
|
}
|
|
|
|
if failed_nodes.len() > 1 {
|
|
println!("\n==== FAILED NODES ====");
|
|
format_table(failed_nodes);
|
|
if drain_msg {
|
|
println!();
|
|
println!("Your cluster is expecting to drain data from nodes that are currently unavailable.");
|
|
println!("If these nodes are definitely dead, please review the layout history with");
|
|
println!(
|
|
"`garage layout history` and use `garage layout skip-dead-nodes` to force progress."
|
|
);
|
|
}
|
|
}
|
|
|
|
if print_staging_role_changes(&layout) {
|
|
println!();
|
|
println!("Please use `garage layout show` to check the proposed new layout and apply it.");
|
|
println!();
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
pub async fn cmd_connect(
|
|
rpc_cli: &Endpoint<SystemRpc, ()>,
|
|
rpc_host: NodeID,
|
|
args: ConnectNodeOpt,
|
|
) -> Result<(), Error> {
|
|
match rpc_cli
|
|
.call(&rpc_host, SystemRpc::Connect(args.node), PRIO_NORMAL)
|
|
.await??
|
|
{
|
|
SystemRpc::Ok => {
|
|
println!("Success.");
|
|
Ok(())
|
|
}
|
|
m => Err(Error::unexpected_rpc_message(m)),
|
|
}
|
|
}
|
|
|
|
pub async fn cmd_admin(
|
|
rpc_cli: &Endpoint<AdminRpc, ()>,
|
|
rpc_host: NodeID,
|
|
args: AdminRpc,
|
|
) -> Result<(), HelperError> {
|
|
match rpc_cli.call(&rpc_host, args, PRIO_NORMAL).await?? {
|
|
AdminRpc::Ok(msg) => {
|
|
println!("{}", msg);
|
|
}
|
|
AdminRpc::BucketList(bl) => {
|
|
print_bucket_list(bl);
|
|
}
|
|
AdminRpc::BucketInfo {
|
|
bucket,
|
|
relevant_keys,
|
|
counters,
|
|
mpu_counters,
|
|
} => {
|
|
print_bucket_info(&bucket, &relevant_keys, &counters, &mpu_counters);
|
|
}
|
|
AdminRpc::KeyList(kl) => {
|
|
print_key_list(kl);
|
|
}
|
|
AdminRpc::KeyInfo(key, rb) => {
|
|
print_key_info(&key, &rb);
|
|
}
|
|
AdminRpc::WorkerList(wi, wlo) => {
|
|
print_worker_list(wi, wlo);
|
|
}
|
|
AdminRpc::WorkerVars(wv) => {
|
|
print_worker_vars(wv);
|
|
}
|
|
AdminRpc::WorkerInfo(tid, wi) => {
|
|
print_worker_info(tid, wi);
|
|
}
|
|
AdminRpc::BlockErrorList(el) => {
|
|
print_block_error_list(el);
|
|
}
|
|
AdminRpc::BlockInfo {
|
|
hash,
|
|
refcount,
|
|
versions,
|
|
uploads,
|
|
} => {
|
|
print_block_info(hash, refcount, versions, uploads);
|
|
}
|
|
r => {
|
|
error!("Unexpected response: {:?}", r);
|
|
}
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
// ---- utility ----
|
|
|
|
pub async fn fetch_status(
|
|
rpc_cli: &Endpoint<SystemRpc, ()>,
|
|
rpc_host: NodeID,
|
|
) -> Result<Vec<KnownNodeInfo>, Error> {
|
|
match rpc_cli
|
|
.call(&rpc_host, SystemRpc::GetKnownNodes, PRIO_NORMAL)
|
|
.await??
|
|
{
|
|
SystemRpc::ReturnKnownNodes(nodes) => Ok(nodes),
|
|
resp => Err(Error::unexpected_rpc_message(resp)),
|
|
}
|
|
}
|