forked from Deuxfleurs/garage
cli: show when nodes are draining metadata
This commit is contained in:
parent
78362140f5
commit
539a920313
1 changed files with 108 additions and 64 deletions
|
@ -1,4 +1,4 @@
|
||||||
use std::collections::HashSet;
|
use std::collections::{HashMap, HashSet};
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
|
|
||||||
use format_table::format_table;
|
use format_table::format_table;
|
||||||
|
@ -62,35 +62,69 @@ pub async fn cmd_status(rpc_cli: &Endpoint<SystemRpc, ()>, rpc_host: NodeID) ->
|
||||||
let mut healthy_nodes =
|
let mut healthy_nodes =
|
||||||
vec!["ID\tHostname\tAddress\tTags\tZone\tCapacity\tDataAvail".to_string()];
|
vec!["ID\tHostname\tAddress\tTags\tZone\tCapacity\tDataAvail".to_string()];
|
||||||
for adv in status.iter().filter(|adv| adv.is_up) {
|
for adv in status.iter().filter(|adv| adv.is_up) {
|
||||||
match layout.current().roles.get(&adv.id) {
|
if let Some(NodeRoleV(Some(cfg))) = layout.current().roles.get(&adv.id) {
|
||||||
Some(NodeRoleV(Some(cfg))) => {
|
let data_avail = match &adv.status.data_disk_avail {
|
||||||
let data_avail = match &adv.status.data_disk_avail {
|
_ if cfg.capacity.is_none() => "N/A".into(),
|
||||||
_ if cfg.capacity.is_none() => "N/A".into(),
|
Some((avail, total)) => {
|
||||||
Some((avail, total)) => {
|
let pct = (*avail as f64) / (*total as f64) * 100.;
|
||||||
let pct = (*avail as f64) / (*total as f64) * 100.;
|
let avail = bytesize::ByteSize::b(*avail);
|
||||||
let avail = bytesize::ByteSize::b(*avail);
|
format!("{} ({:.1}%)", avail, pct)
|
||||||
format!("{} ({:.1}%)", avail, pct)
|
}
|
||||||
}
|
None => "?".into(),
|
||||||
None => "?".into(),
|
};
|
||||||
};
|
healthy_nodes.push(format!(
|
||||||
|
"{id:?}\t{host}\t{addr}\t[{tags}]\t{zone}\t{capacity}\t{data_avail}",
|
||||||
|
id = adv.id,
|
||||||
|
host = adv.status.hostname,
|
||||||
|
addr = adv.addr,
|
||||||
|
tags = cfg.tags.join(","),
|
||||||
|
zone = cfg.zone,
|
||||||
|
capacity = cfg.capacity_string(),
|
||||||
|
data_avail = data_avail,
|
||||||
|
));
|
||||||
|
} else {
|
||||||
|
let prev_role = layout
|
||||||
|
.versions
|
||||||
|
.iter()
|
||||||
|
.rev()
|
||||||
|
.find_map(|x| match x.roles.get(&adv.id) {
|
||||||
|
Some(NodeRoleV(Some(cfg))) => Some(cfg),
|
||||||
|
_ => None,
|
||||||
|
});
|
||||||
|
let historic_role =
|
||||||
|
layout
|
||||||
|
.old_versions
|
||||||
|
.iter()
|
||||||
|
.rev()
|
||||||
|
.find_map(|x| match x.roles.get(&adv.id) {
|
||||||
|
Some(NodeRoleV(Some(cfg))) => Some(cfg),
|
||||||
|
_ => None,
|
||||||
|
});
|
||||||
|
if let Some(cfg) = prev_role {
|
||||||
healthy_nodes.push(format!(
|
healthy_nodes.push(format!(
|
||||||
"{id:?}\t{host}\t{addr}\t[{tags}]\t{zone}\t{capacity}\t{data_avail}",
|
"{id:?}\t{host}\t{addr}\t[{tags}]\t{zone}\tdraining metadata...",
|
||||||
id = adv.id,
|
id = adv.id,
|
||||||
host = adv.status.hostname,
|
host = adv.status.hostname,
|
||||||
addr = adv.addr,
|
addr = adv.addr,
|
||||||
tags = cfg.tags.join(","),
|
tags = cfg.tags.join(","),
|
||||||
zone = cfg.zone,
|
zone = cfg.zone,
|
||||||
capacity = cfg.capacity_string(),
|
|
||||||
data_avail = data_avail,
|
|
||||||
));
|
));
|
||||||
}
|
} else if let Some(cfg) = historic_role {
|
||||||
_ => {
|
healthy_nodes.push(format!(
|
||||||
|
"{id:?}\t{host}\t{addr}\t[{tags}]\t{zone}\tremoved, metadata drained",
|
||||||
|
id = adv.id,
|
||||||
|
host = adv.status.hostname,
|
||||||
|
addr = adv.addr,
|
||||||
|
tags = cfg.tags.join(","),
|
||||||
|
zone = cfg.zone,
|
||||||
|
));
|
||||||
|
} else {
|
||||||
let new_role = match layout.staging.get().roles.get(&adv.id) {
|
let new_role = match layout.staging.get().roles.get(&adv.id) {
|
||||||
Some(NodeRoleV(Some(_))) => "(pending)",
|
Some(NodeRoleV(Some(_))) => "pending...",
|
||||||
_ => "NO ROLE ASSIGNED",
|
_ => "NO ROLE ASSIGNED",
|
||||||
};
|
};
|
||||||
healthy_nodes.push(format!(
|
healthy_nodes.push(format!(
|
||||||
"{id:?}\t{h}\t{addr}\t{new_role}",
|
"{id:?}\t{h}\t{addr}\t\t\t{new_role}",
|
||||||
id = adv.id,
|
id = adv.id,
|
||||||
h = adv.status.hostname,
|
h = adv.status.hostname,
|
||||||
addr = adv.addr,
|
addr = adv.addr,
|
||||||
|
@ -101,55 +135,65 @@ pub async fn cmd_status(rpc_cli: &Endpoint<SystemRpc, ()>, rpc_host: NodeID) ->
|
||||||
}
|
}
|
||||||
format_table(healthy_nodes);
|
format_table(healthy_nodes);
|
||||||
|
|
||||||
let status_keys = status.iter().map(|adv| adv.id).collect::<HashSet<_>>();
|
// Determine which nodes are unhealthy and print that to stdout
|
||||||
let failure_case_1 = status.iter().any(|adv| {
|
let status_map = status
|
||||||
!adv.is_up
|
|
||||||
&& matches!(
|
|
||||||
layout.current().roles.get(&adv.id),
|
|
||||||
Some(NodeRoleV(Some(_)))
|
|
||||||
)
|
|
||||||
});
|
|
||||||
let failure_case_2 = layout
|
|
||||||
.current()
|
|
||||||
.roles
|
|
||||||
.items()
|
|
||||||
.iter()
|
.iter()
|
||||||
.any(|(id, _, v)| !status_keys.contains(id) && v.0.is_some());
|
.map(|adv| (adv.id, adv))
|
||||||
if failure_case_1 || failure_case_2 {
|
.collect::<HashMap<_, _>>();
|
||||||
println!("\n==== FAILED NODES ====");
|
|
||||||
let mut failed_nodes =
|
let tf = timeago::Formatter::new();
|
||||||
vec!["ID\tHostname\tAddress\tTags\tZone\tCapacity\tLast seen".to_string()];
|
let mut failed_nodes =
|
||||||
for adv in status.iter().filter(|adv| !adv.is_up) {
|
vec!["ID\tHostname\tAddress\tTags\tZone\tCapacity\tLast seen".to_string()];
|
||||||
if let Some(NodeRoleV(Some(cfg))) = layout.current().roles.get(&adv.id) {
|
let mut listed = HashSet::new();
|
||||||
let tf = timeago::Formatter::new();
|
for ver in layout.versions.iter().rev() {
|
||||||
failed_nodes.push(format!(
|
for (node, _, role) in ver.roles.items().iter() {
|
||||||
"{id:?}\t{host}\t{addr}\t[{tags}]\t{zone}\t{capacity}\t{last_seen}",
|
let cfg = match role {
|
||||||
id = adv.id,
|
NodeRoleV(Some(role)) if role.capacity.is_some() => role,
|
||||||
host = adv.status.hostname,
|
_ => continue,
|
||||||
addr = adv.addr,
|
};
|
||||||
tags = cfg.tags.join(","),
|
|
||||||
zone = cfg.zone,
|
if listed.contains(node) {
|
||||||
capacity = cfg.capacity_string(),
|
continue;
|
||||||
last_seen = adv
|
}
|
||||||
.last_seen_secs_ago
|
listed.insert(*node);
|
||||||
|
|
||||||
|
let adv = status_map.get(node);
|
||||||
|
if adv.map(|x| x.is_up).unwrap_or(false) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Node is in a layout version, is not a gateway node, and is not up:
|
||||||
|
// it is in a failed state, add proper line to the output
|
||||||
|
let (host, addr, last_seen) = match adv {
|
||||||
|
Some(adv) => (
|
||||||
|
adv.status.hostname.as_str(),
|
||||||
|
adv.addr.to_string(),
|
||||||
|
adv.last_seen_secs_ago
|
||||||
.map(|s| tf.convert(Duration::from_secs(s)))
|
.map(|s| tf.convert(Duration::from_secs(s)))
|
||||||
.unwrap_or_else(|| "never seen".into()),
|
.unwrap_or_else(|| "never seen".into()),
|
||||||
));
|
),
|
||||||
}
|
None => ("??", "??".into(), "never seen".into()),
|
||||||
}
|
};
|
||||||
for (id, _, role_v) in layout.current().roles.items().iter() {
|
let capacity = if ver.version == layout.current().version {
|
||||||
if let NodeRoleV(Some(cfg)) = role_v {
|
cfg.capacity_string()
|
||||||
if !status_keys.contains(id) {
|
} else {
|
||||||
failed_nodes.push(format!(
|
"draining metadata...".to_string()
|
||||||
"{id:?}\t??\t??\t[{tags}]\t{zone}\t{capacity}\tnever seen",
|
};
|
||||||
id = id,
|
failed_nodes.push(format!(
|
||||||
tags = cfg.tags.join(","),
|
"{id:?}\t{host}\t{addr}\t[{tags}]\t{zone}\t{capacity}\t{last_seen}",
|
||||||
zone = cfg.zone,
|
id = node,
|
||||||
capacity = cfg.capacity_string(),
|
host = host,
|
||||||
));
|
addr = addr,
|
||||||
}
|
tags = cfg.tags.join(","),
|
||||||
}
|
zone = cfg.zone,
|
||||||
|
capacity = capacity,
|
||||||
|
last_seen = last_seen,
|
||||||
|
));
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if failed_nodes.len() > 1 {
|
||||||
|
println!("\n==== FAILED NODES ====");
|
||||||
format_table(failed_nodes);
|
format_table(failed_nodes);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue