NLnet task 3 #667

Merged
lx merged 60 commits from nlnet-task3 into next-0.10 2024-01-11 10:58:08 +00:00
Showing only changes of commit 539a920313 - Show all commits

View file

@ -1,4 +1,4 @@
use std::collections::HashSet; use std::collections::{HashMap, HashSet};
use std::time::Duration; use std::time::Duration;
use format_table::format_table; use format_table::format_table;
@ -62,8 +62,7 @@ pub async fn cmd_status(rpc_cli: &Endpoint<SystemRpc, ()>, rpc_host: NodeID) ->
let mut healthy_nodes = let mut healthy_nodes =
vec!["ID\tHostname\tAddress\tTags\tZone\tCapacity\tDataAvail".to_string()]; vec!["ID\tHostname\tAddress\tTags\tZone\tCapacity\tDataAvail".to_string()];
for adv in status.iter().filter(|adv| adv.is_up) { for adv in status.iter().filter(|adv| adv.is_up) {
match layout.current().roles.get(&adv.id) { if let Some(NodeRoleV(Some(cfg))) = layout.current().roles.get(&adv.id) {
Some(NodeRoleV(Some(cfg))) => {
let data_avail = match &adv.status.data_disk_avail { let data_avail = match &adv.status.data_disk_avail {
_ if cfg.capacity.is_none() => "N/A".into(), _ if cfg.capacity.is_none() => "N/A".into(),
Some((avail, total)) => { Some((avail, total)) => {
@ -83,14 +82,49 @@ pub async fn cmd_status(rpc_cli: &Endpoint<SystemRpc, ()>, rpc_host: NodeID) ->
capacity = cfg.capacity_string(), capacity = cfg.capacity_string(),
data_avail = data_avail, data_avail = data_avail,
)); ));
} } else {
_ => { let prev_role = layout
.versions
.iter()
.rev()
.find_map(|x| match x.roles.get(&adv.id) {
Some(NodeRoleV(Some(cfg))) => Some(cfg),
_ => None,
});
let historic_role =
layout
.old_versions
.iter()
.rev()
.find_map(|x| match x.roles.get(&adv.id) {
Some(NodeRoleV(Some(cfg))) => Some(cfg),
_ => None,
});
if let Some(cfg) = prev_role {
healthy_nodes.push(format!(
"{id:?}\t{host}\t{addr}\t[{tags}]\t{zone}\tdraining metadata...",
id = adv.id,
host = adv.status.hostname,
addr = adv.addr,
tags = cfg.tags.join(","),
zone = cfg.zone,
));
} else if let Some(cfg) = historic_role {
healthy_nodes.push(format!(
"{id:?}\t{host}\t{addr}\t[{tags}]\t{zone}\tremoved, metadata drained",
id = adv.id,
host = adv.status.hostname,
addr = adv.addr,
tags = cfg.tags.join(","),
zone = cfg.zone,
));
} else {
let new_role = match layout.staging.get().roles.get(&adv.id) { let new_role = match layout.staging.get().roles.get(&adv.id) {
Some(NodeRoleV(Some(_))) => "(pending)", Some(NodeRoleV(Some(_))) => "pending...",
_ => "NO ROLE ASSIGNED", _ => "NO ROLE ASSIGNED",
}; };
healthy_nodes.push(format!( healthy_nodes.push(format!(
"{id:?}\t{h}\t{addr}\t{new_role}", "{id:?}\t{h}\t{addr}\t\t\t{new_role}",
id = adv.id, id = adv.id,
h = adv.status.hostname, h = adv.status.hostname,
addr = adv.addr, addr = adv.addr,
@ -101,55 +135,65 @@ pub async fn cmd_status(rpc_cli: &Endpoint<SystemRpc, ()>, rpc_host: NodeID) ->
} }
format_table(healthy_nodes); format_table(healthy_nodes);
let status_keys = status.iter().map(|adv| adv.id).collect::<HashSet<_>>(); // Determine which nodes are unhealthy and print that to stdout
let failure_case_1 = status.iter().any(|adv| { let status_map = status
!adv.is_up
&& matches!(
layout.current().roles.get(&adv.id),
Some(NodeRoleV(Some(_)))
)
});
let failure_case_2 = layout
.current()
.roles
.items()
.iter() .iter()
.any(|(id, _, v)| !status_keys.contains(id) && v.0.is_some()); .map(|adv| (adv.id, adv))
if failure_case_1 || failure_case_2 { .collect::<HashMap<_, _>>();
println!("\n==== FAILED NODES ====");
let tf = timeago::Formatter::new();
let mut failed_nodes = let mut failed_nodes =
vec!["ID\tHostname\tAddress\tTags\tZone\tCapacity\tLast seen".to_string()]; vec!["ID\tHostname\tAddress\tTags\tZone\tCapacity\tLast seen".to_string()];
for adv in status.iter().filter(|adv| !adv.is_up) { let mut listed = HashSet::new();
if let Some(NodeRoleV(Some(cfg))) = layout.current().roles.get(&adv.id) { for ver in layout.versions.iter().rev() {
let tf = timeago::Formatter::new(); for (node, _, role) in ver.roles.items().iter() {
failed_nodes.push(format!( let cfg = match role {
"{id:?}\t{host}\t{addr}\t[{tags}]\t{zone}\t{capacity}\t{last_seen}", NodeRoleV(Some(role)) if role.capacity.is_some() => role,
id = adv.id, _ => continue,
host = adv.status.hostname, };
addr = adv.addr,
tags = cfg.tags.join(","), if listed.contains(node) {
zone = cfg.zone, continue;
capacity = cfg.capacity_string(), }
last_seen = adv listed.insert(*node);
.last_seen_secs_ago
let adv = status_map.get(node);
if adv.map(|x| x.is_up).unwrap_or(false) {
continue;
}
// Node is in a layout version, is not a gateway node, and is not up:
// it is in a failed state, add proper line to the output
let (host, addr, last_seen) = match adv {
Some(adv) => (
adv.status.hostname.as_str(),
adv.addr.to_string(),
adv.last_seen_secs_ago
.map(|s| tf.convert(Duration::from_secs(s))) .map(|s| tf.convert(Duration::from_secs(s)))
.unwrap_or_else(|| "never seen".into()), .unwrap_or_else(|| "never seen".into()),
)); ),
} None => ("??", "??".into(), "never seen".into()),
} };
for (id, _, role_v) in layout.current().roles.items().iter() { let capacity = if ver.version == layout.current().version {
if let NodeRoleV(Some(cfg)) = role_v { cfg.capacity_string()
if !status_keys.contains(id) { } else {
"draining metadata...".to_string()
};
failed_nodes.push(format!( failed_nodes.push(format!(
"{id:?}\t??\t??\t[{tags}]\t{zone}\t{capacity}\tnever seen", "{id:?}\t{host}\t{addr}\t[{tags}]\t{zone}\t{capacity}\t{last_seen}",
id = id, id = node,
host = host,
addr = addr,
tags = cfg.tags.join(","), tags = cfg.tags.join(","),
zone = cfg.zone, zone = cfg.zone,
capacity = cfg.capacity_string(), capacity = capacity,
last_seen = last_seen,
)); ));
} }
} }
}
if failed_nodes.len() > 1 {
println!("\n==== FAILED NODES ====");
format_table(failed_nodes); format_table(failed_nodes);
} }