cli: improvements to the layout commands when multiple layouts are live
This commit is contained in:
parent
91b874c4ef
commit
7f2541101f
4 changed files with 49 additions and 29 deletions
|
@ -274,8 +274,7 @@ impl AdminRpcHandler {
|
|||
fn gather_cluster_stats(&self) -> String {
|
||||
let mut ret = String::new();
|
||||
|
||||
// Gather storage node and free space statistics
|
||||
// TODO: not only layout.current() ???
|
||||
// Gather storage node and free space statistics for current nodes
|
||||
let layout = &self.garage.system.cluster_layout();
|
||||
let mut node_partition_count = HashMap::<Uuid, u64>::new();
|
||||
for short_id in layout.current().ring_assignment_data.iter() {
|
||||
|
|
|
@ -179,7 +179,7 @@ pub async fn cmd_status(rpc_cli: &Endpoint<SystemRpc, ()>, rpc_host: NodeID) ->
|
|||
println!("Your cluster is expecting to drain data from nodes that are currently unavailable.");
|
||||
println!("If these nodes are definitely dead, please review the layout history with");
|
||||
println!(
|
||||
"`garage layout history` and use `garage layout assume-sync` to force progress."
|
||||
"`garage layout history` and use `garage layout skip-dead-nodes` to force progress."
|
||||
);
|
||||
}
|
||||
}
|
||||
|
@ -274,6 +274,6 @@ pub async fn fetch_status(
|
|||
.await??
|
||||
{
|
||||
SystemRpc::ReturnKnownNodes(nodes) => Ok(nodes),
|
||||
resp => Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
|
||||
resp => Err(Error::unexpected_rpc_message(resp)),
|
||||
}
|
||||
}
|
||||
|
|
|
@ -354,10 +354,14 @@ pub async fn cmd_layout_history(
|
|||
));
|
||||
}
|
||||
format_table(table);
|
||||
|
||||
println!();
|
||||
|
||||
if layout.versions.len() > 1 {
|
||||
println!("==== UPDATE TRACKERS ====");
|
||||
println!("This is the internal data that Garage stores to know which nodes have what data.");
|
||||
println!("Several layout versions are currently live in the version, and data is being migrated.");
|
||||
println!(
|
||||
"This is the internal data that Garage stores to know which nodes have what data."
|
||||
);
|
||||
println!();
|
||||
let mut table = vec!["Node\tAck\tSync\tSync_ack".to_string()];
|
||||
let all_nodes = layout.get_all_nodes();
|
||||
|
@ -373,16 +377,21 @@ pub async fn cmd_layout_history(
|
|||
table[1..].sort();
|
||||
format_table(table);
|
||||
|
||||
if layout.versions.len() > 1 {
|
||||
println!();
|
||||
println!(
|
||||
"If some nodes are not catching up to the latest layout version in the update tracker,"
|
||||
"If some nodes are not catching up to the latest layout version in the update trackers,"
|
||||
);
|
||||
println!("it might be because they are offline or unable to complete a sync successfully.");
|
||||
println!(
|
||||
"You may force progress using `garage layout assume-sync --version {}`",
|
||||
"You may force progress using `garage layout skip-dead-nodes --version {}`",
|
||||
layout.current().version
|
||||
);
|
||||
} else {
|
||||
println!("Your cluster is currently in a stable state with a single live layout version.");
|
||||
println!("No metadata migration is in progress. Note that the migration of data blocks is not tracked,");
|
||||
println!(
|
||||
"so you might want to keep old nodes online until their data directories become empty."
|
||||
);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
@ -415,6 +424,7 @@ pub async fn cmd_layout_skip_dead_nodes(
|
|||
}
|
||||
|
||||
let all_nodes = layout.get_all_nodes();
|
||||
let mut did_something = false;
|
||||
for node in all_nodes.iter() {
|
||||
if status.iter().any(|x| x.id == *node && x.is_up) {
|
||||
continue;
|
||||
|
@ -422,19 +432,28 @@ pub async fn cmd_layout_skip_dead_nodes(
|
|||
|
||||
if layout.update_trackers.ack_map.set_max(*node, opt.version) {
|
||||
println!("Increased the ACK tracker for node {:?}", node);
|
||||
did_something = true;
|
||||
}
|
||||
|
||||
if opt.allow_missing_data {
|
||||
if layout.update_trackers.sync_map.set_max(*node, opt.version) {
|
||||
println!("Increased the SYNC tracker for node {:?}", node);
|
||||
did_something = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if did_something {
|
||||
send_layout(rpc_cli, rpc_host, layout).await?;
|
||||
println!("Success.");
|
||||
|
||||
Ok(())
|
||||
} else if !opt.allow_missing_data {
|
||||
Err(Error::Message("Nothing was done, try passing the `--allow-missing-data` flag to force progress even when not enough nodes can complete a metadata sync.".into()))
|
||||
} else {
|
||||
Err(Error::Message(
|
||||
"Sorry, there is nothing I can do for you. Please wait patiently. If you ask for help, please send the output of the `garage layout history` command.".into(),
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
// --- utility ---
|
||||
|
@ -448,7 +467,7 @@ pub async fn fetch_layout(
|
|||
.await??
|
||||
{
|
||||
SystemRpc::AdvertiseClusterLayout(t) => Ok(t),
|
||||
resp => Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
|
||||
resp => Err(Error::unexpected_rpc_message(resp)),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -450,6 +450,8 @@ pub fn print_block_info(
|
|||
|
||||
if refcount != nondeleted_count {
|
||||
println!();
|
||||
println!("Warning: refcount does not match number of non-deleted versions");
|
||||
println!(
|
||||
"Warning: refcount does not match number of non-deleted versions (see issue #644)."
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue