cli: improvements to the layout commands when multiple layouts are live
This commit is contained in:
parent
91b874c4ef
commit
7f2541101f
4 changed files with 49 additions and 29 deletions
|
@ -274,8 +274,7 @@ impl AdminRpcHandler {
|
||||||
fn gather_cluster_stats(&self) -> String {
|
fn gather_cluster_stats(&self) -> String {
|
||||||
let mut ret = String::new();
|
let mut ret = String::new();
|
||||||
|
|
||||||
// Gather storage node and free space statistics
|
// Gather storage node and free space statistics for current nodes
|
||||||
// TODO: not only layout.current() ???
|
|
||||||
let layout = &self.garage.system.cluster_layout();
|
let layout = &self.garage.system.cluster_layout();
|
||||||
let mut node_partition_count = HashMap::<Uuid, u64>::new();
|
let mut node_partition_count = HashMap::<Uuid, u64>::new();
|
||||||
for short_id in layout.current().ring_assignment_data.iter() {
|
for short_id in layout.current().ring_assignment_data.iter() {
|
||||||
|
|
|
@ -179,7 +179,7 @@ pub async fn cmd_status(rpc_cli: &Endpoint<SystemRpc, ()>, rpc_host: NodeID) ->
|
||||||
println!("Your cluster is expecting to drain data from nodes that are currently unavailable.");
|
println!("Your cluster is expecting to drain data from nodes that are currently unavailable.");
|
||||||
println!("If these nodes are definitely dead, please review the layout history with");
|
println!("If these nodes are definitely dead, please review the layout history with");
|
||||||
println!(
|
println!(
|
||||||
"`garage layout history` and use `garage layout assume-sync` to force progress."
|
"`garage layout history` and use `garage layout skip-dead-nodes` to force progress."
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -274,6 +274,6 @@ pub async fn fetch_status(
|
||||||
.await??
|
.await??
|
||||||
{
|
{
|
||||||
SystemRpc::ReturnKnownNodes(nodes) => Ok(nodes),
|
SystemRpc::ReturnKnownNodes(nodes) => Ok(nodes),
|
||||||
resp => Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
|
resp => Err(Error::unexpected_rpc_message(resp)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -354,10 +354,14 @@ pub async fn cmd_layout_history(
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
format_table(table);
|
format_table(table);
|
||||||
|
|
||||||
println!();
|
println!();
|
||||||
|
|
||||||
|
if layout.versions.len() > 1 {
|
||||||
println!("==== UPDATE TRACKERS ====");
|
println!("==== UPDATE TRACKERS ====");
|
||||||
println!("This is the internal data that Garage stores to know which nodes have what data.");
|
println!("Several layout versions are currently live in the version, and data is being migrated.");
|
||||||
|
println!(
|
||||||
|
"This is the internal data that Garage stores to know which nodes have what data."
|
||||||
|
);
|
||||||
println!();
|
println!();
|
||||||
let mut table = vec!["Node\tAck\tSync\tSync_ack".to_string()];
|
let mut table = vec!["Node\tAck\tSync\tSync_ack".to_string()];
|
||||||
let all_nodes = layout.get_all_nodes();
|
let all_nodes = layout.get_all_nodes();
|
||||||
|
@ -373,16 +377,21 @@ pub async fn cmd_layout_history(
|
||||||
table[1..].sort();
|
table[1..].sort();
|
||||||
format_table(table);
|
format_table(table);
|
||||||
|
|
||||||
if layout.versions.len() > 1 {
|
|
||||||
println!();
|
println!();
|
||||||
println!(
|
println!(
|
||||||
"If some nodes are not catching up to the latest layout version in the update tracker,"
|
"If some nodes are not catching up to the latest layout version in the update trackers,"
|
||||||
);
|
);
|
||||||
println!("it might be because they are offline or unable to complete a sync successfully.");
|
println!("it might be because they are offline or unable to complete a sync successfully.");
|
||||||
println!(
|
println!(
|
||||||
"You may force progress using `garage layout assume-sync --version {}`",
|
"You may force progress using `garage layout skip-dead-nodes --version {}`",
|
||||||
layout.current().version
|
layout.current().version
|
||||||
);
|
);
|
||||||
|
} else {
|
||||||
|
println!("Your cluster is currently in a stable state with a single live layout version.");
|
||||||
|
println!("No metadata migration is in progress. Note that the migration of data blocks is not tracked,");
|
||||||
|
println!(
|
||||||
|
"so you might want to keep old nodes online until their data directories become empty."
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
|
@ -415,6 +424,7 @@ pub async fn cmd_layout_skip_dead_nodes(
|
||||||
}
|
}
|
||||||
|
|
||||||
let all_nodes = layout.get_all_nodes();
|
let all_nodes = layout.get_all_nodes();
|
||||||
|
let mut did_something = false;
|
||||||
for node in all_nodes.iter() {
|
for node in all_nodes.iter() {
|
||||||
if status.iter().any(|x| x.id == *node && x.is_up) {
|
if status.iter().any(|x| x.id == *node && x.is_up) {
|
||||||
continue;
|
continue;
|
||||||
|
@ -422,19 +432,28 @@ pub async fn cmd_layout_skip_dead_nodes(
|
||||||
|
|
||||||
if layout.update_trackers.ack_map.set_max(*node, opt.version) {
|
if layout.update_trackers.ack_map.set_max(*node, opt.version) {
|
||||||
println!("Increased the ACK tracker for node {:?}", node);
|
println!("Increased the ACK tracker for node {:?}", node);
|
||||||
|
did_something = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if opt.allow_missing_data {
|
if opt.allow_missing_data {
|
||||||
if layout.update_trackers.sync_map.set_max(*node, opt.version) {
|
if layout.update_trackers.sync_map.set_max(*node, opt.version) {
|
||||||
println!("Increased the SYNC tracker for node {:?}", node);
|
println!("Increased the SYNC tracker for node {:?}", node);
|
||||||
|
did_something = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if did_something {
|
||||||
send_layout(rpc_cli, rpc_host, layout).await?;
|
send_layout(rpc_cli, rpc_host, layout).await?;
|
||||||
println!("Success.");
|
println!("Success.");
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
|
} else if !opt.allow_missing_data {
|
||||||
|
Err(Error::Message("Nothing was done, try passing the `--allow-missing-data` flag to force progress even when not enough nodes can complete a metadata sync.".into()))
|
||||||
|
} else {
|
||||||
|
Err(Error::Message(
|
||||||
|
"Sorry, there is nothing I can do for you. Please wait patiently. If you ask for help, please send the output of the `garage layout history` command.".into(),
|
||||||
|
))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// --- utility ---
|
// --- utility ---
|
||||||
|
@ -448,7 +467,7 @@ pub async fn fetch_layout(
|
||||||
.await??
|
.await??
|
||||||
{
|
{
|
||||||
SystemRpc::AdvertiseClusterLayout(t) => Ok(t),
|
SystemRpc::AdvertiseClusterLayout(t) => Ok(t),
|
||||||
resp => Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
|
resp => Err(Error::unexpected_rpc_message(resp)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -450,6 +450,8 @@ pub fn print_block_info(
|
||||||
|
|
||||||
if refcount != nondeleted_count {
|
if refcount != nondeleted_count {
|
||||||
println!();
|
println!();
|
||||||
println!("Warning: refcount does not match number of non-deleted versions");
|
println!(
|
||||||
|
"Warning: refcount does not match number of non-deleted versions (see issue #644)."
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue