layout cli: safer skip-dead-nodes command
This commit is contained in:
parent
d90de365b3
commit
aa59059a91
3 changed files with 49 additions and 21 deletions
|
@ -49,13 +49,7 @@ pub async fn cli_command_dispatch(
|
|||
}
|
||||
|
||||
pub async fn cmd_status(rpc_cli: &Endpoint<SystemRpc, ()>, rpc_host: NodeID) -> Result<(), Error> {
|
||||
let status = match rpc_cli
|
||||
.call(&rpc_host, SystemRpc::GetKnownNodes, PRIO_NORMAL)
|
||||
.await??
|
||||
{
|
||||
SystemRpc::ReturnKnownNodes(nodes) => nodes,
|
||||
resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
|
||||
};
|
||||
let status = fetch_status(rpc_cli, rpc_host).await?;
|
||||
let layout = fetch_layout(rpc_cli, rpc_host).await?;
|
||||
|
||||
println!("==== HEALTHY NODES ====");
|
||||
|
@ -268,3 +262,18 @@ pub async fn cmd_admin(
|
|||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// ---- utility ----
|
||||
|
||||
pub async fn fetch_status(
|
||||
rpc_cli: &Endpoint<SystemRpc, ()>,
|
||||
rpc_host: NodeID,
|
||||
) -> Result<Vec<KnownNodeInfo>, Error> {
|
||||
match rpc_cli
|
||||
.call(&rpc_host, SystemRpc::GetKnownNodes, PRIO_NORMAL)
|
||||
.await??
|
||||
{
|
||||
SystemRpc::ReturnKnownNodes(nodes) => Ok(nodes),
|
||||
resp => Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
|
||||
}
|
||||
}
|
||||
|
|
|
@ -33,8 +33,8 @@ pub async fn cli_layout_command_dispatch(
|
|||
cmd_config_layout(system_rpc_endpoint, rpc_host, config_opt).await
|
||||
}
|
||||
LayoutOperation::History => cmd_layout_history(system_rpc_endpoint, rpc_host).await,
|
||||
LayoutOperation::AssumeSync(assume_sync_opt) => {
|
||||
cmd_layout_assume_sync(system_rpc_endpoint, rpc_host, assume_sync_opt).await
|
||||
LayoutOperation::SkipDeadNodes(assume_sync_opt) => {
|
||||
cmd_layout_skip_dead_nodes(system_rpc_endpoint, rpc_host, assume_sync_opt).await
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -388,13 +388,21 @@ pub async fn cmd_layout_history(
|
|||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn cmd_layout_assume_sync(
|
||||
pub async fn cmd_layout_skip_dead_nodes(
|
||||
rpc_cli: &Endpoint<SystemRpc, ()>,
|
||||
rpc_host: NodeID,
|
||||
opt: AssumeSyncOpt,
|
||||
opt: SkipDeadNodesOpt,
|
||||
) -> Result<(), Error> {
|
||||
let status = fetch_status(rpc_cli, rpc_host).await?;
|
||||
let mut layout = fetch_layout(rpc_cli, rpc_host).await?;
|
||||
|
||||
if layout.versions.len() == 1 {
|
||||
return Err(Error::Message(
|
||||
"This command cannot be called when there is only one live cluster layout version"
|
||||
.into(),
|
||||
));
|
||||
}
|
||||
|
||||
let min_v = layout.min_stored();
|
||||
if opt.version <= min_v || opt.version > layout.current().version {
|
||||
return Err(Error::Message(format!(
|
||||
|
@ -408,12 +416,19 @@ pub async fn cmd_layout_assume_sync(
|
|||
|
||||
let all_nodes = layout.get_all_nodes();
|
||||
for node in all_nodes.iter() {
|
||||
layout.update_trackers.ack_map.set_max(*node, opt.version);
|
||||
layout.update_trackers.sync_map.set_max(*node, opt.version);
|
||||
layout
|
||||
.update_trackers
|
||||
.sync_ack_map
|
||||
.set_max(*node, opt.version);
|
||||
if status.iter().any(|x| x.id == *node && x.is_up) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if layout.update_trackers.ack_map.set_max(*node, opt.version) {
|
||||
println!("Increased the ACK tracker for node {:?}", node);
|
||||
}
|
||||
|
||||
if opt.allow_missing_data {
|
||||
if layout.update_trackers.sync_map.set_max(*node, opt.version) {
|
||||
println!("Increased the SYNC tracker for node {:?}", node);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
send_layout(rpc_cli, rpc_host, layout).await?;
|
||||
|
|
|
@ -117,9 +117,9 @@ pub enum LayoutOperation {
|
|||
#[structopt(name = "history", version = garage_version())]
|
||||
History,
|
||||
|
||||
/// Assume all nodes are synchronized up to a certain layout version
|
||||
#[structopt(name = "assume-sync", version = garage_version())]
|
||||
AssumeSync(AssumeSyncOpt),
|
||||
/// Skip dead nodes when awaiting for a new layout version to be synchronized
|
||||
#[structopt(name = "skip-dead-nodes", version = garage_version())]
|
||||
SkipDeadNodes(SkipDeadNodesOpt),
|
||||
}
|
||||
|
||||
#[derive(StructOpt, Debug)]
|
||||
|
@ -178,11 +178,15 @@ pub struct RevertLayoutOpt {
|
|||
}
|
||||
|
||||
#[derive(StructOpt, Debug)]
|
||||
pub struct AssumeSyncOpt {
|
||||
pub struct SkipDeadNodesOpt {
|
||||
/// Version number of the layout to assume is currently up-to-date.
|
||||
/// This will generally be the current layout version.
|
||||
#[structopt(long = "version")]
|
||||
pub(crate) version: u64,
|
||||
/// Allow the skip even if a quorum of ndoes could not be found for
|
||||
/// the data among the remaining nodes
|
||||
#[structopt(long = "allow-missing-data")]
|
||||
pub(crate) allow_missing_data: bool,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, StructOpt, Debug)]
|
||||
|
|
Loading…
Reference in a new issue