admin api: implement ClusterLayoutSkipDeadNodes and use it in CLI
All checks were successful
ci/woodpecker/push/debug Pipeline was successful
All checks were successful
ci/woodpecker/push/debug Pipeline was successful
This commit is contained in:
parent
3d94eb8d4b
commit
0951b5db75
10 changed files with 190 additions and 126 deletions
|
@ -157,6 +157,40 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"/v2/ClusterLayoutSkipDeadNodes": {
|
||||||
|
"post": {
|
||||||
|
"tags": [
|
||||||
|
"Cluster layout"
|
||||||
|
],
|
||||||
|
"description": "Force progress in layout update trackers",
|
||||||
|
"operationId": "ClusterLayoutSkipDeadNodes",
|
||||||
|
"requestBody": {
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/ClusterLayoutSkipDeadNodesRequest"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": true
|
||||||
|
},
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "Request has been taken into account",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/ClusterLayoutSkipDeadNodesResponse"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"500": {
|
||||||
|
"description": "Internal server error"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"/v2/ConnectClusterNodes": {
|
"/v2/ConnectClusterNodes": {
|
||||||
"post": {
|
"post": {
|
||||||
"tags": [
|
"tags": [
|
||||||
|
@ -1624,6 +1658,44 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"ClusterLayoutSkipDeadNodesRequest": {
|
||||||
|
"type": "object",
|
||||||
|
"required": [
|
||||||
|
"version",
|
||||||
|
"allowMissingData"
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"allowMissingData": {
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
|
"version": {
|
||||||
|
"type": "integer",
|
||||||
|
"format": "int64",
|
||||||
|
"minimum": 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"ClusterLayoutSkipDeadNodesResponse": {
|
||||||
|
"type": "object",
|
||||||
|
"required": [
|
||||||
|
"ackUpdated",
|
||||||
|
"syncUpdated"
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"ackUpdated": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"syncUpdated": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"ClusterLayoutVersion": {
|
"ClusterLayoutVersion": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"required": [
|
"required": [
|
||||||
|
|
|
@ -56,6 +56,7 @@ admin_endpoints![
|
||||||
PreviewClusterLayoutChanges,
|
PreviewClusterLayoutChanges,
|
||||||
ApplyClusterLayout,
|
ApplyClusterLayout,
|
||||||
RevertClusterLayout,
|
RevertClusterLayout,
|
||||||
|
ClusterLayoutSkipDeadNodes,
|
||||||
|
|
||||||
// Access key operations
|
// Access key operations
|
||||||
ListKeys,
|
ListKeys,
|
||||||
|
@ -422,6 +423,22 @@ pub struct RevertClusterLayoutRequest;
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||||
pub struct RevertClusterLayoutResponse(pub GetClusterLayoutResponse);
|
pub struct RevertClusterLayoutResponse(pub GetClusterLayoutResponse);
|
||||||
|
|
||||||
|
// ---- ClusterLayoutSkipDeadNodes ----
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
pub struct ClusterLayoutSkipDeadNodesRequest {
|
||||||
|
pub version: u64,
|
||||||
|
pub allow_missing_data: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
pub struct ClusterLayoutSkipDeadNodesResponse {
|
||||||
|
pub ack_updated: Vec<String>,
|
||||||
|
pub sync_updated: Vec<String>,
|
||||||
|
}
|
||||||
|
|
||||||
// **********************************************
|
// **********************************************
|
||||||
// Access key operations
|
// Access key operations
|
||||||
// **********************************************
|
// **********************************************
|
||||||
|
|
|
@ -465,6 +465,67 @@ impl RequestHandler for RevertClusterLayoutRequest {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl RequestHandler for ClusterLayoutSkipDeadNodesRequest {
|
||||||
|
type Response = ClusterLayoutSkipDeadNodesResponse;
|
||||||
|
|
||||||
|
async fn handle(
|
||||||
|
self,
|
||||||
|
garage: &Arc<Garage>,
|
||||||
|
_admin: &Admin,
|
||||||
|
) -> Result<ClusterLayoutSkipDeadNodesResponse, Error> {
|
||||||
|
let status = garage.system.get_known_nodes();
|
||||||
|
|
||||||
|
let mut layout = garage.system.cluster_layout().inner().clone();
|
||||||
|
let mut ack_updated = vec![];
|
||||||
|
let mut sync_updated = vec![];
|
||||||
|
|
||||||
|
if layout.versions.len() == 1 {
|
||||||
|
return Err(Error::bad_request(
|
||||||
|
"This command cannot be called when there is only one live cluster layout version",
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
let min_v = layout.min_stored();
|
||||||
|
if self.version <= min_v || self.version > layout.current().version {
|
||||||
|
return Err(Error::bad_request(format!(
|
||||||
|
"Invalid version, you may use the following version numbers: {}",
|
||||||
|
(min_v + 1..=layout.current().version)
|
||||||
|
.map(|x| x.to_string())
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.join(" ")
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
let all_nodes = layout.get_all_nodes();
|
||||||
|
for node in all_nodes.iter() {
|
||||||
|
// Update ACK tracker for dead nodes or for all nodes if --allow-missing-data
|
||||||
|
if self.allow_missing_data || !status.iter().any(|x| x.id == *node && x.is_up) {
|
||||||
|
if layout.update_trackers.ack_map.set_max(*node, self.version) {
|
||||||
|
ack_updated.push(hex::encode(node));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If --allow-missing-data, update SYNC tracker for all nodes.
|
||||||
|
if self.allow_missing_data {
|
||||||
|
if layout.update_trackers.sync_map.set_max(*node, self.version) {
|
||||||
|
sync_updated.push(hex::encode(node));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
garage
|
||||||
|
.system
|
||||||
|
.layout_manager
|
||||||
|
.update_cluster_layout(&layout)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
Ok(ClusterLayoutSkipDeadNodesResponse {
|
||||||
|
ack_updated,
|
||||||
|
sync_updated,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// ----
|
// ----
|
||||||
|
|
||||||
impl From<layout::ZoneRedundancy> for ZoneRedundancy {
|
impl From<layout::ZoneRedundancy> for ZoneRedundancy {
|
||||||
|
|
|
@ -172,6 +172,18 @@ fn ApplyClusterLayout() -> () {}
|
||||||
)]
|
)]
|
||||||
fn RevertClusterLayout() -> () {}
|
fn RevertClusterLayout() -> () {}
|
||||||
|
|
||||||
|
#[utoipa::path(post,
|
||||||
|
path = "/v2/ClusterLayoutSkipDeadNodes",
|
||||||
|
tag = "Cluster layout",
|
||||||
|
description = "Force progress in layout update trackers",
|
||||||
|
request_body = ClusterLayoutSkipDeadNodesRequest,
|
||||||
|
responses(
|
||||||
|
(status = 200, description = "Request has been taken into account", body = ClusterLayoutSkipDeadNodesResponse),
|
||||||
|
(status = 500, description = "Internal server error")
|
||||||
|
),
|
||||||
|
)]
|
||||||
|
fn ClusterLayoutSkipDeadNodes() -> () {}
|
||||||
|
|
||||||
// **********************************************
|
// **********************************************
|
||||||
// Access key operations
|
// Access key operations
|
||||||
// **********************************************
|
// **********************************************
|
||||||
|
@ -718,6 +730,7 @@ impl Modify for SecurityAddon {
|
||||||
PreviewClusterLayoutChanges,
|
PreviewClusterLayoutChanges,
|
||||||
ApplyClusterLayout,
|
ApplyClusterLayout,
|
||||||
RevertClusterLayout,
|
RevertClusterLayout,
|
||||||
|
ClusterLayoutSkipDeadNodes,
|
||||||
// Key operations
|
// Key operations
|
||||||
ListKeys,
|
ListKeys,
|
||||||
GetKeyInfo,
|
GetKeyInfo,
|
||||||
|
|
|
@ -41,6 +41,7 @@ impl AdminApiRequest {
|
||||||
POST PreviewClusterLayoutChanges (),
|
POST PreviewClusterLayoutChanges (),
|
||||||
POST ApplyClusterLayout (body),
|
POST ApplyClusterLayout (body),
|
||||||
POST RevertClusterLayout (),
|
POST RevertClusterLayout (),
|
||||||
|
POST ClusterLayoutSkipDeadNodes (body),
|
||||||
// API key endpoints
|
// API key endpoints
|
||||||
GET GetKeyInfo (query_opt::id, query_opt::search, parse_default(false)::show_secret_key),
|
GET GetKeyInfo (query_opt::id, query_opt::search, parse_default(false)::show_secret_key),
|
||||||
POST UpdateKey (body_field, query::id),
|
POST UpdateKey (body_field, query::id),
|
||||||
|
|
|
@ -1,109 +0,0 @@
|
||||||
use garage_util::error::*;
|
|
||||||
|
|
||||||
use garage_rpc::layout::*;
|
|
||||||
use garage_rpc::system::*;
|
|
||||||
use garage_rpc::*;
|
|
||||||
|
|
||||||
use crate::cli::structs::*;
|
|
||||||
|
|
||||||
pub async fn cmd_layout_skip_dead_nodes(
|
|
||||||
rpc_cli: &Endpoint<SystemRpc, ()>,
|
|
||||||
rpc_host: NodeID,
|
|
||||||
opt: SkipDeadNodesOpt,
|
|
||||||
) -> Result<(), Error> {
|
|
||||||
let status = fetch_status(rpc_cli, rpc_host).await?;
|
|
||||||
let mut layout = fetch_layout(rpc_cli, rpc_host).await?;
|
|
||||||
|
|
||||||
if layout.versions.len() == 1 {
|
|
||||||
return Err(Error::Message(
|
|
||||||
"This command cannot be called when there is only one live cluster layout version"
|
|
||||||
.into(),
|
|
||||||
));
|
|
||||||
}
|
|
||||||
|
|
||||||
let min_v = layout.min_stored();
|
|
||||||
if opt.version <= min_v || opt.version > layout.current().version {
|
|
||||||
return Err(Error::Message(format!(
|
|
||||||
"Invalid version, you may use the following version numbers: {}",
|
|
||||||
(min_v + 1..=layout.current().version)
|
|
||||||
.map(|x| x.to_string())
|
|
||||||
.collect::<Vec<_>>()
|
|
||||||
.join(" ")
|
|
||||||
)));
|
|
||||||
}
|
|
||||||
|
|
||||||
let all_nodes = layout.get_all_nodes();
|
|
||||||
let mut did_something = false;
|
|
||||||
for node in all_nodes.iter() {
|
|
||||||
// Update ACK tracker for dead nodes or for all nodes if --allow-missing-data
|
|
||||||
if opt.allow_missing_data || !status.iter().any(|x| x.id == *node && x.is_up) {
|
|
||||||
if layout.update_trackers.ack_map.set_max(*node, opt.version) {
|
|
||||||
println!("Increased the ACK tracker for node {:?}", node);
|
|
||||||
did_something = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// If --allow-missing-data, update SYNC tracker for all nodes.
|
|
||||||
if opt.allow_missing_data {
|
|
||||||
if layout.update_trackers.sync_map.set_max(*node, opt.version) {
|
|
||||||
println!("Increased the SYNC tracker for node {:?}", node);
|
|
||||||
did_something = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if did_something {
|
|
||||||
send_layout(rpc_cli, rpc_host, layout).await?;
|
|
||||||
println!("Success.");
|
|
||||||
Ok(())
|
|
||||||
} else if !opt.allow_missing_data {
|
|
||||||
Err(Error::Message("Nothing was done, try passing the `--allow-missing-data` flag to force progress even when not enough nodes can complete a metadata sync.".into()))
|
|
||||||
} else {
|
|
||||||
Err(Error::Message(
|
|
||||||
"Sorry, there is nothing I can do for you. Please wait patiently. If you ask for help, please send the output of the `garage layout history` command.".into(),
|
|
||||||
))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// --- utility ---
|
|
||||||
|
|
||||||
async fn fetch_status(
|
|
||||||
rpc_cli: &Endpoint<SystemRpc, ()>,
|
|
||||||
rpc_host: NodeID,
|
|
||||||
) -> Result<Vec<KnownNodeInfo>, Error> {
|
|
||||||
match rpc_cli
|
|
||||||
.call(&rpc_host, SystemRpc::GetKnownNodes, PRIO_NORMAL)
|
|
||||||
.await??
|
|
||||||
{
|
|
||||||
SystemRpc::ReturnKnownNodes(nodes) => Ok(nodes),
|
|
||||||
resp => Err(Error::unexpected_rpc_message(resp)),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn fetch_layout(
|
|
||||||
rpc_cli: &Endpoint<SystemRpc, ()>,
|
|
||||||
rpc_host: NodeID,
|
|
||||||
) -> Result<LayoutHistory, Error> {
|
|
||||||
match rpc_cli
|
|
||||||
.call(&rpc_host, SystemRpc::PullClusterLayout, PRIO_NORMAL)
|
|
||||||
.await??
|
|
||||||
{
|
|
||||||
SystemRpc::AdvertiseClusterLayout(t) => Ok(t),
|
|
||||||
resp => Err(Error::unexpected_rpc_message(resp)),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn send_layout(
|
|
||||||
rpc_cli: &Endpoint<SystemRpc, ()>,
|
|
||||||
rpc_host: NodeID,
|
|
||||||
layout: LayoutHistory,
|
|
||||||
) -> Result<(), Error> {
|
|
||||||
rpc_cli
|
|
||||||
.call(
|
|
||||||
&rpc_host,
|
|
||||||
SystemRpc::AdvertiseClusterLayout(layout),
|
|
||||||
PRIO_NORMAL,
|
|
||||||
)
|
|
||||||
.await??;
|
|
||||||
Ok(())
|
|
||||||
}
|
|
|
@ -3,5 +3,3 @@ pub(crate) mod structs;
|
||||||
pub(crate) mod convert_db;
|
pub(crate) mod convert_db;
|
||||||
pub(crate) mod init;
|
pub(crate) mod init;
|
||||||
pub(crate) mod repair;
|
pub(crate) mod repair;
|
||||||
|
|
||||||
pub(crate) mod layout;
|
|
||||||
|
|
|
@ -6,7 +6,6 @@ use garage_util::error::*;
|
||||||
use garage_api_admin::api::*;
|
use garage_api_admin::api::*;
|
||||||
use garage_rpc::layout;
|
use garage_rpc::layout;
|
||||||
|
|
||||||
use crate::cli::layout as cli_v1;
|
|
||||||
use crate::cli::structs::*;
|
use crate::cli::structs::*;
|
||||||
use crate::cli_v2::*;
|
use crate::cli_v2::*;
|
||||||
|
|
||||||
|
@ -20,16 +19,7 @@ impl Cli {
|
||||||
LayoutOperation::Apply(apply_opt) => self.cmd_apply_layout(apply_opt).await,
|
LayoutOperation::Apply(apply_opt) => self.cmd_apply_layout(apply_opt).await,
|
||||||
LayoutOperation::Revert(revert_opt) => self.cmd_revert_layout(revert_opt).await,
|
LayoutOperation::Revert(revert_opt) => self.cmd_revert_layout(revert_opt).await,
|
||||||
LayoutOperation::History => self.cmd_layout_history().await,
|
LayoutOperation::History => self.cmd_layout_history().await,
|
||||||
|
LayoutOperation::SkipDeadNodes(opt) => self.cmd_skip_dead_nodes(opt).await,
|
||||||
// TODO
|
|
||||||
LayoutOperation::SkipDeadNodes(assume_sync_opt) => {
|
|
||||||
cli_v1::cmd_layout_skip_dead_nodes(
|
|
||||||
&self.system_rpc_endpoint,
|
|
||||||
self.rpc_host,
|
|
||||||
assume_sync_opt,
|
|
||||||
)
|
|
||||||
.await
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -304,6 +294,31 @@ To know the correct value of the new layout version, invoke `garage layout show`
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub async fn cmd_skip_dead_nodes(&self, opt: SkipDeadNodesOpt) -> Result<(), Error> {
|
||||||
|
let res = self
|
||||||
|
.api_request(ClusterLayoutSkipDeadNodesRequest {
|
||||||
|
version: opt.version,
|
||||||
|
allow_missing_data: opt.allow_missing_data,
|
||||||
|
})
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
if !res.sync_updated.is_empty() || !res.ack_updated.is_empty() {
|
||||||
|
for node in res.ack_updated.iter() {
|
||||||
|
println!("Increased the ACK tracker for node {:.16}", node);
|
||||||
|
}
|
||||||
|
for node in res.sync_updated.iter() {
|
||||||
|
println!("Increased the SYNC tracker for node {:.16}", node);
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
} else if !opt.allow_missing_data {
|
||||||
|
Err(Error::Message("Nothing was done, try passing the `--allow-missing-data` flag to force progress even when not enough nodes can complete a metadata sync.".into()))
|
||||||
|
} else {
|
||||||
|
Err(Error::Message(
|
||||||
|
"Sorry, there is nothing I can do for you. Please wait patiently. If you ask for help, please send the output of the `garage layout history` command.".into(),
|
||||||
|
))
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// --------------------------
|
// --------------------------
|
||||||
|
|
|
@ -13,7 +13,6 @@ use std::time::Duration;
|
||||||
|
|
||||||
use garage_util::error::*;
|
use garage_util::error::*;
|
||||||
|
|
||||||
use garage_rpc::system::*;
|
|
||||||
use garage_rpc::*;
|
use garage_rpc::*;
|
||||||
|
|
||||||
use garage_api_admin::api::*;
|
use garage_api_admin::api::*;
|
||||||
|
@ -23,7 +22,6 @@ use garage_api_admin::RequestHandler;
|
||||||
use crate::cli::structs::*;
|
use crate::cli::structs::*;
|
||||||
|
|
||||||
pub struct Cli {
|
pub struct Cli {
|
||||||
pub system_rpc_endpoint: Arc<Endpoint<SystemRpc, ()>>,
|
|
||||||
pub proxy_rpc_endpoint: Arc<Endpoint<ProxyRpc, ()>>,
|
pub proxy_rpc_endpoint: Arc<Endpoint<ProxyRpc, ()>>,
|
||||||
pub rpc_host: NodeID,
|
pub rpc_host: NodeID,
|
||||||
}
|
}
|
||||||
|
|
|
@ -289,11 +289,9 @@ async fn cli_command(opt: Opt) -> Result<(), Error> {
|
||||||
Err(e).err_context("Unable to connect to destination RPC host. Check that you are using the same value of rpc_secret as them, and that you have their correct full-length node ID (public key).")?;
|
Err(e).err_context("Unable to connect to destination RPC host. Check that you are using the same value of rpc_secret as them, and that you have their correct full-length node ID (public key).")?;
|
||||||
}
|
}
|
||||||
|
|
||||||
let system_rpc_endpoint = netapp.endpoint::<SystemRpc, ()>(SYSTEM_RPC_PATH.into());
|
|
||||||
let proxy_rpc_endpoint = netapp.endpoint::<ProxyRpc, ()>(PROXY_RPC_PATH.into());
|
let proxy_rpc_endpoint = netapp.endpoint::<ProxyRpc, ()>(PROXY_RPC_PATH.into());
|
||||||
|
|
||||||
let cli = cli_v2::Cli {
|
let cli = cli_v2::Cli {
|
||||||
system_rpc_endpoint,
|
|
||||||
proxy_rpc_endpoint,
|
proxy_rpc_endpoint,
|
||||||
rpc_host: id,
|
rpc_host: id,
|
||||||
};
|
};
|
||||||
|
|
Loading…
Add table
Reference in a new issue