From 52376d47caf747f5cf93a21e5c15e4e6b8d991ee Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Wed, 14 Jun 2023 13:45:27 +0200 Subject: [PATCH] admin api: change cluster status/layout to use lists and not maps (fix #377) --- doc/drafts/admin-api.md | 119 +++++++++++++++++++++++++------------ src/api/admin/cluster.rs | 125 ++++++++++++++++++++++++++++----------- src/api/admin/router.rs | 6 +- 3 files changed, 175 insertions(+), 75 deletions(-) diff --git a/doc/drafts/admin-api.md b/doc/drafts/admin-api.md index e0252f71f..b1a8f402c 100644 --- a/doc/drafts/admin-api.md +++ b/doc/drafts/admin-api.md @@ -56,7 +56,7 @@ See `/v0/health` for an API that also returns JSON output. ### Cluster operations -#### GetClusterStatus `GET /v0/status` +#### GetClusterStatus `GET /v1/status` Returns the cluster's current status in JSON, including: @@ -70,67 +70,93 @@ Example response body: ```json { "node": "ec79480e0ce52ae26fd00c9da684e4fa56658d9c64cdcecb094e936de0bfe71f", - "garage_version": "git:v0.8.0", - "knownNodes": { - "ec79480e0ce52ae26fd00c9da684e4fa56658d9c64cdcecb094e936de0bfe71f": { + "garageVersion": "git:v0.9.0-dev", + "garageFeatures": [ + "k2v", + "sled", + "lmdb", + "sqlite", + "metrics", + "bundled-libs" + ], + "rustVersion": "1.68.0", + "dbEngine": "LMDB (using Heed crate)", + "knownNodes": [ + { + "id": "ec79480e0ce52ae26fd00c9da684e4fa56658d9c64cdcecb094e936de0bfe71f", "addr": "10.0.0.11:3901", "is_up": true, "last_seen_secs_ago": 9, "hostname": "node1" }, - "4a6ae5a1d0d33bf895f5bb4f0a418b7dc94c47c0dd2eb108d1158f3c8f60b0ff": { + { + "id": "4a6ae5a1d0d33bf895f5bb4f0a418b7dc94c47c0dd2eb108d1158f3c8f60b0ff", "addr": "10.0.0.12:3901", "is_up": true, "last_seen_secs_ago": 1, "hostname": "node2" }, - "23ffd0cdd375ebff573b20cc5cef38996b51c1a7d6dbcf2c6e619876e507cf27": { + { + "id": "23ffd0cdd375ebff573b20cc5cef38996b51c1a7d6dbcf2c6e619876e507cf27", "addr": "10.0.0.21:3901", "is_up": true, "last_seen_secs_ago": 7, "hostname": "node3" }, - "e2ee7984ee65b260682086ec70026165903c86e601a4a5a501c1900afe28d84b": { + { + "id": "e2ee7984ee65b260682086ec70026165903c86e601a4a5a501c1900afe28d84b", "addr": "10.0.0.22:3901", "is_up": true, "last_seen_secs_ago": 1, "hostname": "node4" } - }, + ], "layout": { "version": 12, - "roles": { - "ec79480e0ce52ae26fd00c9da684e4fa56658d9c64cdcecb094e936de0bfe71f": { + "roles": [ + { + "id": "ec79480e0ce52ae26fd00c9da684e4fa56658d9c64cdcecb094e936de0bfe71f", "zone": "dc1", - "capacity": 4, + "capacity": 10737418240, "tags": [ "node1" ] }, - "4a6ae5a1d0d33bf895f5bb4f0a418b7dc94c47c0dd2eb108d1158f3c8f60b0ff": { + { + "id": "4a6ae5a1d0d33bf895f5bb4f0a418b7dc94c47c0dd2eb108d1158f3c8f60b0ff", "zone": "dc1", - "capacity": 6, + "capacity": 10737418240, "tags": [ "node2" ] }, - "23ffd0cdd375ebff573b20cc5cef38996b51c1a7d6dbcf2c6e619876e507cf27": { + { + "id": "23ffd0cdd375ebff573b20cc5cef38996b51c1a7d6dbcf2c6e619876e507cf27", "zone": "dc2", - "capacity": 10, + "capacity": 10737418240, "tags": [ "node3" ] } - }, - "stagedRoleChanges": { - "e2ee7984ee65b260682086ec70026165903c86e601a4a5a501c1900afe28d84b": { + ], + "stagedRoleChanges": [ + { + "id": "e2ee7984ee65b260682086ec70026165903c86e601a4a5a501c1900afe28d84b", + "remove": false, "zone": "dc2", - "capacity": 5, + "capacity": 10737418240, "tags": [ "node4" ] } - } + { + "id": "23ffd0cdd375ebff573b20cc5cef38996b51c1a7d6dbcf2c6e619876e507cf27", + "remove": true, + "zone": null, + "capacity": null, + "tags": null, + } + ] } } ``` @@ -198,7 +224,7 @@ Example response: ] ``` -#### GetClusterLayout `GET /v0/layout` +#### GetClusterLayout `GET /v1/layout` Returns the cluster's current layout in JSON, including: @@ -212,42 +238,54 @@ Example response body: ```json { "version": 12, - "roles": { - "ec79480e0ce52ae26fd00c9da684e4fa56658d9c64cdcecb094e936de0bfe71f": { + "roles": [ + { + "id": "ec79480e0ce52ae26fd00c9da684e4fa56658d9c64cdcecb094e936de0bfe71f", "zone": "dc1", - "capacity": 4, + "capacity": 10737418240, "tags": [ "node1" ] }, - "4a6ae5a1d0d33bf895f5bb4f0a418b7dc94c47c0dd2eb108d1158f3c8f60b0ff": { + { + "id": "4a6ae5a1d0d33bf895f5bb4f0a418b7dc94c47c0dd2eb108d1158f3c8f60b0ff", "zone": "dc1", - "capacity": 6, + "capacity": 10737418240, "tags": [ "node2" ] }, - "23ffd0cdd375ebff573b20cc5cef38996b51c1a7d6dbcf2c6e619876e507cf27": { + { + "id": "23ffd0cdd375ebff573b20cc5cef38996b51c1a7d6dbcf2c6e619876e507cf27", "zone": "dc2", - "capacity": 10, + "capacity": 10737418240, "tags": [ "node3" ] } - }, - "stagedRoleChanges": { - "e2ee7984ee65b260682086ec70026165903c86e601a4a5a501c1900afe28d84b": { + ], + "stagedRoleChanges": [ + { + "id": "e2ee7984ee65b260682086ec70026165903c86e601a4a5a501c1900afe28d84b", + "remove": false, "zone": "dc2", - "capacity": 5, + "capacity": 10737418240, "tags": [ "node4" ] } - } + { + "id": "23ffd0cdd375ebff573b20cc5cef38996b51c1a7d6dbcf2c6e619876e507cf27", + "remove": true, + "zone": null, + "capacity": null, + "tags": null, + } + ] } ``` -#### UpdateClusterLayout `POST /v0/layout` +#### UpdateClusterLayout `POST /v1/layout` Send modifications to the cluster layout. These modifications will be included in the staged role changes, visible in subsequent calls @@ -259,8 +297,9 @@ the layout. Request body format: ```json -{ - : { +[ + { + "id": , "capacity": , "zone": , "tags": [ @@ -268,9 +307,11 @@ Request body format: ... ] }, - : null, - ... -} + { + "id": , + "remove": true + } +] ``` Contrary to the CLI that may update only a subset of the fields diff --git a/src/api/admin/cluster.rs b/src/api/admin/cluster.rs index a2c97ee5a..8a208a2c9 100644 --- a/src/api/admin/cluster.rs +++ b/src/api/admin/cluster.rs @@ -1,4 +1,3 @@ -use std::collections::HashMap; use std::net::SocketAddr; use std::sync::Arc; @@ -8,7 +7,7 @@ use serde::{Deserialize, Serialize}; use garage_util::crdt::*; use garage_util::data::*; -use garage_rpc::layout::*; +use garage_rpc::layout; use garage_model::garage::Garage; @@ -26,16 +25,12 @@ pub async fn handle_get_cluster_status(garage: &Arc) -> Result) -> Result) -> GetClusterLayoutResponse { let layout = garage.system.get_cluster_layout(); + let roles = layout + .roles + .items() + .iter() + .filter_map(|(k, _, v)| v.0.clone().map(|x| (k, x))) + .map(|(k, v)| NodeRoleResp { + id: hex::encode(k), + zone: v.zone.clone(), + capacity: v.capacity, + tags: v.tags.clone(), + }) + .collect::>(); + + let staged_role_changes = layout + .staging_roles + .items() + .iter() + .filter(|(k, _, v)| layout.roles.get(k) != Some(v)) + .map(|(k, _, v)| match &v.0 { + None => NodeRoleChange { + id: hex::encode(k), + remove: true, + ..Default::default() + }, + Some(r) => NodeRoleChange { + id: hex::encode(k), + remove: false, + zone: Some(r.zone.clone()), + capacity: r.capacity, + tags: Some(r.tags.clone()), + }, + }) + .collect::>(); + GetClusterLayoutResponse { version: layout.version, - roles: layout - .roles - .items() - .iter() - .filter(|(_, _, v)| v.0.is_some()) - .map(|(k, _, v)| (hex::encode(k), v.0.clone())) - .collect(), - staged_role_changes: layout - .staging_roles - .items() - .iter() - .filter(|(k, _, v)| layout.roles.get(k) != Some(v)) - .map(|(k, _, v)| (hex::encode(k), v.0.clone())) - .collect(), + roles, + staged_role_changes, } } +// ---- + #[derive(Serialize)] #[serde(rename_all = "camelCase")] struct GetClusterStatusResponse { @@ -109,7 +128,7 @@ struct GetClusterStatusResponse { garage_features: Option<&'static [&'static str]>, rust_version: &'static str, db_engine: String, - known_nodes: HashMap, + known_nodes: Vec, layout: GetClusterLayoutResponse, } @@ -124,19 +143,31 @@ struct ConnectClusterNodesResponse { #[serde(rename_all = "camelCase")] struct GetClusterLayoutResponse { version: u64, - roles: HashMap>, - staged_role_changes: HashMap>, + roles: Vec, + staged_role_changes: Vec, +} + +#[derive(Serialize)] +#[serde(rename_all = "camelCase")] +struct NodeRoleResp { + id: String, + zone: String, + capacity: Option, + tags: Vec, } #[derive(Serialize)] #[serde(rename_all = "camelCase")] struct KnownNodeResp { + id: String, addr: SocketAddr, is_up: bool, last_seen_secs_ago: Option, hostname: String, } +// ---- update functions ---- + pub async fn handle_update_cluster_layout( garage: &Arc, req: Request, @@ -148,13 +179,23 @@ pub async fn handle_update_cluster_layout( let mut roles = layout.roles.clone(); roles.merge(&layout.staging_roles); - for (node, role) in updates { - let node = hex::decode(node).ok_or_bad_request("Invalid node identifier")?; + for change in updates { + let node = hex::decode(&change.id).ok_or_bad_request("Invalid node identifier")?; let node = Uuid::try_from(&node).ok_or_bad_request("Invalid node identifier")?; + let new_role = match (change.remove, change.zone, change.capacity, change.tags) { + (true, None, None, None) => None, + (false, Some(zone), capacity, Some(tags)) => Some(layout::NodeRole { + zone, + capacity, + tags, + }), + _ => return Err(Error::bad_request("Invalid layout change")), + }; + layout .staging_roles - .merge(&roles.update_mutator(node, NodeRoleV(role))); + .merge(&roles.update_mutator(node, layout::NodeRoleV(new_role))); } garage.system.update_cluster_layout(&layout).await?; @@ -196,10 +237,28 @@ pub async fn handle_revert_cluster_layout( .body(Body::empty())?) } -type UpdateClusterLayoutRequest = HashMap>; +// ---- + +type UpdateClusterLayoutRequest = Vec; #[derive(Deserialize)] #[serde(rename_all = "camelCase")] struct ApplyRevertLayoutRequest { version: u64, } + +// ---- + +#[derive(Serialize, Deserialize, Default)] +#[serde(rename_all = "camelCase")] +struct NodeRoleChange { + id: String, + #[serde(default)] + remove: bool, + #[serde(default)] + zone: Option, + #[serde(default)] + capacity: Option, + #[serde(default)] + tags: Option>, +} diff --git a/src/api/admin/router.rs b/src/api/admin/router.rs index 0dcb1546e..5af3ffb58 100644 --- a/src/api/admin/router.rs +++ b/src/api/admin/router.rs @@ -95,12 +95,12 @@ impl Endpoint { GET "/check" => CheckWebsiteEnabled, GET "/health" => Health, GET "/metrics" => Metrics, - GET "/v0/status" => GetClusterStatus, + GET "/v1/status" => GetClusterStatus, GET "/v0/health" => GetClusterHealth, POST "/v0/connect" => ConnectClusterNodes, // Layout endpoints - GET "/v0/layout" => GetClusterLayout, - POST "/v0/layout" => UpdateClusterLayout, + GET "/v1/layout" => GetClusterLayout, + POST "/v1/layout" => UpdateClusterLayout, POST "/v0/layout/apply" => ApplyClusterLayout, POST "/v0/layout/revert" => RevertClusterLayout, // API key endpoints