From 35c108b85d2b70ad28cd93bfd412607a89b9acf9 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Wed, 14 Jun 2023 13:53:19 +0200 Subject: [PATCH] admin api: switch GetClusterHealth to camelcase (fix #381 again) --- doc/drafts/admin-api.md | 40 ++++++++++++++++++++-------------------- src/api/admin/cluster.rs | 28 ++++++++++++++++++++++++++++ src/api/admin/router.rs | 2 +- src/rpc/system.rs | 4 ++-- 4 files changed, 51 insertions(+), 23 deletions(-) diff --git a/doc/drafts/admin-api.md b/doc/drafts/admin-api.md index b1a8f402..c80147ef 100644 --- a/doc/drafts/admin-api.md +++ b/doc/drafts/admin-api.md @@ -52,7 +52,7 @@ Returns an HTTP status 200 if the node is ready to answer user's requests, and an HTTP status 503 (Service Unavailable) if there are some partitions for which a quorum of nodes is not available. A simple textual message is also returned in a body with content-type `text/plain`. -See `/v0/health` for an API that also returns JSON output. +See `/v1/health` for an API that also returns JSON output. ### Cluster operations @@ -161,21 +161,21 @@ Example response body: } ``` -#### GetClusterHealth `GET /v0/health` +#### GetClusterHealth `GET /v1/health` Returns the cluster's current health in JSON format, with the following variables: -- `status`: one of `Healthy`, `Degraded` or `Unavailable`: - - Healthy: Garage node is connected to all storage nodes - - Degraded: Garage node is not connected to all storage nodes, but a quorum of write nodes is available for all partitions - - Unavailable: a quorum of write nodes is not available for some partitions -- `known_nodes`: the number of nodes this Garage node has had a TCP connection to since the daemon started -- `connected_nodes`: the nubmer of nodes this Garage node currently has an open connection to -- `storage_nodes`: the number of storage nodes currently registered in the cluster layout -- `storage_nodes_ok`: the number of storage nodes to which a connection is currently open +- `status`: one of `healthy`, `degraded` or `unavailable`: + - healthy: Garage node is connected to all storage nodes + - degraded: Garage node is not connected to all storage nodes, but a quorum of write nodes is available for all partitions + - unavailable: a quorum of write nodes is not available for some partitions +- `knownNodes`: the number of nodes this Garage node has had a TCP connection to since the daemon started +- `connectedNodes`: the nubmer of nodes this Garage node currently has an open connection to +- `storageNodes`: the number of storage nodes currently registered in the cluster layout +- `storageNodesOk`: the number of storage nodes to which a connection is currently open - `partitions`: the total number of partitions of the data (currently always 256) -- `partitions_quorum`: the number of partitions for which a quorum of write nodes is available -- `partitions_all_ok`: the number of partitions for which we are connected to all storage nodes responsible of storing it +- `partitionsQuorum`: the number of partitions for which a quorum of write nodes is available +- `partitionsAllOk`: the number of partitions for which we are connected to all storage nodes responsible of storing it Contrarily to `GET /health`, this endpoint always returns a 200 OK HTTP response code. @@ -183,14 +183,14 @@ Example response body: ```json { - "status": "Degraded", - "known_nodes": 3, - "connected_nodes": 2, - "storage_nodes": 3, - "storage_nodes_ok": 2, - "partitions": 256, - "partitions_quorum": 256, - "partitions_all_ok": 0 + "status": "degraded", + "knownNodes": 3, + "connectedNodes": 3, + "storageNodes": 4, + "storageNodesOk": 3, + "partitions": 256, + "partitionsQuorum": 256, + "partitionsAllOk": 64 } ``` diff --git a/src/api/admin/cluster.rs b/src/api/admin/cluster.rs index 8a208a2c..90203043 100644 --- a/src/api/admin/cluster.rs +++ b/src/api/admin/cluster.rs @@ -40,7 +40,22 @@ pub async fn handle_get_cluster_status(garage: &Arc) -> Result) -> Result, Error> { + use garage_rpc::system::ClusterHealthStatus; let health = garage.system.health(); + let health = ClusterHealth { + status: match health.status { + ClusterHealthStatus::Healthy => "healthy", + ClusterHealthStatus::Degraded => "degraded", + ClusterHealthStatus::Unavailable => "unavailable", + }, + known_nodes: health.known_nodes, + connected_nodes: health.connected_nodes, + storage_nodes: health.storage_nodes, + storage_nodes_ok: health.storage_nodes_ok, + partitions: health.partitions, + partitions_quorum: health.partitions_quorum, + partitions_all_ok: health.partitions_all_ok, + }; Ok(json_ok_response(&health)?) } @@ -120,6 +135,19 @@ fn get_cluster_layout(garage: &Arc) -> GetClusterLayoutResponse { // ---- +#[derive(Debug, Clone, Copy, Serialize)] +#[serde(rename_all = "camelCase")] +pub struct ClusterHealth { + pub status: &'static str, + pub known_nodes: usize, + pub connected_nodes: usize, + pub storage_nodes: usize, + pub storage_nodes_ok: usize, + pub partitions: usize, + pub partitions_quorum: usize, + pub partitions_all_ok: usize, +} + #[derive(Serialize)] #[serde(rename_all = "camelCase")] struct GetClusterStatusResponse { diff --git a/src/api/admin/router.rs b/src/api/admin/router.rs index 5af3ffb5..b98db284 100644 --- a/src/api/admin/router.rs +++ b/src/api/admin/router.rs @@ -96,7 +96,7 @@ impl Endpoint { GET "/health" => Health, GET "/metrics" => Metrics, GET "/v1/status" => GetClusterStatus, - GET "/v0/health" => GetClusterHealth, + GET "/v1/health" => GetClusterHealth, POST "/v0/connect" => ConnectClusterNodes, // Layout endpoints GET "/v1/layout" => GetClusterLayout, diff --git a/src/rpc/system.rs b/src/rpc/system.rs index c549d8fc..1675e70e 100644 --- a/src/rpc/system.rs +++ b/src/rpc/system.rs @@ -151,7 +151,7 @@ pub struct KnownNodeInfo { pub status: NodeStatus, } -#[derive(Debug, Clone, Copy, Serialize, Deserialize)] +#[derive(Debug, Clone, Copy)] pub struct ClusterHealth { /// The current health status of the cluster (see below) pub status: ClusterHealthStatus, @@ -171,7 +171,7 @@ pub struct ClusterHealth { pub partitions_all_ok: usize, } -#[derive(Debug, Clone, Copy, Serialize, Deserialize)] +#[derive(Debug, Clone, Copy)] pub enum ClusterHealthStatus { /// All nodes are available Healthy,