admin api: switch GetClusterHealth to camelcase (fix #381 again)

2023-06-14 13:53:19 +02:00 · 2023-06-14 13:53:19 +02:00 · 35c108b85d
commit 35c108b85d
parent 52376d47ca
4 changed files with 51 additions and 23 deletions
--- a/doc/drafts/admin-api.md
+++ b/doc/drafts/admin-api.md
@ -52,7 +52,7 @@ Returns an HTTP status 200 if the node is ready to answer user's requests,
 and an HTTP status 503 (Service Unavailable) if there are some partitions
 for which a quorum of nodes is not available.
 A simple textual message is also returned in a body with content-type `text/plain`.
-See `/v0/health` for an API that also returns JSON output.
+See `/v1/health` for an API that also returns JSON output.

 ### Cluster operations

@ -161,21 +161,21 @@ Example response body:
 }
 ```

-#### GetClusterHealth `GET /v0/health`
+#### GetClusterHealth `GET /v1/health`

 Returns the cluster's current health in JSON format, with the following variables:

- `status`: one of `Healthy`, `Degraded` or `Unavailable`:
-  - Healthy: Garage node is connected to all storage nodes
-  - Degraded: Garage node is not connected to all storage nodes, but a quorum of write nodes is available for all partitions
-  - Unavailable: a quorum of write nodes is not available for some partitions
- `known_nodes`: the number of nodes this Garage node has had a TCP connection to since the daemon started
- `connected_nodes`: the nubmer of nodes this Garage node currently has an open connection to
- `storage_nodes`: the number of storage nodes currently registered in the cluster layout
- `storage_nodes_ok`: the number of storage nodes to which a connection is currently open
+- `status`: one of `healthy`, `degraded` or `unavailable`:
+  - healthy: Garage node is connected to all storage nodes
+  - degraded: Garage node is not connected to all storage nodes, but a quorum of write nodes is available for all partitions
+  - unavailable: a quorum of write nodes is not available for some partitions
+- `knownNodes`: the number of nodes this Garage node has had a TCP connection to since the daemon started
+- `connectedNodes`: the nubmer of nodes this Garage node currently has an open connection to
+- `storageNodes`: the number of storage nodes currently registered in the cluster layout
+- `storageNodesOk`: the number of storage nodes to which a connection is currently open
 - `partitions`: the total number of partitions of the data (currently always 256)
- `partitions_quorum`: the number of partitions for which a quorum of write nodes is available
- `partitions_all_ok`: the number of partitions for which we are connected to all storage nodes responsible of storing it
+- `partitionsQuorum`: the number of partitions for which a quorum of write nodes is available
+- `partitionsAllOk`: the number of partitions for which we are connected to all storage nodes responsible of storing it

 Contrarily to `GET /health`, this endpoint always returns a 200 OK HTTP response code.

@ -183,14 +183,14 @@ Example response body:

 ```json
 {
-    "status": "Degraded",
-    "known_nodes": 3,
-    "connected_nodes": 2,
-    "storage_nodes": 3,
-    "storage_nodes_ok": 2,
-    "partitions": 256,
-    "partitions_quorum": 256,
-    "partitions_all_ok": 0
+  "status": "degraded",
+  "knownNodes": 3,
+  "connectedNodes": 3,
+  "storageNodes": 4,
+  "storageNodesOk": 3,
+  "partitions": 256,
+  "partitionsQuorum": 256,
+  "partitionsAllOk": 64
 }
 ```

--- a/src/api/admin/cluster.rs
+++ b/src/api/admin/cluster.rs
@ -40,7 +40,22 @@ pub async fn handle_get_cluster_status(garage: &Arc<Garage>) -> Result<Response<
 }

 pub async fn handle_get_cluster_health(garage: &Arc<Garage>) -> Result<Response<Body>, Error> {
+	use garage_rpc::system::ClusterHealthStatus;
 	let health = garage.system.health();
+	let health = ClusterHealth {
+		status: match health.status {
+			ClusterHealthStatus::Healthy => "healthy",
+			ClusterHealthStatus::Degraded => "degraded",
+			ClusterHealthStatus::Unavailable => "unavailable",
+		},
+		known_nodes: health.known_nodes,
+		connected_nodes: health.connected_nodes,
+		storage_nodes: health.storage_nodes,
+		storage_nodes_ok: health.storage_nodes_ok,
+		partitions: health.partitions,
+		partitions_quorum: health.partitions_quorum,
+		partitions_all_ok: health.partitions_all_ok,
+	};
 	Ok(json_ok_response(&health)?)
 }

@ -120,6 +135,19 @@ fn get_cluster_layout(garage: &Arc<Garage>) -> GetClusterLayoutResponse {

 // ----

+#[derive(Debug, Clone, Copy, Serialize)]
+#[serde(rename_all = "camelCase")]
+pub struct ClusterHealth {
+	pub status: &'static str,
+	pub known_nodes: usize,
+	pub connected_nodes: usize,
+	pub storage_nodes: usize,
+	pub storage_nodes_ok: usize,
+	pub partitions: usize,
+	pub partitions_quorum: usize,
+	pub partitions_all_ok: usize,
+}
+
 #[derive(Serialize)]
 #[serde(rename_all = "camelCase")]
 struct GetClusterStatusResponse {
--- a/src/api/admin/router.rs
+++ b/src/api/admin/router.rs
@ -96,7 +96,7 @@ impl Endpoint {
 			GET "/health" => Health,
 			GET "/metrics" => Metrics,
 			GET "/v1/status" => GetClusterStatus,
-			GET "/v0/health" => GetClusterHealth,
+			GET "/v1/health" => GetClusterHealth,
 			POST "/v0/connect" => ConnectClusterNodes,
 			// Layout endpoints
 			GET "/v1/layout" => GetClusterLayout,
--- a/src/rpc/system.rs
+++ b/src/rpc/system.rs
@ -151,7 +151,7 @@ pub struct KnownNodeInfo {
 	pub status: NodeStatus,
 }

-#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
+#[derive(Debug, Clone, Copy)]
 pub struct ClusterHealth {
 	/// The current health status of the cluster (see below)
 	pub status: ClusterHealthStatus,
@ -171,7 +171,7 @@ pub struct ClusterHealth {
 	pub partitions_all_ok: usize,
 }

-#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
+#[derive(Debug, Clone, Copy)]
 pub enum ClusterHealthStatus {
 	/// All nodes are available
 	Healthy,