Garage v0.9 #473
4 changed files with 51 additions and 23 deletions
|
@ -52,7 +52,7 @@ Returns an HTTP status 200 if the node is ready to answer user's requests,
|
||||||
and an HTTP status 503 (Service Unavailable) if there are some partitions
|
and an HTTP status 503 (Service Unavailable) if there are some partitions
|
||||||
for which a quorum of nodes is not available.
|
for which a quorum of nodes is not available.
|
||||||
A simple textual message is also returned in a body with content-type `text/plain`.
|
A simple textual message is also returned in a body with content-type `text/plain`.
|
||||||
See `/v0/health` for an API that also returns JSON output.
|
See `/v1/health` for an API that also returns JSON output.
|
||||||
|
|
||||||
### Cluster operations
|
### Cluster operations
|
||||||
|
|
||||||
|
@ -161,21 +161,21 @@ Example response body:
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
#### GetClusterHealth `GET /v0/health`
|
#### GetClusterHealth `GET /v1/health`
|
||||||
|
|
||||||
Returns the cluster's current health in JSON format, with the following variables:
|
Returns the cluster's current health in JSON format, with the following variables:
|
||||||
|
|
||||||
- `status`: one of `Healthy`, `Degraded` or `Unavailable`:
|
- `status`: one of `healthy`, `degraded` or `unavailable`:
|
||||||
- Healthy: Garage node is connected to all storage nodes
|
- healthy: Garage node is connected to all storage nodes
|
||||||
- Degraded: Garage node is not connected to all storage nodes, but a quorum of write nodes is available for all partitions
|
- degraded: Garage node is not connected to all storage nodes, but a quorum of write nodes is available for all partitions
|
||||||
- Unavailable: a quorum of write nodes is not available for some partitions
|
- unavailable: a quorum of write nodes is not available for some partitions
|
||||||
- `known_nodes`: the number of nodes this Garage node has had a TCP connection to since the daemon started
|
- `knownNodes`: the number of nodes this Garage node has had a TCP connection to since the daemon started
|
||||||
- `connected_nodes`: the nubmer of nodes this Garage node currently has an open connection to
|
- `connectedNodes`: the nubmer of nodes this Garage node currently has an open connection to
|
||||||
- `storage_nodes`: the number of storage nodes currently registered in the cluster layout
|
- `storageNodes`: the number of storage nodes currently registered in the cluster layout
|
||||||
- `storage_nodes_ok`: the number of storage nodes to which a connection is currently open
|
- `storageNodesOk`: the number of storage nodes to which a connection is currently open
|
||||||
- `partitions`: the total number of partitions of the data (currently always 256)
|
- `partitions`: the total number of partitions of the data (currently always 256)
|
||||||
- `partitions_quorum`: the number of partitions for which a quorum of write nodes is available
|
- `partitionsQuorum`: the number of partitions for which a quorum of write nodes is available
|
||||||
- `partitions_all_ok`: the number of partitions for which we are connected to all storage nodes responsible of storing it
|
- `partitionsAllOk`: the number of partitions for which we are connected to all storage nodes responsible of storing it
|
||||||
|
|
||||||
Contrarily to `GET /health`, this endpoint always returns a 200 OK HTTP response code.
|
Contrarily to `GET /health`, this endpoint always returns a 200 OK HTTP response code.
|
||||||
|
|
||||||
|
@ -183,14 +183,14 @@ Example response body:
|
||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"status": "Degraded",
|
"status": "degraded",
|
||||||
"known_nodes": 3,
|
"knownNodes": 3,
|
||||||
"connected_nodes": 2,
|
"connectedNodes": 3,
|
||||||
"storage_nodes": 3,
|
"storageNodes": 4,
|
||||||
"storage_nodes_ok": 2,
|
"storageNodesOk": 3,
|
||||||
"partitions": 256,
|
"partitions": 256,
|
||||||
"partitions_quorum": 256,
|
"partitionsQuorum": 256,
|
||||||
"partitions_all_ok": 0
|
"partitionsAllOk": 64
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
|
@ -40,7 +40,22 @@ pub async fn handle_get_cluster_status(garage: &Arc<Garage>) -> Result<Response<
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn handle_get_cluster_health(garage: &Arc<Garage>) -> Result<Response<Body>, Error> {
|
pub async fn handle_get_cluster_health(garage: &Arc<Garage>) -> Result<Response<Body>, Error> {
|
||||||
|
use garage_rpc::system::ClusterHealthStatus;
|
||||||
let health = garage.system.health();
|
let health = garage.system.health();
|
||||||
|
let health = ClusterHealth {
|
||||||
|
status: match health.status {
|
||||||
|
ClusterHealthStatus::Healthy => "healthy",
|
||||||
|
ClusterHealthStatus::Degraded => "degraded",
|
||||||
|
ClusterHealthStatus::Unavailable => "unavailable",
|
||||||
|
},
|
||||||
|
known_nodes: health.known_nodes,
|
||||||
|
connected_nodes: health.connected_nodes,
|
||||||
|
storage_nodes: health.storage_nodes,
|
||||||
|
storage_nodes_ok: health.storage_nodes_ok,
|
||||||
|
partitions: health.partitions,
|
||||||
|
partitions_quorum: health.partitions_quorum,
|
||||||
|
partitions_all_ok: health.partitions_all_ok,
|
||||||
|
};
|
||||||
Ok(json_ok_response(&health)?)
|
Ok(json_ok_response(&health)?)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -120,6 +135,19 @@ fn get_cluster_layout(garage: &Arc<Garage>) -> GetClusterLayoutResponse {
|
||||||
|
|
||||||
// ----
|
// ----
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, Serialize)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
pub struct ClusterHealth {
|
||||||
|
pub status: &'static str,
|
||||||
|
pub known_nodes: usize,
|
||||||
|
pub connected_nodes: usize,
|
||||||
|
pub storage_nodes: usize,
|
||||||
|
pub storage_nodes_ok: usize,
|
||||||
|
pub partitions: usize,
|
||||||
|
pub partitions_quorum: usize,
|
||||||
|
pub partitions_all_ok: usize,
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Serialize)]
|
#[derive(Serialize)]
|
||||||
#[serde(rename_all = "camelCase")]
|
#[serde(rename_all = "camelCase")]
|
||||||
struct GetClusterStatusResponse {
|
struct GetClusterStatusResponse {
|
||||||
|
|
|
@ -96,7 +96,7 @@ impl Endpoint {
|
||||||
GET "/health" => Health,
|
GET "/health" => Health,
|
||||||
GET "/metrics" => Metrics,
|
GET "/metrics" => Metrics,
|
||||||
GET "/v1/status" => GetClusterStatus,
|
GET "/v1/status" => GetClusterStatus,
|
||||||
GET "/v0/health" => GetClusterHealth,
|
GET "/v1/health" => GetClusterHealth,
|
||||||
POST "/v0/connect" => ConnectClusterNodes,
|
POST "/v0/connect" => ConnectClusterNodes,
|
||||||
// Layout endpoints
|
// Layout endpoints
|
||||||
GET "/v1/layout" => GetClusterLayout,
|
GET "/v1/layout" => GetClusterLayout,
|
||||||
|
|
|
@ -151,7 +151,7 @@ pub struct KnownNodeInfo {
|
||||||
pub status: NodeStatus,
|
pub status: NodeStatus,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Copy)]
|
||||||
pub struct ClusterHealth {
|
pub struct ClusterHealth {
|
||||||
/// The current health status of the cluster (see below)
|
/// The current health status of the cluster (see below)
|
||||||
pub status: ClusterHealthStatus,
|
pub status: ClusterHealthStatus,
|
||||||
|
@ -171,7 +171,7 @@ pub struct ClusterHealth {
|
||||||
pub partitions_all_ok: usize,
|
pub partitions_all_ok: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Copy)]
|
||||||
pub enum ClusterHealthStatus {
|
pub enum ClusterHealthStatus {
|
||||||
/// All nodes are available
|
/// All nodes are available
|
||||||
Healthy,
|
Healthy,
|
||||||
|
|
Loading…
Reference in a new issue