Merge pull request 'Implement /health admin API endpoint to check node health' (#440) from admin-health-api into main
All checks were successful
continuous-integration/drone/push Build is passing

Reviewed-on: #440
This commit is contained in:
Alex 2022-12-11 17:25:28 +00:00
commit defd7d9e63
9 changed files with 833 additions and 10 deletions

686
doc/drafts/admin-api.md Normal file
View file

@ -0,0 +1,686 @@
+++
title = "Administration API"
weight = 60
+++
The Garage administration API is accessible through a dedicated server whose
listen address is specified in the `[admin]` section of the configuration
file (see [configuration file
reference](@/documentation/reference-manual/configuration.md))
**WARNING.** At this point, there is no comittement to stability of the APIs described in this document.
We will bump the version numbers prefixed to each API endpoint at each time the syntax
or semantics change, meaning that code that relies on these endpoint will break
when changes are introduced.
The Garage administration API was introduced in version 0.7.2, this document
does not apply to older versions of Garage.
## Access control
The admin API uses two different tokens for acces control, that are specified in the config file's `[admin]` section:
- `metrics_token`: the token for accessing the Metrics endpoint (if this token
is not set in the config file, the Metrics endpoint can be accessed without
access control);
- `admin_token`: the token for accessing all of the other administration
endpoints (if this token is not set in the config file, access to these
endpoints is disabled entirely).
These tokens are used as simple HTTP bearer tokens. In other words, to
authenticate access to an admin API endpoint, add the following HTTP header
to your request:
```
Authorization: Bearer <token>
```
## Administration API endpoints
### Metrics-related endpoints
#### Metrics `GET /metrics`
Returns internal Garage metrics in Prometheus format.
#### Health `GET /health`
Used for simple health checks in a cluster setting with an orchestrator.
Returns an HTTP status 200 if the node is ready to answer user's requests,
and an HTTP status 503 (Service Unavailable) if there are some partitions
for which a quorum of nodes is not available.
A simple textual message is also returned in a body with content-type `text/plain`.
See `/v0/health` for an API that also returns JSON output.
### Cluster operations
#### GetClusterStatus `GET /v0/status`
Returns the cluster's current status in JSON, including:
- ID of the node being queried and its version of the Garage daemon
- Live nodes
- Currently configured cluster layout
- Staged changes to the cluster layout
Example response body:
```json
{
"node": "ec79480e0ce52ae26fd00c9da684e4fa56658d9c64cdcecb094e936de0bfe71f",
"garage_version": "git:v0.8.0",
"knownNodes": {
"ec79480e0ce52ae26fd00c9da684e4fa56658d9c64cdcecb094e936de0bfe71f": {
"addr": "10.0.0.11:3901",
"is_up": true,
"last_seen_secs_ago": 9,
"hostname": "node1"
},
"4a6ae5a1d0d33bf895f5bb4f0a418b7dc94c47c0dd2eb108d1158f3c8f60b0ff": {
"addr": "10.0.0.12:3901",
"is_up": true,
"last_seen_secs_ago": 1,
"hostname": "node2"
},
"23ffd0cdd375ebff573b20cc5cef38996b51c1a7d6dbcf2c6e619876e507cf27": {
"addr": "10.0.0.21:3901",
"is_up": true,
"last_seen_secs_ago": 7,
"hostname": "node3"
},
"e2ee7984ee65b260682086ec70026165903c86e601a4a5a501c1900afe28d84b": {
"addr": "10.0.0.22:3901",
"is_up": true,
"last_seen_secs_ago": 1,
"hostname": "node4"
}
},
"layout": {
"version": 12,
"roles": {
"ec79480e0ce52ae26fd00c9da684e4fa56658d9c64cdcecb094e936de0bfe71f": {
"zone": "dc1",
"capacity": 4,
"tags": [
"node1"
]
},
"4a6ae5a1d0d33bf895f5bb4f0a418b7dc94c47c0dd2eb108d1158f3c8f60b0ff": {
"zone": "dc1",
"capacity": 6,
"tags": [
"node2"
]
},
"23ffd0cdd375ebff573b20cc5cef38996b51c1a7d6dbcf2c6e619876e507cf27": {
"zone": "dc2",
"capacity": 10,
"tags": [
"node3"
]
}
},
"stagedRoleChanges": {
"e2ee7984ee65b260682086ec70026165903c86e601a4a5a501c1900afe28d84b": {
"zone": "dc2",
"capacity": 5,
"tags": [
"node4"
]
}
}
}
}
```
#### GetClusterHealth `GET /v0/health`
Returns the cluster's current health in JSON format, with the following variables:
- `status`: one of `Healthy`, `Degraded` or `Unavailable`:
- Healthy: Garage node is connected to all storage nodes
- Degraded: Garage node is not connected to all storage nodes, but a quorum of write nodes is available for all partitions
- Unavailable: a quorum of write nodes is not available for some partitions
- `known_nodes`: the number of nodes this Garage node has had a TCP connection to since the daemon started
- `connected_nodes`: the nubmer of nodes this Garage node currently has an open connection to
- `storage_nodes`: the number of storage nodes currently registered in the cluster layout
- `storage_nodes_ok`: the number of storage nodes to which a connection is currently open
- `partitions`: the total number of partitions of the data (currently always 256)
- `partitions_quorum`: the number of partitions for which a quorum of write nodes is available
- `partitions_all_ok`: the number of partitions for which we are connected to all storage nodes responsible of storing it
Contrarily to `GET /health`, this endpoint always returns a 200 OK HTTP response code.
Example response body:
```json
{
"status": "Degraded",
"known_nodes": 3,
"connected_nodes": 2,
"storage_nodes": 3,
"storage_nodes_ok": 2,
"partitions": 256,
"partitions_quorum": 256,
"partitions_all_ok": 0
}
```
#### ConnectClusterNodes `POST /v0/connect`
Instructs this Garage node to connect to other Garage nodes at specified addresses.
Example request body:
```json
[
"ec79480e0ce52ae26fd00c9da684e4fa56658d9c64cdcecb094e936de0bfe71f@10.0.0.11:3901",
"4a6ae5a1d0d33bf895f5bb4f0a418b7dc94c47c0dd2eb108d1158f3c8f60b0ff@10.0.0.12:3901"
]
```
The format of the string for a node to connect to is: `<node ID>@<ip address>:<port>`, same as in the `garage node connect` CLI call.
Example response:
```json
[
{
"success": true,
"error": null
},
{
"success": false,
"error": "Handshake error"
}
]
```
#### GetClusterLayout `GET /v0/layout`
Returns the cluster's current layout in JSON, including:
- Currently configured cluster layout
- Staged changes to the cluster layout
(the info returned by this endpoint is a subset of the info returned by GetClusterStatus)
Example response body:
```json
{
"version": 12,
"roles": {
"ec79480e0ce52ae26fd00c9da684e4fa56658d9c64cdcecb094e936de0bfe71f": {
"zone": "dc1",
"capacity": 4,
"tags": [
"node1"
]
},
"4a6ae5a1d0d33bf895f5bb4f0a418b7dc94c47c0dd2eb108d1158f3c8f60b0ff": {
"zone": "dc1",
"capacity": 6,
"tags": [
"node2"
]
},
"23ffd0cdd375ebff573b20cc5cef38996b51c1a7d6dbcf2c6e619876e507cf27": {
"zone": "dc2",
"capacity": 10,
"tags": [
"node3"
]
}
},
"stagedRoleChanges": {
"e2ee7984ee65b260682086ec70026165903c86e601a4a5a501c1900afe28d84b": {
"zone": "dc2",
"capacity": 5,
"tags": [
"node4"
]
}
}
}
```
#### UpdateClusterLayout `POST /v0/layout`
Send modifications to the cluster layout. These modifications will
be included in the staged role changes, visible in subsequent calls
of `GetClusterLayout`. Once the set of staged changes is satisfactory,
the user may call `ApplyClusterLayout` to apply the changed changes,
or `Revert ClusterLayout` to clear all of the staged changes in
the layout.
Request body format:
```json
{
<node_id>: {
"capacity": <new_capacity>,
"zone": <new_zone>,
"tags": [
<new_tag>,
...
]
},
<node_id_to_remove>: null,
...
}
```
Contrary to the CLI that may update only a subset of the fields
`capacity`, `zone` and `tags`, when calling this API all of these
values must be specified.
#### ApplyClusterLayout `POST /v0/layout/apply`
Applies to the cluster the layout changes currently registered as
staged layout changes.
Request body format:
```json
{
"version": 13
}
```
Similarly to the CLI, the body must include the version of the new layout
that will be created, which MUST be 1 + the value of the currently
existing layout in the cluster.
#### RevertClusterLayout `POST /v0/layout/revert`
Clears all of the staged layout changes.
Request body format:
```json
{
"version": 13
}
```
Reverting the staged changes is done by incrementing the version number
and clearing the contents of the staged change list.
Similarly to the CLI, the body must include the incremented
version number, which MUST be 1 + the value of the currently
existing layout in the cluster.
### Access key operations
#### ListKeys `GET /v0/key`
Returns all API access keys in the cluster.
Example response:
```json
[
{
"id": "GK31c2f218a2e44f485b94239e",
"name": "test"
},
{
"id": "GKe10061ac9c2921f09e4c5540",
"name": "test2"
}
]
```
#### CreateKey `POST /v0/key`
Creates a new API access key.
Request body format:
```json
{
"name": "NameOfMyKey"
}
```
#### ImportKey `POST /v0/key/import`
Imports an existing API key.
Request body format:
```json
{
"accessKeyId": "GK31c2f218a2e44f485b94239e",
"secretAccessKey": "b892c0665f0ada8a4755dae98baa3b133590e11dae3bcc1f9d769d67f16c3835",
"name": "NameOfMyKey"
}
```
#### GetKeyInfo `GET /v0/key?id=<acces key id>`
#### GetKeyInfo `GET /v0/key?search=<pattern>`
Returns information about the requested API access key.
If `id` is set, the key is looked up using its exact identifier (faster).
If `search` is set, the key is looked up using its name or prefix
of identifier (slower, all keys are enumerated to do this).
Example response:
```json
{
"name": "test",
"accessKeyId": "GK31c2f218a2e44f485b94239e",
"secretAccessKey": "b892c0665f0ada8a4755dae98baa3b133590e11dae3bcc1f9d769d67f16c3835",
"permissions": {
"createBucket": false
},
"buckets": [
{
"id": "70dc3bed7fe83a75e46b66e7ddef7d56e65f3c02f9f80b6749fb97eccb5e1033",
"globalAliases": [
"test2"
],
"localAliases": [],
"permissions": {
"read": true,
"write": true,
"owner": false
}
},
{
"id": "d7452a935e663fc1914f3a5515163a6d3724010ce8dfd9e4743ca8be5974f995",
"globalAliases": [
"test3"
],
"localAliases": [],
"permissions": {
"read": true,
"write": true,
"owner": false
}
},
{
"id": "e6a14cd6a27f48684579ec6b381c078ab11697e6bc8513b72b2f5307e25fff9b",
"globalAliases": [],
"localAliases": [
"test"
],
"permissions": {
"read": true,
"write": true,
"owner": true
}
},
{
"id": "96470e0df00ec28807138daf01915cfda2bee8eccc91dea9558c0b4855b5bf95",
"globalAliases": [
"alex"
],
"localAliases": [],
"permissions": {
"read": true,
"write": true,
"owner": true
}
}
]
}
```
#### DeleteKey `DELETE /v0/key?id=<acces key id>`
Deletes an API access key.
#### UpdateKey `POST /v0/key?id=<acces key id>`
Updates information about the specified API access key.
Request body format:
```json
{
"name": "NameOfMyKey",
"allow": {
"createBucket": true,
},
"deny": {}
}
```
All fields (`name`, `allow` and `deny`) are optionnal.
If they are present, the corresponding modifications are applied to the key, otherwise nothing is changed.
The possible flags in `allow` and `deny` are: `createBucket`.
### Bucket operations
#### ListBuckets `GET /v0/bucket`
Returns all storage buckets in the cluster.
Example response:
```json
[
{
"id": "70dc3bed7fe83a75e46b66e7ddef7d56e65f3c02f9f80b6749fb97eccb5e1033",
"globalAliases": [
"test2"
],
"localAliases": []
},
{
"id": "96470e0df00ec28807138daf01915cfda2bee8eccc91dea9558c0b4855b5bf95",
"globalAliases": [
"alex"
],
"localAliases": []
},
{
"id": "d7452a935e663fc1914f3a5515163a6d3724010ce8dfd9e4743ca8be5974f995",
"globalAliases": [
"test3"
],
"localAliases": []
},
{
"id": "e6a14cd6a27f48684579ec6b381c078ab11697e6bc8513b72b2f5307e25fff9b",
"globalAliases": [],
"localAliases": [
{
"accessKeyId": "GK31c2f218a2e44f485b94239e",
"alias": "test"
}
]
}
]
```
#### GetBucketInfo `GET /v0/bucket?id=<bucket id>`
#### GetBucketInfo `GET /v0/bucket?globalAlias=<alias>`
Returns information about the requested storage bucket.
If `id` is set, the bucket is looked up using its exact identifier.
If `globalAlias` is set, the bucket is looked up using its global alias.
(both are fast)
Example response:
```json
{
"id": "afa8f0a22b40b1247ccd0affb869b0af5cff980924a20e4b5e0720a44deb8d39",
"globalAliases": [],
"websiteAccess": false,
"websiteConfig": null,
"keys": [
{
"accessKeyId": "GK31c2f218a2e44f485b94239e",
"name": "Imported key",
"permissions": {
"read": true,
"write": true,
"owner": true
},
"bucketLocalAliases": [
"debug"
]
}
],
"objects": 14827,
"bytes": 13189855625,
"unfinshedUploads": 0,
"quotas": {
"maxSize": null,
"maxObjects": null
}
}
```
#### CreateBucket `POST /v0/bucket`
Creates a new storage bucket.
Request body format:
```json
{
"globalAlias": "NameOfMyBucket"
}
```
OR
```json
{
"localAlias": {
"accessKeyId": "GK31c2f218a2e44f485b94239e",
"alias": "NameOfMyBucket",
"allow": {
"read": true,
"write": true,
"owner": false
}
}
}
```
OR
```json
{}
```
Creates a new bucket, either with a global alias, a local one,
or no alias at all.
Technically, you can also specify both `globalAlias` and `localAlias` and that would create
two aliases, but I don't see why you would want to do that.
#### DeleteBucket `DELETE /v0/bucket?id=<bucket id>`
Deletes a storage bucket. A bucket cannot be deleted if it is not empty.
Warning: this will delete all aliases associated with the bucket!
#### UpdateBucket `PUT /v0/bucket?id=<bucket id>`
Updates configuration of the given bucket.
Request body format:
```json
{
"websiteAccess": {
"enabled": true,
"indexDocument": "index.html",
"errorDocument": "404.html"
},
"quotas": {
"maxSize": 19029801,
"maxObjects": null,
}
}
```
All fields (`websiteAccess` and `quotas`) are optionnal.
If they are present, the corresponding modifications are applied to the bucket, otherwise nothing is changed.
In `websiteAccess`: if `enabled` is `true`, `indexDocument` must be specified.
The field `errorDocument` is optional, if no error document is set a generic
error message is displayed when errors happen. Conversely, if `enabled` is
`false`, neither `indexDocument` nor `errorDocument` must be specified.
In `quotas`: new values of `maxSize` and `maxObjects` must both be specified, or set to `null`
to remove the quotas. An absent value will be considered the same as a `null`. It is not possible
to change only one of the two quotas.
### Operations on permissions for keys on buckets
#### BucketAllowKey `POST /v0/bucket/allow`
Allows a key to do read/write/owner operations on a bucket.
Request body format:
```json
{
"bucketId": "e6a14cd6a27f48684579ec6b381c078ab11697e6bc8513b72b2f5307e25fff9b",
"accessKeyId": "GK31c2f218a2e44f485b94239e",
"permissions": {
"read": true,
"write": true,
"owner": true
},
}
```
Flags in `permissions` which have the value `true` will be activated.
Other flags will remain unchanged.
#### BucketDenyKey `POST /v0/bucket/deny`
Denies a key from doing read/write/owner operations on a bucket.
Request body format:
```json
{
"bucketId": "e6a14cd6a27f48684579ec6b381c078ab11697e6bc8513b72b2f5307e25fff9b",
"accessKeyId": "GK31c2f218a2e44f485b94239e",
"permissions": {
"read": false,
"write": false,
"owner": true
},
}
```
Flags in `permissions` which have the value `true` will be deactivated.
Other flags will remain unchanged.
### Operations on bucket aliases
#### GlobalAliasBucket `PUT /v0/bucket/alias/global?id=<bucket id>&alias=<global alias>`
Empty body. Creates a global alias for a bucket.
#### GlobalUnaliasBucket `DELETE /v0/bucket/alias/global?id=<bucket id>&alias=<global alias>`
Removes a global alias for a bucket.
#### LocalAliasBucket `PUT /v0/bucket/alias/local?id=<bucket id>&accessKeyId=<access key ID>&alias=<local alias>`
Empty body. Creates a local alias for a bucket in the namespace of a specific access key.
#### LocalUnaliasBucket `DELETE /v0/bucket/alias/local?id=<bucket id>&accessKeyId<access key ID>&alias=<local alias>`
Removes a local alias for a bucket in the namespace of a specific access key.

View file

@ -15,6 +15,7 @@ use opentelemetry_prometheus::PrometheusExporter;
use prometheus::{Encoder, TextEncoder}; use prometheus::{Encoder, TextEncoder};
use garage_model::garage::Garage; use garage_model::garage::Garage;
use garage_rpc::system::ClusterHealthStatus;
use garage_util::error::Error as GarageError; use garage_util::error::Error as GarageError;
use crate::generic_server::*; use crate::generic_server::*;
@ -76,6 +77,31 @@ impl AdminApiServer {
.body(Body::empty())?) .body(Body::empty())?)
} }
fn handle_health(&self) -> Result<Response<Body>, Error> {
let health = self.garage.system.health();
let (status, status_str) = match health.status {
ClusterHealthStatus::Healthy => (StatusCode::OK, "Garage is fully operational"),
ClusterHealthStatus::Degraded => (
StatusCode::OK,
"Garage is operational but some storage nodes are unavailable",
),
ClusterHealthStatus::Unavailable => (
StatusCode::SERVICE_UNAVAILABLE,
"Quorum is not available for some/all partitions, reads and writes will fail",
),
};
let status_str = format!(
"{}\nConsult the full health check API endpoint at /v0/health for more details\n",
status_str
);
Ok(Response::builder()
.status(status)
.header(http::header::CONTENT_TYPE, "text/plain")
.body(Body::from(status_str))?)
}
fn handle_metrics(&self) -> Result<Response<Body>, Error> { fn handle_metrics(&self) -> Result<Response<Body>, Error> {
#[cfg(feature = "metrics")] #[cfg(feature = "metrics")]
{ {
@ -124,6 +150,7 @@ impl ApiHandler for AdminApiServer {
) -> Result<Response<Body>, Error> { ) -> Result<Response<Body>, Error> {
let expected_auth_header = let expected_auth_header =
match endpoint.authorization_type() { match endpoint.authorization_type() {
Authorization::None => None,
Authorization::MetricsToken => self.metrics_token.as_ref(), Authorization::MetricsToken => self.metrics_token.as_ref(),
Authorization::AdminToken => match &self.admin_token { Authorization::AdminToken => match &self.admin_token {
None => return Err(Error::forbidden( None => return Err(Error::forbidden(
@ -147,8 +174,10 @@ impl ApiHandler for AdminApiServer {
match endpoint { match endpoint {
Endpoint::Options => self.handle_options(&req), Endpoint::Options => self.handle_options(&req),
Endpoint::Health => self.handle_health(),
Endpoint::Metrics => self.handle_metrics(), Endpoint::Metrics => self.handle_metrics(),
Endpoint::GetClusterStatus => handle_get_cluster_status(&self.garage).await, Endpoint::GetClusterStatus => handle_get_cluster_status(&self.garage).await,
Endpoint::GetClusterHealth => handle_get_cluster_health(&self.garage).await,
Endpoint::ConnectClusterNodes => handle_connect_cluster_nodes(&self.garage, req).await, Endpoint::ConnectClusterNodes => handle_connect_cluster_nodes(&self.garage, req).await,
// Layout // Layout
Endpoint::GetClusterLayout => handle_get_cluster_layout(&self.garage).await, Endpoint::GetClusterLayout => handle_get_cluster_layout(&self.garage).await,

View file

@ -43,6 +43,11 @@ pub async fn handle_get_cluster_status(garage: &Arc<Garage>) -> Result<Response<
Ok(json_ok_response(&res)?) Ok(json_ok_response(&res)?)
} }
pub async fn handle_get_cluster_health(garage: &Arc<Garage>) -> Result<Response<Body>, Error> {
let health = garage.system.health();
Ok(json_ok_response(&health)?)
}
pub async fn handle_connect_cluster_nodes( pub async fn handle_connect_cluster_nodes(
garage: &Arc<Garage>, garage: &Arc<Garage>,
req: Request<Body>, req: Request<Body>,

View file

@ -6,6 +6,7 @@ use crate::admin::error::*;
use crate::router_macros::*; use crate::router_macros::*;
pub enum Authorization { pub enum Authorization {
None,
MetricsToken, MetricsToken,
AdminToken, AdminToken,
} }
@ -16,8 +17,10 @@ router_match! {@func
#[derive(Debug, Clone, PartialEq, Eq)] #[derive(Debug, Clone, PartialEq, Eq)]
pub enum Endpoint { pub enum Endpoint {
Options, Options,
Health,
Metrics, Metrics,
GetClusterStatus, GetClusterStatus,
GetClusterHealth,
ConnectClusterNodes, ConnectClusterNodes,
// Layout // Layout
GetClusterLayout, GetClusterLayout,
@ -88,8 +91,10 @@ impl Endpoint {
let res = router_match!(@gen_path_parser (req.method(), path, query) [ let res = router_match!(@gen_path_parser (req.method(), path, query) [
OPTIONS _ => Options, OPTIONS _ => Options,
GET "/health" => Health,
GET "/metrics" => Metrics, GET "/metrics" => Metrics,
GET "/v0/status" => GetClusterStatus, GET "/v0/status" => GetClusterStatus,
GET "/v0/health" => GetClusterHealth,
POST "/v0/connect" => ConnectClusterNodes, POST "/v0/connect" => ConnectClusterNodes,
// Layout endpoints // Layout endpoints
GET "/v0/layout" => GetClusterLayout, GET "/v0/layout" => GetClusterLayout,
@ -130,6 +135,7 @@ impl Endpoint {
/// Get the kind of authorization which is required to perform the operation. /// Get the kind of authorization which is required to perform the operation.
pub fn authorization_type(&self) -> Authorization { pub fn authorization_type(&self) -> Authorization {
match self { match self {
Self::Health => Authorization::None,
Self::Metrics => Authorization::MetricsToken, Self::Metrics => Authorization::MetricsToken,
_ => Authorization::AdminToken, _ => Authorization::AdminToken,
} }
@ -137,6 +143,7 @@ impl Endpoint {
} }
generateQueryParameters! { generateQueryParameters! {
"format" => format,
"id" => id, "id" => id,
"search" => search, "search" => search,
"globalAlias" => global_alias, "globalAlias" => global_alias,

View file

@ -8,10 +8,10 @@ use garage_util::background::*;
use garage_util::config::*; use garage_util::config::*;
use garage_util::error::*; use garage_util::error::*;
use garage_rpc::replication_mode::ReplicationMode;
use garage_rpc::system::System; use garage_rpc::system::System;
use garage_block::manager::*; use garage_block::manager::*;
use garage_table::replication::ReplicationMode;
use garage_table::replication::TableFullReplication; use garage_table::replication::TableFullReplication;
use garage_table::replication::TableShardedReplication; use garage_table::replication::TableShardedReplication;
use garage_table::*; use garage_table::*;
@ -34,6 +34,9 @@ pub struct Garage {
/// The parsed configuration Garage is running /// The parsed configuration Garage is running
pub config: Config, pub config: Config,
/// The replication mode of this cluster
pub replication_mode: ReplicationMode,
/// The local database /// The local database
pub db: db::Db, pub db: db::Db,
/// A background job runner /// A background job runner
@ -164,12 +167,7 @@ impl Garage {
.expect("Invalid replication_mode in config file."); .expect("Invalid replication_mode in config file.");
info!("Initialize membership management system..."); info!("Initialize membership management system...");
let system = System::new( let system = System::new(network_key, background.clone(), replication_mode, &config)?;
network_key,
background.clone(),
replication_mode.replication_factor(),
&config,
)?;
let data_rep_param = TableShardedReplication { let data_rep_param = TableShardedReplication {
system: system.clone(), system: system.clone(),
@ -258,6 +256,7 @@ impl Garage {
// -- done -- // -- done --
Ok(Arc::new(Self { Ok(Arc::new(Self {
config, config,
replication_mode,
db, db,
background, background,
system, system,

View file

@ -9,6 +9,7 @@ mod consul;
mod kubernetes; mod kubernetes;
pub mod layout; pub mod layout;
pub mod replication_mode;
pub mod ring; pub mod ring;
pub mod system; pub mod system;

View file

@ -1,3 +1,4 @@
#[derive(Clone, Copy)]
pub enum ReplicationMode { pub enum ReplicationMode {
None, None,
TwoWay, TwoWay,

View file

@ -35,6 +35,7 @@ use crate::consul::ConsulDiscovery;
#[cfg(feature = "kubernetes-discovery")] #[cfg(feature = "kubernetes-discovery")]
use crate::kubernetes::*; use crate::kubernetes::*;
use crate::layout::*; use crate::layout::*;
use crate::replication_mode::*;
use crate::ring::*; use crate::ring::*;
use crate::rpc_helper::*; use crate::rpc_helper::*;
@ -102,6 +103,7 @@ pub struct System {
#[cfg(feature = "kubernetes-discovery")] #[cfg(feature = "kubernetes-discovery")]
kubernetes_discovery: Option<KubernetesDiscoveryConfig>, kubernetes_discovery: Option<KubernetesDiscoveryConfig>,
replication_mode: ReplicationMode,
replication_factor: usize, replication_factor: usize,
/// The ring /// The ring
@ -136,6 +138,37 @@ pub struct KnownNodeInfo {
pub status: NodeStatus, pub status: NodeStatus,
} }
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
pub struct ClusterHealth {
/// The current health status of the cluster (see below)
pub status: ClusterHealthStatus,
/// Number of nodes already seen once in the cluster
pub known_nodes: usize,
/// Number of nodes currently connected
pub connected_nodes: usize,
/// Number of storage nodes declared in the current layout
pub storage_nodes: usize,
/// Number of storage nodes currently connected
pub storage_nodes_ok: usize,
/// Number of partitions in the layout
pub partitions: usize,
/// Number of partitions for which we have a quorum of connected nodes
pub partitions_quorum: usize,
/// Number of partitions for which all storage nodes are connected
pub partitions_all_ok: usize,
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
pub enum ClusterHealthStatus {
/// All nodes are available
Healthy,
/// Some storage nodes are unavailable, but quorum is stil
/// achieved for all partitions
Degraded,
/// Quorum is not available for some partitions
Unavailable,
}
pub fn read_node_id(metadata_dir: &Path) -> Result<NodeID, Error> { pub fn read_node_id(metadata_dir: &Path) -> Result<NodeID, Error> {
let mut pubkey_file = metadata_dir.to_path_buf(); let mut pubkey_file = metadata_dir.to_path_buf();
pubkey_file.push("node_key.pub"); pubkey_file.push("node_key.pub");
@ -200,9 +233,11 @@ impl System {
pub fn new( pub fn new(
network_key: NetworkKey, network_key: NetworkKey,
background: Arc<BackgroundRunner>, background: Arc<BackgroundRunner>,
replication_factor: usize, replication_mode: ReplicationMode,
config: &Config, config: &Config,
) -> Result<Arc<Self>, Error> { ) -> Result<Arc<Self>, Error> {
let replication_factor = replication_mode.replication_factor();
let node_key = let node_key =
gen_node_key(&config.metadata_dir).expect("Unable to read or generate node ID"); gen_node_key(&config.metadata_dir).expect("Unable to read or generate node ID");
info!( info!(
@ -324,6 +359,7 @@ impl System {
config.rpc_timeout_msec.map(Duration::from_millis), config.rpc_timeout_msec.map(Duration::from_millis),
), ),
system_endpoint, system_endpoint,
replication_mode,
replication_factor, replication_factor,
rpc_listen_addr: config.rpc_bind_addr, rpc_listen_addr: config.rpc_bind_addr,
#[cfg(any(feature = "consul-discovery", feature = "kubernetes-discovery"))] #[cfg(any(feature = "consul-discovery", feature = "kubernetes-discovery"))]
@ -429,6 +465,67 @@ impl System {
} }
} }
pub fn health(&self) -> ClusterHealth {
let ring: Arc<_> = self.ring.borrow().clone();
let quorum = self.replication_mode.write_quorum();
let replication_factor = self.replication_factor;
let nodes = self
.get_known_nodes()
.into_iter()
.map(|n| (n.id, n))
.collect::<HashMap<Uuid, _>>();
let connected_nodes = nodes.iter().filter(|(_, n)| n.is_up).count();
let storage_nodes = ring
.layout
.roles
.items()
.iter()
.filter(|(_, _, v)| matches!(v, NodeRoleV(Some(r)) if r.capacity.is_some()))
.collect::<Vec<_>>();
let storage_nodes_ok = storage_nodes
.iter()
.filter(|(x, _, _)| nodes.get(x).map(|n| n.is_up).unwrap_or(false))
.count();
let partitions = ring.partitions();
let partitions_n_up = partitions
.iter()
.map(|(_, h)| {
let pn = ring.get_nodes(h, ring.replication_factor);
pn.iter()
.filter(|x| nodes.get(x).map(|n| n.is_up).unwrap_or(false))
.count()
})
.collect::<Vec<usize>>();
let partitions_all_ok = partitions_n_up
.iter()
.filter(|c| **c == replication_factor)
.count();
let partitions_quorum = partitions_n_up.iter().filter(|c| **c >= quorum).count();
let status =
if partitions_quorum == partitions.len() && storage_nodes_ok == storage_nodes.len() {
ClusterHealthStatus::Healthy
} else if partitions_quorum == partitions.len() {
ClusterHealthStatus::Degraded
} else {
ClusterHealthStatus::Unavailable
};
ClusterHealth {
status,
known_nodes: nodes.len(),
connected_nodes,
storage_nodes: storage_nodes.len(),
storage_nodes_ok,
partitions: partitions.len(),
partitions_quorum,
partitions_all_ok,
}
}
// ---- INTERNALS ---- // ---- INTERNALS ----
#[cfg(feature = "consul-discovery")] #[cfg(feature = "consul-discovery")]

View file

@ -1,10 +1,8 @@
mod parameters; mod parameters;
mod fullcopy; mod fullcopy;
mod mode;
mod sharded; mod sharded;
pub use fullcopy::TableFullReplication; pub use fullcopy::TableFullReplication;
pub use mode::ReplicationMode;
pub use parameters::*; pub use parameters::*;
pub use sharded::TableShardedReplication; pub use sharded::TableShardedReplication;