From 2065f011ca3f7c736feecffd108c89d3f8019e85 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Mon, 5 Dec 2022 14:59:15 +0100 Subject: [PATCH 1/5] Implement /health admin API endpoint to check node health --- src/api/admin/api_server.rs | 94 +++++++++++++++++++++++++++++++++++ src/api/admin/router.rs | 4 ++ src/model/garage.rs | 4 ++ src/table/replication/mode.rs | 1 + 4 files changed, 103 insertions(+) diff --git a/src/api/admin/api_server.rs b/src/api/admin/api_server.rs index 2896d058..9beeda1f 100644 --- a/src/api/admin/api_server.rs +++ b/src/api/admin/api_server.rs @@ -1,3 +1,5 @@ +use std::collections::HashMap; +use std::fmt::Write; use std::net::SocketAddr; use std::sync::Arc; @@ -15,6 +17,8 @@ use opentelemetry_prometheus::PrometheusExporter; use prometheus::{Encoder, TextEncoder}; use garage_model::garage::Garage; +use garage_rpc::layout::NodeRoleV; +use garage_util::data::Uuid; use garage_util::error::Error as GarageError; use crate::generic_server::*; @@ -76,6 +80,94 @@ impl AdminApiServer { .body(Body::empty())?) } + fn handle_health(&self) -> Result, Error> { + let ring: Arc<_> = self.garage.system.ring.borrow().clone(); + let quorum = self.garage.replication_mode.write_quorum(); + let replication_factor = self.garage.replication_mode.replication_factor(); + + let nodes = self + .garage + .system + .get_known_nodes() + .into_iter() + .map(|n| (n.id, n)) + .collect::>(); + let n_nodes_connected = nodes.iter().filter(|(_, n)| n.is_up).count(); + + let storage_nodes = ring + .layout + .roles + .items() + .iter() + .filter(|(_, _, v)| matches!(v, NodeRoleV(Some(r)) if r.capacity.is_some())) + .collect::>(); + let n_storage_nodes_ok = storage_nodes + .iter() + .filter(|(x, _, _)| nodes.get(x).map(|n| n.is_up).unwrap_or(false)) + .count(); + + let partitions = ring.partitions(); + let partitions_n_up = partitions + .iter() + .map(|(_, h)| { + let pn = ring.get_nodes(h, ring.replication_factor); + pn.iter() + .filter(|x| nodes.get(x).map(|n| n.is_up).unwrap_or(false)) + .count() + }) + .collect::>(); + let n_partitions_full_ok = partitions_n_up + .iter() + .filter(|c| **c == replication_factor) + .count(); + let n_partitions_quorum = partitions_n_up.iter().filter(|c| **c >= quorum).count(); + + let (status, status_str) = if n_partitions_quorum == partitions.len() + && n_storage_nodes_ok == storage_nodes.len() + { + (StatusCode::OK, "Garage is fully operational") + } else if n_partitions_quorum == partitions.len() { + ( + StatusCode::OK, + "Garage is operational but some storage nodes are unavailable", + ) + } else { + ( + StatusCode::SERVICE_UNAVAILABLE, + "Quorum is not available for some/all partitions, reads and writes will fail", + ) + }; + + let mut buf = status_str.to_string(); + writeln!( + &mut buf, + "\nAll nodes: {} connected, {} known", + n_nodes_connected, + nodes.len() + ) + .unwrap(); + writeln!( + &mut buf, + "Storage nodes: {} connected, {} in layout", + n_storage_nodes_ok, + storage_nodes.len() + ) + .unwrap(); + writeln!(&mut buf, "Number of partitions: {}", partitions.len()).unwrap(); + writeln!(&mut buf, "Partitions with quorum: {}", n_partitions_quorum).unwrap(); + writeln!( + &mut buf, + "Partitions with all nodes available: {}", + n_partitions_full_ok + ) + .unwrap(); + + Ok(Response::builder() + .status(status) + .header(http::header::CONTENT_TYPE, "text/plain") + .body(Body::from(buf))?) + } + fn handle_metrics(&self) -> Result, Error> { #[cfg(feature = "metrics")] { @@ -124,6 +216,7 @@ impl ApiHandler for AdminApiServer { ) -> Result, Error> { let expected_auth_header = match endpoint.authorization_type() { + Authorization::None => None, Authorization::MetricsToken => self.metrics_token.as_ref(), Authorization::AdminToken => match &self.admin_token { None => return Err(Error::forbidden( @@ -147,6 +240,7 @@ impl ApiHandler for AdminApiServer { match endpoint { Endpoint::Options => self.handle_options(&req), + Endpoint::Health => self.handle_health(), Endpoint::Metrics => self.handle_metrics(), Endpoint::GetClusterStatus => handle_get_cluster_status(&self.garage).await, Endpoint::ConnectClusterNodes => handle_connect_cluster_nodes(&self.garage, req).await, diff --git a/src/api/admin/router.rs b/src/api/admin/router.rs index 3eee8b67..14411f75 100644 --- a/src/api/admin/router.rs +++ b/src/api/admin/router.rs @@ -6,6 +6,7 @@ use crate::admin::error::*; use crate::router_macros::*; pub enum Authorization { + None, MetricsToken, AdminToken, } @@ -16,6 +17,7 @@ router_match! {@func #[derive(Debug, Clone, PartialEq, Eq)] pub enum Endpoint { Options, + Health, Metrics, GetClusterStatus, ConnectClusterNodes, @@ -88,6 +90,7 @@ impl Endpoint { let res = router_match!(@gen_path_parser (req.method(), path, query) [ OPTIONS _ => Options, + GET "/health" => Health, GET "/metrics" => Metrics, GET "/v0/status" => GetClusterStatus, POST "/v0/connect" => ConnectClusterNodes, @@ -130,6 +133,7 @@ impl Endpoint { /// Get the kind of authorization which is required to perform the operation. pub fn authorization_type(&self) -> Authorization { match self { + Self::Health => Authorization::None, Self::Metrics => Authorization::MetricsToken, _ => Authorization::AdminToken, } diff --git a/src/model/garage.rs b/src/model/garage.rs index 75012952..c2aabea1 100644 --- a/src/model/garage.rs +++ b/src/model/garage.rs @@ -34,6 +34,9 @@ pub struct Garage { /// The parsed configuration Garage is running pub config: Config, + /// The replication mode of this cluster + pub replication_mode: ReplicationMode, + /// The local database pub db: db::Db, /// A background job runner @@ -258,6 +261,7 @@ impl Garage { // -- done -- Ok(Arc::new(Self { config, + replication_mode, db, background, system, diff --git a/src/table/replication/mode.rs b/src/table/replication/mode.rs index c6f84c45..e244e063 100644 --- a/src/table/replication/mode.rs +++ b/src/table/replication/mode.rs @@ -1,3 +1,4 @@ +#[derive(Clone, Copy)] pub enum ReplicationMode { None, TwoWay, -- 2.43.0 From 280d1be7b1fde13d23e47f75aa8acd2f90efb81f Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Mon, 5 Dec 2022 15:28:57 +0100 Subject: [PATCH 2/5] Refactor health check and add ability to return it in json --- src/api/admin/api_server.rs | 131 +++++++----------- src/api/admin/router.rs | 9 +- src/model/garage.rs | 9 +- src/rpc/lib.rs | 1 + .../mode.rs => rpc/replication_mode.rs} | 0 src/rpc/system.rs | 99 ++++++++++++- src/table/replication/mod.rs | 2 - 7 files changed, 156 insertions(+), 95 deletions(-) rename src/{table/replication/mode.rs => rpc/replication_mode.rs} (100%) diff --git a/src/api/admin/api_server.rs b/src/api/admin/api_server.rs index 9beeda1f..f86ed599 100644 --- a/src/api/admin/api_server.rs +++ b/src/api/admin/api_server.rs @@ -1,4 +1,3 @@ -use std::collections::HashMap; use std::fmt::Write; use std::net::SocketAddr; use std::sync::Arc; @@ -17,8 +16,7 @@ use opentelemetry_prometheus::PrometheusExporter; use prometheus::{Encoder, TextEncoder}; use garage_model::garage::Garage; -use garage_rpc::layout::NodeRoleV; -use garage_util::data::Uuid; +use garage_rpc::system::ClusterHealthStatus; use garage_util::error::Error as GarageError; use crate::generic_server::*; @@ -80,92 +78,61 @@ impl AdminApiServer { .body(Body::empty())?) } - fn handle_health(&self) -> Result, Error> { - let ring: Arc<_> = self.garage.system.ring.borrow().clone(); - let quorum = self.garage.replication_mode.write_quorum(); - let replication_factor = self.garage.replication_mode.replication_factor(); + fn handle_health(&self, format: Option<&str>) -> Result, Error> { + let health = self.garage.system.health(); - let nodes = self - .garage - .system - .get_known_nodes() - .into_iter() - .map(|n| (n.id, n)) - .collect::>(); - let n_nodes_connected = nodes.iter().filter(|(_, n)| n.is_up).count(); - - let storage_nodes = ring - .layout - .roles - .items() - .iter() - .filter(|(_, _, v)| matches!(v, NodeRoleV(Some(r)) if r.capacity.is_some())) - .collect::>(); - let n_storage_nodes_ok = storage_nodes - .iter() - .filter(|(x, _, _)| nodes.get(x).map(|n| n.is_up).unwrap_or(false)) - .count(); - - let partitions = ring.partitions(); - let partitions_n_up = partitions - .iter() - .map(|(_, h)| { - let pn = ring.get_nodes(h, ring.replication_factor); - pn.iter() - .filter(|x| nodes.get(x).map(|n| n.is_up).unwrap_or(false)) - .count() - }) - .collect::>(); - let n_partitions_full_ok = partitions_n_up - .iter() - .filter(|c| **c == replication_factor) - .count(); - let n_partitions_quorum = partitions_n_up.iter().filter(|c| **c >= quorum).count(); - - let (status, status_str) = if n_partitions_quorum == partitions.len() - && n_storage_nodes_ok == storage_nodes.len() - { - (StatusCode::OK, "Garage is fully operational") - } else if n_partitions_quorum == partitions.len() { - ( + let (status, status_str) = match health.status { + ClusterHealthStatus::Healthy => (StatusCode::OK, "Garage is fully operational"), + ClusterHealthStatus::Degraded => ( StatusCode::OK, "Garage is operational but some storage nodes are unavailable", - ) - } else { - ( + ), + ClusterHealthStatus::Unavailable => ( StatusCode::SERVICE_UNAVAILABLE, "Quorum is not available for some/all partitions, reads and writes will fail", - ) + ), }; - let mut buf = status_str.to_string(); - writeln!( - &mut buf, - "\nAll nodes: {} connected, {} known", - n_nodes_connected, - nodes.len() - ) - .unwrap(); - writeln!( - &mut buf, - "Storage nodes: {} connected, {} in layout", - n_storage_nodes_ok, - storage_nodes.len() - ) - .unwrap(); - writeln!(&mut buf, "Number of partitions: {}", partitions.len()).unwrap(); - writeln!(&mut buf, "Partitions with quorum: {}", n_partitions_quorum).unwrap(); - writeln!( - &mut buf, - "Partitions with all nodes available: {}", - n_partitions_full_ok - ) - .unwrap(); + let resp = Response::builder().status(status); - Ok(Response::builder() - .status(status) - .header(http::header::CONTENT_TYPE, "text/plain") - .body(Body::from(buf))?) + if matches!(format, Some("json")) { + let resp_json = + serde_json::to_string_pretty(&health).map_err(garage_util::error::Error::from)?; + Ok(resp + .header(http::header::CONTENT_TYPE, "application/json") + .body(Body::from(resp_json))?) + } else { + let mut buf = status_str.to_string(); + writeln!( + &mut buf, + "\nAll nodes: {} connected, {} known", + health.connected_nodes, health.known_nodes, + ) + .unwrap(); + writeln!( + &mut buf, + "Storage nodes: {} connected, {} in layout", + health.storage_nodes_ok, health.storage_nodes + ) + .unwrap(); + writeln!(&mut buf, "Number of partitions: {}", health.partitions).unwrap(); + writeln!( + &mut buf, + "Partitions with quorum: {}", + health.partitions_quorum + ) + .unwrap(); + writeln!( + &mut buf, + "Partitions with all nodes available: {}", + health.partitions_all_ok + ) + .unwrap(); + + Ok(resp + .header(http::header::CONTENT_TYPE, "text/plain") + .body(Body::from(buf))?) + } } fn handle_metrics(&self) -> Result, Error> { @@ -240,7 +207,7 @@ impl ApiHandler for AdminApiServer { match endpoint { Endpoint::Options => self.handle_options(&req), - Endpoint::Health => self.handle_health(), + Endpoint::Health { format } => self.handle_health(format.as_deref()), Endpoint::Metrics => self.handle_metrics(), Endpoint::GetClusterStatus => handle_get_cluster_status(&self.garage).await, Endpoint::ConnectClusterNodes => handle_connect_cluster_nodes(&self.garage, req).await, diff --git a/src/api/admin/router.rs b/src/api/admin/router.rs index 14411f75..6ffcc131 100644 --- a/src/api/admin/router.rs +++ b/src/api/admin/router.rs @@ -17,7 +17,9 @@ router_match! {@func #[derive(Debug, Clone, PartialEq, Eq)] pub enum Endpoint { Options, - Health, + Health { + format: Option, + }, Metrics, GetClusterStatus, ConnectClusterNodes, @@ -90,7 +92,7 @@ impl Endpoint { let res = router_match!(@gen_path_parser (req.method(), path, query) [ OPTIONS _ => Options, - GET "/health" => Health, + GET "/health" => Health (query_opt::format), GET "/metrics" => Metrics, GET "/v0/status" => GetClusterStatus, POST "/v0/connect" => ConnectClusterNodes, @@ -133,7 +135,7 @@ impl Endpoint { /// Get the kind of authorization which is required to perform the operation. pub fn authorization_type(&self) -> Authorization { match self { - Self::Health => Authorization::None, + Self::Health { .. } => Authorization::None, Self::Metrics => Authorization::MetricsToken, _ => Authorization::AdminToken, } @@ -141,6 +143,7 @@ impl Endpoint { } generateQueryParameters! { + "format" => format, "id" => id, "search" => search, "globalAlias" => global_alias, diff --git a/src/model/garage.rs b/src/model/garage.rs index c2aabea1..e34d034f 100644 --- a/src/model/garage.rs +++ b/src/model/garage.rs @@ -8,10 +8,10 @@ use garage_util::background::*; use garage_util::config::*; use garage_util::error::*; +use garage_rpc::replication_mode::ReplicationMode; use garage_rpc::system::System; use garage_block::manager::*; -use garage_table::replication::ReplicationMode; use garage_table::replication::TableFullReplication; use garage_table::replication::TableShardedReplication; use garage_table::*; @@ -167,12 +167,7 @@ impl Garage { .expect("Invalid replication_mode in config file."); info!("Initialize membership management system..."); - let system = System::new( - network_key, - background.clone(), - replication_mode.replication_factor(), - &config, - )?; + let system = System::new(network_key, background.clone(), replication_mode, &config)?; let data_rep_param = TableShardedReplication { system: system.clone(), diff --git a/src/rpc/lib.rs b/src/rpc/lib.rs index 92caf75d..86f63568 100644 --- a/src/rpc/lib.rs +++ b/src/rpc/lib.rs @@ -9,6 +9,7 @@ mod consul; mod kubernetes; pub mod layout; +pub mod replication_mode; pub mod ring; pub mod system; diff --git a/src/table/replication/mode.rs b/src/rpc/replication_mode.rs similarity index 100% rename from src/table/replication/mode.rs rename to src/rpc/replication_mode.rs diff --git a/src/rpc/system.rs b/src/rpc/system.rs index d6576f20..2c6f14fd 100644 --- a/src/rpc/system.rs +++ b/src/rpc/system.rs @@ -35,6 +35,7 @@ use crate::consul::ConsulDiscovery; #[cfg(feature = "kubernetes-discovery")] use crate::kubernetes::*; use crate::layout::*; +use crate::replication_mode::*; use crate::ring::*; use crate::rpc_helper::*; @@ -102,6 +103,7 @@ pub struct System { #[cfg(feature = "kubernetes-discovery")] kubernetes_discovery: Option, + replication_mode: ReplicationMode, replication_factor: usize, /// The ring @@ -136,6 +138,37 @@ pub struct KnownNodeInfo { pub status: NodeStatus, } +#[derive(Debug, Clone, Copy, Serialize, Deserialize)] +pub struct ClusterHealth { + /// The current health status of the cluster (see below) + pub status: ClusterHealthStatus, + /// Number of nodes already seen once in the cluster + pub known_nodes: usize, + /// Number of nodes currently connected + pub connected_nodes: usize, + /// Number of storage nodes declared in the current layout + pub storage_nodes: usize, + /// Number of storage nodes currently connected + pub storage_nodes_ok: usize, + /// Number of partitions in the layout + pub partitions: usize, + /// Number of partitions for which we have a quorum of connected nodes + pub partitions_quorum: usize, + /// Number of partitions for which all storage nodes are connected + pub partitions_all_ok: usize, +} + +#[derive(Debug, Clone, Copy, Serialize, Deserialize)] +pub enum ClusterHealthStatus { + /// All nodes are available + Healthy, + /// Some storage nodes are unavailable, but quorum is stil + /// achieved for all partitions + Degraded, + /// Quorum is not available for some partitions + Unavailable, +} + pub fn read_node_id(metadata_dir: &Path) -> Result { let mut pubkey_file = metadata_dir.to_path_buf(); pubkey_file.push("node_key.pub"); @@ -200,9 +233,11 @@ impl System { pub fn new( network_key: NetworkKey, background: Arc, - replication_factor: usize, + replication_mode: ReplicationMode, config: &Config, ) -> Result, Error> { + let replication_factor = replication_mode.replication_factor(); + let node_key = gen_node_key(&config.metadata_dir).expect("Unable to read or generate node ID"); info!( @@ -324,6 +359,7 @@ impl System { config.rpc_timeout_msec.map(Duration::from_millis), ), system_endpoint, + replication_mode, replication_factor, rpc_listen_addr: config.rpc_bind_addr, #[cfg(any(feature = "consul-discovery", feature = "kubernetes-discovery"))] @@ -429,6 +465,67 @@ impl System { } } + pub fn health(&self) -> ClusterHealth { + let ring: Arc<_> = self.ring.borrow().clone(); + let quorum = self.replication_mode.write_quorum(); + let replication_factor = self.replication_factor; + + let nodes = self + .get_known_nodes() + .into_iter() + .map(|n| (n.id, n)) + .collect::>(); + let connected_nodes = nodes.iter().filter(|(_, n)| n.is_up).count(); + + let storage_nodes = ring + .layout + .roles + .items() + .iter() + .filter(|(_, _, v)| matches!(v, NodeRoleV(Some(r)) if r.capacity.is_some())) + .collect::>(); + let storage_nodes_ok = storage_nodes + .iter() + .filter(|(x, _, _)| nodes.get(x).map(|n| n.is_up).unwrap_or(false)) + .count(); + + let partitions = ring.partitions(); + let partitions_n_up = partitions + .iter() + .map(|(_, h)| { + let pn = ring.get_nodes(h, ring.replication_factor); + pn.iter() + .filter(|x| nodes.get(x).map(|n| n.is_up).unwrap_or(false)) + .count() + }) + .collect::>(); + let partitions_all_ok = partitions_n_up + .iter() + .filter(|c| **c == replication_factor) + .count(); + let partitions_quorum = partitions_n_up.iter().filter(|c| **c >= quorum).count(); + + let status = + if partitions_quorum == partitions.len() && storage_nodes_ok == storage_nodes.len() { + ClusterHealthStatus::Healthy + } else if partitions_quorum == partitions.len() { + ClusterHealthStatus::Degraded + } else { + ClusterHealthStatus::Unavailable + }; + + ClusterHealth { + status, + known_nodes: nodes.len(), + connected_nodes, + storage_nodes: storage_nodes.len(), + storage_nodes_ok, + partitions: partitions.len(), + partitions_quorum, + partitions_all_ok, + } + } + // ---- INTERNALS ---- #[cfg(feature = "consul-discovery")] diff --git a/src/table/replication/mod.rs b/src/table/replication/mod.rs index 19e6772f..dfcb026a 100644 --- a/src/table/replication/mod.rs +++ b/src/table/replication/mod.rs @@ -1,10 +1,8 @@ mod parameters; mod fullcopy; -mod mode; mod sharded; pub use fullcopy::TableFullReplication; -pub use mode::ReplicationMode; pub use parameters::*; pub use sharded::TableShardedReplication; -- 2.43.0 From d7868c48a4d8d5831051a0be088fe7bbec259bca Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Mon, 5 Dec 2022 15:38:32 +0100 Subject: [PATCH 3/5] Separate /health (simple text answer) and /v0/health (full json answer, authenticated) --- src/api/admin/api_server.rs | 54 ++++++++----------------------------- src/api/admin/cluster.rs | 17 ++++++++++++ src/api/admin/router.rs | 10 +++---- 3 files changed, 33 insertions(+), 48 deletions(-) diff --git a/src/api/admin/api_server.rs b/src/api/admin/api_server.rs index f86ed599..2d325fb1 100644 --- a/src/api/admin/api_server.rs +++ b/src/api/admin/api_server.rs @@ -1,4 +1,3 @@ -use std::fmt::Write; use std::net::SocketAddr; use std::sync::Arc; @@ -78,7 +77,7 @@ impl AdminApiServer { .body(Body::empty())?) } - fn handle_health(&self, format: Option<&str>) -> Result, Error> { + fn handle_health(&self) -> Result, Error> { let health = self.garage.system.health(); let (status, status_str) = match health.status { @@ -92,47 +91,15 @@ impl AdminApiServer { "Quorum is not available for some/all partitions, reads and writes will fail", ), }; + let status_str = format!( + "{}\nConsult the full health check API endpoint at /v0/health for more details\n", + status_str + ); - let resp = Response::builder().status(status); - - if matches!(format, Some("json")) { - let resp_json = - serde_json::to_string_pretty(&health).map_err(garage_util::error::Error::from)?; - Ok(resp - .header(http::header::CONTENT_TYPE, "application/json") - .body(Body::from(resp_json))?) - } else { - let mut buf = status_str.to_string(); - writeln!( - &mut buf, - "\nAll nodes: {} connected, {} known", - health.connected_nodes, health.known_nodes, - ) - .unwrap(); - writeln!( - &mut buf, - "Storage nodes: {} connected, {} in layout", - health.storage_nodes_ok, health.storage_nodes - ) - .unwrap(); - writeln!(&mut buf, "Number of partitions: {}", health.partitions).unwrap(); - writeln!( - &mut buf, - "Partitions with quorum: {}", - health.partitions_quorum - ) - .unwrap(); - writeln!( - &mut buf, - "Partitions with all nodes available: {}", - health.partitions_all_ok - ) - .unwrap(); - - Ok(resp - .header(http::header::CONTENT_TYPE, "text/plain") - .body(Body::from(buf))?) - } + Ok(Response::builder() + .status(status) + .header(http::header::CONTENT_TYPE, "text/plain") + .body(Body::from(status_str))?) } fn handle_metrics(&self) -> Result, Error> { @@ -207,9 +174,10 @@ impl ApiHandler for AdminApiServer { match endpoint { Endpoint::Options => self.handle_options(&req), - Endpoint::Health { format } => self.handle_health(format.as_deref()), + Endpoint::Health => self.handle_health(), Endpoint::Metrics => self.handle_metrics(), Endpoint::GetClusterStatus => handle_get_cluster_status(&self.garage).await, + Endpoint::GetClusterHealth => handle_get_cluster_health(&self.garage).await, Endpoint::ConnectClusterNodes => handle_connect_cluster_nodes(&self.garage, req).await, // Layout Endpoint::GetClusterLayout => handle_get_cluster_layout(&self.garage).await, diff --git a/src/api/admin/cluster.rs b/src/api/admin/cluster.rs index 706db727..a773e27a 100644 --- a/src/api/admin/cluster.rs +++ b/src/api/admin/cluster.rs @@ -9,6 +9,7 @@ use garage_util::crdt::*; use garage_util::data::*; use garage_rpc::layout::*; +use garage_rpc::system::ClusterHealthStatus; use garage_model::garage::Garage; @@ -43,6 +44,22 @@ pub async fn handle_get_cluster_status(garage: &Arc) -> Result) -> Result, Error> { + let health = garage.system.health(); + + let status = match health.status { + ClusterHealthStatus::Unavailable => StatusCode::SERVICE_UNAVAILABLE, + _ => StatusCode::OK, + }; + + let resp_json = + serde_json::to_string_pretty(&health).map_err(garage_util::error::Error::from)?; + Ok(Response::builder() + .status(status) + .header(http::header::CONTENT_TYPE, "application/json") + .body(Body::from(resp_json))?) +} + pub async fn handle_connect_cluster_nodes( garage: &Arc, req: Request, diff --git a/src/api/admin/router.rs b/src/api/admin/router.rs index 6ffcc131..3fa07b3c 100644 --- a/src/api/admin/router.rs +++ b/src/api/admin/router.rs @@ -17,11 +17,10 @@ router_match! {@func #[derive(Debug, Clone, PartialEq, Eq)] pub enum Endpoint { Options, - Health { - format: Option, - }, + Health, Metrics, GetClusterStatus, + GetClusterHealth, ConnectClusterNodes, // Layout GetClusterLayout, @@ -92,9 +91,10 @@ impl Endpoint { let res = router_match!(@gen_path_parser (req.method(), path, query) [ OPTIONS _ => Options, - GET "/health" => Health (query_opt::format), + GET "/health" => Health, GET "/metrics" => Metrics, GET "/v0/status" => GetClusterStatus, + GET "/v0/health" => GetClusterHealth, POST "/v0/connect" => ConnectClusterNodes, // Layout endpoints GET "/v0/layout" => GetClusterLayout, @@ -135,7 +135,7 @@ impl Endpoint { /// Get the kind of authorization which is required to perform the operation. pub fn authorization_type(&self) -> Authorization { match self { - Self::Health { .. } => Authorization::None, + Self::Health => Authorization::None, Self::Metrics => Authorization::MetricsToken, _ => Authorization::AdminToken, } -- 2.43.0 From 5ea5fd2130ee16a2030f6f74cd1a8b641b1c2487 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Sun, 11 Dec 2022 18:11:12 +0100 Subject: [PATCH 4/5] Always return 200 OK on /v0/health, reinstate admin api doc as draft and complete it --- doc/drafts/admin-api.md | 686 +++++++++++++++++++++++++++++++++++++++ src/api/admin/cluster.rs | 7 +- 2 files changed, 687 insertions(+), 6 deletions(-) create mode 100644 doc/drafts/admin-api.md diff --git a/doc/drafts/admin-api.md b/doc/drafts/admin-api.md new file mode 100644 index 00000000..9a697a59 --- /dev/null +++ b/doc/drafts/admin-api.md @@ -0,0 +1,686 @@ ++++ +title = "Administration API" +weight = 60 ++++ + +The Garage administration API is accessible through a dedicated server whose +listen address is specified in the `[admin]` section of the configuration +file (see [configuration file +reference](@/documentation/reference-manual/configuration.md)) + +**WARNING.** At this point, there is no comittement to stability of the APIs described in this document. +We will bump the version numbers prefixed to each API endpoint at each time the syntax +or semantics change, meaning that code that relies on these endpoint will break +when changes are introduced. + +The Garage administration API was introduced in version 0.7.2, this document +does not apply to older versions of Garage. + + +## Access control + +The admin API uses two different tokens for acces control, that are specified in the config file's `[admin]` section: + +- `metrics_token`: the token for accessing the Metrics endpoint (if this token + is not set in the config file, the Metrics endpoint can be accessed without + access control); + +- `admin_token`: the token for accessing all of the other administration + endpoints (if this token is not set in the config file, access to these + endpoints is disabled entirely). + +These tokens are used as simple HTTP bearer tokens. In other words, to +authenticate access to an admin API endpoint, add the following HTTP header +to your request: + +``` +Authorization: Bearer +``` + +## Administration API endpoints + +### Metrics-related endpoints + +#### Metrics `GET /metrics` + +Returns internal Garage metrics in Prometheus format. + +#### Health `GET /health` + +Used for simple health checks in a cluster setting with an orchestrator. +Returns an HTTP status 200 if the node is ready to answer user's requests, +and an HTTP status 503 (Service Unavailable) if there are some partitions +for which a quorum of nodes is not available. +A simple textual message is also returned in a body with content-type `text/plain`. +See `/v0/health` for an API that also returns JSON output. + +### Cluster operations + +#### GetClusterStatus `GET /v0/status` + +Returns the cluster's current status in JSON, including: + +- ID of the node being queried and its version of the Garage daemon +- Live nodes +- Currently configured cluster layout +- Staged changes to the cluster layout + +Example response body: + +```json +{ + "node": "ec79480e0ce52ae26fd00c9da684e4fa56658d9c64cdcecb094e936de0bfe71f", + "garage_version": "git:v0.8.0", + "knownNodes": { + "ec79480e0ce52ae26fd00c9da684e4fa56658d9c64cdcecb094e936de0bfe71f": { + "addr": "10.0.0.11:3901", + "is_up": true, + "last_seen_secs_ago": 9, + "hostname": "node1" + }, + "4a6ae5a1d0d33bf895f5bb4f0a418b7dc94c47c0dd2eb108d1158f3c8f60b0ff": { + "addr": "10.0.0.12:3901", + "is_up": true, + "last_seen_secs_ago": 1, + "hostname": "node2" + }, + "23ffd0cdd375ebff573b20cc5cef38996b51c1a7d6dbcf2c6e619876e507cf27": { + "addr": "10.0.0.21:3901", + "is_up": true, + "last_seen_secs_ago": 7, + "hostname": "node3" + }, + "e2ee7984ee65b260682086ec70026165903c86e601a4a5a501c1900afe28d84b": { + "addr": "10.0.0.22:3901", + "is_up": true, + "last_seen_secs_ago": 1, + "hostname": "node4" + } + }, + "layout": { + "version": 12, + "roles": { + "ec79480e0ce52ae26fd00c9da684e4fa56658d9c64cdcecb094e936de0bfe71f": { + "zone": "dc1", + "capacity": 4, + "tags": [ + "node1" + ] + }, + "4a6ae5a1d0d33bf895f5bb4f0a418b7dc94c47c0dd2eb108d1158f3c8f60b0ff": { + "zone": "dc1", + "capacity": 6, + "tags": [ + "node2" + ] + }, + "23ffd0cdd375ebff573b20cc5cef38996b51c1a7d6dbcf2c6e619876e507cf27": { + "zone": "dc2", + "capacity": 10, + "tags": [ + "node3" + ] + } + }, + "stagedRoleChanges": { + "e2ee7984ee65b260682086ec70026165903c86e601a4a5a501c1900afe28d84b": { + "zone": "dc2", + "capacity": 5, + "tags": [ + "node4" + ] + } + } + } +} +``` + +#### GetClusterHealth `GET /v0/health` + +Returns the cluster's current health in JSON format, with the following variables: + +- `status`: one of `Healthy`, `Degraded` or `Unavailable`: + - Healthy: Garage node is connected to all storage nodes + - Degraded: Garage node is not connected to all storage nodes, but a quorum of write nodes is available for all partitions + - Unavailable: a quorum of write nodes is not available for some partitions +- `known_nodes`: the number of nodes this Garage node has had a TCP connection to since the daemon started +- `connected_nodes`: the nubmer of nodes this Garage node currently has an open connection to +- `storage_nodes`: the number of storage nodes currently registered in the cluster layout +- `storage_nodes_ok`: the number of storage nodes to which a connection is currently open +- `partitions`: the total number of partitions of the data (currently always 256) +- `partitions_quorum`: the number of partitions for which a quorum of write nodes is available +- `partitions_all_ok`: the number of partitions for which we are connected to all storage nodes responsible of storing it + +Contrarily to `GET /health`, this endpoint always returns a 200 OK HTTP response code. + +Example response body: + +```json +{ + "status": "Degraded", + "known_nodes": 3, + "connected_nodes": 2, + "storage_nodes": 3, + "storage_nodes_ok": 2, + "partitions": 256, + "partitions_quorum": 256, + "partitions_all_ok": 0 +} +``` + +#### ConnectClusterNodes `POST /v0/connect` + +Instructs this Garage node to connect to other Garage nodes at specified addresses. + +Example request body: + +```json +[ + "ec79480e0ce52ae26fd00c9da684e4fa56658d9c64cdcecb094e936de0bfe71f@10.0.0.11:3901", + "4a6ae5a1d0d33bf895f5bb4f0a418b7dc94c47c0dd2eb108d1158f3c8f60b0ff@10.0.0.12:3901" +] +``` + +The format of the string for a node to connect to is: `@:`, same as in the `garage node connect` CLI call. + +Example response: + +```json +[ + { + "success": true, + "error": null + }, + { + "success": false, + "error": "Handshake error" + } +] +``` + +#### GetClusterLayout `GET /v0/layout` + +Returns the cluster's current layout in JSON, including: + +- Currently configured cluster layout +- Staged changes to the cluster layout + +(the info returned by this endpoint is a subset of the info returned by GetClusterStatus) + +Example response body: + +```json +{ + "version": 12, + "roles": { + "ec79480e0ce52ae26fd00c9da684e4fa56658d9c64cdcecb094e936de0bfe71f": { + "zone": "dc1", + "capacity": 4, + "tags": [ + "node1" + ] + }, + "4a6ae5a1d0d33bf895f5bb4f0a418b7dc94c47c0dd2eb108d1158f3c8f60b0ff": { + "zone": "dc1", + "capacity": 6, + "tags": [ + "node2" + ] + }, + "23ffd0cdd375ebff573b20cc5cef38996b51c1a7d6dbcf2c6e619876e507cf27": { + "zone": "dc2", + "capacity": 10, + "tags": [ + "node3" + ] + } + }, + "stagedRoleChanges": { + "e2ee7984ee65b260682086ec70026165903c86e601a4a5a501c1900afe28d84b": { + "zone": "dc2", + "capacity": 5, + "tags": [ + "node4" + ] + } + } +} +``` + +#### UpdateClusterLayout `POST /v0/layout` + +Send modifications to the cluster layout. These modifications will +be included in the staged role changes, visible in subsequent calls +of `GetClusterLayout`. Once the set of staged changes is satisfactory, +the user may call `ApplyClusterLayout` to apply the changed changes, +or `Revert ClusterLayout` to clear all of the staged changes in +the layout. + +Request body format: + +```json +{ + : { + "capacity": , + "zone": , + "tags": [ + , + ... + ] + }, + : null, + ... +} +``` + +Contrary to the CLI that may update only a subset of the fields +`capacity`, `zone` and `tags`, when calling this API all of these +values must be specified. + + +#### ApplyClusterLayout `POST /v0/layout/apply` + +Applies to the cluster the layout changes currently registered as +staged layout changes. + +Request body format: + +```json +{ + "version": 13 +} +``` + +Similarly to the CLI, the body must include the version of the new layout +that will be created, which MUST be 1 + the value of the currently +existing layout in the cluster. + +#### RevertClusterLayout `POST /v0/layout/revert` + +Clears all of the staged layout changes. + +Request body format: + +```json +{ + "version": 13 +} +``` + +Reverting the staged changes is done by incrementing the version number +and clearing the contents of the staged change list. +Similarly to the CLI, the body must include the incremented +version number, which MUST be 1 + the value of the currently +existing layout in the cluster. + + +### Access key operations + +#### ListKeys `GET /v0/key` + +Returns all API access keys in the cluster. + +Example response: + +```json +[ + { + "id": "GK31c2f218a2e44f485b94239e", + "name": "test" + }, + { + "id": "GKe10061ac9c2921f09e4c5540", + "name": "test2" + } +] +``` + +#### CreateKey `POST /v0/key` + +Creates a new API access key. + +Request body format: + +```json +{ + "name": "NameOfMyKey" +} +``` + +#### ImportKey `POST /v0/key/import` + +Imports an existing API key. + +Request body format: + +```json +{ + "accessKeyId": "GK31c2f218a2e44f485b94239e", + "secretAccessKey": "b892c0665f0ada8a4755dae98baa3b133590e11dae3bcc1f9d769d67f16c3835", + "name": "NameOfMyKey" +} +``` + +#### GetKeyInfo `GET /v0/key?id=` +#### GetKeyInfo `GET /v0/key?search=` + +Returns information about the requested API access key. + +If `id` is set, the key is looked up using its exact identifier (faster). +If `search` is set, the key is looked up using its name or prefix +of identifier (slower, all keys are enumerated to do this). + +Example response: + +```json +{ + "name": "test", + "accessKeyId": "GK31c2f218a2e44f485b94239e", + "secretAccessKey": "b892c0665f0ada8a4755dae98baa3b133590e11dae3bcc1f9d769d67f16c3835", + "permissions": { + "createBucket": false + }, + "buckets": [ + { + "id": "70dc3bed7fe83a75e46b66e7ddef7d56e65f3c02f9f80b6749fb97eccb5e1033", + "globalAliases": [ + "test2" + ], + "localAliases": [], + "permissions": { + "read": true, + "write": true, + "owner": false + } + }, + { + "id": "d7452a935e663fc1914f3a5515163a6d3724010ce8dfd9e4743ca8be5974f995", + "globalAliases": [ + "test3" + ], + "localAliases": [], + "permissions": { + "read": true, + "write": true, + "owner": false + } + }, + { + "id": "e6a14cd6a27f48684579ec6b381c078ab11697e6bc8513b72b2f5307e25fff9b", + "globalAliases": [], + "localAliases": [ + "test" + ], + "permissions": { + "read": true, + "write": true, + "owner": true + } + }, + { + "id": "96470e0df00ec28807138daf01915cfda2bee8eccc91dea9558c0b4855b5bf95", + "globalAliases": [ + "alex" + ], + "localAliases": [], + "permissions": { + "read": true, + "write": true, + "owner": true + } + } + ] +} +``` + +#### DeleteKey `DELETE /v0/key?id=` + +Deletes an API access key. + +#### UpdateKey `POST /v0/key?id=` + +Updates information about the specified API access key. + +Request body format: + +```json +{ + "name": "NameOfMyKey", + "allow": { + "createBucket": true, + }, + "deny": {} +} +``` + +All fields (`name`, `allow` and `deny`) are optionnal. +If they are present, the corresponding modifications are applied to the key, otherwise nothing is changed. +The possible flags in `allow` and `deny` are: `createBucket`. + + +### Bucket operations + +#### ListBuckets `GET /v0/bucket` + +Returns all storage buckets in the cluster. + +Example response: + +```json +[ + { + "id": "70dc3bed7fe83a75e46b66e7ddef7d56e65f3c02f9f80b6749fb97eccb5e1033", + "globalAliases": [ + "test2" + ], + "localAliases": [] + }, + { + "id": "96470e0df00ec28807138daf01915cfda2bee8eccc91dea9558c0b4855b5bf95", + "globalAliases": [ + "alex" + ], + "localAliases": [] + }, + { + "id": "d7452a935e663fc1914f3a5515163a6d3724010ce8dfd9e4743ca8be5974f995", + "globalAliases": [ + "test3" + ], + "localAliases": [] + }, + { + "id": "e6a14cd6a27f48684579ec6b381c078ab11697e6bc8513b72b2f5307e25fff9b", + "globalAliases": [], + "localAliases": [ + { + "accessKeyId": "GK31c2f218a2e44f485b94239e", + "alias": "test" + } + ] + } +] +``` + +#### GetBucketInfo `GET /v0/bucket?id=` +#### GetBucketInfo `GET /v0/bucket?globalAlias=` + +Returns information about the requested storage bucket. + +If `id` is set, the bucket is looked up using its exact identifier. +If `globalAlias` is set, the bucket is looked up using its global alias. +(both are fast) + +Example response: + +```json +{ + "id": "afa8f0a22b40b1247ccd0affb869b0af5cff980924a20e4b5e0720a44deb8d39", + "globalAliases": [], + "websiteAccess": false, + "websiteConfig": null, + "keys": [ + { + "accessKeyId": "GK31c2f218a2e44f485b94239e", + "name": "Imported key", + "permissions": { + "read": true, + "write": true, + "owner": true + }, + "bucketLocalAliases": [ + "debug" + ] + } + ], + "objects": 14827, + "bytes": 13189855625, + "unfinshedUploads": 0, + "quotas": { + "maxSize": null, + "maxObjects": null + } +} +``` + +#### CreateBucket `POST /v0/bucket` + +Creates a new storage bucket. + +Request body format: + +```json +{ + "globalAlias": "NameOfMyBucket" +} +``` + +OR + +```json +{ + "localAlias": { + "accessKeyId": "GK31c2f218a2e44f485b94239e", + "alias": "NameOfMyBucket", + "allow": { + "read": true, + "write": true, + "owner": false + } + } +} +``` + +OR + +```json +{} +``` + +Creates a new bucket, either with a global alias, a local one, +or no alias at all. + +Technically, you can also specify both `globalAlias` and `localAlias` and that would create +two aliases, but I don't see why you would want to do that. + +#### DeleteBucket `DELETE /v0/bucket?id=` + +Deletes a storage bucket. A bucket cannot be deleted if it is not empty. + +Warning: this will delete all aliases associated with the bucket! + +#### UpdateBucket `PUT /v0/bucket?id=` + +Updates configuration of the given bucket. + +Request body format: + +```json +{ + "websiteAccess": { + "enabled": true, + "indexDocument": "index.html", + "errorDocument": "404.html" + }, + "quotas": { + "maxSize": 19029801, + "maxObjects": null, + } +} +``` + +All fields (`websiteAccess` and `quotas`) are optionnal. +If they are present, the corresponding modifications are applied to the bucket, otherwise nothing is changed. + +In `websiteAccess`: if `enabled` is `true`, `indexDocument` must be specified. +The field `errorDocument` is optional, if no error document is set a generic +error message is displayed when errors happen. Conversely, if `enabled` is +`false`, neither `indexDocument` nor `errorDocument` must be specified. + +In `quotas`: new values of `maxSize` and `maxObjects` must both be specified, or set to `null` +to remove the quotas. An absent value will be considered the same as a `null`. It is not possible +to change only one of the two quotas. + +### Operations on permissions for keys on buckets + +#### BucketAllowKey `POST /v0/bucket/allow` + +Allows a key to do read/write/owner operations on a bucket. + +Request body format: + +```json +{ + "bucketId": "e6a14cd6a27f48684579ec6b381c078ab11697e6bc8513b72b2f5307e25fff9b", + "accessKeyId": "GK31c2f218a2e44f485b94239e", + "permissions": { + "read": true, + "write": true, + "owner": true + }, +} +``` + +Flags in `permissions` which have the value `true` will be activated. +Other flags will remain unchanged. + +#### BucketDenyKey `POST /v0/bucket/deny` + +Denies a key from doing read/write/owner operations on a bucket. + +Request body format: + +```json +{ + "bucketId": "e6a14cd6a27f48684579ec6b381c078ab11697e6bc8513b72b2f5307e25fff9b", + "accessKeyId": "GK31c2f218a2e44f485b94239e", + "permissions": { + "read": false, + "write": false, + "owner": true + }, +} +``` + +Flags in `permissions` which have the value `true` will be deactivated. +Other flags will remain unchanged. + + +### Operations on bucket aliases + +#### GlobalAliasBucket `PUT /v0/bucket/alias/global?id=&alias=` + +Empty body. Creates a global alias for a bucket. + +#### GlobalUnaliasBucket `DELETE /v0/bucket/alias/global?id=&alias=` + +Removes a global alias for a bucket. + +#### LocalAliasBucket `PUT /v0/bucket/alias/local?id=&accessKeyId=&alias=` + +Empty body. Creates a local alias for a bucket in the namespace of a specific access key. + +#### LocalUnaliasBucket `DELETE /v0/bucket/alias/local?id=&accessKeyId&alias=` + +Removes a local alias for a bucket in the namespace of a specific access key. + diff --git a/src/api/admin/cluster.rs b/src/api/admin/cluster.rs index a773e27a..f4bf8b7b 100644 --- a/src/api/admin/cluster.rs +++ b/src/api/admin/cluster.rs @@ -47,15 +47,10 @@ pub async fn handle_get_cluster_status(garage: &Arc) -> Result) -> Result, Error> { let health = garage.system.health(); - let status = match health.status { - ClusterHealthStatus::Unavailable => StatusCode::SERVICE_UNAVAILABLE, - _ => StatusCode::OK, - }; - let resp_json = serde_json::to_string_pretty(&health).map_err(garage_util::error::Error::from)?; Ok(Response::builder() - .status(status) + .status(StatusCode::OK) .header(http::header::CONTENT_TYPE, "application/json") .body(Body::from(resp_json))?) } -- 2.43.0 From 533afcf4e13022c46fd21ec51ca2a9969692ef4c Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Sun, 11 Dec 2022 18:17:08 +0100 Subject: [PATCH 5/5] simplify --- src/api/admin/cluster.rs | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/src/api/admin/cluster.rs b/src/api/admin/cluster.rs index f4bf8b7b..182a4f6f 100644 --- a/src/api/admin/cluster.rs +++ b/src/api/admin/cluster.rs @@ -9,7 +9,6 @@ use garage_util::crdt::*; use garage_util::data::*; use garage_rpc::layout::*; -use garage_rpc::system::ClusterHealthStatus; use garage_model::garage::Garage; @@ -46,13 +45,7 @@ pub async fn handle_get_cluster_status(garage: &Arc) -> Result) -> Result, Error> { let health = garage.system.health(); - - let resp_json = - serde_json::to_string_pretty(&health).map_err(garage_util::error::Error::from)?; - Ok(Response::builder() - .status(StatusCode::OK) - .header(http::header::CONTENT_TYPE, "application/json") - .body(Body::from(resp_json))?) + Ok(json_ok_response(&health)?) } pub async fn handle_connect_cluster_nodes( -- 2.43.0