Implement /health admin API endpoint to check node health #440
3 changed files with 33 additions and 48 deletions
|
@ -1,4 +1,3 @@
|
||||||
use std::fmt::Write;
|
|
||||||
use std::net::SocketAddr;
|
use std::net::SocketAddr;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
@ -78,7 +77,7 @@ impl AdminApiServer {
|
||||||
.body(Body::empty())?)
|
.body(Body::empty())?)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn handle_health(&self, format: Option<&str>) -> Result<Response<Body>, Error> {
|
fn handle_health(&self) -> Result<Response<Body>, Error> {
|
||||||
let health = self.garage.system.health();
|
let health = self.garage.system.health();
|
||||||
|
|
||||||
let (status, status_str) = match health.status {
|
let (status, status_str) = match health.status {
|
||||||
|
@ -92,47 +91,15 @@ impl AdminApiServer {
|
||||||
"Quorum is not available for some/all partitions, reads and writes will fail",
|
"Quorum is not available for some/all partitions, reads and writes will fail",
|
||||||
),
|
),
|
||||||
};
|
};
|
||||||
|
let status_str = format!(
|
||||||
|
"{}\nConsult the full health check API endpoint at /v0/health for more details\n",
|
||||||
|
status_str
|
||||||
|
);
|
||||||
|
|
||||||
let resp = Response::builder().status(status);
|
Ok(Response::builder()
|
||||||
|
.status(status)
|
||||||
if matches!(format, Some("json")) {
|
.header(http::header::CONTENT_TYPE, "text/plain")
|
||||||
let resp_json =
|
.body(Body::from(status_str))?)
|
||||||
serde_json::to_string_pretty(&health).map_err(garage_util::error::Error::from)?;
|
|
||||||
Ok(resp
|
|
||||||
.header(http::header::CONTENT_TYPE, "application/json")
|
|
||||||
.body(Body::from(resp_json))?)
|
|
||||||
} else {
|
|
||||||
let mut buf = status_str.to_string();
|
|
||||||
writeln!(
|
|
||||||
&mut buf,
|
|
||||||
"\nAll nodes: {} connected, {} known",
|
|
||||||
health.connected_nodes, health.known_nodes,
|
|
||||||
)
|
|
||||||
.unwrap();
|
|
||||||
writeln!(
|
|
||||||
&mut buf,
|
|
||||||
"Storage nodes: {} connected, {} in layout",
|
|
||||||
health.storage_nodes_ok, health.storage_nodes
|
|
||||||
)
|
|
||||||
.unwrap();
|
|
||||||
writeln!(&mut buf, "Number of partitions: {}", health.partitions).unwrap();
|
|
||||||
writeln!(
|
|
||||||
&mut buf,
|
|
||||||
"Partitions with quorum: {}",
|
|
||||||
health.partitions_quorum
|
|
||||||
)
|
|
||||||
.unwrap();
|
|
||||||
writeln!(
|
|
||||||
&mut buf,
|
|
||||||
"Partitions with all nodes available: {}",
|
|
||||||
health.partitions_all_ok
|
|
||||||
)
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
Ok(resp
|
|
||||||
.header(http::header::CONTENT_TYPE, "text/plain")
|
|
||||||
.body(Body::from(buf))?)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn handle_metrics(&self) -> Result<Response<Body>, Error> {
|
fn handle_metrics(&self) -> Result<Response<Body>, Error> {
|
||||||
|
@ -207,9 +174,10 @@ impl ApiHandler for AdminApiServer {
|
||||||
|
|
||||||
match endpoint {
|
match endpoint {
|
||||||
Endpoint::Options => self.handle_options(&req),
|
Endpoint::Options => self.handle_options(&req),
|
||||||
Endpoint::Health { format } => self.handle_health(format.as_deref()),
|
Endpoint::Health => self.handle_health(),
|
||||||
Endpoint::Metrics => self.handle_metrics(),
|
Endpoint::Metrics => self.handle_metrics(),
|
||||||
Endpoint::GetClusterStatus => handle_get_cluster_status(&self.garage).await,
|
Endpoint::GetClusterStatus => handle_get_cluster_status(&self.garage).await,
|
||||||
|
Endpoint::GetClusterHealth => handle_get_cluster_health(&self.garage).await,
|
||||||
Endpoint::ConnectClusterNodes => handle_connect_cluster_nodes(&self.garage, req).await,
|
Endpoint::ConnectClusterNodes => handle_connect_cluster_nodes(&self.garage, req).await,
|
||||||
// Layout
|
// Layout
|
||||||
Endpoint::GetClusterLayout => handle_get_cluster_layout(&self.garage).await,
|
Endpoint::GetClusterLayout => handle_get_cluster_layout(&self.garage).await,
|
||||||
|
|
|
@ -9,6 +9,7 @@ use garage_util::crdt::*;
|
||||||
use garage_util::data::*;
|
use garage_util::data::*;
|
||||||
|
|
||||||
use garage_rpc::layout::*;
|
use garage_rpc::layout::*;
|
||||||
|
use garage_rpc::system::ClusterHealthStatus;
|
||||||
|
|
||||||
use garage_model::garage::Garage;
|
use garage_model::garage::Garage;
|
||||||
|
|
||||||
|
@ -43,6 +44,22 @@ pub async fn handle_get_cluster_status(garage: &Arc<Garage>) -> Result<Response<
|
||||||
Ok(json_ok_response(&res)?)
|
Ok(json_ok_response(&res)?)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub async fn handle_get_cluster_health(garage: &Arc<Garage>) -> Result<Response<Body>, Error> {
|
||||||
|
let health = garage.system.health();
|
||||||
|
|
||||||
|
let status = match health.status {
|
||||||
|
ClusterHealthStatus::Unavailable => StatusCode::SERVICE_UNAVAILABLE,
|
||||||
|
_ => StatusCode::OK,
|
||||||
|
};
|
||||||
|
|
||||||
|
let resp_json =
|
||||||
|
serde_json::to_string_pretty(&health).map_err(garage_util::error::Error::from)?;
|
||||||
|
Ok(Response::builder()
|
||||||
|
.status(status)
|
||||||
|
.header(http::header::CONTENT_TYPE, "application/json")
|
||||||
|
.body(Body::from(resp_json))?)
|
||||||
|
}
|
||||||
|
|
||||||
pub async fn handle_connect_cluster_nodes(
|
pub async fn handle_connect_cluster_nodes(
|
||||||
garage: &Arc<Garage>,
|
garage: &Arc<Garage>,
|
||||||
req: Request<Body>,
|
req: Request<Body>,
|
||||||
|
|
|
@ -17,11 +17,10 @@ router_match! {@func
|
||||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||||
pub enum Endpoint {
|
pub enum Endpoint {
|
||||||
Options,
|
Options,
|
||||||
Health {
|
Health,
|
||||||
format: Option<String>,
|
|
||||||
},
|
|
||||||
Metrics,
|
Metrics,
|
||||||
GetClusterStatus,
|
GetClusterStatus,
|
||||||
|
GetClusterHealth,
|
||||||
ConnectClusterNodes,
|
ConnectClusterNodes,
|
||||||
// Layout
|
// Layout
|
||||||
GetClusterLayout,
|
GetClusterLayout,
|
||||||
|
@ -92,9 +91,10 @@ impl Endpoint {
|
||||||
|
|
||||||
let res = router_match!(@gen_path_parser (req.method(), path, query) [
|
let res = router_match!(@gen_path_parser (req.method(), path, query) [
|
||||||
OPTIONS _ => Options,
|
OPTIONS _ => Options,
|
||||||
GET "/health" => Health (query_opt::format),
|
GET "/health" => Health,
|
||||||
GET "/metrics" => Metrics,
|
GET "/metrics" => Metrics,
|
||||||
GET "/v0/status" => GetClusterStatus,
|
GET "/v0/status" => GetClusterStatus,
|
||||||
|
GET "/v0/health" => GetClusterHealth,
|
||||||
POST "/v0/connect" => ConnectClusterNodes,
|
POST "/v0/connect" => ConnectClusterNodes,
|
||||||
// Layout endpoints
|
// Layout endpoints
|
||||||
GET "/v0/layout" => GetClusterLayout,
|
GET "/v0/layout" => GetClusterLayout,
|
||||||
|
@ -135,7 +135,7 @@ impl Endpoint {
|
||||||
/// Get the kind of authorization which is required to perform the operation.
|
/// Get the kind of authorization which is required to perform the operation.
|
||||||
pub fn authorization_type(&self) -> Authorization {
|
pub fn authorization_type(&self) -> Authorization {
|
||||||
match self {
|
match self {
|
||||||
Self::Health { .. } => Authorization::None,
|
Self::Health => Authorization::None,
|
||||||
Self::Metrics => Authorization::MetricsToken,
|
Self::Metrics => Authorization::MetricsToken,
|
||||||
_ => Authorization::AdminToken,
|
_ => Authorization::AdminToken,
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue