From 6a1079c4129157ae6c6e2a94b10d9c2b8f91c5b6 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Fri, 31 Jan 2025 17:51:50 +0100 Subject: [PATCH] admin api: impl RequestHandler for MetricsRequest --- src/api/admin/api_server.rs | 36 +----------- src/api/admin/block.rs | 9 +-- src/api/admin/special.rs | 110 ++++++++++++++++++++++++------------ src/garage/cli_v2/block.rs | 2 +- 4 files changed, 84 insertions(+), 73 deletions(-) diff --git a/src/api/admin/api_server.rs b/src/api/admin/api_server.rs index e865d199..ecc538e4 100644 --- a/src/api/admin/api_server.rs +++ b/src/api/admin/api_server.rs @@ -5,7 +5,7 @@ use argon2::password_hash::PasswordHash; use async_trait::async_trait; use http::header::{HeaderValue, ACCESS_CONTROL_ALLOW_ORIGIN, AUTHORIZATION}; -use hyper::{body::Incoming as IncomingBody, Request, Response, StatusCode}; +use hyper::{body::Incoming as IncomingBody, Request, Response}; use serde::{Deserialize, Serialize}; use tokio::sync::watch; @@ -13,8 +13,6 @@ use opentelemetry::trace::SpanRef; #[cfg(feature = "metrics")] use opentelemetry_prometheus::PrometheusExporter; -#[cfg(feature = "metrics")] -use prometheus::{Encoder, TextEncoder}; use garage_model::garage::Garage; use garage_rpc::{Endpoint as RpcEndpoint, *}; @@ -100,7 +98,7 @@ pub type ResBody = BoxBody; pub struct AdminApiServer { garage: Arc, #[cfg(feature = "metrics")] - exporter: PrometheusExporter, + pub(crate) exporter: PrometheusExporter, metrics_token: Option, admin_token: Option, pub(crate) background: Arc, @@ -148,34 +146,6 @@ impl AdminApiServer { .run_server(bind_addr, Some(0o220), must_exit) .await } - - fn handle_metrics(&self) -> Result, Error> { - #[cfg(feature = "metrics")] - { - use opentelemetry::trace::Tracer; - - let mut buffer = vec![]; - let encoder = TextEncoder::new(); - - let tracer = opentelemetry::global::tracer("garage"); - let metric_families = tracer.in_span("admin/gather_metrics", |_| { - self.exporter.registry().gather() - }); - - encoder - .encode(&metric_families, &mut buffer) - .ok_or_internal_error("Could not serialize metrics")?; - - Ok(Response::builder() - .status(StatusCode::OK) - .header(http::header::CONTENT_TYPE, encoder.format_type()) - .body(bytes_body(buffer.into()))?) - } - #[cfg(not(feature = "metrics"))] - Err(Error::bad_request( - "Garage was built without the metrics feature".to_string(), - )) - } } #[async_trait] @@ -246,7 +216,7 @@ impl AdminApiServer { AdminApiRequest::Options(req) => req.handle(&self.garage, &self).await, AdminApiRequest::CheckDomain(req) => req.handle(&self.garage, &self).await, AdminApiRequest::Health(req) => req.handle(&self.garage, &self).await, - AdminApiRequest::Metrics(_req) => self.handle_metrics(), + AdminApiRequest::Metrics(req) => req.handle(&self.garage, &self).await, req => { let res = req.handle(&self.garage, &self).await?; let mut res = json_ok_response(&res)?; diff --git a/src/api/admin/block.rs b/src/api/admin/block.rs index cf143a71..8f0e63eb 100644 --- a/src/api/admin/block.rs +++ b/src/api/admin/block.rs @@ -12,10 +12,11 @@ use garage_model::garage::Garage; use garage_model::s3::object_table::*; use garage_model::s3::version_table::*; -use crate::admin::api::*; -use crate::admin::error::*; -use crate::admin::{Admin, RequestHandler}; -use crate::common_error::CommonErrorDerivative; +use garage_api_common::common_error::CommonErrorDerivative; + +use crate::api::*; +use crate::error::*; +use crate::{Admin, RequestHandler}; #[async_trait] impl RequestHandler for LocalListBlockErrorsRequest { diff --git a/src/api/admin/special.rs b/src/api/admin/special.rs index 4717238d..79f1f4d7 100644 --- a/src/api/admin/special.rs +++ b/src/api/admin/special.rs @@ -7,12 +7,15 @@ use http::header::{ }; use hyper::{Response, StatusCode}; +#[cfg(feature = "metrics")] +use prometheus::{Encoder, TextEncoder}; + use garage_model::garage::Garage; use garage_rpc::system::ClusterHealthStatus; use garage_api_common::helpers::*; -use crate::api::{CheckDomainRequest, HealthRequest, OptionsRequest}; +use crate::api::{CheckDomainRequest, HealthRequest, MetricsRequest, OptionsRequest}; use crate::api_server::ResBody; use crate::error::*; use crate::{Admin, RequestHandler}; @@ -36,6 +39,77 @@ impl RequestHandler for OptionsRequest { } } +#[async_trait] +impl RequestHandler for MetricsRequest { + type Response = Response; + + async fn handle( + self, + _garage: &Arc, + admin: &Admin, + ) -> Result, Error> { + #[cfg(feature = "metrics")] + { + use opentelemetry::trace::Tracer; + + let mut buffer = vec![]; + let encoder = TextEncoder::new(); + + let tracer = opentelemetry::global::tracer("garage"); + let metric_families = tracer.in_span("admin/gather_metrics", |_| { + admin.exporter.registry().gather() + }); + + encoder + .encode(&metric_families, &mut buffer) + .ok_or_internal_error("Could not serialize metrics")?; + + Ok(Response::builder() + .status(StatusCode::OK) + .header(http::header::CONTENT_TYPE, encoder.format_type()) + .body(bytes_body(buffer.into()))?) + } + #[cfg(not(feature = "metrics"))] + Err(Error::bad_request( + "Garage was built without the metrics feature".to_string(), + )) + } +} + +#[async_trait] +impl RequestHandler for HealthRequest { + type Response = Response; + + async fn handle( + self, + garage: &Arc, + _admin: &Admin, + ) -> Result, Error> { + let health = garage.system.health(); + + let (status, status_str) = match health.status { + ClusterHealthStatus::Healthy => (StatusCode::OK, "Garage is fully operational"), + ClusterHealthStatus::Degraded => ( + StatusCode::OK, + "Garage is operational but some storage nodes are unavailable", + ), + ClusterHealthStatus::Unavailable => ( + StatusCode::SERVICE_UNAVAILABLE, + "Quorum is not available for some/all partitions, reads and writes will fail", + ), + }; + let status_str = format!( + "{}\nConsult the full health check API endpoint at /v2/GetClusterHealth for more details\n", + status_str + ); + + Ok(Response::builder() + .status(status) + .header(http::header::CONTENT_TYPE, "text/plain") + .body(string_body(status_str))?) + } +} + #[async_trait] impl RequestHandler for CheckDomainRequest { type Response = Response; @@ -109,37 +183,3 @@ async fn check_domain(garage: &Arc, domain: &str) -> Result None => Ok(false), } } - -#[async_trait] -impl RequestHandler for HealthRequest { - type Response = Response; - - async fn handle( - self, - garage: &Arc, - _admin: &Admin, - ) -> Result, Error> { - let health = garage.system.health(); - - let (status, status_str) = match health.status { - ClusterHealthStatus::Healthy => (StatusCode::OK, "Garage is fully operational"), - ClusterHealthStatus::Degraded => ( - StatusCode::OK, - "Garage is operational but some storage nodes are unavailable", - ), - ClusterHealthStatus::Unavailable => ( - StatusCode::SERVICE_UNAVAILABLE, - "Quorum is not available for some/all partitions, reads and writes will fail", - ), - }; - let status_str = format!( - "{}\nConsult the full health check API endpoint at /v2/GetClusterHealth for more details\n", - status_str - ); - - Ok(Response::builder() - .status(status) - .header(http::header::CONTENT_TYPE, "text/plain") - .body(string_body(status_str))?) - } -} diff --git a/src/garage/cli_v2/block.rs b/src/garage/cli_v2/block.rs index 7d4595eb..bfc0db4a 100644 --- a/src/garage/cli_v2/block.rs +++ b/src/garage/cli_v2/block.rs @@ -3,7 +3,7 @@ use format_table::format_table; use garage_util::error::*; -use garage_api::admin::api::*; +use garage_api_admin::api::*; use crate::cli::structs::*; use crate::cli_v2::*;