admin api: impl RequestHandler for MetricsRequest
All checks were successful
ci/woodpecker/push/debug Pipeline was successful
ci/woodpecker/pr/debug Pipeline was successful

This commit is contained in:
Alex 2025-01-31 17:51:50 +01:00
parent c35456ea56
commit 92d10bb47d
2 changed files with 78 additions and 68 deletions

View file

@ -5,7 +5,7 @@ use argon2::password_hash::PasswordHash;
use async_trait::async_trait;
use http::header::{HeaderValue, ACCESS_CONTROL_ALLOW_ORIGIN, AUTHORIZATION};
use hyper::{body::Incoming as IncomingBody, Request, Response, StatusCode};
use hyper::{body::Incoming as IncomingBody, Request, Response};
use serde::{Deserialize, Serialize};
use tokio::sync::watch;
@ -13,8 +13,6 @@ use opentelemetry::trace::SpanRef;
#[cfg(feature = "metrics")]
use opentelemetry_prometheus::PrometheusExporter;
#[cfg(feature = "metrics")]
use prometheus::{Encoder, TextEncoder};
use garage_model::garage::Garage;
use garage_rpc::{Endpoint as RpcEndpoint, *};
@ -100,7 +98,7 @@ pub type ResBody = BoxBody<Error>;
pub struct AdminApiServer {
garage: Arc<Garage>,
#[cfg(feature = "metrics")]
exporter: PrometheusExporter,
pub(crate) exporter: PrometheusExporter,
metrics_token: Option<String>,
admin_token: Option<String>,
pub(crate) background: Arc<BackgroundRunner>,
@ -146,34 +144,6 @@ impl AdminApiServer {
.run_server(bind_addr, Some(0o220), must_exit)
.await
}
fn handle_metrics(&self) -> Result<Response<ResBody>, Error> {
#[cfg(feature = "metrics")]
{
use opentelemetry::trace::Tracer;
let mut buffer = vec![];
let encoder = TextEncoder::new();
let tracer = opentelemetry::global::tracer("garage");
let metric_families = tracer.in_span("admin/gather_metrics", |_| {
self.exporter.registry().gather()
});
encoder
.encode(&metric_families, &mut buffer)
.ok_or_internal_error("Could not serialize metrics")?;
Ok(Response::builder()
.status(StatusCode::OK)
.header(http::header::CONTENT_TYPE, encoder.format_type())
.body(bytes_body(buffer.into()))?)
}
#[cfg(not(feature = "metrics"))]
Err(Error::bad_request(
"Garage was built without the metrics feature".to_string(),
))
}
}
#[async_trait]
@ -234,7 +204,7 @@ impl ApiHandler for Arc<AdminApiServer> {
AdminApiRequest::Options(req) => req.handle(&self.garage, &self).await,
AdminApiRequest::CheckDomain(req) => req.handle(&self.garage, &self).await,
AdminApiRequest::Health(req) => req.handle(&self.garage, &self).await,
AdminApiRequest::Metrics(_req) => self.handle_metrics(),
AdminApiRequest::Metrics(req) => req.handle(&self.garage, &self).await,
req => {
let res = req.handle(&self.garage, &self).await?;
let mut res = json_ok_response(&res)?;

View file

@ -7,10 +7,13 @@ use http::header::{
};
use hyper::{Response, StatusCode};
#[cfg(feature = "metrics")]
use prometheus::{Encoder, TextEncoder};
use garage_model::garage::Garage;
use garage_rpc::system::ClusterHealthStatus;
use crate::admin::api::{CheckDomainRequest, HealthRequest, OptionsRequest};
use crate::admin::api::{CheckDomainRequest, HealthRequest, MetricsRequest, OptionsRequest};
use crate::admin::api_server::ResBody;
use crate::admin::error::*;
use crate::admin::{Admin, RequestHandler};
@ -35,6 +38,77 @@ impl RequestHandler for OptionsRequest {
}
}
#[async_trait]
impl RequestHandler for MetricsRequest {
type Response = Response<ResBody>;
async fn handle(
self,
_garage: &Arc<Garage>,
admin: &Admin,
) -> Result<Response<ResBody>, Error> {
#[cfg(feature = "metrics")]
{
use opentelemetry::trace::Tracer;
let mut buffer = vec![];
let encoder = TextEncoder::new();
let tracer = opentelemetry::global::tracer("garage");
let metric_families = tracer.in_span("admin/gather_metrics", |_| {
admin.exporter.registry().gather()
});
encoder
.encode(&metric_families, &mut buffer)
.ok_or_internal_error("Could not serialize metrics")?;
Ok(Response::builder()
.status(StatusCode::OK)
.header(http::header::CONTENT_TYPE, encoder.format_type())
.body(bytes_body(buffer.into()))?)
}
#[cfg(not(feature = "metrics"))]
Err(Error::bad_request(
"Garage was built without the metrics feature".to_string(),
))
}
}
#[async_trait]
impl RequestHandler for HealthRequest {
type Response = Response<ResBody>;
async fn handle(
self,
garage: &Arc<Garage>,
_admin: &Admin,
) -> Result<Response<ResBody>, Error> {
let health = garage.system.health();
let (status, status_str) = match health.status {
ClusterHealthStatus::Healthy => (StatusCode::OK, "Garage is fully operational"),
ClusterHealthStatus::Degraded => (
StatusCode::OK,
"Garage is operational but some storage nodes are unavailable",
),
ClusterHealthStatus::Unavailable => (
StatusCode::SERVICE_UNAVAILABLE,
"Quorum is not available for some/all partitions, reads and writes will fail",
),
};
let status_str = format!(
"{}\nConsult the full health check API endpoint at /v2/GetClusterHealth for more details\n",
status_str
);
Ok(Response::builder()
.status(status)
.header(http::header::CONTENT_TYPE, "text/plain")
.body(string_body(status_str))?)
}
}
#[async_trait]
impl RequestHandler for CheckDomainRequest {
type Response = Response<ResBody>;
@ -108,37 +182,3 @@ async fn check_domain(garage: &Arc<Garage>, domain: &str) -> Result<bool, Error>
None => Ok(false),
}
}
#[async_trait]
impl RequestHandler for HealthRequest {
type Response = Response<ResBody>;
async fn handle(
self,
garage: &Arc<Garage>,
_admin: &Admin,
) -> Result<Response<ResBody>, Error> {
let health = garage.system.health();
let (status, status_str) = match health.status {
ClusterHealthStatus::Healthy => (StatusCode::OK, "Garage is fully operational"),
ClusterHealthStatus::Degraded => (
StatusCode::OK,
"Garage is operational but some storage nodes are unavailable",
),
ClusterHealthStatus::Unavailable => (
StatusCode::SERVICE_UNAVAILABLE,
"Quorum is not available for some/all partitions, reads and writes will fail",
),
};
let status_str = format!(
"{}\nConsult the full health check API endpoint at /v2/GetClusterHealth for more details\n",
status_str
);
Ok(Response::builder()
.status(status)
.header(http::header::CONTENT_TYPE, "text/plain")
.body(string_body(status_str))?)
}
}