admin api: impl RequestHandler for MetricsRequest

This commit is contained in:
Alex 2025-01-31 17:51:50 +01:00
parent b1629dd355
commit 6a1079c412
4 changed files with 84 additions and 73 deletions

View file

@ -5,7 +5,7 @@ use argon2::password_hash::PasswordHash;
use async_trait::async_trait;
use http::header::{HeaderValue, ACCESS_CONTROL_ALLOW_ORIGIN, AUTHORIZATION};
use hyper::{body::Incoming as IncomingBody, Request, Response, StatusCode};
use hyper::{body::Incoming as IncomingBody, Request, Response};
use serde::{Deserialize, Serialize};
use tokio::sync::watch;
@ -13,8 +13,6 @@ use opentelemetry::trace::SpanRef;
#[cfg(feature = "metrics")]
use opentelemetry_prometheus::PrometheusExporter;
#[cfg(feature = "metrics")]
use prometheus::{Encoder, TextEncoder};
use garage_model::garage::Garage;
use garage_rpc::{Endpoint as RpcEndpoint, *};
@ -100,7 +98,7 @@ pub type ResBody = BoxBody<Error>;
pub struct AdminApiServer {
garage: Arc<Garage>,
#[cfg(feature = "metrics")]
exporter: PrometheusExporter,
pub(crate) exporter: PrometheusExporter,
metrics_token: Option<String>,
admin_token: Option<String>,
pub(crate) background: Arc<BackgroundRunner>,
@ -148,34 +146,6 @@ impl AdminApiServer {
.run_server(bind_addr, Some(0o220), must_exit)
.await
}
fn handle_metrics(&self) -> Result<Response<ResBody>, Error> {
#[cfg(feature = "metrics")]
{
use opentelemetry::trace::Tracer;
let mut buffer = vec![];
let encoder = TextEncoder::new();
let tracer = opentelemetry::global::tracer("garage");
let metric_families = tracer.in_span("admin/gather_metrics", |_| {
self.exporter.registry().gather()
});
encoder
.encode(&metric_families, &mut buffer)
.ok_or_internal_error("Could not serialize metrics")?;
Ok(Response::builder()
.status(StatusCode::OK)
.header(http::header::CONTENT_TYPE, encoder.format_type())
.body(bytes_body(buffer.into()))?)
}
#[cfg(not(feature = "metrics"))]
Err(Error::bad_request(
"Garage was built without the metrics feature".to_string(),
))
}
}
#[async_trait]
@ -246,7 +216,7 @@ impl AdminApiServer {
AdminApiRequest::Options(req) => req.handle(&self.garage, &self).await,
AdminApiRequest::CheckDomain(req) => req.handle(&self.garage, &self).await,
AdminApiRequest::Health(req) => req.handle(&self.garage, &self).await,
AdminApiRequest::Metrics(_req) => self.handle_metrics(),
AdminApiRequest::Metrics(req) => req.handle(&self.garage, &self).await,
req => {
let res = req.handle(&self.garage, &self).await?;
let mut res = json_ok_response(&res)?;

View file

@ -12,10 +12,11 @@ use garage_model::garage::Garage;
use garage_model::s3::object_table::*;
use garage_model::s3::version_table::*;
use crate::admin::api::*;
use crate::admin::error::*;
use crate::admin::{Admin, RequestHandler};
use crate::common_error::CommonErrorDerivative;
use garage_api_common::common_error::CommonErrorDerivative;
use crate::api::*;
use crate::error::*;
use crate::{Admin, RequestHandler};
#[async_trait]
impl RequestHandler for LocalListBlockErrorsRequest {

View file

@ -7,12 +7,15 @@ use http::header::{
};
use hyper::{Response, StatusCode};
#[cfg(feature = "metrics")]
use prometheus::{Encoder, TextEncoder};
use garage_model::garage::Garage;
use garage_rpc::system::ClusterHealthStatus;
use garage_api_common::helpers::*;
use crate::api::{CheckDomainRequest, HealthRequest, OptionsRequest};
use crate::api::{CheckDomainRequest, HealthRequest, MetricsRequest, OptionsRequest};
use crate::api_server::ResBody;
use crate::error::*;
use crate::{Admin, RequestHandler};
@ -36,6 +39,77 @@ impl RequestHandler for OptionsRequest {
}
}
#[async_trait]
impl RequestHandler for MetricsRequest {
type Response = Response<ResBody>;
async fn handle(
self,
_garage: &Arc<Garage>,
admin: &Admin,
) -> Result<Response<ResBody>, Error> {
#[cfg(feature = "metrics")]
{
use opentelemetry::trace::Tracer;
let mut buffer = vec![];
let encoder = TextEncoder::new();
let tracer = opentelemetry::global::tracer("garage");
let metric_families = tracer.in_span("admin/gather_metrics", |_| {
admin.exporter.registry().gather()
});
encoder
.encode(&metric_families, &mut buffer)
.ok_or_internal_error("Could not serialize metrics")?;
Ok(Response::builder()
.status(StatusCode::OK)
.header(http::header::CONTENT_TYPE, encoder.format_type())
.body(bytes_body(buffer.into()))?)
}
#[cfg(not(feature = "metrics"))]
Err(Error::bad_request(
"Garage was built without the metrics feature".to_string(),
))
}
}
#[async_trait]
impl RequestHandler for HealthRequest {
type Response = Response<ResBody>;
async fn handle(
self,
garage: &Arc<Garage>,
_admin: &Admin,
) -> Result<Response<ResBody>, Error> {
let health = garage.system.health();
let (status, status_str) = match health.status {
ClusterHealthStatus::Healthy => (StatusCode::OK, "Garage is fully operational"),
ClusterHealthStatus::Degraded => (
StatusCode::OK,
"Garage is operational but some storage nodes are unavailable",
),
ClusterHealthStatus::Unavailable => (
StatusCode::SERVICE_UNAVAILABLE,
"Quorum is not available for some/all partitions, reads and writes will fail",
),
};
let status_str = format!(
"{}\nConsult the full health check API endpoint at /v2/GetClusterHealth for more details\n",
status_str
);
Ok(Response::builder()
.status(status)
.header(http::header::CONTENT_TYPE, "text/plain")
.body(string_body(status_str))?)
}
}
#[async_trait]
impl RequestHandler for CheckDomainRequest {
type Response = Response<ResBody>;
@ -109,37 +183,3 @@ async fn check_domain(garage: &Arc<Garage>, domain: &str) -> Result<bool, Error>
None => Ok(false),
}
}
#[async_trait]
impl RequestHandler for HealthRequest {
type Response = Response<ResBody>;
async fn handle(
self,
garage: &Arc<Garage>,
_admin: &Admin,
) -> Result<Response<ResBody>, Error> {
let health = garage.system.health();
let (status, status_str) = match health.status {
ClusterHealthStatus::Healthy => (StatusCode::OK, "Garage is fully operational"),
ClusterHealthStatus::Degraded => (
StatusCode::OK,
"Garage is operational but some storage nodes are unavailable",
),
ClusterHealthStatus::Unavailable => (
StatusCode::SERVICE_UNAVAILABLE,
"Quorum is not available for some/all partitions, reads and writes will fail",
),
};
let status_str = format!(
"{}\nConsult the full health check API endpoint at /v2/GetClusterHealth for more details\n",
status_str
);
Ok(Response::builder()
.status(status)
.header(http::header::CONTENT_TYPE, "text/plain")
.body(string_body(status_str))?)
}
}

View file

@ -3,7 +3,7 @@ use format_table::format_table;
use garage_util::error::*;
use garage_api::admin::api::*;
use garage_api_admin::api::*;
use crate::cli::structs::*;
use crate::cli_v2::*;