diff --git a/src/rpc/system.rs b/src/rpc/system.rs index b42e49fc..016a406c 100644 --- a/src/rpc/system.rs +++ b/src/rpc/system.rs @@ -3,7 +3,6 @@ use std::collections::HashMap; use std::io::{Read, Write}; use std::net::{IpAddr, SocketAddr}; use std::path::{Path, PathBuf}; -use std::sync::atomic::Ordering; use std::sync::{Arc, RwLock}; use std::time::{Duration, Instant}; @@ -11,6 +10,7 @@ use arc_swap::ArcSwap; use async_trait::async_trait; use futures::{join, select}; use futures_util::future::*; +use opentelemetry::{Context, KeyValue}; use serde::{Deserialize, Serialize}; use sodiumoxide::crypto::sign::ed25519; use tokio::sync::watch; @@ -248,7 +248,12 @@ impl System { replication_mode: ReplicationMode, config: &Config, ) -> Result, Error> { + let metrics = SystemMetrics::new(); + let replication_factor = replication_mode.replication_factor(); + metrics + ._replication_factor + .observe(&Context::current(), replication_factor as u64, &[]); let node_key = gen_node_key(&config.metadata_dir).expect("Unable to read or generate node ID"); @@ -281,7 +286,14 @@ impl System { } }; - let metrics = SystemMetrics::new(replication_factor); + metrics._garage_build_info.observe( + &Context::current(), + 1, + &[KeyValue::new( + "version", + garage_util::version::garage_version(), + )], + ); let mut local_status = NodeStatus::initial(replication_factor, &cluster_layout); local_status.update_disk_usage(&config.metadata_dir, &config.data_dir, &metrics); @@ -892,6 +904,7 @@ impl NodeStatus { fn update_disk_usage(&mut self, meta_dir: &Path, data_dir: &Path, metrics: &SystemMetrics) { use systemstat::{Platform, System}; + let mounts = System::new().mounts().unwrap_or_default(); let mount_avail = |path: &Path| { @@ -906,24 +919,28 @@ impl NodeStatus { self.data_disk_avail = mount_avail(data_dir); if let Some((avail, total)) = self.meta_disk_avail { - metrics - .values - .meta_disk_avail - .store(avail, Ordering::Relaxed); - metrics - .values - .meta_disk_total - .store(total, Ordering::Relaxed); + metrics._disk_avail.observe( + &Context::current(), + avail, + &[KeyValue::new("volume", "meta")], + ); + metrics._disk_total.observe( + &Context::current(), + total, + &[KeyValue::new("volume", "meta")], + ); } if let Some((avail, total)) = self.data_disk_avail { - metrics - .values - .data_disk_avail - .store(avail, Ordering::Relaxed); - metrics - .values - .data_disk_total - .store(total, Ordering::Relaxed); + metrics._disk_avail.observe( + &Context::current(), + avail, + &[KeyValue::new("volume", "data")], + ); + metrics._disk_total.observe( + &Context::current(), + total, + &[KeyValue::new("volume", "data")], + ); } } } diff --git a/src/rpc/system_metrics.rs b/src/rpc/system_metrics.rs index 83f5fa97..9c3d039b 100644 --- a/src/rpc/system_metrics.rs +++ b/src/rpc/system_metrics.rs @@ -1,77 +1,34 @@ -use std::sync::atomic::{AtomicU64, Ordering}; -use std::sync::Arc; - -use opentelemetry::{global, metrics::*, KeyValue}; +use opentelemetry::{global, metrics::*}; /// TableMetrics reference all counter used for metrics pub struct SystemMetrics { - pub(crate) _garage_build_info: ValueObserver, - pub(crate) _replication_factor: ValueObserver, - pub(crate) _disk_avail: ValueObserver, - pub(crate) _disk_total: ValueObserver, - pub(crate) values: Arc, -} - -#[derive(Default)] -pub struct SystemMetricsValues { - pub(crate) data_disk_total: AtomicU64, - pub(crate) data_disk_avail: AtomicU64, - pub(crate) meta_disk_total: AtomicU64, - pub(crate) meta_disk_avail: AtomicU64, + pub(crate) _garage_build_info: ObservableGauge, + pub(crate) _replication_factor: ObservableGauge, + pub(crate) _disk_avail: ObservableGauge, + pub(crate) _disk_total: ObservableGauge, } impl SystemMetrics { - pub fn new(replication_factor: usize) -> Self { + pub fn new() -> Self { let meter = global::meter("garage_system"); - let values = Arc::new(SystemMetricsValues::default()); - let values1 = values.clone(); - let values2 = values.clone(); + Self { _garage_build_info: meter - .u64_value_observer("garage_build_info", move |observer| { - observer.observe( - 1, - &[KeyValue::new( - "version", - garage_util::version::garage_version(), - )], - ) - }) + .u64_observable_gauge("garage_build_info") .with_description("Garage build info") .init(), _replication_factor: meter - .u64_value_observer("garage_replication_factor", move |observer| { - observer.observe(replication_factor as u64, &[]) - }) + .u64_observable_gauge("garage_replication_factor") .with_description("Garage replication factor setting") .init(), _disk_avail: meter - .u64_value_observer("garage_local_disk_avail", move |observer| { - match values1.data_disk_avail.load(Ordering::Relaxed) { - 0 => (), - x => observer.observe(x, &[KeyValue::new("volume", "data")]), - }; - match values1.meta_disk_avail.load(Ordering::Relaxed) { - 0 => (), - x => observer.observe(x, &[KeyValue::new("volume", "metadata")]), - }; - }) + .u64_observable_gauge("garage_local_disk_avail") .with_description("Garage available disk space on each node") .init(), _disk_total: meter - .u64_value_observer("garage_local_disk_total", move |observer| { - match values2.data_disk_total.load(Ordering::Relaxed) { - 0 => (), - x => observer.observe(x, &[KeyValue::new("volume", "data")]), - }; - match values2.meta_disk_total.load(Ordering::Relaxed) { - 0 => (), - x => observer.observe(x, &[KeyValue::new("volume", "metadata")]), - }; - }) + .u64_observable_gauge("garage_local_disk_total") .with_description("Garage total disk space on each node") .init(), - values, } } }