admin: more info in admin GetClusterStatus
This commit is contained in:
parent
539af6eac4
commit
c04dd8788a
5 changed files with 192 additions and 96 deletions
|
@ -69,8 +69,8 @@ Example response body:
|
|||
|
||||
```json
|
||||
{
|
||||
"node": "ec79480e0ce52ae26fd00c9da684e4fa56658d9c64cdcecb094e936de0bfe71f",
|
||||
"garageVersion": "git:v0.9.0-dev",
|
||||
"node": "b10c110e4e854e5aa3f4637681befac755154b20059ec163254ddbfae86b09df",
|
||||
"garageVersion": "v0.10.0",
|
||||
"garageFeatures": [
|
||||
"k2v",
|
||||
"sled",
|
||||
|
@ -81,84 +81,93 @@ Example response body:
|
|||
],
|
||||
"rustVersion": "1.68.0",
|
||||
"dbEngine": "LMDB (using Heed crate)",
|
||||
"knownNodes": [
|
||||
"layoutVersion": 5,
|
||||
"nodes": [
|
||||
{
|
||||
"id": "ec79480e0ce52ae26fd00c9da684e4fa56658d9c64cdcecb094e936de0bfe71f",
|
||||
"addr": "10.0.0.11:3901",
|
||||
"isUp": true,
|
||||
"lastSeenSecsAgo": 9,
|
||||
"hostname": "node1"
|
||||
},
|
||||
{
|
||||
"id": "4a6ae5a1d0d33bf895f5bb4f0a418b7dc94c47c0dd2eb108d1158f3c8f60b0ff",
|
||||
"addr": "10.0.0.12:3901",
|
||||
"isUp": true,
|
||||
"lastSeenSecsAgo": 1,
|
||||
"hostname": "node2"
|
||||
},
|
||||
{
|
||||
"id": "23ffd0cdd375ebff573b20cc5cef38996b51c1a7d6dbcf2c6e619876e507cf27",
|
||||
"addr": "10.0.0.21:3901",
|
||||
"isUp": true,
|
||||
"lastSeenSecsAgo": 7,
|
||||
"hostname": "node3"
|
||||
},
|
||||
{
|
||||
"id": "e2ee7984ee65b260682086ec70026165903c86e601a4a5a501c1900afe28d84b",
|
||||
"addr": "10.0.0.22:3901",
|
||||
"isUp": true,
|
||||
"lastSeenSecsAgo": 1,
|
||||
"hostname": "node4"
|
||||
}
|
||||
],
|
||||
"layout": {
|
||||
"version": 12,
|
||||
"roles": [
|
||||
{
|
||||
"id": "ec79480e0ce52ae26fd00c9da684e4fa56658d9c64cdcecb094e936de0bfe71f",
|
||||
"id": "62b218d848e86a64f7fe1909735f29a4350547b54c4b204f91246a14eb0a1a8c",
|
||||
"role": {
|
||||
"id": "62b218d848e86a64f7fe1909735f29a4350547b54c4b204f91246a14eb0a1a8c",
|
||||
"zone": "dc1",
|
||||
"capacity": 10737418240,
|
||||
"tags": [
|
||||
"node1"
|
||||
]
|
||||
"capacity": 100000000000,
|
||||
"tags": []
|
||||
},
|
||||
"addr": "10.0.0.3:3901",
|
||||
"hostname": "node3",
|
||||
"isUp": true,
|
||||
"lastSeenSecsAgo": 12,
|
||||
"draining": false,
|
||||
"dataPartition": {
|
||||
"available": 660270088192,
|
||||
"total": 873862266880
|
||||
},
|
||||
"metadataPartition": {
|
||||
"available": 660270088192,
|
||||
"total": 873862266880
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "4a6ae5a1d0d33bf895f5bb4f0a418b7dc94c47c0dd2eb108d1158f3c8f60b0ff",
|
||||
"id": "a11c7cf18af297379eff8688360155fe68d9061654449ba0ce239252f5a7487f",
|
||||
"role": null,
|
||||
"addr": "10.0.0.2:3901",
|
||||
"hostname": "node2",
|
||||
"isUp": true,
|
||||
"lastSeenSecsAgo": 11,
|
||||
"draining": true,
|
||||
"dataPartition": {
|
||||
"available": 660270088192,
|
||||
"total": 873862266880
|
||||
},
|
||||
"metadataPartition": {
|
||||
"available": 660270088192,
|
||||
"total": 873862266880
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "a235ac7695e0c54d7b403943025f57504d500fdcc5c3e42c71c5212faca040a2",
|
||||
"role": {
|
||||
"id": "a235ac7695e0c54d7b403943025f57504d500fdcc5c3e42c71c5212faca040a2",
|
||||
"zone": "dc1",
|
||||
"capacity": 10737418240,
|
||||
"tags": [
|
||||
"node2"
|
||||
]
|
||||
"capacity": 100000000000,
|
||||
"tags": []
|
||||
},
|
||||
"addr": "127.0.0.1:3904",
|
||||
"hostname": "lindy",
|
||||
"isUp": true,
|
||||
"lastSeenSecsAgo": 2,
|
||||
"draining": false,
|
||||
"dataPartition": {
|
||||
"available": 660270088192,
|
||||
"total": 873862266880
|
||||
},
|
||||
"metadataPartition": {
|
||||
"available": 660270088192,
|
||||
"total": 873862266880
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "23ffd0cdd375ebff573b20cc5cef38996b51c1a7d6dbcf2c6e619876e507cf27",
|
||||
"zone": "dc2",
|
||||
"capacity": 10737418240,
|
||||
"tags": [
|
||||
"node3"
|
||||
]
|
||||
"id": "b10c110e4e854e5aa3f4637681befac755154b20059ec163254ddbfae86b09df",
|
||||
"role": {
|
||||
"id": "b10c110e4e854e5aa3f4637681befac755154b20059ec163254ddbfae86b09df",
|
||||
"zone": "dc1",
|
||||
"capacity": 100000000000,
|
||||
"tags": []
|
||||
},
|
||||
"addr": "10.0.0.1:3901",
|
||||
"hostname": "node1",
|
||||
"isUp": true,
|
||||
"lastSeenSecsAgo": 3,
|
||||
"draining": false,
|
||||
"dataPartition": {
|
||||
"available": 660270088192,
|
||||
"total": 873862266880
|
||||
},
|
||||
"metadataPartition": {
|
||||
"available": 660270088192,
|
||||
"total": 873862266880
|
||||
}
|
||||
],
|
||||
"stagedRoleChanges": [
|
||||
{
|
||||
"id": "e2ee7984ee65b260682086ec70026165903c86e601a4a5a501c1900afe28d84b",
|
||||
"remove": false,
|
||||
"zone": "dc2",
|
||||
"capacity": 10737418240,
|
||||
"tags": [
|
||||
"node4"
|
||||
]
|
||||
}
|
||||
{
|
||||
"id": "23ffd0cdd375ebff573b20cc5cef38996b51c1a7d6dbcf2c6e619876e507cf27",
|
||||
"remove": true,
|
||||
"zone": null,
|
||||
"capacity": null,
|
||||
"tags": null,
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### GetClusterHealth `GET /v1/health`
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
use std::collections::HashMap;
|
||||
use std::net::SocketAddr;
|
||||
use std::sync::Arc;
|
||||
|
||||
|
@ -15,25 +16,95 @@ use crate::admin::error::*;
|
|||
use crate::helpers::{json_ok_response, parse_json_body};
|
||||
|
||||
pub async fn handle_get_cluster_status(garage: &Arc<Garage>) -> Result<Response<Body>, Error> {
|
||||
let layout = garage.system.cluster_layout();
|
||||
let mut nodes = garage
|
||||
.system
|
||||
.get_known_nodes()
|
||||
.into_iter()
|
||||
.map(|i| {
|
||||
(
|
||||
i.id,
|
||||
NodeResp {
|
||||
id: hex::encode(i.id),
|
||||
addr: Some(i.addr),
|
||||
hostname: i.status.hostname,
|
||||
is_up: i.is_up,
|
||||
last_seen_secs_ago: i.last_seen_secs_ago,
|
||||
data_partition: i
|
||||
.status
|
||||
.data_disk_avail
|
||||
.map(|(avail, total)| FreeSpaceResp {
|
||||
available: avail,
|
||||
total,
|
||||
}),
|
||||
metadata_partition: i.status.meta_disk_avail.map(|(avail, total)| {
|
||||
FreeSpaceResp {
|
||||
available: avail,
|
||||
total,
|
||||
}
|
||||
}),
|
||||
..Default::default()
|
||||
},
|
||||
)
|
||||
})
|
||||
.collect::<HashMap<_, _>>();
|
||||
|
||||
for (id, _, role) in layout.current().roles.items().iter() {
|
||||
if let layout::NodeRoleV(Some(r)) = role {
|
||||
let role = NodeRoleResp {
|
||||
id: hex::encode(id),
|
||||
zone: r.zone.to_string(),
|
||||
capacity: r.capacity,
|
||||
tags: r.tags.clone(),
|
||||
};
|
||||
match nodes.get_mut(id) {
|
||||
None => {
|
||||
nodes.insert(
|
||||
*id,
|
||||
NodeResp {
|
||||
id: hex::encode(id),
|
||||
role: Some(role),
|
||||
..Default::default()
|
||||
},
|
||||
);
|
||||
}
|
||||
Some(n) => {
|
||||
if n.role.is_none() {
|
||||
n.role = Some(role);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for ver in layout.versions.iter().rev().skip(1) {
|
||||
for (id, _, role) in ver.roles.items().iter() {
|
||||
if let layout::NodeRoleV(Some(r)) = role {
|
||||
if !nodes.contains_key(id) && r.capacity.is_some() {
|
||||
nodes.insert(
|
||||
*id,
|
||||
NodeResp {
|
||||
id: hex::encode(id),
|
||||
draining: true,
|
||||
..Default::default()
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut nodes = nodes.into_iter().map(|(_, v)| v).collect::<Vec<_>>();
|
||||
nodes.sort_by(|x, y| x.id.cmp(&y.id));
|
||||
|
||||
let res = GetClusterStatusResponse {
|
||||
node: hex::encode(garage.system.id),
|
||||
garage_version: garage_util::version::garage_version(),
|
||||
garage_features: garage_util::version::garage_features(),
|
||||
rust_version: garage_util::version::rust_version(),
|
||||
db_engine: garage.db.engine(),
|
||||
known_nodes: garage
|
||||
.system
|
||||
.get_known_nodes()
|
||||
.into_iter()
|
||||
.map(|i| KnownNodeResp {
|
||||
id: hex::encode(i.id),
|
||||
addr: i.addr,
|
||||
is_up: i.is_up,
|
||||
last_seen_secs_ago: i.last_seen_secs_ago,
|
||||
hostname: i.status.hostname,
|
||||
})
|
||||
.collect(),
|
||||
layout: format_cluster_layout(&garage.system.cluster_layout()),
|
||||
layout_version: layout.current().version,
|
||||
nodes,
|
||||
};
|
||||
|
||||
Ok(json_ok_response(&res)?)
|
||||
|
@ -157,8 +228,8 @@ struct GetClusterStatusResponse {
|
|||
garage_features: Option<&'static [&'static str]>,
|
||||
rust_version: &'static str,
|
||||
db_engine: String,
|
||||
known_nodes: Vec<KnownNodeResp>,
|
||||
layout: GetClusterLayoutResponse,
|
||||
layout_version: u64,
|
||||
nodes: Vec<NodeResp>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
|
@ -192,14 +263,27 @@ struct NodeRoleResp {
|
|||
tags: Vec<String>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
#[derive(Serialize, Default)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
struct KnownNodeResp {
|
||||
struct FreeSpaceResp {
|
||||
available: u64,
|
||||
total: u64,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Default)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
struct NodeResp {
|
||||
id: String,
|
||||
addr: SocketAddr,
|
||||
role: Option<NodeRoleResp>,
|
||||
addr: Option<SocketAddr>,
|
||||
hostname: Option<String>,
|
||||
is_up: bool,
|
||||
last_seen_secs_ago: Option<u64>,
|
||||
hostname: String,
|
||||
draining: bool,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
data_partition: Option<FreeSpaceResp>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
metadata_partition: Option<FreeSpaceResp>,
|
||||
}
|
||||
|
||||
// ---- update functions ----
|
||||
|
|
|
@ -295,7 +295,7 @@ impl AdminRpcHandler {
|
|||
let info = node_info.get(id);
|
||||
let status = info.map(|x| &x.status);
|
||||
let role = layout.current().roles.get(id).and_then(|x| x.0.as_ref());
|
||||
let hostname = status.map(|x| x.hostname.as_str()).unwrap_or("?");
|
||||
let hostname = status.and_then(|x| x.hostname.as_deref()).unwrap_or("?");
|
||||
let zone = role.map(|x| x.zone.as_str()).unwrap_or("?");
|
||||
let capacity = role
|
||||
.map(|x| x.capacity_string())
|
||||
|
|
|
@ -62,6 +62,7 @@ pub async fn cmd_status(rpc_cli: &Endpoint<SystemRpc, ()>, rpc_host: NodeID) ->
|
|||
let mut healthy_nodes =
|
||||
vec!["ID\tHostname\tAddress\tTags\tZone\tCapacity\tDataAvail".to_string()];
|
||||
for adv in status.iter().filter(|adv| adv.is_up) {
|
||||
let host = adv.status.hostname.as_deref().unwrap_or("?");
|
||||
if let Some(NodeRoleV(Some(cfg))) = layout.current().roles.get(&adv.id) {
|
||||
let data_avail = match &adv.status.data_disk_avail {
|
||||
_ if cfg.capacity.is_none() => "N/A".into(),
|
||||
|
@ -75,7 +76,7 @@ pub async fn cmd_status(rpc_cli: &Endpoint<SystemRpc, ()>, rpc_host: NodeID) ->
|
|||
healthy_nodes.push(format!(
|
||||
"{id:?}\t{host}\t{addr}\t[{tags}]\t{zone}\t{capacity}\t{data_avail}",
|
||||
id = adv.id,
|
||||
host = adv.status.hostname,
|
||||
host = host,
|
||||
addr = adv.addr,
|
||||
tags = cfg.tags.join(","),
|
||||
zone = cfg.zone,
|
||||
|
@ -95,7 +96,7 @@ pub async fn cmd_status(rpc_cli: &Endpoint<SystemRpc, ()>, rpc_host: NodeID) ->
|
|||
healthy_nodes.push(format!(
|
||||
"{id:?}\t{host}\t{addr}\t[{tags}]\t{zone}\tdraining metadata...",
|
||||
id = adv.id,
|
||||
host = adv.status.hostname,
|
||||
host = host,
|
||||
addr = adv.addr,
|
||||
tags = cfg.tags.join(","),
|
||||
zone = cfg.zone,
|
||||
|
@ -108,7 +109,7 @@ pub async fn cmd_status(rpc_cli: &Endpoint<SystemRpc, ()>, rpc_host: NodeID) ->
|
|||
healthy_nodes.push(format!(
|
||||
"{id:?}\t{h}\t{addr}\t\t\t{new_role}",
|
||||
id = adv.id,
|
||||
h = adv.status.hostname,
|
||||
h = host,
|
||||
addr = adv.addr,
|
||||
new_role = new_role,
|
||||
));
|
||||
|
@ -149,7 +150,7 @@ pub async fn cmd_status(rpc_cli: &Endpoint<SystemRpc, ()>, rpc_host: NodeID) ->
|
|||
// it is in a failed state, add proper line to the output
|
||||
let (host, addr, last_seen) = match adv {
|
||||
Some(adv) => (
|
||||
adv.status.hostname.as_str(),
|
||||
adv.status.hostname.as_deref().unwrap_or("?"),
|
||||
adv.addr.to_string(),
|
||||
adv.last_seen_secs_ago
|
||||
.map(|s| tf.convert(Duration::from_secs(s)))
|
||||
|
|
|
@ -126,7 +126,7 @@ pub struct System {
|
|||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct NodeStatus {
|
||||
/// Hostname of the node
|
||||
pub hostname: String,
|
||||
pub hostname: Option<String>,
|
||||
|
||||
/// Replication factor configured on the node
|
||||
pub replication_factor: usize,
|
||||
|
@ -765,9 +765,11 @@ impl EndpointHandler<SystemRpc> for System {
|
|||
impl NodeStatus {
|
||||
fn initial(replication_factor: usize, layout_manager: &LayoutManager) -> Self {
|
||||
NodeStatus {
|
||||
hostname: gethostname::gethostname()
|
||||
hostname: Some(
|
||||
gethostname::gethostname()
|
||||
.into_string()
|
||||
.unwrap_or_else(|_| "<invalid utf-8>".to_string()),
|
||||
),
|
||||
replication_factor,
|
||||
layout_digest: layout_manager.layout().digest(),
|
||||
meta_disk_avail: None,
|
||||
|
@ -777,7 +779,7 @@ impl NodeStatus {
|
|||
|
||||
fn unknown() -> Self {
|
||||
NodeStatus {
|
||||
hostname: "?".to_string(),
|
||||
hostname: None,
|
||||
replication_factor: 0,
|
||||
layout_digest: Default::default(),
|
||||
meta_disk_avail: None,
|
||||
|
|
Loading…
Reference in a new issue