admin: more info in admin GetClusterStatus
This commit is contained in:
parent
539af6eac4
commit
c04dd8788a
5 changed files with 192 additions and 96 deletions
|
@ -69,8 +69,8 @@ Example response body:
|
||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"node": "ec79480e0ce52ae26fd00c9da684e4fa56658d9c64cdcecb094e936de0bfe71f",
|
"node": "b10c110e4e854e5aa3f4637681befac755154b20059ec163254ddbfae86b09df",
|
||||||
"garageVersion": "git:v0.9.0-dev",
|
"garageVersion": "v0.10.0",
|
||||||
"garageFeatures": [
|
"garageFeatures": [
|
||||||
"k2v",
|
"k2v",
|
||||||
"sled",
|
"sled",
|
||||||
|
@ -81,84 +81,93 @@ Example response body:
|
||||||
],
|
],
|
||||||
"rustVersion": "1.68.0",
|
"rustVersion": "1.68.0",
|
||||||
"dbEngine": "LMDB (using Heed crate)",
|
"dbEngine": "LMDB (using Heed crate)",
|
||||||
"knownNodes": [
|
"layoutVersion": 5,
|
||||||
|
"nodes": [
|
||||||
{
|
{
|
||||||
"id": "ec79480e0ce52ae26fd00c9da684e4fa56658d9c64cdcecb094e936de0bfe71f",
|
"id": "62b218d848e86a64f7fe1909735f29a4350547b54c4b204f91246a14eb0a1a8c",
|
||||||
"addr": "10.0.0.11:3901",
|
"role": {
|
||||||
"isUp": true,
|
"id": "62b218d848e86a64f7fe1909735f29a4350547b54c4b204f91246a14eb0a1a8c",
|
||||||
"lastSeenSecsAgo": 9,
|
|
||||||
"hostname": "node1"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": "4a6ae5a1d0d33bf895f5bb4f0a418b7dc94c47c0dd2eb108d1158f3c8f60b0ff",
|
|
||||||
"addr": "10.0.0.12:3901",
|
|
||||||
"isUp": true,
|
|
||||||
"lastSeenSecsAgo": 1,
|
|
||||||
"hostname": "node2"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": "23ffd0cdd375ebff573b20cc5cef38996b51c1a7d6dbcf2c6e619876e507cf27",
|
|
||||||
"addr": "10.0.0.21:3901",
|
|
||||||
"isUp": true,
|
|
||||||
"lastSeenSecsAgo": 7,
|
|
||||||
"hostname": "node3"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": "e2ee7984ee65b260682086ec70026165903c86e601a4a5a501c1900afe28d84b",
|
|
||||||
"addr": "10.0.0.22:3901",
|
|
||||||
"isUp": true,
|
|
||||||
"lastSeenSecsAgo": 1,
|
|
||||||
"hostname": "node4"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"layout": {
|
|
||||||
"version": 12,
|
|
||||||
"roles": [
|
|
||||||
{
|
|
||||||
"id": "ec79480e0ce52ae26fd00c9da684e4fa56658d9c64cdcecb094e936de0bfe71f",
|
|
||||||
"zone": "dc1",
|
"zone": "dc1",
|
||||||
"capacity": 10737418240,
|
"capacity": 100000000000,
|
||||||
"tags": [
|
"tags": []
|
||||||
"node1"
|
},
|
||||||
]
|
"addr": "10.0.0.3:3901",
|
||||||
|
"hostname": "node3",
|
||||||
|
"isUp": true,
|
||||||
|
"lastSeenSecsAgo": 12,
|
||||||
|
"draining": false,
|
||||||
|
"dataPartition": {
|
||||||
|
"available": 660270088192,
|
||||||
|
"total": 873862266880
|
||||||
|
},
|
||||||
|
"metadataPartition": {
|
||||||
|
"available": 660270088192,
|
||||||
|
"total": 873862266880
|
||||||
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": "4a6ae5a1d0d33bf895f5bb4f0a418b7dc94c47c0dd2eb108d1158f3c8f60b0ff",
|
"id": "a11c7cf18af297379eff8688360155fe68d9061654449ba0ce239252f5a7487f",
|
||||||
|
"role": null,
|
||||||
|
"addr": "10.0.0.2:3901",
|
||||||
|
"hostname": "node2",
|
||||||
|
"isUp": true,
|
||||||
|
"lastSeenSecsAgo": 11,
|
||||||
|
"draining": true,
|
||||||
|
"dataPartition": {
|
||||||
|
"available": 660270088192,
|
||||||
|
"total": 873862266880
|
||||||
|
},
|
||||||
|
"metadataPartition": {
|
||||||
|
"available": 660270088192,
|
||||||
|
"total": 873862266880
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "a235ac7695e0c54d7b403943025f57504d500fdcc5c3e42c71c5212faca040a2",
|
||||||
|
"role": {
|
||||||
|
"id": "a235ac7695e0c54d7b403943025f57504d500fdcc5c3e42c71c5212faca040a2",
|
||||||
"zone": "dc1",
|
"zone": "dc1",
|
||||||
"capacity": 10737418240,
|
"capacity": 100000000000,
|
||||||
"tags": [
|
"tags": []
|
||||||
"node2"
|
},
|
||||||
]
|
"addr": "127.0.0.1:3904",
|
||||||
|
"hostname": "lindy",
|
||||||
|
"isUp": true,
|
||||||
|
"lastSeenSecsAgo": 2,
|
||||||
|
"draining": false,
|
||||||
|
"dataPartition": {
|
||||||
|
"available": 660270088192,
|
||||||
|
"total": 873862266880
|
||||||
|
},
|
||||||
|
"metadataPartition": {
|
||||||
|
"available": 660270088192,
|
||||||
|
"total": 873862266880
|
||||||
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": "23ffd0cdd375ebff573b20cc5cef38996b51c1a7d6dbcf2c6e619876e507cf27",
|
"id": "b10c110e4e854e5aa3f4637681befac755154b20059ec163254ddbfae86b09df",
|
||||||
"zone": "dc2",
|
"role": {
|
||||||
"capacity": 10737418240,
|
"id": "b10c110e4e854e5aa3f4637681befac755154b20059ec163254ddbfae86b09df",
|
||||||
"tags": [
|
"zone": "dc1",
|
||||||
"node3"
|
"capacity": 100000000000,
|
||||||
]
|
"tags": []
|
||||||
|
},
|
||||||
|
"addr": "10.0.0.1:3901",
|
||||||
|
"hostname": "node1",
|
||||||
|
"isUp": true,
|
||||||
|
"lastSeenSecsAgo": 3,
|
||||||
|
"draining": false,
|
||||||
|
"dataPartition": {
|
||||||
|
"available": 660270088192,
|
||||||
|
"total": 873862266880
|
||||||
|
},
|
||||||
|
"metadataPartition": {
|
||||||
|
"available": 660270088192,
|
||||||
|
"total": 873862266880
|
||||||
}
|
}
|
||||||
],
|
|
||||||
"stagedRoleChanges": [
|
|
||||||
{
|
|
||||||
"id": "e2ee7984ee65b260682086ec70026165903c86e601a4a5a501c1900afe28d84b",
|
|
||||||
"remove": false,
|
|
||||||
"zone": "dc2",
|
|
||||||
"capacity": 10737418240,
|
|
||||||
"tags": [
|
|
||||||
"node4"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
{
|
|
||||||
"id": "23ffd0cdd375ebff573b20cc5cef38996b51c1a7d6dbcf2c6e619876e507cf27",
|
|
||||||
"remove": true,
|
|
||||||
"zone": null,
|
|
||||||
"capacity": null,
|
|
||||||
"tags": null,
|
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
}
|
|
||||||
```
|
```
|
||||||
|
|
||||||
#### GetClusterHealth `GET /v1/health`
|
#### GetClusterHealth `GET /v1/health`
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
use std::collections::HashMap;
|
||||||
use std::net::SocketAddr;
|
use std::net::SocketAddr;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
@ -15,25 +16,95 @@ use crate::admin::error::*;
|
||||||
use crate::helpers::{json_ok_response, parse_json_body};
|
use crate::helpers::{json_ok_response, parse_json_body};
|
||||||
|
|
||||||
pub async fn handle_get_cluster_status(garage: &Arc<Garage>) -> Result<Response<Body>, Error> {
|
pub async fn handle_get_cluster_status(garage: &Arc<Garage>) -> Result<Response<Body>, Error> {
|
||||||
|
let layout = garage.system.cluster_layout();
|
||||||
|
let mut nodes = garage
|
||||||
|
.system
|
||||||
|
.get_known_nodes()
|
||||||
|
.into_iter()
|
||||||
|
.map(|i| {
|
||||||
|
(
|
||||||
|
i.id,
|
||||||
|
NodeResp {
|
||||||
|
id: hex::encode(i.id),
|
||||||
|
addr: Some(i.addr),
|
||||||
|
hostname: i.status.hostname,
|
||||||
|
is_up: i.is_up,
|
||||||
|
last_seen_secs_ago: i.last_seen_secs_ago,
|
||||||
|
data_partition: i
|
||||||
|
.status
|
||||||
|
.data_disk_avail
|
||||||
|
.map(|(avail, total)| FreeSpaceResp {
|
||||||
|
available: avail,
|
||||||
|
total,
|
||||||
|
}),
|
||||||
|
metadata_partition: i.status.meta_disk_avail.map(|(avail, total)| {
|
||||||
|
FreeSpaceResp {
|
||||||
|
available: avail,
|
||||||
|
total,
|
||||||
|
}
|
||||||
|
}),
|
||||||
|
..Default::default()
|
||||||
|
},
|
||||||
|
)
|
||||||
|
})
|
||||||
|
.collect::<HashMap<_, _>>();
|
||||||
|
|
||||||
|
for (id, _, role) in layout.current().roles.items().iter() {
|
||||||
|
if let layout::NodeRoleV(Some(r)) = role {
|
||||||
|
let role = NodeRoleResp {
|
||||||
|
id: hex::encode(id),
|
||||||
|
zone: r.zone.to_string(),
|
||||||
|
capacity: r.capacity,
|
||||||
|
tags: r.tags.clone(),
|
||||||
|
};
|
||||||
|
match nodes.get_mut(id) {
|
||||||
|
None => {
|
||||||
|
nodes.insert(
|
||||||
|
*id,
|
||||||
|
NodeResp {
|
||||||
|
id: hex::encode(id),
|
||||||
|
role: Some(role),
|
||||||
|
..Default::default()
|
||||||
|
},
|
||||||
|
);
|
||||||
|
}
|
||||||
|
Some(n) => {
|
||||||
|
if n.role.is_none() {
|
||||||
|
n.role = Some(role);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for ver in layout.versions.iter().rev().skip(1) {
|
||||||
|
for (id, _, role) in ver.roles.items().iter() {
|
||||||
|
if let layout::NodeRoleV(Some(r)) = role {
|
||||||
|
if !nodes.contains_key(id) && r.capacity.is_some() {
|
||||||
|
nodes.insert(
|
||||||
|
*id,
|
||||||
|
NodeResp {
|
||||||
|
id: hex::encode(id),
|
||||||
|
draining: true,
|
||||||
|
..Default::default()
|
||||||
|
},
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut nodes = nodes.into_iter().map(|(_, v)| v).collect::<Vec<_>>();
|
||||||
|
nodes.sort_by(|x, y| x.id.cmp(&y.id));
|
||||||
|
|
||||||
let res = GetClusterStatusResponse {
|
let res = GetClusterStatusResponse {
|
||||||
node: hex::encode(garage.system.id),
|
node: hex::encode(garage.system.id),
|
||||||
garage_version: garage_util::version::garage_version(),
|
garage_version: garage_util::version::garage_version(),
|
||||||
garage_features: garage_util::version::garage_features(),
|
garage_features: garage_util::version::garage_features(),
|
||||||
rust_version: garage_util::version::rust_version(),
|
rust_version: garage_util::version::rust_version(),
|
||||||
db_engine: garage.db.engine(),
|
db_engine: garage.db.engine(),
|
||||||
known_nodes: garage
|
layout_version: layout.current().version,
|
||||||
.system
|
nodes,
|
||||||
.get_known_nodes()
|
|
||||||
.into_iter()
|
|
||||||
.map(|i| KnownNodeResp {
|
|
||||||
id: hex::encode(i.id),
|
|
||||||
addr: i.addr,
|
|
||||||
is_up: i.is_up,
|
|
||||||
last_seen_secs_ago: i.last_seen_secs_ago,
|
|
||||||
hostname: i.status.hostname,
|
|
||||||
})
|
|
||||||
.collect(),
|
|
||||||
layout: format_cluster_layout(&garage.system.cluster_layout()),
|
|
||||||
};
|
};
|
||||||
|
|
||||||
Ok(json_ok_response(&res)?)
|
Ok(json_ok_response(&res)?)
|
||||||
|
@ -157,8 +228,8 @@ struct GetClusterStatusResponse {
|
||||||
garage_features: Option<&'static [&'static str]>,
|
garage_features: Option<&'static [&'static str]>,
|
||||||
rust_version: &'static str,
|
rust_version: &'static str,
|
||||||
db_engine: String,
|
db_engine: String,
|
||||||
known_nodes: Vec<KnownNodeResp>,
|
layout_version: u64,
|
||||||
layout: GetClusterLayoutResponse,
|
nodes: Vec<NodeResp>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Serialize)]
|
#[derive(Serialize)]
|
||||||
|
@ -192,14 +263,27 @@ struct NodeRoleResp {
|
||||||
tags: Vec<String>,
|
tags: Vec<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Serialize)]
|
#[derive(Serialize, Default)]
|
||||||
#[serde(rename_all = "camelCase")]
|
#[serde(rename_all = "camelCase")]
|
||||||
struct KnownNodeResp {
|
struct FreeSpaceResp {
|
||||||
|
available: u64,
|
||||||
|
total: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Default)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
struct NodeResp {
|
||||||
id: String,
|
id: String,
|
||||||
addr: SocketAddr,
|
role: Option<NodeRoleResp>,
|
||||||
|
addr: Option<SocketAddr>,
|
||||||
|
hostname: Option<String>,
|
||||||
is_up: bool,
|
is_up: bool,
|
||||||
last_seen_secs_ago: Option<u64>,
|
last_seen_secs_ago: Option<u64>,
|
||||||
hostname: String,
|
draining: bool,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
data_partition: Option<FreeSpaceResp>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
metadata_partition: Option<FreeSpaceResp>,
|
||||||
}
|
}
|
||||||
|
|
||||||
// ---- update functions ----
|
// ---- update functions ----
|
||||||
|
|
|
@ -295,7 +295,7 @@ impl AdminRpcHandler {
|
||||||
let info = node_info.get(id);
|
let info = node_info.get(id);
|
||||||
let status = info.map(|x| &x.status);
|
let status = info.map(|x| &x.status);
|
||||||
let role = layout.current().roles.get(id).and_then(|x| x.0.as_ref());
|
let role = layout.current().roles.get(id).and_then(|x| x.0.as_ref());
|
||||||
let hostname = status.map(|x| x.hostname.as_str()).unwrap_or("?");
|
let hostname = status.and_then(|x| x.hostname.as_deref()).unwrap_or("?");
|
||||||
let zone = role.map(|x| x.zone.as_str()).unwrap_or("?");
|
let zone = role.map(|x| x.zone.as_str()).unwrap_or("?");
|
||||||
let capacity = role
|
let capacity = role
|
||||||
.map(|x| x.capacity_string())
|
.map(|x| x.capacity_string())
|
||||||
|
|
|
@ -62,6 +62,7 @@ pub async fn cmd_status(rpc_cli: &Endpoint<SystemRpc, ()>, rpc_host: NodeID) ->
|
||||||
let mut healthy_nodes =
|
let mut healthy_nodes =
|
||||||
vec!["ID\tHostname\tAddress\tTags\tZone\tCapacity\tDataAvail".to_string()];
|
vec!["ID\tHostname\tAddress\tTags\tZone\tCapacity\tDataAvail".to_string()];
|
||||||
for adv in status.iter().filter(|adv| adv.is_up) {
|
for adv in status.iter().filter(|adv| adv.is_up) {
|
||||||
|
let host = adv.status.hostname.as_deref().unwrap_or("?");
|
||||||
if let Some(NodeRoleV(Some(cfg))) = layout.current().roles.get(&adv.id) {
|
if let Some(NodeRoleV(Some(cfg))) = layout.current().roles.get(&adv.id) {
|
||||||
let data_avail = match &adv.status.data_disk_avail {
|
let data_avail = match &adv.status.data_disk_avail {
|
||||||
_ if cfg.capacity.is_none() => "N/A".into(),
|
_ if cfg.capacity.is_none() => "N/A".into(),
|
||||||
|
@ -75,7 +76,7 @@ pub async fn cmd_status(rpc_cli: &Endpoint<SystemRpc, ()>, rpc_host: NodeID) ->
|
||||||
healthy_nodes.push(format!(
|
healthy_nodes.push(format!(
|
||||||
"{id:?}\t{host}\t{addr}\t[{tags}]\t{zone}\t{capacity}\t{data_avail}",
|
"{id:?}\t{host}\t{addr}\t[{tags}]\t{zone}\t{capacity}\t{data_avail}",
|
||||||
id = adv.id,
|
id = adv.id,
|
||||||
host = adv.status.hostname,
|
host = host,
|
||||||
addr = adv.addr,
|
addr = adv.addr,
|
||||||
tags = cfg.tags.join(","),
|
tags = cfg.tags.join(","),
|
||||||
zone = cfg.zone,
|
zone = cfg.zone,
|
||||||
|
@ -95,7 +96,7 @@ pub async fn cmd_status(rpc_cli: &Endpoint<SystemRpc, ()>, rpc_host: NodeID) ->
|
||||||
healthy_nodes.push(format!(
|
healthy_nodes.push(format!(
|
||||||
"{id:?}\t{host}\t{addr}\t[{tags}]\t{zone}\tdraining metadata...",
|
"{id:?}\t{host}\t{addr}\t[{tags}]\t{zone}\tdraining metadata...",
|
||||||
id = adv.id,
|
id = adv.id,
|
||||||
host = adv.status.hostname,
|
host = host,
|
||||||
addr = adv.addr,
|
addr = adv.addr,
|
||||||
tags = cfg.tags.join(","),
|
tags = cfg.tags.join(","),
|
||||||
zone = cfg.zone,
|
zone = cfg.zone,
|
||||||
|
@ -108,7 +109,7 @@ pub async fn cmd_status(rpc_cli: &Endpoint<SystemRpc, ()>, rpc_host: NodeID) ->
|
||||||
healthy_nodes.push(format!(
|
healthy_nodes.push(format!(
|
||||||
"{id:?}\t{h}\t{addr}\t\t\t{new_role}",
|
"{id:?}\t{h}\t{addr}\t\t\t{new_role}",
|
||||||
id = adv.id,
|
id = adv.id,
|
||||||
h = adv.status.hostname,
|
h = host,
|
||||||
addr = adv.addr,
|
addr = adv.addr,
|
||||||
new_role = new_role,
|
new_role = new_role,
|
||||||
));
|
));
|
||||||
|
@ -149,7 +150,7 @@ pub async fn cmd_status(rpc_cli: &Endpoint<SystemRpc, ()>, rpc_host: NodeID) ->
|
||||||
// it is in a failed state, add proper line to the output
|
// it is in a failed state, add proper line to the output
|
||||||
let (host, addr, last_seen) = match adv {
|
let (host, addr, last_seen) = match adv {
|
||||||
Some(adv) => (
|
Some(adv) => (
|
||||||
adv.status.hostname.as_str(),
|
adv.status.hostname.as_deref().unwrap_or("?"),
|
||||||
adv.addr.to_string(),
|
adv.addr.to_string(),
|
||||||
adv.last_seen_secs_ago
|
adv.last_seen_secs_ago
|
||||||
.map(|s| tf.convert(Duration::from_secs(s)))
|
.map(|s| tf.convert(Duration::from_secs(s)))
|
||||||
|
|
|
@ -126,7 +126,7 @@ pub struct System {
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
pub struct NodeStatus {
|
pub struct NodeStatus {
|
||||||
/// Hostname of the node
|
/// Hostname of the node
|
||||||
pub hostname: String,
|
pub hostname: Option<String>,
|
||||||
|
|
||||||
/// Replication factor configured on the node
|
/// Replication factor configured on the node
|
||||||
pub replication_factor: usize,
|
pub replication_factor: usize,
|
||||||
|
@ -765,9 +765,11 @@ impl EndpointHandler<SystemRpc> for System {
|
||||||
impl NodeStatus {
|
impl NodeStatus {
|
||||||
fn initial(replication_factor: usize, layout_manager: &LayoutManager) -> Self {
|
fn initial(replication_factor: usize, layout_manager: &LayoutManager) -> Self {
|
||||||
NodeStatus {
|
NodeStatus {
|
||||||
hostname: gethostname::gethostname()
|
hostname: Some(
|
||||||
|
gethostname::gethostname()
|
||||||
.into_string()
|
.into_string()
|
||||||
.unwrap_or_else(|_| "<invalid utf-8>".to_string()),
|
.unwrap_or_else(|_| "<invalid utf-8>".to_string()),
|
||||||
|
),
|
||||||
replication_factor,
|
replication_factor,
|
||||||
layout_digest: layout_manager.layout().digest(),
|
layout_digest: layout_manager.layout().digest(),
|
||||||
meta_disk_avail: None,
|
meta_disk_avail: None,
|
||||||
|
@ -777,7 +779,7 @@ impl NodeStatus {
|
||||||
|
|
||||||
fn unknown() -> Self {
|
fn unknown() -> Self {
|
||||||
NodeStatus {
|
NodeStatus {
|
||||||
hostname: "?".to_string(),
|
hostname: None,
|
||||||
replication_factor: 0,
|
replication_factor: 0,
|
||||||
layout_digest: Default::default(),
|
layout_digest: Default::default(),
|
||||||
meta_disk_avail: None,
|
meta_disk_avail: None,
|
||||||
|
|
Loading…
Reference in a new issue