cli: new worker info command
This commit is contained in:
parent
a51e8d94c6
commit
9d82196945
6 changed files with 86 additions and 11 deletions
|
@ -67,14 +67,17 @@ impl Worker for RepairWorker {
|
||||||
idx_bytes
|
idx_bytes
|
||||||
};
|
};
|
||||||
WorkerStatus {
|
WorkerStatus {
|
||||||
progress: Some("Phase 1".into()),
|
progress: Some("0.00%".into()),
|
||||||
freeform: vec![format!("Now at: {}", hex::encode(idx_bytes))],
|
freeform: vec![format!(
|
||||||
|
"Currently in phase 1, iterator position: {}",
|
||||||
|
hex::encode(idx_bytes)
|
||||||
|
)],
|
||||||
..Default::default()
|
..Default::default()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Some(bi) => WorkerStatus {
|
Some(bi) => WorkerStatus {
|
||||||
progress: Some(format!("{:.2}%", bi.progress() * 100.)),
|
progress: Some(format!("{:.2}%", bi.progress() * 100.)),
|
||||||
freeform: vec!["Phase 2".into()],
|
freeform: vec!["Currently in phase 2".into()],
|
||||||
..Default::default()
|
..Default::default()
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
@ -291,11 +294,11 @@ impl Worker for ScrubWorker {
|
||||||
}
|
}
|
||||||
ScrubWorkerState::Paused(bsi, rt) => {
|
ScrubWorkerState::Paused(bsi, rt) => {
|
||||||
s.progress = Some(format!("{:.2}%", bsi.progress() * 100.));
|
s.progress = Some(format!("{:.2}%", bsi.progress() * 100.));
|
||||||
s.freeform = vec![format!("Paused, resumes at {}", msec_to_rfc3339(*rt))];
|
s.freeform = vec![format!("Scrub paused, resumes at {}", msec_to_rfc3339(*rt))];
|
||||||
}
|
}
|
||||||
ScrubWorkerState::Finished => {
|
ScrubWorkerState::Finished => {
|
||||||
s.freeform = vec![format!(
|
s.freeform = vec![format!(
|
||||||
"Completed {}",
|
"Last scrub completed at {}",
|
||||||
msec_to_rfc3339(self.persisted.time_last_complete_scrub)
|
msec_to_rfc3339(self.persisted.time_last_complete_scrub)
|
||||||
)];
|
)];
|
||||||
}
|
}
|
||||||
|
|
|
@ -257,7 +257,7 @@ impl BlockResyncManager {
|
||||||
|
|
||||||
if let Err(e) = &res {
|
if let Err(e) = &res {
|
||||||
manager.metrics.resync_error_counter.add(1);
|
manager.metrics.resync_error_counter.add(1);
|
||||||
warn!("Error when resyncing {:?}: {}", hash, e);
|
error!("Error when resyncing {:?}: {}", hash, e);
|
||||||
|
|
||||||
let err_counter = match self.errors.get(hash.as_slice())? {
|
let err_counter = match self.errors.get(hash.as_slice())? {
|
||||||
Some(ec) => ErrorCounter::decode(&ec).add1(now + 1),
|
Some(ec) => ErrorCounter::decode(&ec).add1(now + 1),
|
||||||
|
@ -482,7 +482,7 @@ impl Worker for ResyncWorker {
|
||||||
|
|
||||||
if self.index >= persisted.n_workers {
|
if self.index >= persisted.n_workers {
|
||||||
return WorkerStatus {
|
return WorkerStatus {
|
||||||
freeform: vec!["(unused)".into()],
|
freeform: vec!["This worker is currently disabled".into()],
|
||||||
..Default::default()
|
..Default::default()
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
|
@ -54,6 +54,7 @@ pub enum AdminRpc {
|
||||||
HashMap<usize, garage_util::background::WorkerInfo>,
|
HashMap<usize, garage_util::background::WorkerInfo>,
|
||||||
WorkerListOpt,
|
WorkerListOpt,
|
||||||
),
|
),
|
||||||
|
WorkerInfo(usize, garage_util::background::WorkerInfo),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Rpc for AdminRpc {
|
impl Rpc for AdminRpc {
|
||||||
|
@ -880,6 +881,16 @@ impl AdminRpcHandler {
|
||||||
let workers = self.garage.background.get_worker_info();
|
let workers = self.garage.background.get_worker_info();
|
||||||
Ok(AdminRpc::WorkerList(workers, opt))
|
Ok(AdminRpc::WorkerList(workers, opt))
|
||||||
}
|
}
|
||||||
|
WorkerCmd::Info { tid } => {
|
||||||
|
let info = self
|
||||||
|
.garage
|
||||||
|
.background
|
||||||
|
.get_worker_info()
|
||||||
|
.get(&tid)
|
||||||
|
.ok_or_bad_request(format!("No worker with TID {}", tid))?
|
||||||
|
.clone();
|
||||||
|
Ok(AdminRpc::WorkerInfo(tid, info))
|
||||||
|
}
|
||||||
WorkerCmd::Set { opt } => match opt {
|
WorkerCmd::Set { opt } => match opt {
|
||||||
WorkerSetCmd::ScrubTranquility { tranquility } => {
|
WorkerSetCmd::ScrubTranquility { tranquility } => {
|
||||||
let scrub_command = ScrubWorkerCommand::SetTranquility(tranquility);
|
let scrub_command = ScrubWorkerCommand::SetTranquility(tranquility);
|
||||||
|
|
|
@ -186,7 +186,10 @@ pub async fn cmd_admin(
|
||||||
print_key_info(&key, &rb);
|
print_key_info(&key, &rb);
|
||||||
}
|
}
|
||||||
AdminRpc::WorkerList(wi, wlo) => {
|
AdminRpc::WorkerList(wi, wlo) => {
|
||||||
print_worker_info(wi, wlo);
|
print_worker_list(wi, wlo);
|
||||||
|
}
|
||||||
|
AdminRpc::WorkerInfo(tid, wi) => {
|
||||||
|
print_worker_info(tid, wi);
|
||||||
}
|
}
|
||||||
r => {
|
r => {
|
||||||
error!("Unexpected response: {:?}", r);
|
error!("Unexpected response: {:?}", r);
|
||||||
|
|
|
@ -516,6 +516,9 @@ pub enum WorkerCmd {
|
||||||
#[structopt(flatten)]
|
#[structopt(flatten)]
|
||||||
opt: WorkerListOpt,
|
opt: WorkerListOpt,
|
||||||
},
|
},
|
||||||
|
/// Get detailed information about a worker
|
||||||
|
#[structopt(name = "info", version = garage_version())]
|
||||||
|
Info { tid: usize },
|
||||||
/// Set worker parameter
|
/// Set worker parameter
|
||||||
#[structopt(name = "set", version = garage_version())]
|
#[structopt(name = "set", version = garage_version())]
|
||||||
Set {
|
Set {
|
||||||
|
|
|
@ -241,7 +241,7 @@ pub fn find_matching_node(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn print_worker_info(wi: HashMap<usize, WorkerInfo>, wlo: WorkerListOpt) {
|
pub fn print_worker_list(wi: HashMap<usize, WorkerInfo>, wlo: WorkerListOpt) {
|
||||||
let mut wi = wi.into_iter().collect::<Vec<_>>();
|
let mut wi = wi.into_iter().collect::<Vec<_>>();
|
||||||
wi.sort_by_key(|(tid, info)| {
|
wi.sort_by_key(|(tid, info)| {
|
||||||
(
|
(
|
||||||
|
@ -284,13 +284,13 @@ pub fn print_worker_info(wi: HashMap<usize, WorkerInfo>, wlo: WorkerListOpt) {
|
||||||
.tranquility
|
.tranquility
|
||||||
.as_ref()
|
.as_ref()
|
||||||
.map(ToString::to_string)
|
.map(ToString::to_string)
|
||||||
.unwrap_or("-".into()),
|
.unwrap_or_else(|| "-".into()),
|
||||||
info.status.progress.as_deref().unwrap_or("-"),
|
info.status.progress.as_deref().unwrap_or("-"),
|
||||||
info.status
|
info.status
|
||||||
.queue_length
|
.queue_length
|
||||||
.as_ref()
|
.as_ref()
|
||||||
.map(ToString::to_string)
|
.map(ToString::to_string)
|
||||||
.unwrap_or("-".into()),
|
.unwrap_or_else(|| "-".into()),
|
||||||
total_err,
|
total_err,
|
||||||
consec_err,
|
consec_err,
|
||||||
err_ago,
|
err_ago,
|
||||||
|
@ -298,3 +298,58 @@ pub fn print_worker_info(wi: HashMap<usize, WorkerInfo>, wlo: WorkerListOpt) {
|
||||||
}
|
}
|
||||||
format_table(table);
|
format_table(table);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn print_worker_info(tid: usize, info: WorkerInfo) {
|
||||||
|
let mut table = vec![];
|
||||||
|
table.push(format!("Task id:\t{}", tid));
|
||||||
|
table.push(format!("Worker name:\t{}", info.name));
|
||||||
|
match info.state {
|
||||||
|
WorkerState::Throttled(t) => {
|
||||||
|
table.push(format!(
|
||||||
|
"Worker state:\tBusy (throttled, paused for {:.3}s)",
|
||||||
|
t
|
||||||
|
));
|
||||||
|
}
|
||||||
|
s => {
|
||||||
|
table.push(format!("Worker state:\t{}", s));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
if let Some(tql) = info.status.tranquility {
|
||||||
|
table.push(format!("Tranquility:\t{}", tql));
|
||||||
|
}
|
||||||
|
|
||||||
|
table.push("".into());
|
||||||
|
table.push(format!("Total errors:\t{}", info.errors));
|
||||||
|
table.push(format!("Consecutive errs:\t{}", info.consecutive_errors));
|
||||||
|
if let Some((s, t)) = info.last_error {
|
||||||
|
table.push(format!("Last error:\t{}", s));
|
||||||
|
let tf = timeago::Formatter::new();
|
||||||
|
table.push(format!(
|
||||||
|
"Last error time:\t{}",
|
||||||
|
tf.convert(Duration::from_millis(now_msec() - t))
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
table.push("".into());
|
||||||
|
if let Some(p) = info.status.progress {
|
||||||
|
table.push(format!("Progress:\t{}", p));
|
||||||
|
}
|
||||||
|
if let Some(ql) = info.status.queue_length {
|
||||||
|
table.push(format!("Queue length:\t{}", ql));
|
||||||
|
}
|
||||||
|
if let Some(pe) = info.status.persistent_errors {
|
||||||
|
table.push(format!("Persistent errors:\t{}", pe));
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i, s) in info.status.freeform.iter().enumerate() {
|
||||||
|
if i == 0 {
|
||||||
|
if table.last() != Some(&"".into()) {
|
||||||
|
table.push("".into());
|
||||||
|
}
|
||||||
|
table.push(format!("Message:\t{}", s));
|
||||||
|
} else {
|
||||||
|
table.push(format!("\t{}", s));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
format_table(table);
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in a new issue