From 1d5142dcea81eead76dc8ff8be736a7385bd8ced Mon Sep 17 00:00:00 2001 From: Baptiste Jonglez Date: Fri, 24 Jan 2025 19:24:09 +0100 Subject: [PATCH 1/2] db-snapshot: Fix error reporting when using "garage meta snapshot --all" Snapshot errors on remote nodes were not reported at all. We now get proper error output such as: 0fa0f35be69528ab error: Internal error: DB error: LMDB: No space left on device (os error 28) 88d92e2971d14bae ok Fix #920 --- src/garage/admin/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/garage/admin/mod.rs b/src/garage/admin/mod.rs index e2468143..823afc59 100644 --- a/src/garage/admin/mod.rs +++ b/src/garage/admin/mod.rs @@ -482,7 +482,7 @@ impl AdminRpcHandler { AdminRpc::MetaOperation(MetaOperation::Snapshot { all: false }), PRIO_NORMAL, ) - .await + .await? })) .await; -- 2.45.3 From 8ff2aa729bfea9b7af96934966e7bb113c316f89 Mon Sep 17 00:00:00 2001 From: Baptiste Jonglez Date: Fri, 24 Jan 2025 19:21:08 +0100 Subject: [PATCH 2/2] db-snapshot: propagate any node snapshot error through RPC call In particular, it means that "garage meta snapshot --all" will get an exit code of 1 if any node fails to snapshot. This makes sure that any external tool trying to snapshot nodes (e.g. from cron) will be aware of the failure. Fix #920 --- src/garage/admin/mod.rs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/garage/admin/mod.rs b/src/garage/admin/mod.rs index 823afc59..ab530ebf 100644 --- a/src/garage/admin/mod.rs +++ b/src/garage/admin/mod.rs @@ -495,7 +495,14 @@ impl AdminRpcHandler { ret.push(format!("{:?}\t{}", to, res_str)); } - Ok(AdminRpc::Ok(format_table_to_string(ret))) + if resps.iter().any(|resp| match resp { + Err(_) => true, + Ok(_) => false, + }) { + Err(Error::BadRequest(format_table_to_string(ret)).into()) + } else { + Ok(AdminRpc::Ok(format_table_to_string(ret))) + } } MetaOperation::Snapshot { all: false } => { garage_model::snapshot::async_snapshot_metadata(&self.garage).await?; -- 2.45.3