From 06aa4b604fe0a9b5230bc0626d883e5b37953dec Mon Sep 17 00:00:00 2001 From: Baptiste Jonglez Date: Fri, 24 Jan 2025 19:24:09 +0100 Subject: [PATCH 1/2] db-snapshot: Fix error reporting when using "garage meta snapshot --all" Snapshot errors on remote nodes were not reported at all. We now get proper error output such as: 0fa0f35be69528ab error: Internal error: DB error: LMDB: No space left on device (os error 28) 88d92e2971d14bae ok Fix #920 --- src/garage/admin/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/garage/admin/mod.rs b/src/garage/admin/mod.rs index ea414b56..176911fb 100644 --- a/src/garage/admin/mod.rs +++ b/src/garage/admin/mod.rs @@ -484,7 +484,7 @@ impl AdminRpcHandler { AdminRpc::MetaOperation(MetaOperation::Snapshot { all: false }), PRIO_NORMAL, ) - .await + .await? })) .await; From a2e134f036a5bdeca55ae0ce6d731d1ec37a454c Mon Sep 17 00:00:00 2001 From: Baptiste Jonglez Date: Fri, 24 Jan 2025 19:21:08 +0100 Subject: [PATCH 2/2] db-snapshot: propagate any node snapshot error through RPC call In particular, it means that "garage meta snapshot --all" will get an exit code of 1 if any node fails to snapshot. This makes sure that any external tool trying to snapshot nodes (e.g. from cron) will be aware of the failure. Fix #920 --- src/garage/admin/mod.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/garage/admin/mod.rs b/src/garage/admin/mod.rs index 176911fb..1a4ff853 100644 --- a/src/garage/admin/mod.rs +++ b/src/garage/admin/mod.rs @@ -497,7 +497,11 @@ impl AdminRpcHandler { ret.push(format!("{:?}\t{}", to, res_str)); } - Ok(AdminRpc::Ok(format_table_to_string(ret))) + if resps.iter().any(Result::is_err) { + Err(GarageError::Message(format_table_to_string(ret)).into()) + } else { + Ok(AdminRpc::Ok(format_table_to_string(ret))) + } } MetaOperation::Snapshot { all: false } => { garage_model::snapshot::async_snapshot_metadata(&self.garage).await?;