WIP: fix crash in layout computation when changing all nodes of a zone to gateway mode
All checks were successful
ci/woodpecker/pr/debug Pipeline was successful

This change is probably not a proper fix, somebody with more expertise on
this code should look at it.

Here is how to reproduce the crash:

- start with a layout with two zones
- move all nodes of a zone to gateway mode: `garage layout assign fea54bcc081f318 -g`
- `garage layout show` will panic with a backtrace

Fortunately, the crash is only on the RPC client side, not on the Garage
server itself, and `garage layout revert` still works to go back to the
previous state.

As far as I can tell, this bug is present since Garage 0.9.0 which
includes the new layout assignation algorithm:

  #296
This commit is contained in:
Baptiste Jonglez 2025-01-26 20:40:02 +01:00
parent d4e3e60920
commit 6d798c640f

View file

@ -650,8 +650,11 @@ impl LayoutVersion {
let mut cost = CostFunction::new();
for (p, assoc_p) in prev_assign.iter().enumerate() {
for n in assoc_p.iter() {
let node_zone = zone_to_id[self.expect_get_node_zone(&self.node_id_vec[*n])];
cost.insert((Vertex::PZ(p, node_zone), Vertex::N(*n)), -1);
if let Some(&node_zone) =
zone_to_id.get(self.expect_get_node_zone(&self.node_id_vec[*n]))
{
cost.insert((Vertex::PZ(p, node_zone), Vertex::N(*n)), -1);
}
}
}
@ -751,8 +754,11 @@ impl LayoutVersion {
if let Some(prev_assign) = prev_assign_opt {
let mut old_zones_of_p = Vec::<usize>::new();
for n in prev_assign[p].iter() {
old_zones_of_p
.push(zone_to_id[self.expect_get_node_zone(&self.node_id_vec[*n])]);
if let Some(&zone_id) =
zone_to_id.get(self.expect_get_node_zone(&self.node_id_vec[*n]))
{
old_zones_of_p.push(zone_id);
}
}
if !old_zones_of_p.contains(&z) {
new_partitions_zone[z] += 1;