Garage v0.9 #473

Merged
lx merged 175 commits from next into main 2023-10-10 13:28:29 +00:00
5 changed files with 271 additions and 277 deletions
Showing only changes of commit ea5afc2511 - Show all commits

View file

@ -86,7 +86,7 @@ fn get_cluster_layout(garage: &Arc<Garage>) -> GetClusterLayoutResponse {
.map(|(k, _, v)| (hex::encode(k), v.0.clone()))
.collect(),
staged_role_changes: layout
.staging
.staging_roles
.items()
.iter()
.filter(|(k, _, v)| layout.roles.get(k) != Some(v))
@ -137,14 +137,14 @@ pub async fn handle_update_cluster_layout(
let mut layout = garage.system.get_cluster_layout();
let mut roles = layout.roles.clone();
roles.merge(&layout.staging);
roles.merge(&layout.staging_roles);
for (node, role) in updates {
let node = hex::decode(node).ok_or_bad_request("Invalid node identifier")?;
let node = Uuid::try_from(&node).ok_or_bad_request("Invalid node identifier")?;
layout
.staging
.staging_roles
.merge(&roles.update_mutator(node, NodeRoleV(role)));
}

View file

@ -71,7 +71,7 @@ pub async fn cmd_status(rpc_cli: &Endpoint<SystemRpc, ()>, rpc_host: NodeID) ->
));
}
_ => {
let new_role = match layout.staging.get(&adv.id) {
let new_role = match layout.staging_roles.get(&adv.id) {
Some(NodeRoleV(Some(_))) => "(pending)",
_ => "NO ROLE ASSIGNED",
};

View file

@ -63,14 +63,14 @@ pub async fn cmd_assign_role(
.collect::<Result<Vec<_>, _>>()?;
let mut roles = layout.roles.clone();
roles.merge(&layout.staging);
roles.merge(&layout.staging_roles);
for replaced in args.replace.iter() {
let replaced_node = find_matching_node(layout.node_ids().iter().cloned(), replaced)?;
match roles.get(&replaced_node) {
Some(NodeRoleV(Some(_))) => {
layout
.staging
.staging_roles
.merge(&roles.update_mutator(replaced_node, NodeRoleV(None)));
}
_ => {
@ -128,7 +128,7 @@ pub async fn cmd_assign_role(
};
layout
.staging
.staging_roles
.merge(&roles.update_mutator(added_node, NodeRoleV(Some(new_entry))));
}
@ -148,13 +148,13 @@ pub async fn cmd_remove_role(
let mut layout = fetch_layout(rpc_cli, rpc_host).await?;
let mut roles = layout.roles.clone();
roles.merge(&layout.staging);
roles.merge(&layout.staging_roles);
let deleted_node =
find_matching_node(roles.items().iter().map(|(id, _, _)| *id), &args.node_id)?;
layout
.staging
.staging_roles
.merge(&roles.update_mutator(deleted_node, NodeRoleV(None)));
send_layout(rpc_cli, rpc_host, layout).await?;
@ -278,7 +278,7 @@ pub async fn cmd_config_layout(
println!("The zone redundancy must be at least 1.");
} else {
layout
.staged_parameters
.staging_parameters
.update(LayoutParameters { zone_redundancy: r });
println!("The new zone redundancy has been saved ({}).", r);
}
@ -352,13 +352,13 @@ pub fn print_cluster_layout(layout: &ClusterLayout) -> bool {
}
pub fn print_staging_parameters_changes(layout: &ClusterLayout) -> bool {
let has_changes = layout.staged_parameters.get().clone() != layout.parameters;
let has_changes = layout.staging_parameters.get().clone() != layout.parameters;
if has_changes {
println!();
println!("==== NEW LAYOUT PARAMETERS ====");
println!(
"Zone redundancy: {}",
layout.staged_parameters.get().zone_redundancy
layout.staging_parameters.get().zone_redundancy
);
println!();
}
@ -367,7 +367,7 @@ pub fn print_staging_parameters_changes(layout: &ClusterLayout) -> bool {
pub fn print_staging_role_changes(layout: &ClusterLayout) -> bool {
let has_changes = layout
.staging
.staging_roles
.items()
.iter()
.any(|(k, _, v)| layout.roles.get(k) != Some(v));
@ -376,7 +376,7 @@ pub fn print_staging_role_changes(layout: &ClusterLayout) -> bool {
println!();
println!("==== STAGED ROLE CHANGES ====");
let mut table = vec!["ID\tTags\tZone\tCapacity".to_string()];
for (id, _, role) in layout.staging.items().iter() {
for (id, _, role) in layout.staging_roles.items().iter() {
if layout.roles.get(id) == Some(role) {
continue;
}

View file

@ -44,8 +44,8 @@ impl Edge for WeightedEdge {}
/// provide user friendly Vertex enum to address vertices, and to use internally usize
/// indices and Vec instead of HashMap in the graph algorithm to optimize execution speed.
pub struct Graph<E: Edge> {
vertextoid: HashMap<Vertex, usize>,
idtovertex: Vec<Vertex>,
vertex_to_id: HashMap<Vertex, usize>,
id_to_vertex: Vec<Vertex>,
// The graph is stored as an adjacency list
graph: Vec<Vec<E>>,
@ -60,22 +60,30 @@ impl<E: Edge> Graph<E> {
map.insert(*vert, i);
}
Graph::<E> {
vertextoid: map,
idtovertex: vertices.to_vec(),
vertex_to_id: map,
id_to_vertex: vertices.to_vec(),
graph: vec![Vec::<E>::new(); vertices.len()],
}
}
fn get_vertex_id(&self, v: &Vertex) -> Result<usize, String> {
self.vertex_to_id
.get(v)
.cloned()
.ok_or_else(|| format!("The graph does not contain vertex {:?}", v))
}
}
impl Graph<FlowEdge> {
/// This function adds a directed edge to the graph with capacity c, and the
/// corresponding reversed edge with capacity 0.
pub fn add_edge(&mut self, u: Vertex, v: Vertex, c: u32) -> Result<(), String> {
if !self.vertextoid.contains_key(&u) || !self.vertextoid.contains_key(&v) {
return Err("The graph does not contain the provided vertex.".to_string());
let idu = self.get_vertex_id(&u)?;
let idv = self.get_vertex_id(&v)?;
if idu == idv {
return Err("Cannot add edge from vertex to itself in flow graph".into());
}
let idu = self.vertextoid[&u];
let idv = self.vertextoid[&v];
let rev_u = self.graph[idu].len();
let rev_v = self.graph[idv].len();
self.graph[idu].push(FlowEdge {
@ -96,14 +104,11 @@ impl Graph<FlowEdge> {
/// This function returns the list of vertices that receive a positive flow from
/// vertex v.
pub fn get_positive_flow_from(&self, v: Vertex) -> Result<Vec<Vertex>, String> {
if !self.vertextoid.contains_key(&v) {
return Err("The graph does not contain the provided vertex.".to_string());
}
let idv = self.vertextoid[&v];
let idv = self.get_vertex_id(&v)?;
let mut result = Vec::<Vertex>::new();
for edge in self.graph[idv].iter() {
if edge.flow > 0 {
result.push(self.idtovertex[edge.dest]);
result.push(self.id_to_vertex[edge.dest]);
}
}
Ok(result)
@ -111,10 +116,7 @@ impl Graph<FlowEdge> {
/// This function returns the value of the flow incoming to v.
pub fn get_inflow(&self, v: Vertex) -> Result<i32, String> {
if !self.vertextoid.contains_key(&v) {
return Err("The graph does not contain the provided vertex.".to_string());
}
let idv = self.vertextoid[&v];
let idv = self.get_vertex_id(&v)?;
let mut result = 0;
for edge in self.graph[idv].iter() {
result += max(0, self.graph[edge.dest][edge.rev].flow);
@ -124,10 +126,7 @@ impl Graph<FlowEdge> {
/// This function returns the value of the flow outgoing from v.
pub fn get_outflow(&self, v: Vertex) -> Result<i32, String> {
if !self.vertextoid.contains_key(&v) {
return Err("The graph does not contain the provided vertex.".to_string());
}
let idv = self.vertextoid[&v];
let idv = self.get_vertex_id(&v)?;
let mut result = 0;
for edge in self.graph[idv].iter() {
result += max(0, edge.flow);
@ -157,32 +156,25 @@ impl Graph<FlowEdge> {
}
/// Computes an upper bound of the flow on the graph
pub fn flow_upper_bound(&self) -> u32 {
let idsource = self.vertextoid[&Vertex::Source];
pub fn flow_upper_bound(&self) -> Result<u32, String> {
let idsource = self.get_vertex_id(&Vertex::Source)?;
let mut flow_upper_bound = 0;
for edge in self.graph[idsource].iter() {
flow_upper_bound += edge.cap;
}
flow_upper_bound
Ok(flow_upper_bound)
}
/// This function computes the maximal flow using Dinic's algorithm. It starts with
/// the flow values already present in the graph. So it is possible to add some edge to
/// the graph, compute a flow, add other edges, update the flow.
pub fn compute_maximal_flow(&mut self) -> Result<(), String> {
if !self.vertextoid.contains_key(&Vertex::Source) {
return Err("The graph does not contain a source.".to_string());
}
if !self.vertextoid.contains_key(&Vertex::Sink) {
return Err("The graph does not contain a sink.".to_string());
}
let idsource = self.vertextoid[&Vertex::Source];
let idsink = self.vertextoid[&Vertex::Sink];
let idsource = self.get_vertex_id(&Vertex::Source)?;
let idsink = self.get_vertex_id(&Vertex::Sink)?;
let nb_vertices = self.graph.len();
let flow_upper_bound = self.flow_upper_bound();
let flow_upper_bound = self.flow_upper_bound()?;
// To ensure the dispersion of the associations generated by the
// assignation, we shuffle the neighbours of the nodes. Hence,
@ -196,8 +188,7 @@ impl Graph<FlowEdge> {
let mut fifo = VecDeque::new();
fifo.push_back((idsource, 0));
while !fifo.is_empty() {
if let Some((id, lvl)) = fifo.pop_front() {
while let Some((id, lvl)) = fifo.pop_front() {
if level[id] == None {
// it means id has not yet been reached
level[id] = Some(lvl);
@ -208,40 +199,37 @@ impl Graph<FlowEdge> {
}
}
}
}
if level[idsink] == None {
// There is no residual flow
break;
}
// Now we run DFS respecting the level array
let mut next_nbd = vec![0; nb_vertices];
let mut lifo = VecDeque::new();
let mut lifo = Vec::new();
lifo.push_back((idsource, flow_upper_bound));
lifo.push((idsource, flow_upper_bound));
while let Some((id_tmp, f_tmp)) = lifo.back() {
let id = *id_tmp;
let f = *f_tmp;
while let Some((id, f)) = lifo.last().cloned() {
if id == idsink {
// The DFS reached the sink, we can add a
// residual flow.
lifo.pop_back();
while let Some((id, _)) = lifo.pop_back() {
lifo.pop();
while let Some((id, _)) = lifo.pop() {
let nbd = next_nbd[id];
self.graph[id][nbd].flow += f as i32;
let id_rev = self.graph[id][nbd].dest;
let nbd_rev = self.graph[id][nbd].rev;
self.graph[id_rev][nbd_rev].flow -= f as i32;
}
lifo.push_back((idsource, flow_upper_bound));
lifo.push((idsource, flow_upper_bound));
continue;
}
// else we did not reach the sink
let nbd = next_nbd[id];
if nbd >= self.graph[id].len() {
// There is nothing to explore from id anymore
lifo.pop_back();
if let Some((parent, _)) = lifo.back() {
lifo.pop();
if let Some((parent, _)) = lifo.last() {
next_nbd[*parent] += 1;
}
continue;
@ -263,7 +251,7 @@ impl Graph<FlowEdge> {
}
}
// otherwise, we send flow to nbd.
lifo.push_back((self.graph[id][nbd].dest, new_flow));
lifo.push((self.graph[id][nbd].dest, new_flow));
}
}
Ok(())
@ -287,8 +275,8 @@ impl Graph<FlowEdge> {
for c in cycles.iter() {
for i in 0..c.len() {
// We add one flow unit to the edge (u,v) of cycle c
let idu = self.vertextoid[&c[i]];
let idv = self.vertextoid[&c[(i + 1) % c.len()]];
let idu = self.vertex_to_id[&c[i]];
let idv = self.vertex_to_id[&c[(i + 1) % c.len()]];
for j in 0..self.graph[idu].len() {
// since idu appears at most once in the cycles, we enumerate every
// edge at most once.
@ -310,14 +298,14 @@ impl Graph<FlowEdge> {
/// Construct the weighted graph G_f from the flow and the cost function
fn build_cost_graph(&self, cost: &CostFunction) -> Result<Graph<WeightedEdge>, String> {
let mut g = Graph::<WeightedEdge>::new(&self.idtovertex);
let nb_vertices = self.idtovertex.len();
let mut g = Graph::<WeightedEdge>::new(&self.id_to_vertex);
let nb_vertices = self.id_to_vertex.len();
for i in 0..nb_vertices {
for edge in self.graph[i].iter() {
if edge.cap as i32 - edge.flow > 0 {
// It is possible to send overflow through this edge
let u = self.idtovertex[i];
let v = self.idtovertex[edge.dest];
let u = self.id_to_vertex[i];
let v = self.id_to_vertex[edge.dest];
if cost.contains_key(&(u, v)) {
g.add_edge(u, v, cost[&(u, v)])?;
} else if cost.contains_key(&(v, u)) {
@ -335,11 +323,8 @@ impl Graph<FlowEdge> {
impl Graph<WeightedEdge> {
/// This function adds a single directed weighted edge to the graph.
pub fn add_edge(&mut self, u: Vertex, v: Vertex, w: i32) -> Result<(), String> {
if !self.vertextoid.contains_key(&u) || !self.vertextoid.contains_key(&v) {
return Err("The graph does not contain the provided vertex.".to_string());
}
let idu = self.vertextoid[&u];
let idv = self.vertextoid[&v];
let idu = self.get_vertex_id(&u)?;
let idv = self.get_vertex_id(&v)?;
self.graph[idu].push(WeightedEdge { w, dest: idv });
Ok(())
}
@ -378,7 +363,13 @@ impl Graph<WeightedEdge> {
// in the graph. Thus the .rev().
return cycles_prev
.iter()
.map(|cycle| cycle.iter().rev().map(|id| self.idtovertex[*id]).collect())
.map(|cycle| {
cycle
.iter()
.rev()
.map(|id| self.id_to_vertex[*id])
.collect()
})
.collect();
}
}

View file

@ -53,9 +53,9 @@ pub struct ClusterLayout {
pub ring_assignation_data: Vec<CompactNodeType>,
/// Parameters to be used in the next partition assignation computation.
pub staged_parameters: Lww<LayoutParameters>,
pub staging_parameters: Lww<LayoutParameters>,
/// Role changes which are staged for the next version of the layout
pub staging: LwwMap<Uuid, NodeRoleV>,
pub staging_roles: LwwMap<Uuid, NodeRoleV>,
pub staging_hash: Hash,
}
@ -122,12 +122,11 @@ impl ClusterLayout {
let parameters = LayoutParameters {
zone_redundancy: replication_factor,
};
let staged_parameters = Lww::<LayoutParameters>::new(parameters.clone());
let staging_parameters = Lww::<LayoutParameters>::new(parameters.clone());
let empty_lwwmap = LwwMap::new();
let empty_lwwmap_hash = blake2sum(&rmp_to_vec_all_named(&empty_lwwmap).unwrap()[..]);
ClusterLayout {
let mut ret = ClusterLayout {
version: 0,
replication_factor,
partition_size: 0,
@ -135,10 +134,17 @@ impl ClusterLayout {
node_id_vec: Vec::new(),
ring_assignation_data: Vec::new(),
parameters,
staged_parameters,
staging: empty_lwwmap,
staging_hash: empty_lwwmap_hash,
staging_parameters,
staging_roles: empty_lwwmap,
staging_hash: [0u8; 32].into(),
};
ret.staging_hash = ret.calculate_staging_hash();
ret
}
fn calculate_staging_hash(&self) -> Hash {
let hashed_tuple = (&self.staging_roles, &self.staging_parameters);
blake2sum(&rmp_to_vec_all_named(&hashed_tuple).unwrap()[..])
}
pub fn merge(&mut self, other: &ClusterLayout) -> bool {
@ -148,16 +154,15 @@ impl ClusterLayout {
true
}
Ordering::Equal => {
let param_changed = self.staged_parameters.get() != other.staged_parameters.get();
self.staged_parameters.merge(&other.staged_parameters);
self.staging.merge(&other.staging);
self.staging_parameters.merge(&other.staging_parameters);
self.staging_roles.merge(&other.staging_roles);
let new_staging_hash = blake2sum(&rmp_to_vec_all_named(&self.staging).unwrap()[..]);
let stage_changed = new_staging_hash != self.staging_hash;
let new_staging_hash = self.calculate_staging_hash();
let changed = new_staging_hash != self.staging_hash;
self.staging_hash = new_staging_hash;
stage_changed || param_changed
changed
}
Ordering::Less => false,
}
@ -179,13 +184,14 @@ To know the correct value of the new layout version, invoke `garage layout show`
}
}
self.roles.merge(&self.staging);
self.roles.merge(&self.staging_roles);
self.roles.retain(|(_, _, v)| v.0.is_some());
self.parameters = self.staging_parameters.get().clone();
let msg = self.calculate_partition_assignation()?;
self.staging.clear();
self.staging_hash = blake2sum(&rmp_to_vec_all_named(&self.staging).unwrap()[..]);
self.staging_roles.clear();
self.staging_hash = self.calculate_staging_hash();
self.version += 1;
@ -208,9 +214,9 @@ To know the correct value of the new layout version, invoke `garage layout show`
}
}
self.staging.clear();
self.staging_hash = blake2sum(&rmp_to_vec_all_named(&self.staging).unwrap()[..]);
self.staged_parameters.update(self.parameters.clone());
self.staging_roles.clear();
self.staging_hash = self.calculate_staging_hash();
self.staging_parameters.update(self.parameters.clone());
self.version += 1;
@ -307,7 +313,7 @@ To know the correct value of the new layout version, invoke `garage layout show`
/// returns true if consistent, false if error
pub fn check(&self) -> bool {
// Check that the hash of the staging data is correct
let staging_hash = blake2sum(&rmp_to_vec_all_named(&self.staging).unwrap()[..]);
let staging_hash = self.calculate_staging_hash();
if staging_hash != self.staging_hash {
return false;
}
@ -403,16 +409,13 @@ impl ClusterLayout {
/// Among such optimal assignation, it minimizes the distance to
/// the former assignation (if any) to minimize the amount of
/// data to be moved.
// Staged role changes must be merged with nodes roles before calling this function,
// hence it must only be called from apply_staged_changes() and hence is not public.
/// Staged role changes must be merged with nodes roles before calling this function,
/// hence it must only be called from apply_staged_changes() and hence is not public.
fn calculate_partition_assignation(&mut self) -> Result<Message, Error> {
// We update the node ids, since the node role list might have changed with the
// changes in the layout. We retrieve the old_assignation reframed with new ids
let old_assignation_opt = self.update_node_id_vec()?;
//We update the parameters
self.parameters = self.staged_parameters.get().clone();
let mut msg = Message::new();
msg.push("==== COMPUTATION OF A NEW PARTITION ASSIGNATION ====".into());
msg.push("".into());
@ -1049,7 +1052,7 @@ mod tests {
cl.node_id_vec.push(x);
}
let update = cl.staging.update_mutator(
let update = cl.staging_roles.update_mutator(
cl.node_id_vec[i],
NodeRoleV(Some(NodeRole {
zone: (node_zone_vec[i].to_string()),
@ -1057,10 +1060,10 @@ mod tests {
tags: (vec![]),
})),
);
cl.staging.merge(&update);
cl.staging_roles.merge(&update);
}
cl.staging_hash = blake2sum(&rmp_to_vec_all_named(&cl.staging).unwrap()[..]);
cl.staged_parameters
cl.staging_hash = cl.calculate_staging_hash();
cl.staging_parameters
.update(LayoutParameters { zone_redundancy });
}