Garage v0.9 #473

Merged
lx merged 175 commits from next into main 2023-10-10 13:28:29 +00:00
2 changed files with 111 additions and 63 deletions
Showing only changes of commit e5664c9822 - Show all commits

View file

@ -169,7 +169,7 @@ pub async fn cmd_show_layout(
rpc_cli: &Endpoint<SystemRpc, ()>, rpc_cli: &Endpoint<SystemRpc, ()>,
rpc_host: NodeID, rpc_host: NodeID,
) -> Result<(), Error> { ) -> Result<(), Error> {
let mut layout = fetch_layout(rpc_cli, rpc_host).await?; let layout = fetch_layout(rpc_cli, rpc_host).await?;
println!("==== CURRENT CLUSTER LAYOUT ===="); println!("==== CURRENT CLUSTER LAYOUT ====");
if !print_cluster_layout(&layout) { if !print_cluster_layout(&layout) {
@ -179,9 +179,16 @@ pub async fn cmd_show_layout(
println!(); println!();
println!("Current cluster layout version: {}", layout.version); println!("Current cluster layout version: {}", layout.version);
if print_staging_role_changes(&layout) { let has_role_changes = print_staging_role_changes(&layout);
layout.roles.merge(&layout.staging); let has_param_changes = print_staging_parameters_changes(&layout);
if has_role_changes || has_param_changes {
let v = layout.version;
let res_apply = layout.apply_staged_changes(Some(v + 1));
// this will print the stats of what partitions
// will move around when we apply
match res_apply {
Ok((layout, msg)) => {
println!(); println!();
println!("==== NEW CLUSTER LAYOUT AFTER APPLYING CHANGES ===="); println!("==== NEW CLUSTER LAYOUT AFTER APPLYING CHANGES ====");
if !print_cluster_layout(&layout) { if !print_cluster_layout(&layout) {
@ -189,31 +196,23 @@ pub async fn cmd_show_layout(
} }
println!(); println!();
println!("==== PARAMETERS OF THE LAYOUT COMPUTATION ====");
println!(
"Zone redundancy: {}",
layout.staged_parameters.get().zone_redundancy
);
println!();
// this will print the stats of what partitions
// will move around when we apply
match layout.calculate_partition_assignation() {
Ok(msg) => {
for line in msg.iter() { for line in msg.iter() {
println!("{}", line); println!("{}", line);
} }
println!("To enact the staged role changes, type:"); println!("To enact the staged role changes, type:");
println!(); println!();
println!(" garage layout apply --version {}", layout.version + 1); println!(" garage layout apply --version {}", v + 1);
println!(); println!();
println!( println!(
"You can also revert all proposed changes with: garage layout revert --version {}", "You can also revert all proposed changes with: garage layout revert --version {}",
layout.version + 1) v + 1)
} }
Err(Error::Message(s)) => { Err(Error::Message(s)) => {
println!("Error while trying to compute the assignation: {}", s); println!("Error while trying to compute the assignation: {}", s);
println!("This new layout cannot yet be applied."); println!("This new layout cannot yet be applied.");
println!(
"You can also revert all proposed changes with: garage layout revert --version {}",
v + 1)
} }
_ => { _ => {
println!("Unknown Error"); println!("Unknown Error");
@ -321,21 +320,29 @@ pub async fn send_layout(
} }
pub fn print_cluster_layout(layout: &ClusterLayout) -> bool { pub fn print_cluster_layout(layout: &ClusterLayout) -> bool {
let mut table = vec!["ID\tTags\tZone\tCapacity".to_string()]; let mut table = vec!["ID\tTags\tZone\tCapacity\tUsable".to_string()];
for (id, _, role) in layout.roles.items().iter() { for (id, _, role) in layout.roles.items().iter() {
let role = match &role.0 { let role = match &role.0 {
Some(r) => r, Some(r) => r,
_ => continue, _ => continue,
}; };
let tags = role.tags.join(","); let tags = role.tags.join(",");
let usage = layout.get_node_usage(id).unwrap_or(0);
let capacity = layout.get_node_capacity(id).unwrap_or(1);
table.push(format!( table.push(format!(
"{:?}\t{}\t{}\t{}", "{:?}\t{}\t{}\t{}\t{} ({:.1}%)",
id, id,
tags, tags,
role.zone, role.zone,
role.capacity_string() role.capacity_string(),
usage as u32 * layout.partition_size,
(100.0 * usage as f32 * layout.partition_size as f32) / (capacity as f32)
)); ));
} }
println!();
println!("Parameters of the layout computation:");
println!("Zone redundancy: {}", layout.parameters.zone_redundancy);
println!();
if table.len() == 1 { if table.len() == 1 {
false false
} else { } else {
@ -344,6 +351,20 @@ pub fn print_cluster_layout(layout: &ClusterLayout) -> bool {
} }
} }
pub fn print_staging_parameters_changes(layout: &ClusterLayout) -> bool {
let has_changes = layout.staged_parameters.get().clone() != layout.parameters;
if has_changes {
println!();
println!("==== NEW LAYOUT PARAMETERS ====");
println!(
"Zone redundancy: {}",
layout.staged_parameters.get().zone_redundancy
);
println!();
}
has_changes
}
pub fn print_staging_role_changes(layout: &ClusterLayout) -> bool { pub fn print_staging_role_changes(layout: &ClusterLayout) -> bool {
let has_changes = layout let has_changes = layout
.staging .staging

View file

@ -205,6 +205,7 @@ To know the correct value of the new layout version, invoke `garage layout show`
self.staging.clear(); self.staging.clear();
self.staging_hash = blake2sum(&rmp_to_vec_all_named(&self.staging).unwrap()[..]); self.staging_hash = blake2sum(&rmp_to_vec_all_named(&self.staging).unwrap()[..]);
self.staged_parameters.update(self.parameters.clone());
self.version += 1; self.version += 1;
@ -267,6 +268,26 @@ To know the correct value of the new layout version, invoke `garage layout show`
} }
} }
///Returns the number of partitions associated to this node in the ring
pub fn get_node_usage(&self, uuid: &Uuid) -> Result<usize, Error> {
for (i, id) in self.node_id_vec.iter().enumerate() {
if id == uuid {
let mut count = 0;
for nod in self.ring_assignation_data.iter() {
if i as u8 == *nod {
count += 1
}
}
return Ok(count);
}
}
Err(Error::Message(
"The Uuid does not correspond to a node present in the \
cluster or this node does not have a positive capacity."
.into(),
))
}
///Returns the sum of capacities of non gateway nodes in the cluster ///Returns the sum of capacities of non gateway nodes in the cluster
pub fn get_total_capacity(&self) -> Result<u32, Error> { pub fn get_total_capacity(&self) -> Result<u32, Error> {
let mut total_capacity = 0; let mut total_capacity = 0;
@ -357,11 +378,10 @@ To know the correct value of the new layout version, invoke `garage layout show`
//algorithm. //algorithm.
let cl2 = self.clone(); let cl2 = self.clone();
let (_, zone_to_id) = cl2.generate_useful_zone_ids().expect("Critical Error"); let (_, zone_to_id) = cl2.generate_useful_zone_ids().expect("Critical Error");
let partition_size = cl2 match cl2.compute_optimal_partition_size(&zone_to_id) {
.compute_optimal_partition_size(&zone_to_id) Ok(s) if s != self.partition_size => return false,
.expect("Critical Error"); Err(_) => return false,
if partition_size != self.partition_size { _ => (),
return false;
} }
true true
@ -376,8 +396,9 @@ impl ClusterLayout {
/// Among such optimal assignation, it minimizes the distance to /// Among such optimal assignation, it minimizes the distance to
/// the former assignation (if any) to minimize the amount of /// the former assignation (if any) to minimize the amount of
/// data to be moved. /// data to be moved.
/// Staged changes must be merged with nodes roles before calling this function. // Staged role changes must be merged with nodes roles before calling this function,
pub fn calculate_partition_assignation(&mut self) -> Result<Message, Error> { // hence it must only be called from apply_staged_changes() and it is not public.
fn calculate_partition_assignation(&mut self) -> Result<Message, Error> {
//The nodes might have been updated, some might have been deleted. //The nodes might have been updated, some might have been deleted.
//So we need to first update the list of nodes and retrieve the //So we need to first update the list of nodes and retrieve the
//assignation. //assignation.
@ -386,13 +407,15 @@ impl ClusterLayout {
//changes in the layout. We retrieve the old_assignation reframed with the new ids //changes in the layout. We retrieve the old_assignation reframed with the new ids
let old_assignation_opt = self.update_node_id_vec()?; let old_assignation_opt = self.update_node_id_vec()?;
let redundancy = self.staged_parameters.get().zone_redundancy; self.parameters = self.staged_parameters.get().clone();
let mut msg = Message::new(); let mut msg = Message::new();
msg.push("==== COMPUTATION OF A NEW PARTITION ASSIGNATION ====".into());
msg.push("".into());
msg.push(format!( msg.push(format!(
"Computation of a new cluster layout where partitions are \ "Partitions are \
replicated {} times on at least {} distinct zones.", replicated {} times on at least {} distinct zones.",
self.replication_factor, redundancy self.replication_factor, self.parameters.zone_redundancy
)); ));
//We generate for once numerical ids for the zones of non gateway nodes, //We generate for once numerical ids for the zones of non gateway nodes,
@ -400,11 +423,6 @@ impl ClusterLayout {
let (id_to_zone, zone_to_id) = self.generate_useful_zone_ids()?; let (id_to_zone, zone_to_id) = self.generate_useful_zone_ids()?;
let nb_useful_nodes = self.useful_nodes().len(); let nb_useful_nodes = self.useful_nodes().len();
msg.push(format!(
"The cluster contains {} nodes spread over {} zones.",
nb_useful_nodes,
id_to_zone.len()
));
if nb_useful_nodes < self.replication_factor { if nb_useful_nodes < self.replication_factor {
return Err(Error::Message(format!( return Err(Error::Message(format!(
"The number of nodes with positive \ "The number of nodes with positive \
@ -412,12 +430,12 @@ impl ClusterLayout {
nb_useful_nodes, self.replication_factor nb_useful_nodes, self.replication_factor
))); )));
} }
if id_to_zone.len() < redundancy { if id_to_zone.len() < self.parameters.zone_redundancy {
return Err(Error::Message(format!( return Err(Error::Message(format!(
"The number of zones with non-gateway \ "The number of zones with non-gateway \
nodes ({}) is smaller than the redundancy parameter ({})", nodes ({}) is smaller than the redundancy parameter ({})",
id_to_zone.len(), id_to_zone.len(),
redundancy self.parameters.zone_redundancy
))); )));
} }
@ -429,10 +447,8 @@ impl ClusterLayout {
if old_assignation_opt != None { if old_assignation_opt != None {
msg.push(format!( msg.push(format!(
"Given the replication and redundancy constraint, the \ "Optimal size of a partition: {} (was {} in the previous layout).",
optimal size of a partition is {}. In the previous layout, it used to \ partition_size, self.partition_size
be {} (the zone redundancy was {}).",
partition_size, self.partition_size, self.parameters.zone_redundancy
)); ));
} else { } else {
msg.push(format!( msg.push(format!(
@ -442,7 +458,6 @@ impl ClusterLayout {
)); ));
} }
self.partition_size = partition_size; self.partition_size = partition_size;
self.parameters = self.staged_parameters.get().clone();
if partition_size < 100 { if partition_size < 100 {
msg.push( msg.push(
@ -470,6 +485,13 @@ impl ClusterLayout {
//We update the layout structure //We update the layout structure
self.update_ring_from_flow(id_to_zone.len(), &gflow)?; self.update_ring_from_flow(id_to_zone.len(), &gflow)?;
if !self.check() {
return Err(Error::Message(
"Critical error: The computed layout happens to be incorrect".into(),
));
}
Ok(msg) Ok(msg)
} }
@ -553,12 +575,6 @@ impl ClusterLayout {
//We write the ring //We write the ring
self.ring_assignation_data = Vec::<CompactNodeType>::new(); self.ring_assignation_data = Vec::<CompactNodeType>::new();
if !self.check() {
return Err(Error::Message(
"Critical error: The computed layout happens to be incorrect".into(),
));
}
Ok(Some(old_assignation)) Ok(Some(old_assignation))
} }
@ -652,7 +668,7 @@ impl ClusterLayout {
ClusterLayout::generate_graph_vertices(zone_to_id.len(), self.useful_nodes().len()); ClusterLayout::generate_graph_vertices(zone_to_id.len(), self.useful_nodes().len());
let mut g = Graph::<FlowEdge>::new(&vertices); let mut g = Graph::<FlowEdge>::new(&vertices);
let nb_zones = zone_to_id.len(); let nb_zones = zone_to_id.len();
let redundancy = self.staged_parameters.get().zone_redundancy; let redundancy = self.parameters.zone_redundancy;
for p in 0..NB_PARTITIONS { for p in 0..NB_PARTITIONS {
g.add_edge(Vertex::Source, Vertex::Pup(p), redundancy as u32)?; g.add_edge(Vertex::Source, Vertex::Pup(p), redundancy as u32)?;
g.add_edge( g.add_edge(
@ -774,8 +790,9 @@ impl ClusterLayout {
let used_cap = self.partition_size * nb_partitions as u32 * self.replication_factor as u32; let used_cap = self.partition_size * nb_partitions as u32 * self.replication_factor as u32;
let total_cap = self.get_total_capacity()?; let total_cap = self.get_total_capacity()?;
let percent_cap = 100.0 * (used_cap as f32) / (total_cap as f32); let percent_cap = 100.0 * (used_cap as f32) / (total_cap as f32);
msg.push("".into());
msg.push(format!( msg.push(format!(
"Available capacity / Total cluster capacity: {} / {} ({:.1} %)", "Usable capacity / Total cluster capacity: {} / {} ({:.1} %)",
used_cap, total_cap, percent_cap used_cap, total_cap, percent_cap
)); ));
msg.push("".into()); msg.push("".into());
@ -878,7 +895,7 @@ impl ClusterLayout {
} }
let percent_cap_z = 100.0 * (available_cap_z as f32) / (total_cap_z as f32); let percent_cap_z = 100.0 * (available_cap_z as f32) / (total_cap_z as f32);
msg.push(format!( msg.push(format!(
" Available capacity / Total capacity: {}/{} ({:.1}%).", " Usable capacity / Total capacity: {}/{} ({:.1}%).",
available_cap_z, total_cap_z, percent_cap_z available_cap_z, total_cap_z, percent_cap_z
)); ));
@ -891,7 +908,7 @@ impl ClusterLayout {
.tags_string(); .tags_string();
msg.push(format!( msg.push(format!(
" Node {}: {} partitions ({} new) ; \ " Node {}: {} partitions ({} new) ; \
available/total capacity: {} / {} ({:.1}%) ; tags:{}", usable/total capacity: {} / {} ({:.1}%) ; tags:{}",
&self.node_id_vec[*n].to_vec()[0..2] &self.node_id_vec[*n].to_vec()[0..2]
.to_vec() .to_vec()
.encode_hex::<String>(), .encode_hex::<String>(),
@ -1008,7 +1025,7 @@ mod tests {
cl.node_id_vec.push(x); cl.node_id_vec.push(x);
} }
let update = cl.roles.update_mutator( let update = cl.staging.update_mutator(
cl.node_id_vec[i], cl.node_id_vec[i],
NodeRoleV(Some(NodeRole { NodeRoleV(Some(NodeRole {
zone: (node_zone_vec[i].to_string()), zone: (node_zone_vec[i].to_string()),
@ -1016,9 +1033,11 @@ mod tests {
tags: (vec![]), tags: (vec![]),
})), })),
); );
cl.roles.merge(&update); cl.staging.merge(&update);
} }
cl.staged_parameters = Lww::<LayoutParameters>::new(LayoutParameters { zone_redundancy }); cl.staging_hash = blake2sum(&rmp_to_vec_all_named(&cl.staging).unwrap()[..]);
cl.staged_parameters
.update(LayoutParameters { zone_redundancy });
} }
#[test] #[test]
@ -1032,7 +1051,9 @@ mod tests {
let mut cl = ClusterLayout::new(3); let mut cl = ClusterLayout::new(3);
update_layout(&mut cl, &node_id_vec, &node_capacity_vec, &node_zone_vec, 3); update_layout(&mut cl, &node_id_vec, &node_capacity_vec, &node_zone_vec, 3);
show_msg(&cl.calculate_partition_assignation().unwrap()); let v = cl.version;
let (mut cl, msg) = cl.apply_staged_changes(Some(v + 1)).unwrap();
show_msg(&msg);
assert!(cl.check()); assert!(cl.check());
assert!(matches!(check_against_naive(&cl), Ok(true))); assert!(matches!(check_against_naive(&cl), Ok(true)));
@ -1043,13 +1064,17 @@ mod tests {
.map(|x| x.to_string()) .map(|x| x.to_string())
.collect(); .collect();
update_layout(&mut cl, &node_id_vec, &node_capacity_vec, &node_zone_vec, 2); update_layout(&mut cl, &node_id_vec, &node_capacity_vec, &node_zone_vec, 2);
show_msg(&cl.calculate_partition_assignation().unwrap()); let v = cl.version;
let (mut cl, msg) = cl.apply_staged_changes(Some(v + 1)).unwrap();
show_msg(&msg);
assert!(cl.check()); assert!(cl.check());
assert!(matches!(check_against_naive(&cl), Ok(true))); assert!(matches!(check_against_naive(&cl), Ok(true)));
node_capacity_vec = vec![4000, 1000, 2000, 7000, 1000, 1000, 2000, 10000, 2000]; node_capacity_vec = vec![4000, 1000, 2000, 7000, 1000, 1000, 2000, 10000, 2000];
update_layout(&mut cl, &node_id_vec, &node_capacity_vec, &node_zone_vec, 3); update_layout(&mut cl, &node_id_vec, &node_capacity_vec, &node_zone_vec, 3);
show_msg(&cl.calculate_partition_assignation().unwrap()); let v = cl.version;
let (mut cl, msg) = cl.apply_staged_changes(Some(v + 1)).unwrap();
show_msg(&msg);
assert!(cl.check()); assert!(cl.check());
assert!(matches!(check_against_naive(&cl), Ok(true))); assert!(matches!(check_against_naive(&cl), Ok(true)));
@ -1057,7 +1082,9 @@ mod tests {
4000000, 4000000, 2000000, 7000000, 1000000, 9000000, 2000000, 10000, 2000000, 4000000, 4000000, 2000000, 7000000, 1000000, 9000000, 2000000, 10000, 2000000,
]; ];
update_layout(&mut cl, &node_id_vec, &node_capacity_vec, &node_zone_vec, 1); update_layout(&mut cl, &node_id_vec, &node_capacity_vec, &node_zone_vec, 1);
show_msg(&cl.calculate_partition_assignation().unwrap()); let v = cl.version;
let (cl, msg) = cl.apply_staged_changes(Some(v + 1)).unwrap();
show_msg(&msg);
assert!(cl.check()); assert!(cl.check());
assert!(matches!(check_against_naive(&cl), Ok(true))); assert!(matches!(check_against_naive(&cl), Ok(true)));
} }