forked from Deuxfleurs/garage
Corrected two bugs:
- self.node_id_vec was not properly updated when the previous ring was empty - ClusterLayout::merge was not considering changes in the layout parameters
This commit is contained in:
parent
a951b6c452
commit
9407df60cc
3 changed files with 41 additions and 22 deletions
|
@ -188,6 +188,10 @@ pub async fn cmd_show_layout(
|
||||||
println!("No nodes have a role in the new layout.");
|
println!("No nodes have a role in the new layout.");
|
||||||
}
|
}
|
||||||
println!();
|
println!();
|
||||||
|
|
||||||
|
println!("==== PARAMETERS OF THE LAYOUT COMPUTATION ====");
|
||||||
|
println!("Zone redundancy: {}", layout.parameters.get().zone_redundancy);
|
||||||
|
println!();
|
||||||
|
|
||||||
// this will print the stats of what partitions
|
// this will print the stats of what partitions
|
||||||
// will move around when we apply
|
// will move around when we apply
|
||||||
|
@ -267,7 +271,7 @@ pub async fn cmd_config_layout(
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
layout.parameters.update(LayoutParameters{ zone_redundancy: r });
|
layout.parameters.update(LayoutParameters{ zone_redundancy: r });
|
||||||
println!("The new zone redundancy has been staged.");
|
println!("The new zone redundancy has been saved ({}).", r);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -150,15 +150,17 @@ impl ClusterLayout {
|
||||||
true
|
true
|
||||||
}
|
}
|
||||||
Ordering::Equal => {
|
Ordering::Equal => {
|
||||||
|
let param_changed = self.parameters.get() != other.parameters.get();
|
||||||
self.parameters.merge(&other.parameters);
|
self.parameters.merge(&other.parameters);
|
||||||
self.staging.merge(&other.staging);
|
self.staging.merge(&other.staging);
|
||||||
|
|
||||||
|
|
||||||
let new_staging_hash = blake2sum(&rmp_to_vec_all_named(&self.staging).unwrap()[..]);
|
let new_staging_hash = blake2sum(&rmp_to_vec_all_named(&self.staging).unwrap()[..]);
|
||||||
let changed = new_staging_hash != self.staging_hash;
|
let stage_changed = new_staging_hash != self.staging_hash;
|
||||||
|
|
||||||
self.staging_hash = new_staging_hash;
|
self.staging_hash = new_staging_hash;
|
||||||
|
|
||||||
changed
|
stage_changed || param_changed
|
||||||
}
|
}
|
||||||
Ordering::Less => false,
|
Ordering::Less => false,
|
||||||
}
|
}
|
||||||
|
@ -352,7 +354,7 @@ To know the correct value of the new layout version, invoke `garage layout show`
|
||||||
//Check that the partition size stored is the one computed by the asignation
|
//Check that the partition size stored is the one computed by the asignation
|
||||||
//algorithm.
|
//algorithm.
|
||||||
let cl2 = self.clone();
|
let cl2 = self.clone();
|
||||||
let (_ , zone_to_id) = cl2.generate_zone_ids().expect("Critical Error");
|
let (_ , zone_to_id) = cl2.generate_useful_zone_ids().expect("Critical Error");
|
||||||
let partition_size = cl2.compute_optimal_partition_size(&zone_to_id).expect("Critical Error");
|
let partition_size = cl2.compute_optimal_partition_size(&zone_to_id).expect("Critical Error");
|
||||||
if partition_size != self.partition_size {
|
if partition_size != self.partition_size {
|
||||||
return false;
|
return false;
|
||||||
|
@ -371,13 +373,14 @@ impl ClusterLayout {
|
||||||
/// partition (assuming all partitions have the same size).
|
/// partition (assuming all partitions have the same size).
|
||||||
/// Among such optimal assignation, it minimizes the distance to
|
/// Among such optimal assignation, it minimizes the distance to
|
||||||
/// the former assignation (if any) to minimize the amount of
|
/// the former assignation (if any) to minimize the amount of
|
||||||
/// data to be moved.
|
/// data to be moved.
|
||||||
|
/// Staged changes must be merged with nodes roles before calling this function.
|
||||||
pub fn calculate_partition_assignation(&mut self) -> Result<Message,Error> {
|
pub fn calculate_partition_assignation(&mut self) -> Result<Message,Error> {
|
||||||
//The nodes might have been updated, some might have been deleted.
|
//The nodes might have been updated, some might have been deleted.
|
||||||
//So we need to first update the list of nodes and retrieve the
|
//So we need to first update the list of nodes and retrieve the
|
||||||
//assignation.
|
//assignation.
|
||||||
|
|
||||||
//We update the node ids, since the node list might have changed with the staged
|
//We update the node ids, since the node role list might have changed with the
|
||||||
//changes in the layout. We retrieve the old_assignation reframed with the new ids
|
//changes in the layout. We retrieve the old_assignation reframed with the new ids
|
||||||
let old_assignation_opt = self.update_node_id_vec()?;
|
let old_assignation_opt = self.update_node_id_vec()?;
|
||||||
|
|
||||||
|
@ -387,12 +390,23 @@ impl ClusterLayout {
|
||||||
msg.push(format!("Computation of a new cluster layout where partitions are \
|
msg.push(format!("Computation of a new cluster layout where partitions are \
|
||||||
replicated {} times on at least {} distinct zones.", self.replication_factor, redundancy));
|
replicated {} times on at least {} distinct zones.", self.replication_factor, redundancy));
|
||||||
|
|
||||||
//We generate for once numerical ids for the zone, to use them as indices in the
|
//We generate for once numerical ids for the zones of non gateway nodes,
|
||||||
//flow graphs.
|
//to use them as indices in the flow graphs.
|
||||||
let (id_to_zone , zone_to_id) = self.generate_zone_ids()?;
|
let (id_to_zone , zone_to_id) = self.generate_useful_zone_ids()?;
|
||||||
|
|
||||||
|
let nb_useful_nodes = self.useful_nodes().len();
|
||||||
msg.push(format!("The cluster contains {} nodes spread over {} zones.",
|
msg.push(format!("The cluster contains {} nodes spread over {} zones.",
|
||||||
self.useful_nodes().len(), id_to_zone.len()));
|
nb_useful_nodes, id_to_zone.len()));
|
||||||
|
if nb_useful_nodes < self.replication_factor{
|
||||||
|
return Err(Error::Message(format!("The number of nodes with positive \
|
||||||
|
capacity ({}) is smaller than the replication factor ({}).",
|
||||||
|
nb_useful_nodes, self.replication_factor)));
|
||||||
|
}
|
||||||
|
if id_to_zone.len() < redundancy {
|
||||||
|
return Err(Error::Message(format!("The number of zones with non-gateway \
|
||||||
|
nodes ({}) is smaller than the redundancy parameter ({})",
|
||||||
|
id_to_zone.len() , redundancy)));
|
||||||
|
}
|
||||||
|
|
||||||
//We compute the optimal partition size
|
//We compute the optimal partition size
|
||||||
//Capacities should be given in a unit so that partition size is at least 100.
|
//Capacities should be given in a unit so that partition size is at least 100.
|
||||||
|
@ -413,8 +427,7 @@ impl ClusterLayout {
|
||||||
|
|
||||||
if partition_size < 100 {
|
if partition_size < 100 {
|
||||||
msg.push("WARNING: The partition size is low (< 100), you might consider to \
|
msg.push("WARNING: The partition size is low (< 100), you might consider to \
|
||||||
give the nodes capacities in a smaller unit (e.g. Mb instead of Gb) to \
|
provide the nodes capacities in a smaller unit (e.g. Mb instead of Gb).".into());
|
||||||
achieve a more tailored use of your storage ressources.".into());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//We compute a first flow/assignment that is heuristically close to the previous
|
//We compute a first flow/assignment that is heuristically close to the previous
|
||||||
|
@ -456,12 +469,14 @@ impl ClusterLayout {
|
||||||
.filter(|(_, _, v)|
|
.filter(|(_, _, v)|
|
||||||
match v {NodeRoleV(Some(r)) if r.capacity == None => true, _=> false })
|
match v {NodeRoleV(Some(r)) if r.capacity == None => true, _=> false })
|
||||||
.map(|(k, _, _)| *k).collect();
|
.map(|(k, _, _)| *k).collect();
|
||||||
|
|
||||||
let nb_useful_nodes = new_non_gateway_nodes.len();
|
let nb_useful_nodes = new_non_gateway_nodes.len();
|
||||||
let mut new_node_id_vec = Vec::<Uuid>::new();
|
let mut new_node_id_vec = Vec::<Uuid>::new();
|
||||||
new_node_id_vec.append(&mut new_non_gateway_nodes);
|
new_node_id_vec.append(&mut new_non_gateway_nodes);
|
||||||
new_node_id_vec.append(&mut new_gateway_nodes);
|
new_node_id_vec.append(&mut new_gateway_nodes);
|
||||||
|
|
||||||
|
let old_node_id_vec = self.node_id_vec.clone();
|
||||||
|
self.node_id_vec = new_node_id_vec.clone();
|
||||||
|
|
||||||
// (2) We retrieve the old association
|
// (2) We retrieve the old association
|
||||||
//We rewrite the old association with the new indices. We only consider partition
|
//We rewrite the old association with the new indices. We only consider partition
|
||||||
|
@ -490,15 +505,14 @@ impl ClusterLayout {
|
||||||
let rf= self.replication_factor;
|
let rf= self.replication_factor;
|
||||||
for p in 0..nb_partitions {
|
for p in 0..nb_partitions {
|
||||||
for old_id in &self.ring_assignation_data[p*rf..(p+1)*rf] {
|
for old_id in &self.ring_assignation_data[p*rf..(p+1)*rf] {
|
||||||
let uuid = self.node_id_vec[*old_id as usize];
|
let uuid = old_node_id_vec[*old_id as usize];
|
||||||
if uuid_to_new_id.contains_key(&uuid) {
|
if uuid_to_new_id.contains_key(&uuid) {
|
||||||
old_assignation[p].push(uuid_to_new_id[&uuid]);
|
old_assignation[p].push(uuid_to_new_id[&uuid]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//We write the results
|
//We write the ring
|
||||||
self.node_id_vec = new_node_id_vec;
|
|
||||||
self.ring_assignation_data = Vec::<CompactNodeType>::new();
|
self.ring_assignation_data = Vec::<CompactNodeType>::new();
|
||||||
|
|
||||||
return Ok(Some(old_assignation));
|
return Ok(Some(old_assignation));
|
||||||
|
@ -507,11 +521,11 @@ impl ClusterLayout {
|
||||||
|
|
||||||
///This function generates ids for the zone of the nodes appearing in
|
///This function generates ids for the zone of the nodes appearing in
|
||||||
///self.node_id_vec.
|
///self.node_id_vec.
|
||||||
fn generate_zone_ids(&self) -> Result<(Vec<String>, HashMap<String, usize>),Error>{
|
fn generate_useful_zone_ids(&self) -> Result<(Vec<String>, HashMap<String, usize>),Error>{
|
||||||
let mut id_to_zone = Vec::<String>::new();
|
let mut id_to_zone = Vec::<String>::new();
|
||||||
let mut zone_to_id = HashMap::<String,usize>::new();
|
let mut zone_to_id = HashMap::<String,usize>::new();
|
||||||
|
|
||||||
for uuid in self.node_id_vec.iter() {
|
for uuid in self.useful_nodes().iter() {
|
||||||
if self.roles.get(uuid) == None {
|
if self.roles.get(uuid) == None {
|
||||||
return Err(Error::Message("The uuid was not found in the node roles (this should \
|
return Err(Error::Message("The uuid was not found in the node roles (this should \
|
||||||
not happen, it might be a critical error).".into()));
|
not happen, it might be a critical error).".into()));
|
||||||
|
@ -685,7 +699,7 @@ impl ClusterLayout {
|
||||||
storage capacities. \
|
storage capacities. \
|
||||||
You might want to rebalance the storage capacities or relax the constraints. \
|
You might want to rebalance the storage capacities or relax the constraints. \
|
||||||
See the detailed statistics below and look for saturated nodes/zones."));
|
See the detailed statistics below and look for saturated nodes/zones."));
|
||||||
msg.push(format!("Recall that because of the replication, the actual available \
|
msg.push(format!("Recall that because of the replication factor, the actual available \
|
||||||
storage capacity is {} / {} = {}.",
|
storage capacity is {} / {} = {}.",
|
||||||
used_cap , self.replication_factor ,
|
used_cap , self.replication_factor ,
|
||||||
used_cap/self.replication_factor as u32));
|
used_cap/self.replication_factor as u32));
|
||||||
|
@ -741,7 +755,7 @@ impl ClusterLayout {
|
||||||
transferred.", total_new_partitions));
|
transferred.", total_new_partitions));
|
||||||
}
|
}
|
||||||
msg.push(format!(""));
|
msg.push(format!(""));
|
||||||
msg.push(format!("Detailed statistics by zones and nodes."));
|
msg.push(format!("==== DETAILED STATISTICS BY ZONES AND NODES ===="));
|
||||||
|
|
||||||
for z in 0..id_to_zone.len(){
|
for z in 0..id_to_zone.len(){
|
||||||
let mut nodes_of_z = Vec::<usize>::new();
|
let mut nodes_of_z = Vec::<usize>::new();
|
||||||
|
|
|
@ -565,6 +565,7 @@ impl System {
|
||||||
return Err(Error::Message(msg));
|
return Err(Error::Message(msg));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
let update_ring = self.update_ring.lock().await;
|
let update_ring = self.update_ring.lock().await;
|
||||||
let mut layout: ClusterLayout = self.ring.borrow().layout.clone();
|
let mut layout: ClusterLayout = self.ring.borrow().layout.clone();
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue