Garage v1.0 #683
4 changed files with 174 additions and 169 deletions
|
@ -18,7 +18,7 @@ impl LayoutHistory {
|
|||
};
|
||||
|
||||
let mut ret = LayoutHistory {
|
||||
versions: vec![version].into_boxed_slice().into(),
|
||||
versions: vec![version],
|
||||
update_trackers: Default::default(),
|
||||
trackers_hash: [0u8; 32].into(),
|
||||
staging: Lww::raw(0, staging),
|
||||
|
@ -211,6 +211,11 @@ To know the correct value of the new layout version, invoke `garage layout show`
|
|||
|
||||
let msg = new_version.calculate_partition_assignment()?;
|
||||
self.versions.push(new_version);
|
||||
if self.current().check().is_ok() {
|
||||
while self.versions.first().unwrap().check().is_err() {
|
||||
self.versions.remove(0);
|
||||
}
|
||||
}
|
||||
|
||||
// Reset the staged layout changes
|
||||
self.staging.update(LayoutStaging {
|
||||
|
@ -245,7 +250,7 @@ To know the correct value of the new layout version, invoke `garage layout show`
|
|||
version.check()?;
|
||||
}
|
||||
|
||||
// TODO: anythign more ?
|
||||
// TODO: anything more ?
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
|
|
@ -3,6 +3,9 @@ mod history;
|
|||
mod schema;
|
||||
mod version;
|
||||
|
||||
#[cfg(test)]
|
||||
mod test;
|
||||
|
||||
pub mod manager;
|
||||
|
||||
// ---- re-exports ----
|
||||
|
|
159
src/rpc/layout/test.rs
Normal file
159
src/rpc/layout/test.rs
Normal file
|
@ -0,0 +1,159 @@
|
|||
use std::cmp::min;
|
||||
use std::collections::HashMap;
|
||||
|
||||
use garage_util::crdt::Crdt;
|
||||
use garage_util::error::*;
|
||||
|
||||
use crate::layout::*;
|
||||
|
||||
// This function checks that the partition size S computed is at least better than the
|
||||
// one given by a very naive algorithm. To do so, we try to run the naive algorithm
|
||||
// assuming a partion size of S+1. If we succed, it means that the optimal assignment
|
||||
// was not optimal. The naive algorithm is the following :
|
||||
// - we compute the max number of partitions associated to every node, capped at the
|
||||
// partition number. It gives the number of tokens of every node.
|
||||
// - every zone has a number of tokens equal to the sum of the tokens of its nodes.
|
||||
// - we cycle over the partitions and associate zone tokens while respecting the
|
||||
// zone redundancy constraint.
|
||||
// NOTE: the naive algorithm is not optimal. Counter example:
|
||||
// take nb_partition = 3 ; replication_factor = 5; redundancy = 4;
|
||||
// number of tokens by zone : (A, 4), (B,1), (C,4), (D, 4), (E, 2)
|
||||
// With these parameters, the naive algo fails, whereas there is a solution:
|
||||
// (A,A,C,D,E) , (A,B,C,D,D) (A,C,C,D,E)
|
||||
fn check_against_naive(cl: &LayoutVersion) -> Result<bool, Error> {
|
||||
let over_size = cl.partition_size + 1;
|
||||
let mut zone_token = HashMap::<String, usize>::new();
|
||||
|
||||
let (zones, zone_to_id) = cl.generate_nongateway_zone_ids()?;
|
||||
|
||||
if zones.is_empty() {
|
||||
return Ok(false);
|
||||
}
|
||||
|
||||
for z in zones.iter() {
|
||||
zone_token.insert(z.clone(), 0);
|
||||
}
|
||||
for uuid in cl.nongateway_nodes() {
|
||||
let z = cl.get_node_zone(&uuid)?;
|
||||
let c = cl.get_node_capacity(&uuid)?;
|
||||
zone_token.insert(
|
||||
z.to_string(),
|
||||
zone_token[z] + min(NB_PARTITIONS, (c / over_size) as usize),
|
||||
);
|
||||
}
|
||||
|
||||
// For every partition, we count the number of zone already associated and
|
||||
// the name of the last zone associated
|
||||
|
||||
let mut id_zone_token = vec![0; zones.len()];
|
||||
for (z, t) in zone_token.iter() {
|
||||
id_zone_token[zone_to_id[z]] = *t;
|
||||
}
|
||||
|
||||
let mut nb_token = vec![0; NB_PARTITIONS];
|
||||
let mut last_zone = vec![zones.len(); NB_PARTITIONS];
|
||||
|
||||
let mut curr_zone = 0;
|
||||
|
||||
let redundancy = cl.effective_zone_redundancy();
|
||||
|
||||
for replic in 0..cl.replication_factor {
|
||||
for p in 0..NB_PARTITIONS {
|
||||
while id_zone_token[curr_zone] == 0
|
||||
|| (last_zone[p] == curr_zone
|
||||
&& redundancy - nb_token[p] <= cl.replication_factor - replic)
|
||||
{
|
||||
curr_zone += 1;
|
||||
if curr_zone >= zones.len() {
|
||||
return Ok(true);
|
||||
}
|
||||
}
|
||||
id_zone_token[curr_zone] -= 1;
|
||||
if last_zone[p] != curr_zone {
|
||||
nb_token[p] += 1;
|
||||
last_zone[p] = curr_zone;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return Ok(false);
|
||||
}
|
||||
|
||||
fn show_msg(msg: &Message) {
|
||||
for s in msg.iter() {
|
||||
println!("{}", s);
|
||||
}
|
||||
}
|
||||
|
||||
fn update_layout(
|
||||
cl: &mut LayoutHistory,
|
||||
node_capacity_vec: &[u64],
|
||||
node_zone_vec: &[&'static str],
|
||||
zone_redundancy: usize,
|
||||
) {
|
||||
let staging = cl.staging.get_mut();
|
||||
|
||||
for (i, (capacity, zone)) in node_capacity_vec
|
||||
.iter()
|
||||
.zip(node_zone_vec.iter())
|
||||
.enumerate()
|
||||
{
|
||||
let node_id = [i as u8; 32].into();
|
||||
|
||||
let update = staging.roles.update_mutator(
|
||||
node_id,
|
||||
NodeRoleV(Some(NodeRole {
|
||||
zone: zone.to_string(),
|
||||
capacity: Some(*capacity),
|
||||
tags: (vec![]),
|
||||
})),
|
||||
);
|
||||
staging.roles.merge(&update);
|
||||
}
|
||||
staging.parameters.update(LayoutParameters {
|
||||
zone_redundancy: ZoneRedundancy::AtLeast(zone_redundancy),
|
||||
});
|
||||
|
||||
cl.update_hashes();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_assignment() {
|
||||
let mut node_capacity_vec = vec![4000, 1000, 2000];
|
||||
let mut node_zone_vec = vec!["A", "B", "C"];
|
||||
|
||||
let mut cl = LayoutHistory::new(3);
|
||||
update_layout(&mut cl, &node_capacity_vec, &node_zone_vec, 3);
|
||||
let v = cl.current().version;
|
||||
let (mut cl, msg) = cl.apply_staged_changes(Some(v + 1)).unwrap();
|
||||
show_msg(&msg);
|
||||
assert_eq!(cl.check(), Ok(()));
|
||||
assert!(check_against_naive(cl.current()).unwrap());
|
||||
|
||||
node_capacity_vec = vec![4000, 1000, 1000, 3000, 1000, 1000, 2000, 10000, 2000];
|
||||
node_zone_vec = vec!["A", "B", "C", "C", "C", "B", "G", "H", "I"];
|
||||
update_layout(&mut cl, &node_capacity_vec, &node_zone_vec, 2);
|
||||
let v = cl.current().version;
|
||||
let (mut cl, msg) = cl.apply_staged_changes(Some(v + 1)).unwrap();
|
||||
show_msg(&msg);
|
||||
assert_eq!(cl.check(), Ok(()));
|
||||
assert!(check_against_naive(cl.current()).unwrap());
|
||||
|
||||
node_capacity_vec = vec![4000, 1000, 2000, 7000, 1000, 1000, 2000, 10000, 2000];
|
||||
update_layout(&mut cl, &node_capacity_vec, &node_zone_vec, 3);
|
||||
let v = cl.current().version;
|
||||
let (mut cl, msg) = cl.apply_staged_changes(Some(v + 1)).unwrap();
|
||||
show_msg(&msg);
|
||||
assert_eq!(cl.check(), Ok(()));
|
||||
assert!(check_against_naive(cl.current()).unwrap());
|
||||
|
||||
node_capacity_vec = vec![
|
||||
4000000, 4000000, 2000000, 7000000, 1000000, 9000000, 2000000, 10000, 2000000,
|
||||
];
|
||||
update_layout(&mut cl, &node_capacity_vec, &node_zone_vec, 1);
|
||||
let v = cl.current().version;
|
||||
let (cl, msg) = cl.apply_staged_changes(Some(v + 1)).unwrap();
|
||||
show_msg(&msg);
|
||||
assert_eq!(cl.check(), Ok(()));
|
||||
assert!(check_against_naive(cl.current()).unwrap());
|
||||
}
|
|
@ -143,7 +143,7 @@ impl LayoutVersion {
|
|||
}
|
||||
|
||||
/// Given a node uuids, this function returns the label of its zone
|
||||
fn get_node_zone(&self, uuid: &Uuid) -> Result<&str, Error> {
|
||||
pub(crate) fn get_node_zone(&self, uuid: &Uuid) -> Result<&str, Error> {
|
||||
match self.node_role(uuid) {
|
||||
Some(role) => Ok(&role.zone),
|
||||
_ => Err(Error::Message(
|
||||
|
@ -162,7 +162,7 @@ impl LayoutVersion {
|
|||
}
|
||||
|
||||
/// Returns the effective value of the zone_redundancy parameter
|
||||
fn effective_zone_redundancy(&self) -> usize {
|
||||
pub(crate) fn effective_zone_redundancy(&self) -> usize {
|
||||
match self.parameters.zone_redundancy {
|
||||
ZoneRedundancy::AtLeast(v) => v,
|
||||
ZoneRedundancy::Maximum => {
|
||||
|
@ -472,7 +472,9 @@ impl LayoutVersion {
|
|||
|
||||
/// This function generates ids for the zone of the nodes appearing in
|
||||
/// self.node_id_vec.
|
||||
fn generate_nongateway_zone_ids(&self) -> Result<(Vec<String>, HashMap<String, usize>), Error> {
|
||||
pub(crate) fn generate_nongateway_zone_ids(
|
||||
&self,
|
||||
) -> Result<(Vec<String>, HashMap<String, usize>), Error> {
|
||||
let mut id_to_zone = Vec::<String>::new();
|
||||
let mut zone_to_id = HashMap::<String, usize>::new();
|
||||
|
||||
|
@ -838,167 +840,3 @@ impl LayoutVersion {
|
|||
Ok(msg)
|
||||
}
|
||||
}
|
||||
|
||||
// ====================================================================================
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{Error, *};
|
||||
use std::cmp::min;
|
||||
|
||||
// This function checks that the partition size S computed is at least better than the
|
||||
// one given by a very naive algorithm. To do so, we try to run the naive algorithm
|
||||
// assuming a partion size of S+1. If we succed, it means that the optimal assignment
|
||||
// was not optimal. The naive algorithm is the following :
|
||||
// - we compute the max number of partitions associated to every node, capped at the
|
||||
// partition number. It gives the number of tokens of every node.
|
||||
// - every zone has a number of tokens equal to the sum of the tokens of its nodes.
|
||||
// - we cycle over the partitions and associate zone tokens while respecting the
|
||||
// zone redundancy constraint.
|
||||
// NOTE: the naive algorithm is not optimal. Counter example:
|
||||
// take nb_partition = 3 ; replication_factor = 5; redundancy = 4;
|
||||
// number of tokens by zone : (A, 4), (B,1), (C,4), (D, 4), (E, 2)
|
||||
// With these parameters, the naive algo fails, whereas there is a solution:
|
||||
// (A,A,C,D,E) , (A,B,C,D,D) (A,C,C,D,E)
|
||||
fn check_against_naive(cl: &LayoutVersion) -> Result<bool, Error> {
|
||||
let over_size = cl.partition_size + 1;
|
||||
let mut zone_token = HashMap::<String, usize>::new();
|
||||
|
||||
let (zones, zone_to_id) = cl.generate_nongateway_zone_ids()?;
|
||||
|
||||
if zones.is_empty() {
|
||||
return Ok(false);
|
||||
}
|
||||
|
||||
for z in zones.iter() {
|
||||
zone_token.insert(z.clone(), 0);
|
||||
}
|
||||
for uuid in cl.nongateway_nodes() {
|
||||
let z = cl.get_node_zone(&uuid)?;
|
||||
let c = cl.get_node_capacity(&uuid)?;
|
||||
zone_token.insert(
|
||||
z.clone(),
|
||||
zone_token[&z] + min(NB_PARTITIONS, (c / over_size) as usize),
|
||||
);
|
||||
}
|
||||
|
||||
// For every partition, we count the number of zone already associated and
|
||||
// the name of the last zone associated
|
||||
|
||||
let mut id_zone_token = vec![0; zones.len()];
|
||||
for (z, t) in zone_token.iter() {
|
||||
id_zone_token[zone_to_id[z]] = *t;
|
||||
}
|
||||
|
||||
let mut nb_token = vec![0; NB_PARTITIONS];
|
||||
let mut last_zone = vec![zones.len(); NB_PARTITIONS];
|
||||
|
||||
let mut curr_zone = 0;
|
||||
|
||||
let redundancy = cl.effective_zone_redundancy();
|
||||
|
||||
for replic in 0..cl.replication_factor {
|
||||
for p in 0..NB_PARTITIONS {
|
||||
while id_zone_token[curr_zone] == 0
|
||||
|| (last_zone[p] == curr_zone
|
||||
&& redundancy - nb_token[p] <= cl.replication_factor - replic)
|
||||
{
|
||||
curr_zone += 1;
|
||||
if curr_zone >= zones.len() {
|
||||
return Ok(true);
|
||||
}
|
||||
}
|
||||
id_zone_token[curr_zone] -= 1;
|
||||
if last_zone[p] != curr_zone {
|
||||
nb_token[p] += 1;
|
||||
last_zone[p] = curr_zone;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return Ok(false);
|
||||
}
|
||||
|
||||
fn show_msg(msg: &Message) {
|
||||
for s in msg.iter() {
|
||||
println!("{}", s);
|
||||
}
|
||||
}
|
||||
|
||||
fn update_layout(
|
||||
cl: &mut LayoutVersion,
|
||||
node_id_vec: &Vec<u8>,
|
||||
node_capacity_vec: &Vec<u64>,
|
||||
node_zone_vec: &Vec<String>,
|
||||
zone_redundancy: usize,
|
||||
) {
|
||||
for i in 0..node_id_vec.len() {
|
||||
if let Some(x) = FixedBytes32::try_from(&[i as u8; 32]) {
|
||||
cl.node_id_vec.push(x);
|
||||
}
|
||||
|
||||
let update = cl.staging_roles.update_mutator(
|
||||
cl.node_id_vec[i],
|
||||
NodeRoleV(Some(NodeRole {
|
||||
zone: (node_zone_vec[i].to_string()),
|
||||
capacity: (Some(node_capacity_vec[i])),
|
||||
tags: (vec![]),
|
||||
})),
|
||||
);
|
||||
cl.staging_roles.merge(&update);
|
||||
}
|
||||
cl.staging_parameters.update(LayoutParameters {
|
||||
zone_redundancy: ZoneRedundancy::AtLeast(zone_redundancy),
|
||||
});
|
||||
cl.staging_hash = cl.calculate_staging_hash();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_assignment() {
|
||||
let mut node_id_vec = vec![1, 2, 3];
|
||||
let mut node_capacity_vec = vec![4000, 1000, 2000];
|
||||
let mut node_zone_vec = vec!["A", "B", "C"]
|
||||
.into_iter()
|
||||
.map(|x| x.to_string())
|
||||
.collect();
|
||||
|
||||
let mut cl = LayoutVersion::new(3);
|
||||
update_layout(&mut cl, &node_id_vec, &node_capacity_vec, &node_zone_vec, 3);
|
||||
let v = cl.version;
|
||||
let (mut cl, msg) = cl.apply_staged_changes(Some(v + 1)).unwrap();
|
||||
show_msg(&msg);
|
||||
assert_eq!(cl.check(), Ok(()));
|
||||
assert!(matches!(check_against_naive(&cl), Ok(true)));
|
||||
|
||||
node_id_vec = vec![1, 2, 3, 4, 5, 6, 7, 8, 9];
|
||||
node_capacity_vec = vec![4000, 1000, 1000, 3000, 1000, 1000, 2000, 10000, 2000];
|
||||
node_zone_vec = vec!["A", "B", "C", "C", "C", "B", "G", "H", "I"]
|
||||
.into_iter()
|
||||
.map(|x| x.to_string())
|
||||
.collect();
|
||||
update_layout(&mut cl, &node_id_vec, &node_capacity_vec, &node_zone_vec, 2);
|
||||
let v = cl.version;
|
||||
let (mut cl, msg) = cl.apply_staged_changes(Some(v + 1)).unwrap();
|
||||
show_msg(&msg);
|
||||
assert_eq!(cl.check(), Ok(()));
|
||||
assert!(matches!(check_against_naive(&cl), Ok(true)));
|
||||
|
||||
node_capacity_vec = vec![4000, 1000, 2000, 7000, 1000, 1000, 2000, 10000, 2000];
|
||||
update_layout(&mut cl, &node_id_vec, &node_capacity_vec, &node_zone_vec, 3);
|
||||
let v = cl.version;
|
||||
let (mut cl, msg) = cl.apply_staged_changes(Some(v + 1)).unwrap();
|
||||
show_msg(&msg);
|
||||
assert_eq!(cl.check(), Ok(()));
|
||||
assert!(matches!(check_against_naive(&cl), Ok(true)));
|
||||
|
||||
node_capacity_vec = vec![
|
||||
4000000, 4000000, 2000000, 7000000, 1000000, 9000000, 2000000, 10000, 2000000,
|
||||
];
|
||||
update_layout(&mut cl, &node_id_vec, &node_capacity_vec, &node_zone_vec, 1);
|
||||
let v = cl.version;
|
||||
let (cl, msg) = cl.apply_staged_changes(Some(v + 1)).unwrap();
|
||||
show_msg(&msg);
|
||||
assert_eq!(cl.check(), Ok(()));
|
||||
assert!(matches!(check_against_naive(&cl), Ok(true)));
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue