Garage v1.0 #683
4 changed files with 174 additions and 169 deletions
|
@ -18,7 +18,7 @@ impl LayoutHistory {
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut ret = LayoutHistory {
|
let mut ret = LayoutHistory {
|
||||||
versions: vec![version].into_boxed_slice().into(),
|
versions: vec![version],
|
||||||
update_trackers: Default::default(),
|
update_trackers: Default::default(),
|
||||||
trackers_hash: [0u8; 32].into(),
|
trackers_hash: [0u8; 32].into(),
|
||||||
staging: Lww::raw(0, staging),
|
staging: Lww::raw(0, staging),
|
||||||
|
@ -211,6 +211,11 @@ To know the correct value of the new layout version, invoke `garage layout show`
|
||||||
|
|
||||||
let msg = new_version.calculate_partition_assignment()?;
|
let msg = new_version.calculate_partition_assignment()?;
|
||||||
self.versions.push(new_version);
|
self.versions.push(new_version);
|
||||||
|
if self.current().check().is_ok() {
|
||||||
|
while self.versions.first().unwrap().check().is_err() {
|
||||||
|
self.versions.remove(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Reset the staged layout changes
|
// Reset the staged layout changes
|
||||||
self.staging.update(LayoutStaging {
|
self.staging.update(LayoutStaging {
|
||||||
|
@ -245,7 +250,7 @@ To know the correct value of the new layout version, invoke `garage layout show`
|
||||||
version.check()?;
|
version.check()?;
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: anythign more ?
|
// TODO: anything more ?
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,6 +3,9 @@ mod history;
|
||||||
mod schema;
|
mod schema;
|
||||||
mod version;
|
mod version;
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod test;
|
||||||
|
|
||||||
pub mod manager;
|
pub mod manager;
|
||||||
|
|
||||||
// ---- re-exports ----
|
// ---- re-exports ----
|
||||||
|
|
159
src/rpc/layout/test.rs
Normal file
159
src/rpc/layout/test.rs
Normal file
|
@ -0,0 +1,159 @@
|
||||||
|
use std::cmp::min;
|
||||||
|
use std::collections::HashMap;
|
||||||
|
|
||||||
|
use garage_util::crdt::Crdt;
|
||||||
|
use garage_util::error::*;
|
||||||
|
|
||||||
|
use crate::layout::*;
|
||||||
|
|
||||||
|
// This function checks that the partition size S computed is at least better than the
|
||||||
|
// one given by a very naive algorithm. To do so, we try to run the naive algorithm
|
||||||
|
// assuming a partion size of S+1. If we succed, it means that the optimal assignment
|
||||||
|
// was not optimal. The naive algorithm is the following :
|
||||||
|
// - we compute the max number of partitions associated to every node, capped at the
|
||||||
|
// partition number. It gives the number of tokens of every node.
|
||||||
|
// - every zone has a number of tokens equal to the sum of the tokens of its nodes.
|
||||||
|
// - we cycle over the partitions and associate zone tokens while respecting the
|
||||||
|
// zone redundancy constraint.
|
||||||
|
// NOTE: the naive algorithm is not optimal. Counter example:
|
||||||
|
// take nb_partition = 3 ; replication_factor = 5; redundancy = 4;
|
||||||
|
// number of tokens by zone : (A, 4), (B,1), (C,4), (D, 4), (E, 2)
|
||||||
|
// With these parameters, the naive algo fails, whereas there is a solution:
|
||||||
|
// (A,A,C,D,E) , (A,B,C,D,D) (A,C,C,D,E)
|
||||||
|
fn check_against_naive(cl: &LayoutVersion) -> Result<bool, Error> {
|
||||||
|
let over_size = cl.partition_size + 1;
|
||||||
|
let mut zone_token = HashMap::<String, usize>::new();
|
||||||
|
|
||||||
|
let (zones, zone_to_id) = cl.generate_nongateway_zone_ids()?;
|
||||||
|
|
||||||
|
if zones.is_empty() {
|
||||||
|
return Ok(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
for z in zones.iter() {
|
||||||
|
zone_token.insert(z.clone(), 0);
|
||||||
|
}
|
||||||
|
for uuid in cl.nongateway_nodes() {
|
||||||
|
let z = cl.get_node_zone(&uuid)?;
|
||||||
|
let c = cl.get_node_capacity(&uuid)?;
|
||||||
|
zone_token.insert(
|
||||||
|
z.to_string(),
|
||||||
|
zone_token[z] + min(NB_PARTITIONS, (c / over_size) as usize),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// For every partition, we count the number of zone already associated and
|
||||||
|
// the name of the last zone associated
|
||||||
|
|
||||||
|
let mut id_zone_token = vec![0; zones.len()];
|
||||||
|
for (z, t) in zone_token.iter() {
|
||||||
|
id_zone_token[zone_to_id[z]] = *t;
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut nb_token = vec![0; NB_PARTITIONS];
|
||||||
|
let mut last_zone = vec![zones.len(); NB_PARTITIONS];
|
||||||
|
|
||||||
|
let mut curr_zone = 0;
|
||||||
|
|
||||||
|
let redundancy = cl.effective_zone_redundancy();
|
||||||
|
|
||||||
|
for replic in 0..cl.replication_factor {
|
||||||
|
for p in 0..NB_PARTITIONS {
|
||||||
|
while id_zone_token[curr_zone] == 0
|
||||||
|
|| (last_zone[p] == curr_zone
|
||||||
|
&& redundancy - nb_token[p] <= cl.replication_factor - replic)
|
||||||
|
{
|
||||||
|
curr_zone += 1;
|
||||||
|
if curr_zone >= zones.len() {
|
||||||
|
return Ok(true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
id_zone_token[curr_zone] -= 1;
|
||||||
|
if last_zone[p] != curr_zone {
|
||||||
|
nb_token[p] += 1;
|
||||||
|
last_zone[p] = curr_zone;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return Ok(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn show_msg(msg: &Message) {
|
||||||
|
for s in msg.iter() {
|
||||||
|
println!("{}", s);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn update_layout(
|
||||||
|
cl: &mut LayoutHistory,
|
||||||
|
node_capacity_vec: &[u64],
|
||||||
|
node_zone_vec: &[&'static str],
|
||||||
|
zone_redundancy: usize,
|
||||||
|
) {
|
||||||
|
let staging = cl.staging.get_mut();
|
||||||
|
|
||||||
|
for (i, (capacity, zone)) in node_capacity_vec
|
||||||
|
.iter()
|
||||||
|
.zip(node_zone_vec.iter())
|
||||||
|
.enumerate()
|
||||||
|
{
|
||||||
|
let node_id = [i as u8; 32].into();
|
||||||
|
|
||||||
|
let update = staging.roles.update_mutator(
|
||||||
|
node_id,
|
||||||
|
NodeRoleV(Some(NodeRole {
|
||||||
|
zone: zone.to_string(),
|
||||||
|
capacity: Some(*capacity),
|
||||||
|
tags: (vec![]),
|
||||||
|
})),
|
||||||
|
);
|
||||||
|
staging.roles.merge(&update);
|
||||||
|
}
|
||||||
|
staging.parameters.update(LayoutParameters {
|
||||||
|
zone_redundancy: ZoneRedundancy::AtLeast(zone_redundancy),
|
||||||
|
});
|
||||||
|
|
||||||
|
cl.update_hashes();
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_assignment() {
|
||||||
|
let mut node_capacity_vec = vec![4000, 1000, 2000];
|
||||||
|
let mut node_zone_vec = vec!["A", "B", "C"];
|
||||||
|
|
||||||
|
let mut cl = LayoutHistory::new(3);
|
||||||
|
update_layout(&mut cl, &node_capacity_vec, &node_zone_vec, 3);
|
||||||
|
let v = cl.current().version;
|
||||||
|
let (mut cl, msg) = cl.apply_staged_changes(Some(v + 1)).unwrap();
|
||||||
|
show_msg(&msg);
|
||||||
|
assert_eq!(cl.check(), Ok(()));
|
||||||
|
assert!(check_against_naive(cl.current()).unwrap());
|
||||||
|
|
||||||
|
node_capacity_vec = vec![4000, 1000, 1000, 3000, 1000, 1000, 2000, 10000, 2000];
|
||||||
|
node_zone_vec = vec!["A", "B", "C", "C", "C", "B", "G", "H", "I"];
|
||||||
|
update_layout(&mut cl, &node_capacity_vec, &node_zone_vec, 2);
|
||||||
|
let v = cl.current().version;
|
||||||
|
let (mut cl, msg) = cl.apply_staged_changes(Some(v + 1)).unwrap();
|
||||||
|
show_msg(&msg);
|
||||||
|
assert_eq!(cl.check(), Ok(()));
|
||||||
|
assert!(check_against_naive(cl.current()).unwrap());
|
||||||
|
|
||||||
|
node_capacity_vec = vec![4000, 1000, 2000, 7000, 1000, 1000, 2000, 10000, 2000];
|
||||||
|
update_layout(&mut cl, &node_capacity_vec, &node_zone_vec, 3);
|
||||||
|
let v = cl.current().version;
|
||||||
|
let (mut cl, msg) = cl.apply_staged_changes(Some(v + 1)).unwrap();
|
||||||
|
show_msg(&msg);
|
||||||
|
assert_eq!(cl.check(), Ok(()));
|
||||||
|
assert!(check_against_naive(cl.current()).unwrap());
|
||||||
|
|
||||||
|
node_capacity_vec = vec![
|
||||||
|
4000000, 4000000, 2000000, 7000000, 1000000, 9000000, 2000000, 10000, 2000000,
|
||||||
|
];
|
||||||
|
update_layout(&mut cl, &node_capacity_vec, &node_zone_vec, 1);
|
||||||
|
let v = cl.current().version;
|
||||||
|
let (cl, msg) = cl.apply_staged_changes(Some(v + 1)).unwrap();
|
||||||
|
show_msg(&msg);
|
||||||
|
assert_eq!(cl.check(), Ok(()));
|
||||||
|
assert!(check_against_naive(cl.current()).unwrap());
|
||||||
|
}
|
|
@ -143,7 +143,7 @@ impl LayoutVersion {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Given a node uuids, this function returns the label of its zone
|
/// Given a node uuids, this function returns the label of its zone
|
||||||
fn get_node_zone(&self, uuid: &Uuid) -> Result<&str, Error> {
|
pub(crate) fn get_node_zone(&self, uuid: &Uuid) -> Result<&str, Error> {
|
||||||
match self.node_role(uuid) {
|
match self.node_role(uuid) {
|
||||||
Some(role) => Ok(&role.zone),
|
Some(role) => Ok(&role.zone),
|
||||||
_ => Err(Error::Message(
|
_ => Err(Error::Message(
|
||||||
|
@ -162,7 +162,7 @@ impl LayoutVersion {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the effective value of the zone_redundancy parameter
|
/// Returns the effective value of the zone_redundancy parameter
|
||||||
fn effective_zone_redundancy(&self) -> usize {
|
pub(crate) fn effective_zone_redundancy(&self) -> usize {
|
||||||
match self.parameters.zone_redundancy {
|
match self.parameters.zone_redundancy {
|
||||||
ZoneRedundancy::AtLeast(v) => v,
|
ZoneRedundancy::AtLeast(v) => v,
|
||||||
ZoneRedundancy::Maximum => {
|
ZoneRedundancy::Maximum => {
|
||||||
|
@ -472,7 +472,9 @@ impl LayoutVersion {
|
||||||
|
|
||||||
/// This function generates ids for the zone of the nodes appearing in
|
/// This function generates ids for the zone of the nodes appearing in
|
||||||
/// self.node_id_vec.
|
/// self.node_id_vec.
|
||||||
fn generate_nongateway_zone_ids(&self) -> Result<(Vec<String>, HashMap<String, usize>), Error> {
|
pub(crate) fn generate_nongateway_zone_ids(
|
||||||
|
&self,
|
||||||
|
) -> Result<(Vec<String>, HashMap<String, usize>), Error> {
|
||||||
let mut id_to_zone = Vec::<String>::new();
|
let mut id_to_zone = Vec::<String>::new();
|
||||||
let mut zone_to_id = HashMap::<String, usize>::new();
|
let mut zone_to_id = HashMap::<String, usize>::new();
|
||||||
|
|
||||||
|
@ -838,167 +840,3 @@ impl LayoutVersion {
|
||||||
Ok(msg)
|
Ok(msg)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ====================================================================================
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod tests {
|
|
||||||
use super::{Error, *};
|
|
||||||
use std::cmp::min;
|
|
||||||
|
|
||||||
// This function checks that the partition size S computed is at least better than the
|
|
||||||
// one given by a very naive algorithm. To do so, we try to run the naive algorithm
|
|
||||||
// assuming a partion size of S+1. If we succed, it means that the optimal assignment
|
|
||||||
// was not optimal. The naive algorithm is the following :
|
|
||||||
// - we compute the max number of partitions associated to every node, capped at the
|
|
||||||
// partition number. It gives the number of tokens of every node.
|
|
||||||
// - every zone has a number of tokens equal to the sum of the tokens of its nodes.
|
|
||||||
// - we cycle over the partitions and associate zone tokens while respecting the
|
|
||||||
// zone redundancy constraint.
|
|
||||||
// NOTE: the naive algorithm is not optimal. Counter example:
|
|
||||||
// take nb_partition = 3 ; replication_factor = 5; redundancy = 4;
|
|
||||||
// number of tokens by zone : (A, 4), (B,1), (C,4), (D, 4), (E, 2)
|
|
||||||
// With these parameters, the naive algo fails, whereas there is a solution:
|
|
||||||
// (A,A,C,D,E) , (A,B,C,D,D) (A,C,C,D,E)
|
|
||||||
fn check_against_naive(cl: &LayoutVersion) -> Result<bool, Error> {
|
|
||||||
let over_size = cl.partition_size + 1;
|
|
||||||
let mut zone_token = HashMap::<String, usize>::new();
|
|
||||||
|
|
||||||
let (zones, zone_to_id) = cl.generate_nongateway_zone_ids()?;
|
|
||||||
|
|
||||||
if zones.is_empty() {
|
|
||||||
return Ok(false);
|
|
||||||
}
|
|
||||||
|
|
||||||
for z in zones.iter() {
|
|
||||||
zone_token.insert(z.clone(), 0);
|
|
||||||
}
|
|
||||||
for uuid in cl.nongateway_nodes() {
|
|
||||||
let z = cl.get_node_zone(&uuid)?;
|
|
||||||
let c = cl.get_node_capacity(&uuid)?;
|
|
||||||
zone_token.insert(
|
|
||||||
z.clone(),
|
|
||||||
zone_token[&z] + min(NB_PARTITIONS, (c / over_size) as usize),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
// For every partition, we count the number of zone already associated and
|
|
||||||
// the name of the last zone associated
|
|
||||||
|
|
||||||
let mut id_zone_token = vec![0; zones.len()];
|
|
||||||
for (z, t) in zone_token.iter() {
|
|
||||||
id_zone_token[zone_to_id[z]] = *t;
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut nb_token = vec![0; NB_PARTITIONS];
|
|
||||||
let mut last_zone = vec![zones.len(); NB_PARTITIONS];
|
|
||||||
|
|
||||||
let mut curr_zone = 0;
|
|
||||||
|
|
||||||
let redundancy = cl.effective_zone_redundancy();
|
|
||||||
|
|
||||||
for replic in 0..cl.replication_factor {
|
|
||||||
for p in 0..NB_PARTITIONS {
|
|
||||||
while id_zone_token[curr_zone] == 0
|
|
||||||
|| (last_zone[p] == curr_zone
|
|
||||||
&& redundancy - nb_token[p] <= cl.replication_factor - replic)
|
|
||||||
{
|
|
||||||
curr_zone += 1;
|
|
||||||
if curr_zone >= zones.len() {
|
|
||||||
return Ok(true);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
id_zone_token[curr_zone] -= 1;
|
|
||||||
if last_zone[p] != curr_zone {
|
|
||||||
nb_token[p] += 1;
|
|
||||||
last_zone[p] = curr_zone;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return Ok(false);
|
|
||||||
}
|
|
||||||
|
|
||||||
fn show_msg(msg: &Message) {
|
|
||||||
for s in msg.iter() {
|
|
||||||
println!("{}", s);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn update_layout(
|
|
||||||
cl: &mut LayoutVersion,
|
|
||||||
node_id_vec: &Vec<u8>,
|
|
||||||
node_capacity_vec: &Vec<u64>,
|
|
||||||
node_zone_vec: &Vec<String>,
|
|
||||||
zone_redundancy: usize,
|
|
||||||
) {
|
|
||||||
for i in 0..node_id_vec.len() {
|
|
||||||
if let Some(x) = FixedBytes32::try_from(&[i as u8; 32]) {
|
|
||||||
cl.node_id_vec.push(x);
|
|
||||||
}
|
|
||||||
|
|
||||||
let update = cl.staging_roles.update_mutator(
|
|
||||||
cl.node_id_vec[i],
|
|
||||||
NodeRoleV(Some(NodeRole {
|
|
||||||
zone: (node_zone_vec[i].to_string()),
|
|
||||||
capacity: (Some(node_capacity_vec[i])),
|
|
||||||
tags: (vec![]),
|
|
||||||
})),
|
|
||||||
);
|
|
||||||
cl.staging_roles.merge(&update);
|
|
||||||
}
|
|
||||||
cl.staging_parameters.update(LayoutParameters {
|
|
||||||
zone_redundancy: ZoneRedundancy::AtLeast(zone_redundancy),
|
|
||||||
});
|
|
||||||
cl.staging_hash = cl.calculate_staging_hash();
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_assignment() {
|
|
||||||
let mut node_id_vec = vec![1, 2, 3];
|
|
||||||
let mut node_capacity_vec = vec![4000, 1000, 2000];
|
|
||||||
let mut node_zone_vec = vec!["A", "B", "C"]
|
|
||||||
.into_iter()
|
|
||||||
.map(|x| x.to_string())
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
let mut cl = LayoutVersion::new(3);
|
|
||||||
update_layout(&mut cl, &node_id_vec, &node_capacity_vec, &node_zone_vec, 3);
|
|
||||||
let v = cl.version;
|
|
||||||
let (mut cl, msg) = cl.apply_staged_changes(Some(v + 1)).unwrap();
|
|
||||||
show_msg(&msg);
|
|
||||||
assert_eq!(cl.check(), Ok(()));
|
|
||||||
assert!(matches!(check_against_naive(&cl), Ok(true)));
|
|
||||||
|
|
||||||
node_id_vec = vec![1, 2, 3, 4, 5, 6, 7, 8, 9];
|
|
||||||
node_capacity_vec = vec![4000, 1000, 1000, 3000, 1000, 1000, 2000, 10000, 2000];
|
|
||||||
node_zone_vec = vec!["A", "B", "C", "C", "C", "B", "G", "H", "I"]
|
|
||||||
.into_iter()
|
|
||||||
.map(|x| x.to_string())
|
|
||||||
.collect();
|
|
||||||
update_layout(&mut cl, &node_id_vec, &node_capacity_vec, &node_zone_vec, 2);
|
|
||||||
let v = cl.version;
|
|
||||||
let (mut cl, msg) = cl.apply_staged_changes(Some(v + 1)).unwrap();
|
|
||||||
show_msg(&msg);
|
|
||||||
assert_eq!(cl.check(), Ok(()));
|
|
||||||
assert!(matches!(check_against_naive(&cl), Ok(true)));
|
|
||||||
|
|
||||||
node_capacity_vec = vec![4000, 1000, 2000, 7000, 1000, 1000, 2000, 10000, 2000];
|
|
||||||
update_layout(&mut cl, &node_id_vec, &node_capacity_vec, &node_zone_vec, 3);
|
|
||||||
let v = cl.version;
|
|
||||||
let (mut cl, msg) = cl.apply_staged_changes(Some(v + 1)).unwrap();
|
|
||||||
show_msg(&msg);
|
|
||||||
assert_eq!(cl.check(), Ok(()));
|
|
||||||
assert!(matches!(check_against_naive(&cl), Ok(true)));
|
|
||||||
|
|
||||||
node_capacity_vec = vec![
|
|
||||||
4000000, 4000000, 2000000, 7000000, 1000000, 9000000, 2000000, 10000, 2000000,
|
|
||||||
];
|
|
||||||
update_layout(&mut cl, &node_id_vec, &node_capacity_vec, &node_zone_vec, 1);
|
|
||||||
let v = cl.version;
|
|
||||||
let (cl, msg) = cl.apply_staged_changes(Some(v + 1)).unwrap();
|
|
||||||
show_msg(&msg);
|
|
||||||
assert_eq!(cl.check(), Ok(()));
|
|
||||||
assert!(matches!(check_against_naive(&cl), Ok(true)));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
Loading…
Reference in a new issue