Garage v1.0 #683
4 changed files with 109 additions and 33 deletions
|
@ -1,3 +1,5 @@
|
||||||
|
use std::collections::HashSet;
|
||||||
|
|
||||||
use garage_util::crdt::{Crdt, Lww, LwwMap};
|
use garage_util::crdt::{Crdt, Lww, LwwMap};
|
||||||
use garage_util::data::*;
|
use garage_util::data::*;
|
||||||
use garage_util::encode::nonversioned_encode;
|
use garage_util::encode::nonversioned_encode;
|
||||||
|
@ -30,6 +32,14 @@ impl LayoutHistory {
|
||||||
self.versions.last().as_ref().unwrap()
|
self.versions.last().as_ref().unwrap()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn all_storage_nodes(&self) -> HashSet<Uuid> {
|
||||||
|
self.versions
|
||||||
|
.iter()
|
||||||
|
.map(|x| x.nongateway_nodes())
|
||||||
|
.flatten()
|
||||||
|
.collect::<HashSet<_>>()
|
||||||
|
}
|
||||||
|
|
||||||
pub(crate) fn update_hashes(&mut self) {
|
pub(crate) fn update_hashes(&mut self) {
|
||||||
self.trackers_hash = self.calculate_trackers_hash();
|
self.trackers_hash = self.calculate_trackers_hash();
|
||||||
self.staging_hash = self.calculate_staging_hash();
|
self.staging_hash = self.calculate_staging_hash();
|
||||||
|
@ -43,6 +53,65 @@ impl LayoutHistory {
|
||||||
blake2sum(&nonversioned_encode(&self.staging).unwrap()[..])
|
blake2sum(&nonversioned_encode(&self.staging).unwrap()[..])
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ------------------ update tracking ---------------
|
||||||
|
|
||||||
|
pub(crate) fn update_trackers(&mut self, node_id: Uuid) {
|
||||||
|
// Ensure trackers for this node's values are up-to-date
|
||||||
|
|
||||||
|
// 1. Acknowledge the last layout version in the history
|
||||||
|
self.ack_last(node_id);
|
||||||
|
|
||||||
|
// 2. Assume the data on this node is sync'ed up at least to
|
||||||
|
// the first layout version in the history
|
||||||
|
self.sync_first(node_id);
|
||||||
|
|
||||||
|
// 3. Acknowledge everyone has synced up to min(self.sync_map)
|
||||||
|
self.sync_ack(node_id);
|
||||||
|
|
||||||
|
// 4. Cleanup layout versions that are not needed anymore
|
||||||
|
self.cleanup_old_versions();
|
||||||
|
|
||||||
|
info!("ack_map: {:?}", self.update_trackers.ack_map);
|
||||||
|
info!("sync_map: {:?}", self.update_trackers.sync_map);
|
||||||
|
info!("sync_ack_map: {:?}", self.update_trackers.sync_ack_map);
|
||||||
|
|
||||||
|
// Finally, update hashes
|
||||||
|
self.update_hashes();
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn ack_last(&mut self, node: Uuid) {
|
||||||
|
let last_version = self.current().version;
|
||||||
|
self.update_trackers.ack_map.set_max(node, last_version);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn sync_first(&mut self, node: Uuid) {
|
||||||
|
let first_version = self.versions.first().as_ref().unwrap().version;
|
||||||
|
self.update_trackers.sync_map.set_max(node, first_version);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn sync_ack(&mut self, node: Uuid) {
|
||||||
|
self.update_trackers.sync_ack_map.set_max(
|
||||||
|
node,
|
||||||
|
self.calculate_global_min(&self.update_trackers.sync_map),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn cleanup_old_versions(&mut self) {
|
||||||
|
let min_sync_ack = self.calculate_global_min(&self.update_trackers.sync_ack_map);
|
||||||
|
while self.versions.first().as_ref().unwrap().version < min_sync_ack {
|
||||||
|
self.versions.remove(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn calculate_global_min(&self, tracker: &UpdateTracker) -> u64 {
|
||||||
|
let storage_nodes = self.all_storage_nodes();
|
||||||
|
storage_nodes
|
||||||
|
.iter()
|
||||||
|
.map(|x| tracker.0.get(x).copied().unwrap_or(0))
|
||||||
|
.min()
|
||||||
|
.unwrap_or(0)
|
||||||
|
}
|
||||||
|
|
||||||
// ================== updates to layout, public interface ===================
|
// ================== updates to layout, public interface ===================
|
||||||
|
|
||||||
pub fn merge(&mut self, other: &LayoutHistory) -> bool {
|
pub fn merge(&mut self, other: &LayoutHistory) -> bool {
|
||||||
|
@ -78,11 +147,6 @@ impl LayoutHistory {
|
||||||
changed = true;
|
changed = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Update hashes if there are changes
|
|
||||||
if changed {
|
|
||||||
self.update_hashes();
|
|
||||||
}
|
|
||||||
|
|
||||||
changed
|
changed
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -51,7 +51,7 @@ impl LayoutManager {
|
||||||
let persist_cluster_layout: Persister<LayoutHistory> =
|
let persist_cluster_layout: Persister<LayoutHistory> =
|
||||||
Persister::new(&config.metadata_dir, "cluster_layout");
|
Persister::new(&config.metadata_dir, "cluster_layout");
|
||||||
|
|
||||||
let cluster_layout = match persist_cluster_layout.load() {
|
let mut cluster_layout = match persist_cluster_layout.load() {
|
||||||
Ok(x) => {
|
Ok(x) => {
|
||||||
if x.current().replication_factor != replication_factor {
|
if x.current().replication_factor != replication_factor {
|
||||||
return Err(Error::Message(format!(
|
return Err(Error::Message(format!(
|
||||||
|
@ -71,6 +71,8 @@ impl LayoutManager {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
cluster_layout.update_trackers(node_id.into());
|
||||||
|
|
||||||
let layout = Arc::new(RwLock::new(cluster_layout));
|
let layout = Arc::new(RwLock::new(cluster_layout));
|
||||||
let change_notify = Arc::new(Notify::new());
|
let change_notify = Arc::new(Notify::new());
|
||||||
|
|
||||||
|
@ -126,7 +128,7 @@ impl LayoutManager {
|
||||||
if prev_layout_check && layout.check().is_err() {
|
if prev_layout_check && layout.check().is_err() {
|
||||||
panic!("Merged two correct layouts and got an incorrect layout.");
|
panic!("Merged two correct layouts and got an incorrect layout.");
|
||||||
}
|
}
|
||||||
|
layout.update_trackers(self.node_id);
|
||||||
return Some(layout.clone());
|
return Some(layout.clone());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -137,6 +139,7 @@ impl LayoutManager {
|
||||||
let mut layout = self.layout.write().unwrap();
|
let mut layout = self.layout.write().unwrap();
|
||||||
if layout.update_trackers != *adv {
|
if layout.update_trackers != *adv {
|
||||||
if layout.update_trackers.merge(adv) {
|
if layout.update_trackers.merge(adv) {
|
||||||
|
layout.update_trackers(self.node_id);
|
||||||
return Some(layout.update_trackers.clone());
|
return Some(layout.update_trackers.clone());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,6 +3,7 @@ use std::fmt;
|
||||||
use bytesize::ByteSize;
|
use bytesize::ByteSize;
|
||||||
|
|
||||||
use garage_util::crdt::{AutoCrdt, Crdt};
|
use garage_util::crdt::{AutoCrdt, Crdt};
|
||||||
|
use garage_util::data::Uuid;
|
||||||
|
|
||||||
mod v08 {
|
mod v08 {
|
||||||
use crate::layout::CompactNodeType;
|
use crate::layout::CompactNodeType;
|
||||||
|
@ -276,8 +277,7 @@ mod v010 {
|
||||||
let update_tracker = UpdateTracker(
|
let update_tracker = UpdateTracker(
|
||||||
version
|
version
|
||||||
.nongateway_nodes()
|
.nongateway_nodes()
|
||||||
.iter()
|
.map(|x| (x, version.version))
|
||||||
.map(|x| (*x, version.version))
|
|
||||||
.collect::<HashMap<Uuid, u64>>(),
|
.collect::<HashMap<Uuid, u64>>(),
|
||||||
);
|
);
|
||||||
let staging = LayoutStaging {
|
let staging = LayoutStaging {
|
||||||
|
@ -375,8 +375,15 @@ impl UpdateTracker {
|
||||||
changed
|
changed
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn min(&self) -> u64 {
|
pub(crate) fn set_max(&mut self, peer: Uuid, value: u64) {
|
||||||
self.0.iter().map(|(_, v)| *v).min().unwrap_or(0)
|
match self.0.get_mut(&peer) {
|
||||||
|
Some(e) => {
|
||||||
|
*e = std::cmp::max(*e, value);
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
self.0.insert(peer, value);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -134,15 +134,14 @@ impl LayoutVersion {
|
||||||
// ===================== internal information extractors ======================
|
// ===================== internal information extractors ======================
|
||||||
|
|
||||||
/// Returns the uuids of the non_gateway nodes in self.node_id_vec.
|
/// Returns the uuids of the non_gateway nodes in self.node_id_vec.
|
||||||
pub(crate) fn nongateway_nodes(&self) -> Vec<Uuid> {
|
pub(crate) fn nongateway_nodes(&self) -> impl Iterator<Item = Uuid> + '_ {
|
||||||
let mut result = Vec::<Uuid>::new();
|
self.node_id_vec
|
||||||
for uuid in self.node_id_vec.iter() {
|
.iter()
|
||||||
match self.node_role(uuid) {
|
.copied()
|
||||||
Some(role) if role.capacity.is_some() => result.push(*uuid),
|
.filter(move |uuid| match self.node_role(uuid) {
|
||||||
_ => (),
|
Some(role) if role.capacity.is_some() => true,
|
||||||
}
|
_ => false,
|
||||||
}
|
})
|
||||||
result
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Given a node uuids, this function returns the label of its zone
|
/// Given a node uuids, this function returns the label of its zone
|
||||||
|
@ -158,8 +157,8 @@ impl LayoutVersion {
|
||||||
/// Returns the sum of capacities of non gateway nodes in the cluster
|
/// Returns the sum of capacities of non gateway nodes in the cluster
|
||||||
fn get_total_capacity(&self) -> Result<u64, Error> {
|
fn get_total_capacity(&self) -> Result<u64, Error> {
|
||||||
let mut total_capacity = 0;
|
let mut total_capacity = 0;
|
||||||
for uuid in self.nongateway_nodes().iter() {
|
for uuid in self.nongateway_nodes() {
|
||||||
total_capacity += self.get_node_capacity(uuid)?;
|
total_capacity += self.get_node_capacity(&uuid)?;
|
||||||
}
|
}
|
||||||
Ok(total_capacity)
|
Ok(total_capacity)
|
||||||
}
|
}
|
||||||
|
@ -320,7 +319,7 @@ impl LayoutVersion {
|
||||||
// to use them as indices in the flow graphs.
|
// to use them as indices in the flow graphs.
|
||||||
let (id_to_zone, zone_to_id) = self.generate_nongateway_zone_ids()?;
|
let (id_to_zone, zone_to_id) = self.generate_nongateway_zone_ids()?;
|
||||||
|
|
||||||
let nb_nongateway_nodes = self.nongateway_nodes().len();
|
let nb_nongateway_nodes = self.nongateway_nodes().count();
|
||||||
if nb_nongateway_nodes < self.replication_factor {
|
if nb_nongateway_nodes < self.replication_factor {
|
||||||
return Err(Error::Message(format!(
|
return Err(Error::Message(format!(
|
||||||
"The number of nodes with positive \
|
"The number of nodes with positive \
|
||||||
|
@ -479,7 +478,8 @@ impl LayoutVersion {
|
||||||
let mut id_to_zone = Vec::<String>::new();
|
let mut id_to_zone = Vec::<String>::new();
|
||||||
let mut zone_to_id = HashMap::<String, usize>::new();
|
let mut zone_to_id = HashMap::<String, usize>::new();
|
||||||
|
|
||||||
for uuid in self.nongateway_nodes().iter() {
|
let nongateway_nodes = self.nongateway_nodes().collect::<Vec<_>>();
|
||||||
|
for uuid in nongateway_nodes.iter() {
|
||||||
let r = self.node_role(uuid).unwrap();
|
let r = self.node_role(uuid).unwrap();
|
||||||
if !zone_to_id.contains_key(&r.zone) && r.capacity.is_some() {
|
if !zone_to_id.contains_key(&r.zone) && r.capacity.is_some() {
|
||||||
zone_to_id.insert(r.zone.clone(), id_to_zone.len());
|
zone_to_id.insert(r.zone.clone(), id_to_zone.len());
|
||||||
|
@ -556,8 +556,10 @@ impl LayoutVersion {
|
||||||
exclude_assoc: &HashSet<(usize, usize)>,
|
exclude_assoc: &HashSet<(usize, usize)>,
|
||||||
zone_redundancy: usize,
|
zone_redundancy: usize,
|
||||||
) -> Result<Graph<FlowEdge>, Error> {
|
) -> Result<Graph<FlowEdge>, Error> {
|
||||||
let vertices =
|
let vertices = LayoutVersion::generate_graph_vertices(
|
||||||
LayoutVersion::generate_graph_vertices(zone_to_id.len(), self.nongateway_nodes().len());
|
zone_to_id.len(),
|
||||||
|
self.nongateway_nodes().count(),
|
||||||
|
);
|
||||||
let mut g = Graph::<FlowEdge>::new(&vertices);
|
let mut g = Graph::<FlowEdge>::new(&vertices);
|
||||||
let nb_zones = zone_to_id.len();
|
let nb_zones = zone_to_id.len();
|
||||||
for p in 0..NB_PARTITIONS {
|
for p in 0..NB_PARTITIONS {
|
||||||
|
@ -576,7 +578,7 @@ impl LayoutVersion {
|
||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for n in 0..self.nongateway_nodes().len() {
|
for n in 0..self.nongateway_nodes().count() {
|
||||||
let node_capacity = self.get_node_capacity(&self.node_id_vec[n])?;
|
let node_capacity = self.get_node_capacity(&self.node_id_vec[n])?;
|
||||||
let node_zone = zone_to_id[self.get_node_zone(&self.node_id_vec[n])?];
|
let node_zone = zone_to_id[self.get_node_zone(&self.node_id_vec[n])?];
|
||||||
g.add_edge(Vertex::N(n), Vertex::Sink, node_capacity / partition_size)?;
|
g.add_edge(Vertex::N(n), Vertex::Sink, node_capacity / partition_size)?;
|
||||||
|
@ -600,7 +602,7 @@ impl LayoutVersion {
|
||||||
// previous assignment
|
// previous assignment
|
||||||
let mut exclude_edge = HashSet::<(usize, usize)>::new();
|
let mut exclude_edge = HashSet::<(usize, usize)>::new();
|
||||||
if let Some(prev_assign) = prev_assign_opt {
|
if let Some(prev_assign) = prev_assign_opt {
|
||||||
let nb_nodes = self.nongateway_nodes().len();
|
let nb_nodes = self.nongateway_nodes().count();
|
||||||
for (p, prev_assign_p) in prev_assign.iter().enumerate() {
|
for (p, prev_assign_p) in prev_assign.iter().enumerate() {
|
||||||
for n in 0..nb_nodes {
|
for n in 0..nb_nodes {
|
||||||
exclude_edge.insert((p, n));
|
exclude_edge.insert((p, n));
|
||||||
|
@ -652,7 +654,7 @@ impl LayoutVersion {
|
||||||
// We compute the maximal length of a simple path in gflow. It is used in the
|
// We compute the maximal length of a simple path in gflow. It is used in the
|
||||||
// Bellman-Ford algorithm in optimize_flow_with_cost to set the number
|
// Bellman-Ford algorithm in optimize_flow_with_cost to set the number
|
||||||
// of iterations.
|
// of iterations.
|
||||||
let nb_nodes = self.nongateway_nodes().len();
|
let nb_nodes = self.nongateway_nodes().count();
|
||||||
let path_length = 4 * nb_nodes;
|
let path_length = 4 * nb_nodes;
|
||||||
gflow.optimize_flow_with_cost(&cost, path_length)?;
|
gflow.optimize_flow_with_cost(&cost, path_length)?;
|
||||||
|
|
||||||
|
@ -730,7 +732,7 @@ impl LayoutVersion {
|
||||||
}
|
}
|
||||||
|
|
||||||
// We define and fill in the following tables
|
// We define and fill in the following tables
|
||||||
let storing_nodes = self.nongateway_nodes();
|
let storing_nodes = self.nongateway_nodes().collect::<Vec<_>>();
|
||||||
let mut new_partitions = vec![0; storing_nodes.len()];
|
let mut new_partitions = vec![0; storing_nodes.len()];
|
||||||
let mut stored_partitions = vec![0; storing_nodes.len()];
|
let mut stored_partitions = vec![0; storing_nodes.len()];
|
||||||
|
|
||||||
|
@ -873,9 +875,9 @@ mod tests {
|
||||||
for z in zones.iter() {
|
for z in zones.iter() {
|
||||||
zone_token.insert(z.clone(), 0);
|
zone_token.insert(z.clone(), 0);
|
||||||
}
|
}
|
||||||
for uuid in cl.nongateway_nodes().iter() {
|
for uuid in cl.nongateway_nodes() {
|
||||||
let z = cl.get_node_zone(uuid)?;
|
let z = cl.get_node_zone(&uuid)?;
|
||||||
let c = cl.get_node_capacity(uuid)?;
|
let c = cl.get_node_capacity(&uuid)?;
|
||||||
zone_token.insert(
|
zone_token.insert(
|
||||||
z.clone(),
|
z.clone(),
|
||||||
zone_token[&z] + min(NB_PARTITIONS, (c / over_size) as usize),
|
zone_token[&z] + min(NB_PARTITIONS, (c / over_size) as usize),
|
||||||
|
|
Loading…
Reference in a new issue