Garage v1.0 #683

Merged
lx merged 119 commits from next-0.10 into main 2024-04-10 15:23:13 +00:00
5 changed files with 55 additions and 57 deletions
Showing only changes of commit 01a0bd5410 - Show all commits

View file

@ -78,7 +78,7 @@ pub async fn handle_get_cluster_status(garage: &Arc<Garage>) -> Result<Response<
} }
} }
for ver in layout.versions.iter().rev().skip(1) { for ver in layout.versions().iter().rev().skip(1) {
for (id, _, role) in ver.roles.items().iter() { for (id, _, role) in ver.roles.items().iter() {
if let layout::NodeRoleV(Some(r)) = role { if let layout::NodeRoleV(Some(r)) = role {
if !nodes.contains_key(id) && r.capacity.is_some() { if !nodes.contains_key(id) && r.capacity.is_some() {
@ -156,7 +156,7 @@ pub async fn handle_connect_cluster_nodes(
} }
pub async fn handle_get_cluster_layout(garage: &Arc<Garage>) -> Result<Response<ResBody>, Error> { pub async fn handle_get_cluster_layout(garage: &Arc<Garage>) -> Result<Response<ResBody>, Error> {
let res = format_cluster_layout(&garage.system.cluster_layout()); let res = format_cluster_layout(garage.system.cluster_layout().inner());
Ok(json_ok_response(&res)?) Ok(json_ok_response(&res)?)
} }
@ -295,7 +295,7 @@ pub async fn handle_update_cluster_layout(
) -> Result<Response<ResBody>, Error> { ) -> Result<Response<ResBody>, Error> {
let updates = parse_json_body::<UpdateClusterLayoutRequest, _, Error>(req).await?; let updates = parse_json_body::<UpdateClusterLayoutRequest, _, Error>(req).await?;
let mut layout = garage.system.cluster_layout().clone(); let mut layout = garage.system.cluster_layout().inner().clone();
let mut roles = layout.current().roles.clone(); let mut roles = layout.current().roles.clone();
roles.merge(&layout.staging.get().roles); roles.merge(&layout.staging.get().roles);
@ -341,7 +341,7 @@ pub async fn handle_apply_cluster_layout(
) -> Result<Response<ResBody>, Error> { ) -> Result<Response<ResBody>, Error> {
let param = parse_json_body::<ApplyLayoutRequest, _, Error>(req).await?; let param = parse_json_body::<ApplyLayoutRequest, _, Error>(req).await?;
let layout = garage.system.cluster_layout().clone(); let layout = garage.system.cluster_layout().inner().clone();
let (layout, msg) = layout.apply_staged_changes(Some(param.version))?; let (layout, msg) = layout.apply_staged_changes(Some(param.version))?;
garage garage
@ -360,7 +360,7 @@ pub async fn handle_apply_cluster_layout(
pub async fn handle_revert_cluster_layout( pub async fn handle_revert_cluster_layout(
garage: &Arc<Garage>, garage: &Arc<Garage>,
) -> Result<Response<ResBody>, Error> { ) -> Result<Response<ResBody>, Error> {
let layout = garage.system.cluster_layout().clone(); let layout = garage.system.cluster_layout().inner().clone();
let layout = layout.revert_staged_changes()?; let layout = layout.revert_staged_changes()?;
garage garage
.system .system

View file

@ -1,5 +1,4 @@
use std::collections::HashMap; use std::collections::HashMap;
use std::ops::Deref;
use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::atomic::{AtomicUsize, Ordering};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
@ -49,13 +48,6 @@ pub struct LayoutHelper {
pub(crate) ack_lock: HashMap<u64, AtomicUsize>, pub(crate) ack_lock: HashMap<u64, AtomicUsize>,
} }
impl Deref for LayoutHelper {
type Target = LayoutHistory;
fn deref(&self) -> &LayoutHistory {
self.layout()
}
}
impl LayoutHelper { impl LayoutHelper {
pub fn new( pub fn new(
replication_factor: ReplicationFactor, replication_factor: ReplicationFactor,
@ -131,10 +123,6 @@ impl LayoutHelper {
// ------------------ single updating function -------------- // ------------------ single updating function --------------
fn layout(&self) -> &LayoutHistory {
self.layout.as_ref().unwrap()
}
pub(crate) fn update<F>(&mut self, f: F) -> bool pub(crate) fn update<F>(&mut self, f: F) -> bool
where where
F: FnOnce(&mut LayoutHistory) -> bool, F: FnOnce(&mut LayoutHistory) -> bool,
@ -153,6 +141,18 @@ impl LayoutHelper {
// ------------------ read helpers --------------- // ------------------ read helpers ---------------
pub fn inner(&self) -> &LayoutHistory {
self.layout.as_ref().unwrap()
}
pub fn current(&self) -> &LayoutVersion {
self.inner().current()
}
pub fn versions(&self) -> &[LayoutVersion] {
&self.inner().versions
}
/// Return all nodes that have a role (gateway or storage) /// Return all nodes that have a role (gateway or storage)
/// in one of the currently active layout versions /// in one of the currently active layout versions
pub fn all_nodes(&self) -> &[Uuid] { pub fn all_nodes(&self) -> &[Uuid] {
@ -175,20 +175,19 @@ impl LayoutHelper {
pub fn sync_digest(&self) -> SyncLayoutDigest { pub fn sync_digest(&self) -> SyncLayoutDigest {
SyncLayoutDigest { SyncLayoutDigest {
current: self.layout().current().version, current: self.current().version,
ack_map_min: self.ack_map_min(), ack_map_min: self.ack_map_min(),
min_stored: self.layout().min_stored(), min_stored: self.inner().min_stored(),
} }
} }
pub fn read_nodes_of(&self, position: &Hash) -> Vec<Uuid> { pub fn read_nodes_of(&self, position: &Hash) -> Vec<Uuid> {
let sync_min = self.sync_map_min; let sync_min = self.sync_map_min;
let version = self let version = self
.layout() .versions()
.versions
.iter() .iter()
.find(|x| x.version == sync_min) .find(|x| x.version == sync_min)
.or(self.layout().versions.last()) .or(self.versions().last())
.unwrap(); .unwrap();
version version
.nodes_of(position, version.replication_factor) .nodes_of(position, version.replication_factor)
@ -196,8 +195,7 @@ impl LayoutHelper {
} }
pub fn storage_sets_of(&self, position: &Hash) -> Vec<Vec<Uuid>> { pub fn storage_sets_of(&self, position: &Hash) -> Vec<Vec<Uuid>> {
self.layout() self.versions()
.versions
.iter() .iter()
.map(|x| x.nodes_of(position, x.replication_factor).collect()) .map(|x| x.nodes_of(position, x.replication_factor).collect())
.collect() .collect()
@ -205,7 +203,7 @@ impl LayoutHelper {
pub fn storage_nodes_of(&self, position: &Hash) -> Vec<Uuid> { pub fn storage_nodes_of(&self, position: &Hash) -> Vec<Uuid> {
let mut ret = vec![]; let mut ret = vec![];
for version in self.layout().versions.iter() { for version in self.versions().iter() {
ret.extend(version.nodes_of(position, version.replication_factor)); ret.extend(version.nodes_of(position, version.replication_factor));
} }
ret.sort(); ret.sort();
@ -224,7 +222,7 @@ impl LayoutHelper {
pub fn digest(&self) -> RpcLayoutDigest { pub fn digest(&self) -> RpcLayoutDigest {
RpcLayoutDigest { RpcLayoutDigest {
current_version: self.current().version, current_version: self.current().version,
active_versions: self.versions.len(), active_versions: self.versions().len(),
trackers_hash: self.trackers_hash, trackers_hash: self.trackers_hash,
staging_hash: self.staging_hash, staging_hash: self.staging_hash,
} }
@ -246,13 +244,16 @@ impl LayoutHelper {
// 3. Acknowledge everyone has synced up to min(self.sync_map) // 3. Acknowledge everyone has synced up to min(self.sync_map)
self.sync_ack(local_node_id); self.sync_ack(local_node_id);
debug!("ack_map: {:?}", self.update_trackers.ack_map); debug!("ack_map: {:?}", self.inner().update_trackers.ack_map);
debug!("sync_map: {:?}", self.update_trackers.sync_map); debug!("sync_map: {:?}", self.inner().update_trackers.sync_map);
debug!("sync_ack_map: {:?}", self.update_trackers.sync_ack_map); debug!(
"sync_ack_map: {:?}",
self.inner().update_trackers.sync_ack_map
);
} }
fn sync_first(&mut self, local_node_id: Uuid) { fn sync_first(&mut self, local_node_id: Uuid) {
let first_version = self.min_stored(); let first_version = self.inner().min_stored();
self.update(|layout| { self.update(|layout| {
layout layout
.update_trackers .update_trackers
@ -286,8 +287,7 @@ impl LayoutHelper {
} }
pub(crate) fn max_free_ack(&self) -> u64 { pub(crate) fn max_free_ack(&self) -> u64 {
self.layout() self.versions()
.versions
.iter() .iter()
.map(|x| x.version) .map(|x| x.version)
.skip_while(|v| { .skip_while(|v| {

View file

@ -109,7 +109,7 @@ impl LayoutManager {
} }
pub fn add_table(&self, table_name: &'static str) { pub fn add_table(&self, table_name: &'static str) {
let first_version = self.layout().versions.first().unwrap().version; let first_version = self.layout().versions().first().unwrap().version;
self.table_sync_version self.table_sync_version
.lock() .lock()
@ -127,7 +127,7 @@ impl LayoutManager {
if layout.update(|l| l.update_trackers.sync_map.set_max(self.node_id, sync_until)) { if layout.update(|l| l.update_trackers.sync_map.set_max(self.node_id, sync_until)) {
info!("sync_until updated to {}", sync_until); info!("sync_until updated to {}", sync_until);
self.broadcast_update(SystemRpc::AdvertiseClusterLayoutTrackers( self.broadcast_update(SystemRpc::AdvertiseClusterLayoutTrackers(
layout.update_trackers.clone(), layout.inner().update_trackers.clone(),
)); ));
} }
} }
@ -136,7 +136,7 @@ impl LayoutManager {
let mut layout = self.layout.write().unwrap(); let mut layout = self.layout.write().unwrap();
if layout.ack_max_free(self.node_id) { if layout.ack_max_free(self.node_id) {
self.broadcast_update(SystemRpc::AdvertiseClusterLayoutTrackers( self.broadcast_update(SystemRpc::AdvertiseClusterLayoutTrackers(
layout.update_trackers.clone(), layout.inner().update_trackers.clone(),
)); ));
} }
} }
@ -160,16 +160,16 @@ impl LayoutManager {
fn merge_layout(&self, adv: &LayoutHistory) -> Option<LayoutHistory> { fn merge_layout(&self, adv: &LayoutHistory) -> Option<LayoutHistory> {
let mut layout = self.layout.write().unwrap(); let mut layout = self.layout.write().unwrap();
let prev_digest = layout.digest(); let prev_digest = layout.digest();
let prev_layout_check = layout.check().is_ok(); let prev_layout_check = layout.inner().check().is_ok();
if !prev_layout_check || adv.check().is_ok() { if !prev_layout_check || adv.check().is_ok() {
if layout.update(|l| l.merge(adv)) { if layout.update(|l| l.merge(adv)) {
layout.update_trackers(self.node_id); layout.update_trackers(self.node_id);
if prev_layout_check && layout.check().is_err() { if prev_layout_check && layout.inner().check().is_err() {
panic!("Merged two correct layouts and got an incorrect layout."); panic!("Merged two correct layouts and got an incorrect layout.");
} }
assert!(layout.digest() != prev_digest); assert!(layout.digest() != prev_digest);
return Some(layout.clone()); return Some(layout.inner().clone());
} }
} }
@ -180,11 +180,11 @@ impl LayoutManager {
let mut layout = self.layout.write().unwrap(); let mut layout = self.layout.write().unwrap();
let prev_digest = layout.digest(); let prev_digest = layout.digest();
if layout.update_trackers != *adv { if layout.inner().update_trackers != *adv {
if layout.update(|l| l.update_trackers.merge(adv)) { if layout.update(|l| l.update_trackers.merge(adv)) {
layout.update_trackers(self.node_id); layout.update_trackers(self.node_id);
assert!(layout.digest() != prev_digest); assert!(layout.digest() != prev_digest);
return Some(layout.update_trackers.clone()); return Some(layout.inner().update_trackers.clone());
} }
} }
@ -230,7 +230,7 @@ impl LayoutManager {
/// Save cluster layout data to disk /// Save cluster layout data to disk
async fn save_cluster_layout(&self) -> Result<(), Error> { async fn save_cluster_layout(&self) -> Result<(), Error> {
let layout = self.layout.read().unwrap().clone(); let layout = self.layout.read().unwrap().inner().clone();
self.persist_cluster_layout self.persist_cluster_layout
.save_async(&layout) .save_async(&layout)
.await .await
@ -278,13 +278,13 @@ impl LayoutManager {
} }
pub(crate) fn handle_pull_cluster_layout(&self) -> SystemRpc { pub(crate) fn handle_pull_cluster_layout(&self) -> SystemRpc {
let layout = self.layout.read().unwrap().clone(); let layout = self.layout.read().unwrap().inner().clone();
SystemRpc::AdvertiseClusterLayout(layout) SystemRpc::AdvertiseClusterLayout(layout)
} }
pub(crate) fn handle_pull_cluster_layout_trackers(&self) -> SystemRpc { pub(crate) fn handle_pull_cluster_layout_trackers(&self) -> SystemRpc {
let layout = self.layout.read().unwrap(); let layout = self.layout.read().unwrap();
SystemRpc::AdvertiseClusterLayoutTrackers(layout.update_trackers.clone()) SystemRpc::AdvertiseClusterLayoutTrackers(layout.inner().update_trackers.clone())
} }
pub(crate) async fn handle_advertise_cluster_layout( pub(crate) async fn handle_advertise_cluster_layout(

View file

@ -26,7 +26,7 @@ use garage_util::data::*;
use garage_util::error::Error; use garage_util::error::Error;
use garage_util::metrics::RecordDuration; use garage_util::metrics::RecordDuration;
use crate::layout::{LayoutHelper, LayoutHistory}; use crate::layout::{LayoutHelper, LayoutVersion};
use crate::metrics::RpcMetrics; use crate::metrics::RpcMetrics;
// Default RPC timeout = 5 minutes // Default RPC timeout = 5 minutes
@ -304,7 +304,8 @@ impl RpcHelper {
// preemptively send an additional request to any remaining nodes. // preemptively send an additional request to any remaining nodes.
// Reorder requests to priorize closeness / low latency // Reorder requests to priorize closeness / low latency
let request_order = self.request_order(&self.0.layout.read().unwrap(), to.iter().copied()); let request_order =
self.request_order(&self.0.layout.read().unwrap().current(), to.iter().copied());
let send_all_at_once = strategy.rs_send_all_at_once.unwrap_or(false); let send_all_at_once = strategy.rs_send_all_at_once.unwrap_or(false);
// Build future for each request // Build future for each request
@ -497,16 +498,16 @@ impl RpcHelper {
let mut ret = Vec::with_capacity(12); let mut ret = Vec::with_capacity(12);
let ver_iter = layout let ver_iter = layout
.versions .versions()
.iter() .iter()
.rev() .rev()
.chain(layout.old_versions.iter().rev()); .chain(layout.inner().old_versions.iter().rev());
for ver in ver_iter { for ver in ver_iter {
if ver.version > layout.sync_map_min() { if ver.version > layout.sync_map_min() {
continue; continue;
} }
let nodes = ver.nodes_of(position, ver.replication_factor); let nodes = ver.nodes_of(position, ver.replication_factor);
for node in rpc_helper.request_order(&layout, nodes) { for node in rpc_helper.request_order(layout.current(), nodes) {
if !ret.contains(&node) { if !ret.contains(&node) {
ret.push(node); ret.push(node);
} }
@ -517,15 +518,12 @@ impl RpcHelper {
fn request_order( fn request_order(
&self, &self,
layout: &LayoutHistory, layout: &LayoutVersion,
nodes: impl Iterator<Item = Uuid>, nodes: impl Iterator<Item = Uuid>,
) -> Vec<Uuid> { ) -> Vec<Uuid> {
// Retrieve some status variables that we will use to sort requests // Retrieve some status variables that we will use to sort requests
let peer_list = self.0.peering.get_peer_list(); let peer_list = self.0.peering.get_peer_list();
let our_zone = layout let our_zone = layout.get_node_zone(&self.0.our_node_id).unwrap_or("");
.current()
.get_node_zone(&self.0.our_node_id)
.unwrap_or("");
// Augment requests with some information used to sort them. // Augment requests with some information used to sort them.
// The tuples are as follows: // The tuples are as follows:
@ -535,7 +533,7 @@ impl RpcHelper {
// and within a same zone we priorize nodes with the lowest latency. // and within a same zone we priorize nodes with the lowest latency.
let mut nodes = nodes let mut nodes = nodes
.map(|to| { .map(|to| {
let peer_zone = layout.current().get_node_zone(&to).unwrap_or(""); let peer_zone = layout.get_node_zone(&to).unwrap_or("");
let peer_avg_ping = peer_list let peer_avg_ping = peer_list
.iter() .iter()
.find(|x| x.id.as_ref() == to.as_slice()) .find(|x| x.id.as_ref() == to.as_slice())

View file

@ -451,7 +451,7 @@ impl System {
// Obtain information about nodes that have a role as storage nodes // Obtain information about nodes that have a role as storage nodes
// in one of the active layout versions // in one of the active layout versions
let mut storage_nodes = HashSet::<Uuid>::with_capacity(16); let mut storage_nodes = HashSet::<Uuid>::with_capacity(16);
for ver in layout.versions.iter() { for ver in layout.versions().iter() {
storage_nodes.extend( storage_nodes.extend(
ver.roles ver.roles
.items() .items()
@ -470,7 +470,7 @@ impl System {
let mut partitions_all_ok = 0; let mut partitions_all_ok = 0;
for (_, hash) in partitions.iter() { for (_, hash) in partitions.iter() {
let mut write_sets = layout let mut write_sets = layout
.versions .versions()
.iter() .iter()
.map(|x| x.nodes_of(hash, x.replication_factor)); .map(|x| x.nodes_of(hash, x.replication_factor));
let has_quorum = write_sets let has_quorum = write_sets
@ -634,7 +634,7 @@ impl System {
.filter(|p| p.is_up()) .filter(|p| p.is_up())
.count(); .count();
let not_configured = self.cluster_layout().check().is_err(); let not_configured = self.cluster_layout().inner().check().is_err();
let no_peers = n_connected < self.replication_factor.into(); let no_peers = n_connected < self.replication_factor.into();
let expected_n_nodes = self.cluster_layout().all_nodes().len(); let expected_n_nodes = self.cluster_layout().all_nodes().len();
let bad_peers = n_connected != expected_n_nodes; let bad_peers = n_connected != expected_n_nodes;