From 707442f5de416fdbed4681a33b739f0a787b7834 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 16 Nov 2023 13:51:40 +0100 Subject: [PATCH] layout: refactor digests and add "!=" assertions before epidemic bcast --- src/rpc/layout/helper.rs | 27 +++++++++++++++++++++++++-- src/rpc/layout/history.rs | 1 - src/rpc/layout/manager.rs | 36 ++++++++++-------------------------- src/rpc/layout/mod.rs | 2 +- src/rpc/system.rs | 17 +++++++++-------- 5 files changed, 45 insertions(+), 38 deletions(-) diff --git a/src/rpc/layout/helper.rs b/src/rpc/layout/helper.rs index ed3da498..0d746ea3 100644 --- a/src/rpc/layout/helper.rs +++ b/src/rpc/layout/helper.rs @@ -2,10 +2,24 @@ use std::collections::HashMap; use std::ops::Deref; use std::sync::atomic::{AtomicUsize, Ordering}; +use serde::{Deserialize, Serialize}; + use garage_util::data::*; use super::schema::*; +#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq, Eq)] +pub struct LayoutDigest { + /// Cluster layout version + pub current_version: u64, + /// Number of active layout versions + pub active_versions: usize, + /// Hash of cluster layout update trackers + pub trackers_hash: Hash, + /// Hash of cluster layout staging data + pub staging_hash: Hash, +} + pub struct LayoutHelper { layout: Option, @@ -16,8 +30,8 @@ pub struct LayoutHelper { all_nodes: Vec, all_nongateway_nodes: Vec, - pub(crate) trackers_hash: Hash, - pub(crate) staging_hash: Hash, + trackers_hash: Hash, + staging_hash: Hash, // ack lock: counts in-progress write operations for each // layout version ; we don't increase the ack update tracker @@ -152,6 +166,15 @@ impl LayoutHelper { self.staging_hash } + pub fn digest(&self) -> LayoutDigest { + LayoutDigest { + current_version: self.current().version, + active_versions: self.versions.len(), + trackers_hash: self.trackers_hash, + staging_hash: self.staging_hash, + } + } + // ------------------ helpers for update tracking --------------- pub(crate) fn update_trackers(&mut self, local_node_id: Uuid) { diff --git a/src/rpc/layout/history.rs b/src/rpc/layout/history.rs index 0a139549..653d2a48 100644 --- a/src/rpc/layout/history.rs +++ b/src/rpc/layout/history.rs @@ -5,7 +5,6 @@ use garage_util::data::*; use garage_util::encode::nonversioned_encode; use garage_util::error::*; -use super::schema::*; use super::*; impl LayoutHistory { diff --git a/src/rpc/layout/manager.rs b/src/rpc/layout/manager.rs index 85d94ffa..c65831a2 100644 --- a/src/rpc/layout/manager.rs +++ b/src/rpc/layout/manager.rs @@ -2,8 +2,6 @@ use std::collections::HashMap; use std::sync::{atomic::Ordering, Arc, Mutex, RwLock, RwLockReadGuard}; use std::time::Duration; -use serde::{Deserialize, Serialize}; - use tokio::sync::Notify; use netapp::endpoint::Endpoint; @@ -33,16 +31,6 @@ pub struct LayoutManager { system_endpoint: Arc>, } -#[derive(Debug, Clone, Serialize, Deserialize, Default)] -pub struct LayoutStatus { - /// Cluster layout version - pub cluster_layout_version: u64, - /// Hash of cluster layout update trackers - pub cluster_layout_trackers_hash: Hash, - /// Hash of cluster layout staging data - pub cluster_layout_staging_hash: Hash, -} - impl LayoutManager { pub fn new( config: &Config, @@ -105,15 +93,6 @@ impl LayoutManager { self.layout.read().unwrap() } - pub fn status(&self) -> LayoutStatus { - let layout = self.layout(); - LayoutStatus { - cluster_layout_version: layout.current().version, - cluster_layout_trackers_hash: layout.trackers_hash(), - cluster_layout_staging_hash: layout.staging_hash(), - } - } - pub async fn update_cluster_layout( self: &Arc, layout: &LayoutHistory, @@ -173,6 +152,7 @@ impl LayoutManager { fn merge_layout(&self, adv: &LayoutHistory) -> Option { let mut layout = self.layout.write().unwrap(); + let prev_digest = layout.digest(); let prev_layout_check = layout.check().is_ok(); if !prev_layout_check || adv.check().is_ok() { @@ -181,6 +161,7 @@ impl LayoutManager { if prev_layout_check && layout.check().is_err() { panic!("Merged two correct layouts and got an incorrect layout."); } + assert!(layout.digest() != prev_digest); return Some(layout.clone()); } } @@ -190,10 +171,12 @@ impl LayoutManager { fn merge_layout_trackers(&self, adv: &UpdateTrackers) -> Option { let mut layout = self.layout.write().unwrap(); + let prev_digest = layout.digest(); if layout.update_trackers != *adv { if layout.update(|l| l.update_trackers.merge(adv)) { layout.update_trackers(self.node_id); + assert!(layout.digest() != prev_digest); return Some(layout.update_trackers.clone()); } } @@ -269,16 +252,17 @@ impl LayoutManager { // ---- RPC HANDLERS ---- - pub(crate) fn handle_advertise_status(self: &Arc, from: Uuid, remote: &LayoutStatus) { - let local = self.status(); - if remote.cluster_layout_version > local.cluster_layout_version - || remote.cluster_layout_staging_hash != local.cluster_layout_staging_hash + pub(crate) fn handle_advertise_status(self: &Arc, from: Uuid, remote: &LayoutDigest) { + let local = self.layout().digest(); + if remote.current_version > local.current_version + || remote.active_versions != local.active_versions + || remote.staging_hash != local.staging_hash { tokio::spawn({ let this = self.clone(); async move { this.pull_cluster_layout(from).await } }); - } else if remote.cluster_layout_trackers_hash != local.cluster_layout_trackers_hash { + } else if remote.trackers_hash != local.trackers_hash { tokio::spawn({ let this = self.clone(); async move { this.pull_cluster_layout_trackers(from).await } diff --git a/src/rpc/layout/mod.rs b/src/rpc/layout/mod.rs index 91151ab4..eb127fda 100644 --- a/src/rpc/layout/mod.rs +++ b/src/rpc/layout/mod.rs @@ -11,7 +11,7 @@ pub mod manager; // ---- re-exports ---- -pub use helper::LayoutHelper; +pub use helper::{LayoutDigest, LayoutHelper}; pub use manager::WriteLock; pub use schema::*; pub use version::*; diff --git a/src/rpc/system.rs b/src/rpc/system.rs index d74dc2a1..dc127afb 100644 --- a/src/rpc/system.rs +++ b/src/rpc/system.rs @@ -33,8 +33,9 @@ use garage_util::time::*; use crate::consul::ConsulDiscovery; #[cfg(feature = "kubernetes-discovery")] use crate::kubernetes::*; -use crate::layout::manager::{LayoutManager, LayoutStatus}; -use crate::layout::{self, LayoutHelper, LayoutHistory, NodeRoleV}; +use crate::layout::{ + self, manager::LayoutManager, LayoutDigest, LayoutHelper, LayoutHistory, NodeRoleV, +}; use crate::replication_mode::*; use crate::rpc_helper::*; @@ -130,8 +131,8 @@ pub struct NodeStatus { /// Replication factor configured on the node pub replication_factor: usize, - /// Layout status - pub layout_status: LayoutStatus, + /// Cluster layout digest + pub layout_digest: LayoutDigest, /// Disk usage on partition containing metadata directory (tuple: `(avail, total)`) #[serde(default)] @@ -539,7 +540,7 @@ impl System { fn update_local_status(&self) { let mut new_si: NodeStatus = self.local_status.load().as_ref().clone(); - new_si.layout_status = self.layout_manager.status(); + new_si.layout_digest = self.layout_manager.layout().digest(); new_si.update_disk_usage(&self.metadata_dir, &self.data_dir, &self.metrics); @@ -573,7 +574,7 @@ impl System { } self.layout_manager - .handle_advertise_status(from, &info.layout_status); + .handle_advertise_status(from, &info.layout_digest); self.node_status .write() @@ -755,7 +756,7 @@ impl NodeStatus { .into_string() .unwrap_or_else(|_| "".to_string()), replication_factor, - layout_status: layout_manager.status(), + layout_digest: layout_manager.layout().digest(), meta_disk_avail: None, data_disk_avail: None, } @@ -765,7 +766,7 @@ impl NodeStatus { NodeStatus { hostname: "?".to_string(), replication_factor: 0, - layout_status: Default::default(), + layout_digest: Default::default(), meta_disk_avail: None, data_disk_avail: None, }