Garage v1.0 #683

Merged
lx merged 119 commits from next-0.10 into main 2024-04-10 15:23:13 +00:00
4 changed files with 96 additions and 70 deletions
Showing only changes of commit 19ef1ec8e7 - Show all commits

View file

@ -1,3 +1,5 @@
use std::sync::Arc;
use bytesize::ByteSize; use bytesize::ByteSize;
use format_table::format_table; use format_table::format_table;
@ -321,7 +323,7 @@ pub async fn fetch_layout(
.call(&rpc_host, SystemRpc::PullClusterLayout, PRIO_NORMAL) .call(&rpc_host, SystemRpc::PullClusterLayout, PRIO_NORMAL)
.await?? .await??
{ {
SystemRpc::AdvertiseClusterLayout(t) => Ok(t), SystemRpc::AdvertiseClusterLayout(t) => Ok(Arc::try_unwrap(t).unwrap()),
resp => Err(Error::Message(format!("Invalid RPC response: {:?}", resp))), resp => Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
} }
} }
@ -334,7 +336,7 @@ pub async fn send_layout(
rpc_cli rpc_cli
.call( .call(
&rpc_host, &rpc_host,
SystemRpc::AdvertiseClusterLayout(layout), SystemRpc::AdvertiseClusterLayout(Arc::new(layout)),
PRIO_NORMAL, PRIO_NORMAL,
) )
.await??; .await??;

View file

@ -1,6 +1,8 @@
use std::sync::Arc; use std::sync::Arc;
use std::time::Duration; use std::time::Duration;
use serde::{Deserialize, Serialize};
use tokio::sync::watch; use tokio::sync::watch;
use tokio::sync::Mutex; use tokio::sync::Mutex;
@ -28,6 +30,16 @@ pub struct LayoutManager {
system_endpoint: Arc<Endpoint<SystemRpc, System>>, system_endpoint: Arc<Endpoint<SystemRpc, System>>,
} }
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct LayoutStatus {
/// Cluster layout version
pub cluster_layout_version: u64,
/// Hash of cluster layout update trackers
// (TODO) pub cluster_layout_trackers_hash: Hash,
/// Hash of cluster layout staging data
pub cluster_layout_staging_hash: Hash,
}
impl LayoutManager { impl LayoutManager {
pub fn new( pub fn new(
config: &Config, config: &Config,
@ -35,7 +47,7 @@ impl LayoutManager {
system_endpoint: Arc<Endpoint<SystemRpc, System>>, system_endpoint: Arc<Endpoint<SystemRpc, System>>,
fullmesh: Arc<FullMeshPeeringStrategy>, fullmesh: Arc<FullMeshPeeringStrategy>,
replication_factor: usize, replication_factor: usize,
) -> Result<Self, Error> { ) -> Result<Arc<Self>, Error> {
let persist_cluster_layout: Persister<LayoutHistory> = let persist_cluster_layout: Persister<LayoutHistory> =
Persister::new(&config.metadata_dir, "cluster_layout"); Persister::new(&config.metadata_dir, "cluster_layout");
@ -68,28 +80,39 @@ impl LayoutManager {
config.rpc_timeout_msec.map(Duration::from_millis), config.rpc_timeout_msec.map(Duration::from_millis),
); );
Ok(Self { Ok(Arc::new(Self {
replication_factor, replication_factor,
persist_cluster_layout, persist_cluster_layout,
layout_watch, layout_watch,
update_layout: Mutex::new(update_layout), update_layout: Mutex::new(update_layout),
system_endpoint, system_endpoint,
rpc_helper, rpc_helper,
}) }))
} }
// ---- PUBLIC INTERFACE ---- // ---- PUBLIC INTERFACE ----
pub async fn update_cluster_layout(&self, layout: &LayoutHistory) -> Result<(), Error> { pub fn status(&self) -> LayoutStatus {
let layout = self.layout();
LayoutStatus {
cluster_layout_version: layout.current().version,
cluster_layout_staging_hash: layout.staging_hash,
}
}
pub async fn update_cluster_layout(
self: &Arc<Self>,
layout: &LayoutHistory,
) -> Result<(), Error> {
self.handle_advertise_cluster_layout(layout).await?; self.handle_advertise_cluster_layout(layout).await?;
Ok(()) Ok(())
} }
pub fn history(&self) -> watch::Ref<Arc<LayoutHistory>> { pub fn layout(&self) -> watch::Ref<Arc<LayoutHistory>> {
self.layout_watch.borrow() self.layout_watch.borrow()
} }
pub(crate) async fn pull_cluster_layout(&self, peer: Uuid) { pub(crate) async fn pull_cluster_layout(self: &Arc<Self>, peer: Uuid) {
let resp = self let resp = self
.rpc_helper .rpc_helper
.call( .call(
@ -118,13 +141,25 @@ impl LayoutManager {
// ---- RPC HANDLERS ---- // ---- RPC HANDLERS ----
pub(crate) fn handle_advertise_status(self: &Arc<Self>, from: Uuid, status: &LayoutStatus) {
let local_status = self.status();
if status.cluster_layout_version > local_status.cluster_layout_version
|| status.cluster_layout_staging_hash != local_status.cluster_layout_staging_hash
{
tokio::spawn({
let this = self.clone();
async move { this.pull_cluster_layout(from).await }
});
}
}
pub(crate) fn handle_pull_cluster_layout(&self) -> SystemRpc { pub(crate) fn handle_pull_cluster_layout(&self) -> SystemRpc {
let layout = self.layout_watch.borrow().as_ref().clone(); let layout = self.layout_watch.borrow().clone();
SystemRpc::AdvertiseClusterLayout(layout) SystemRpc::AdvertiseClusterLayout(layout)
} }
pub(crate) async fn handle_advertise_cluster_layout( pub(crate) async fn handle_advertise_cluster_layout(
&self, self: &Arc<Self>,
adv: &LayoutHistory, adv: &LayoutHistory,
) -> Result<SystemRpc, Error> { ) -> Result<SystemRpc, Error> {
if adv.current().replication_factor != self.replication_factor { if adv.current().replication_factor != self.replication_factor {
@ -137,39 +172,42 @@ impl LayoutManager {
return Err(Error::Message(msg)); return Err(Error::Message(msg));
} }
let update_layout = self.update_layout.lock().await; if *adv != **self.layout_watch.borrow() {
// TODO: don't clone each time an AdvertiseClusterLayout is received let update_layout = self.update_layout.lock().await;
let mut layout: LayoutHistory = self.layout_watch.borrow().as_ref().clone(); let mut layout: LayoutHistory = self.layout_watch.borrow().as_ref().clone();
let prev_layout_check = layout.check().is_ok(); let prev_layout_check = layout.check().is_ok();
if layout.merge(adv) { if layout.merge(adv) {
if prev_layout_check && layout.check().is_err() { if prev_layout_check && layout.check().is_err() {
error!("New cluster layout is invalid, discarding."); error!("New cluster layout is invalid, discarding.");
return Err(Error::Message( return Err(Error::Message(
"New cluster layout is invalid, discarding.".into(), "New cluster layout is invalid, discarding.".into(),
)); ));
}
update_layout.send(Arc::new(layout.clone()))?;
drop(update_layout);
/* TODO
tokio::spawn(async move {
if let Err(e) = system
.rpc_helper()
.broadcast(
&system.system_endpoint,
SystemRpc::AdvertiseClusterLayout(layout),
RequestStrategy::with_priority(PRIO_HIGH),
)
.await
{
warn!("Error while broadcasting new cluster layout: {}", e);
} }
});
*/
self.save_cluster_layout().await?; let layout = Arc::new(layout);
update_layout.send(layout.clone())?;
drop(update_layout); // release mutex
tokio::spawn({
let this = self.clone();
async move {
if let Err(e) = this
.rpc_helper
.broadcast(
&this.system_endpoint,
SystemRpc::AdvertiseClusterLayout(layout),
RequestStrategy::with_priority(PRIO_HIGH),
)
.await
{
warn!("Error while broadcasting new cluster layout: {}", e);
}
}
});
self.save_cluster_layout().await?;
}
} }
Ok(SystemRpc::Ok) Ok(SystemRpc::Ok)

View file

@ -226,7 +226,7 @@ mod v010 {
} }
/// The history of cluster layouts /// The history of cluster layouts
#[derive(Clone, Debug, Serialize, Deserialize)] #[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
pub struct LayoutHistory { pub struct LayoutHistory {
/// The versions currently in use in the cluster /// The versions currently in use in the cluster
pub versions: Vec<LayoutVersion>, pub versions: Vec<LayoutVersion>,
@ -241,7 +241,7 @@ mod v010 {
} }
/// The tracker of acknowlegments and data syncs around the cluster /// The tracker of acknowlegments and data syncs around the cluster
#[derive(Clone, Debug, Serialize, Deserialize, Default)] #[derive(Clone, Debug, Serialize, Deserialize, Default, PartialEq)]
pub struct UpdateTrackers { pub struct UpdateTrackers {
/// The highest layout version number each node has ack'ed /// The highest layout version number each node has ack'ed
pub ack_map: UpdateTracker, pub ack_map: UpdateTracker,
@ -253,7 +253,7 @@ mod v010 {
} }
/// The history of cluster layouts /// The history of cluster layouts
#[derive(Clone, Debug, Serialize, Deserialize, Default)] #[derive(Clone, Debug, Serialize, Deserialize, Default, PartialEq)]
pub struct UpdateTracker(pub HashMap<Uuid, u64>); pub struct UpdateTracker(pub HashMap<Uuid, u64>);
impl garage_util::migrate::Migrate for LayoutHistory { impl garage_util::migrate::Migrate for LayoutHistory {

View file

@ -33,7 +33,7 @@ use garage_util::time::*;
use crate::consul::ConsulDiscovery; use crate::consul::ConsulDiscovery;
#[cfg(feature = "kubernetes-discovery")] #[cfg(feature = "kubernetes-discovery")]
use crate::kubernetes::*; use crate::kubernetes::*;
use crate::layout::manager::LayoutManager; use crate::layout::manager::{LayoutManager, LayoutStatus};
use crate::layout::*; use crate::layout::*;
use crate::replication_mode::*; use crate::replication_mode::*;
use crate::rpc_helper::*; use crate::rpc_helper::*;
@ -68,7 +68,7 @@ pub enum SystemRpc {
/// Ask other node its cluster layout. Answered with AdvertiseClusterLayout /// Ask other node its cluster layout. Answered with AdvertiseClusterLayout
PullClusterLayout, PullClusterLayout,
/// Advertisement of cluster layout. Sent spontanously or in response to PullClusterLayout /// Advertisement of cluster layout. Sent spontanously or in response to PullClusterLayout
AdvertiseClusterLayout(LayoutHistory), AdvertiseClusterLayout(Arc<LayoutHistory>),
} }
impl Rpc for SystemRpc { impl Rpc for SystemRpc {
@ -104,7 +104,7 @@ pub struct System {
#[cfg(feature = "kubernetes-discovery")] #[cfg(feature = "kubernetes-discovery")]
kubernetes_discovery: Option<KubernetesDiscoveryConfig>, kubernetes_discovery: Option<KubernetesDiscoveryConfig>,
pub layout_manager: LayoutManager, pub layout_manager: Arc<LayoutManager>,
metrics: SystemMetrics, metrics: SystemMetrics,
@ -125,12 +125,8 @@ pub struct NodeStatus {
/// Replication factor configured on the node /// Replication factor configured on the node
pub replication_factor: usize, pub replication_factor: usize,
/// Cluster layout version /// Layout status
pub cluster_layout_version: u64, pub layout_status: LayoutStatus,
/// Hash of cluster layout update trackers
// (TODO) pub cluster_layout_trackers_hash: Hash,
/// Hash of cluster layout staging data
pub cluster_layout_staging_hash: Hash,
/// Disk usage on partition containing metadata directory (tuple: `(avail, total)`) /// Disk usage on partition containing metadata directory (tuple: `(avail, total)`)
#[serde(default)] #[serde(default)]
@ -284,7 +280,7 @@ impl System {
// ---- set up metrics and status exchange ---- // ---- set up metrics and status exchange ----
let metrics = SystemMetrics::new(replication_factor); let metrics = SystemMetrics::new(replication_factor);
let mut local_status = NodeStatus::initial(replication_factor, &layout_manager.history()); let mut local_status = NodeStatus::initial(replication_factor, &layout_manager);
local_status.update_disk_usage(&config.metadata_dir, &config.data_dir, &metrics); local_status.update_disk_usage(&config.metadata_dir, &config.data_dir, &metrics);
// ---- if enabled, set up additionnal peer discovery methods ---- // ---- if enabled, set up additionnal peer discovery methods ----
@ -350,7 +346,7 @@ impl System {
// ---- Public utilities / accessors ---- // ---- Public utilities / accessors ----
pub fn cluster_layout(&self) -> watch::Ref<Arc<LayoutHistory>> { pub fn cluster_layout(&self) -> watch::Ref<Arc<LayoutHistory>> {
self.layout_manager.history() self.layout_manager.layout()
} }
pub fn layout_watch(&self) -> watch::Receiver<Arc<LayoutHistory>> { pub fn layout_watch(&self) -> watch::Receiver<Arc<LayoutHistory>> {
@ -536,9 +532,7 @@ impl System {
fn update_local_status(&self) { fn update_local_status(&self) {
let mut new_si: NodeStatus = self.local_status.load().as_ref().clone(); let mut new_si: NodeStatus = self.local_status.load().as_ref().clone();
let layout = self.cluster_layout(); new_si.layout_status = self.layout_manager.status();
new_si.cluster_layout_version = layout.current().version;
new_si.cluster_layout_staging_hash = layout.staging_hash;
new_si.update_disk_usage(&self.metadata_dir, &self.data_dir, &self.metrics); new_si.update_disk_usage(&self.metadata_dir, &self.data_dir, &self.metrics);
@ -571,14 +565,8 @@ impl System {
std::process::exit(1); std::process::exit(1);
} }
if info.cluster_layout_version > local_info.cluster_layout_version self.layout_manager
|| info.cluster_layout_staging_hash != local_info.cluster_layout_staging_hash .handle_advertise_status(from, &info.layout_status);
{
tokio::spawn({
let system = self.clone();
async move { system.layout_manager.pull_cluster_layout(from).await }
});
}
self.node_status self.node_status
.write() .write()
@ -746,14 +734,13 @@ impl EndpointHandler<SystemRpc> for System {
} }
impl NodeStatus { impl NodeStatus {
fn initial(replication_factor: usize, layout: &LayoutHistory) -> Self { fn initial(replication_factor: usize, layout_manager: &LayoutManager) -> Self {
NodeStatus { NodeStatus {
hostname: gethostname::gethostname() hostname: gethostname::gethostname()
.into_string() .into_string()
.unwrap_or_else(|_| "<invalid utf-8>".to_string()), .unwrap_or_else(|_| "<invalid utf-8>".to_string()),
replication_factor, replication_factor,
cluster_layout_version: layout.current().version, layout_status: layout_manager.status(),
cluster_layout_staging_hash: layout.staging_hash,
meta_disk_avail: None, meta_disk_avail: None,
data_disk_avail: None, data_disk_avail: None,
} }
@ -763,8 +750,7 @@ impl NodeStatus {
NodeStatus { NodeStatus {
hostname: "?".to_string(), hostname: "?".to_string(),
replication_factor: 0, replication_factor: 0,
cluster_layout_version: 0, layout_status: Default::default(),
cluster_layout_staging_hash: Hash::from([0u8; 32]),
meta_disk_avail: None, meta_disk_avail: None,
data_disk_avail: None, data_disk_avail: None,
} }