//! Module containing structs related to membership management use std::io::{Read, Write}; use std::net::SocketAddr; use std::path::{Path, PathBuf}; use std::sync::Arc; use std::time::Duration; use arc_swap::ArcSwap; use async_trait::async_trait; use futures::{join, select}; use futures_util::future::*; use serde::{Deserialize, Serialize}; use sodiumoxide::crypto::sign::ed25519; use tokio::sync::watch; use tokio::sync::Mutex; use netapp::endpoint::{Endpoint, EndpointHandler, Message}; use netapp::peering::fullmesh::FullMeshPeeringStrategy; use netapp::proto::*; use netapp::{NetApp, NetworkKey, NodeID, NodeKey}; use garage_util::background::BackgroundRunner; use garage_util::error::Error; use garage_util::persister::Persister; //use garage_util::time::*; //use crate::consul::get_consul_nodes; use crate::ring::*; use crate::rpc_helper::{RequestStrategy, RpcHelper}; const DISCOVERY_INTERVAL: Duration = Duration::from_secs(60); const PING_TIMEOUT: Duration = Duration::from_secs(2); /// RPC endpoint used for calls related to membership pub const SYSTEM_RPC_PATH: &str = "garage_rpc/membership.rs/SystemRpc"; /// RPC messages related to membership #[derive(Debug, Serialize, Deserialize, Clone)] pub enum SystemRpc { /// Response to successfull advertisements Ok, /// Error response Error(String), /// Ask other node its config. Answered with AdvertiseConfig PullConfig, /// Advertise Garage status. Answered with another AdvertiseStatus. /// Exchanged with every node on a regular basis. AdvertiseStatus(StateInfo), /// Advertisement of nodes config. Sent spontanously or in response to PullConfig AdvertiseConfig(NetworkConfig), /// Get known nodes states GetKnownNodes, /// Return known nodes ReturnKnownNodes(Vec<(NodeID, SocketAddr, bool)>), } impl Message for SystemRpc { type Response = SystemRpc; } /// This node's membership manager pub struct System { /// The id of this node pub id: NodeID, persist_config: Persister, state_info: ArcSwap, pub netapp: Arc, fullmesh: Arc, pub rpc: RpcHelper, system_endpoint: Arc>, rpc_listen_addr: SocketAddr, bootstrap_peers: Vec<(NodeID, SocketAddr)>, consul_host: Option, consul_service_name: Option, replication_factor: usize, /// The ring pub ring: watch::Receiver>, update_ring: Mutex>>, /// The job runner of this node pub background: Arc, } #[derive(Debug, Clone, Serialize, Deserialize)] pub struct StateInfo { /// Hostname of the node pub hostname: String, /// Replication factor configured on the node pub replication_factor: usize, /// Configuration version pub config_version: u64, } fn gen_node_key(metadata_dir: &Path) -> Result { let mut id_file = metadata_dir.to_path_buf(); id_file.push("node_id"); if id_file.as_path().exists() { let mut f = std::fs::File::open(id_file.as_path())?; let mut d = vec![]; f.read_to_end(&mut d)?; if d.len() != 64 { return Err(Error::Message("Corrupt node_id file".to_string())); } let mut key = [0u8; 64]; key.copy_from_slice(&d[..]); Ok(NodeKey::from_slice(&key[..]).unwrap()) } else { let (key, _) = ed25519::gen_keypair(); let mut f = std::fs::File::create(id_file.as_path())?; f.write_all(&key[..])?; Ok(NodeKey::from_slice(&key[..]).unwrap()) } } impl System { /// Create this node's membership manager pub fn new( network_key: NetworkKey, metadata_dir: PathBuf, background: Arc, replication_factor: usize, rpc_listen_addr: SocketAddr, bootstrap_peers: Vec<(NodeID, SocketAddr)>, consul_host: Option, consul_service_name: Option, ) -> Arc { let node_key = gen_node_key(&metadata_dir).expect("Unable to read or generate node ID"); info!("Node public key: {}", hex::encode(&node_key.public_key())); let persist_config = Persister::new(&metadata_dir, "network_config"); let net_config = match persist_config.load() { Ok(x) => x, Err(e) => { match Persister::::new( &metadata_dir, "network_config", ) .load() { Ok(old_config) => NetworkConfig::migrate_from_021(old_config), Err(e2) => { info!( "No valid previous network configuration stored ({}, {}), starting fresh.", e, e2 ); NetworkConfig::new() } } } }; let state_info = StateInfo { hostname: gethostname::gethostname() .into_string() .unwrap_or_else(|_| "".to_string()), replication_factor: replication_factor, config_version: net_config.version, }; let ring = Ring::new(net_config, replication_factor); let (update_ring, ring) = watch::channel(Arc::new(ring)); let netapp = NetApp::new(network_key, node_key); let fullmesh = FullMeshPeeringStrategy::new(netapp.clone(), bootstrap_peers.clone()); let system_endpoint = netapp.endpoint(SYSTEM_RPC_PATH.into()); let sys = Arc::new(System { id: netapp.id.clone(), persist_config, state_info: ArcSwap::new(Arc::new(state_info)), netapp: netapp.clone(), fullmesh: fullmesh.clone(), rpc: RpcHelper { fullmesh: fullmesh.clone(), background: background.clone(), }, system_endpoint, replication_factor, rpc_listen_addr, bootstrap_peers, consul_host, consul_service_name, ring, update_ring: Mutex::new(update_ring), background: background.clone(), }); sys.system_endpoint.set_handler(sys.clone()); sys } /// Perform bootstraping, starting the ping loop pub async fn run(self: Arc, must_exit: watch::Receiver) { join!( self.netapp .clone() .listen(self.rpc_listen_addr, None, must_exit.clone()), self.fullmesh.clone().run(must_exit.clone()), self.discovery_loop(must_exit.clone()), ); } // ---- INTERNALS ---- /// Save network configuration to disc async fn save_network_config(self: Arc) -> Result<(), Error> { let ring: Arc = self.ring.borrow().clone(); self.persist_config .save_async(&ring.config) .await .expect("Cannot save current cluster configuration"); Ok(()) } fn update_state_info(&self) { let mut new_si: StateInfo = self.state_info.load().as_ref().clone(); let ring = self.ring.borrow(); new_si.config_version = ring.config.version; self.state_info.swap(Arc::new(new_si)); } fn handle_pull_config(&self) -> SystemRpc { let ring = self.ring.borrow().clone(); SystemRpc::AdvertiseConfig(ring.config.clone()) } async fn handle_advertise_config( self: Arc, adv: &NetworkConfig, ) -> Result { let update_ring = self.update_ring.lock().await; let ring: Arc = self.ring.borrow().clone(); if adv.version > ring.config.version { let ring = Ring::new(adv.clone(), self.replication_factor); update_ring.send(Arc::new(ring))?; drop(update_ring); let self2 = self.clone(); let adv2 = adv.clone(); self.background.spawn_cancellable(async move { self2 .rpc .broadcast( &self2.system_endpoint, SystemRpc::AdvertiseConfig(adv2), RequestStrategy::with_priority(PRIO_NORMAL), ) .await; Ok(()) }); self.background.spawn(self.clone().save_network_config()); } Ok(SystemRpc::Ok) } async fn discovery_loop(&self, mut stop_signal: watch::Receiver) { /* TODO let consul_config = match (&self.consul_host, &self.consul_service_name) { (Some(ch), Some(csn)) => Some((ch.clone(), csn.clone())), _ => None, }; */ while !*stop_signal.borrow() { let not_configured = self.ring.borrow().config.members.is_empty(); let no_peers = self.fullmesh.get_peer_list().len() < self.replication_factor; let bad_peers = self .fullmesh .get_peer_list() .iter() .filter(|p| p.is_up()) .count() != self.ring.borrow().config.members.len(); if not_configured || no_peers || bad_peers { info!("Doing a bootstrap/discovery step (not_configured: {}, no_peers: {}, bad_peers: {})", not_configured, no_peers, bad_peers); let ping_list = self.bootstrap_peers.clone(); /* *TODO bring this back: persisted list of peers if let Ok(peers) = self.persist_status.load_async().await { ping_list.extend(peers.iter().map(|x| (x.addr, Some(x.id)))); } */ /* * TODO bring this back: get peers from consul if let Some((consul_host, consul_service_name)) = &consul_config { match get_consul_nodes(consul_host, consul_service_name).await { Ok(node_list) => { ping_list.extend(node_list.iter().map(|a| (*a, None))); } Err(e) => { warn!("Could not retrieve node list from Consul: {}", e); } } } */ for (node_id, node_addr) in ping_list { tokio::spawn(self.netapp.clone().try_connect(node_addr, node_id)); } } let restart_at = tokio::time::sleep(DISCOVERY_INTERVAL); select! { _ = restart_at.fuse() => {}, _ = stop_signal.changed().fuse() => {}, } } } async fn pull_config(self: Arc, peer: NodeID) { let resp = self .rpc .call( &self.system_endpoint, peer, SystemRpc::PullConfig, RequestStrategy::with_priority(PRIO_HIGH).with_timeout(PING_TIMEOUT), ) .await; if let Ok(SystemRpc::AdvertiseConfig(config)) = resp { let _: Result<_, _> = self.handle_advertise_config(&config).await; } } } #[async_trait] impl EndpointHandler for System { async fn handle(self: &Arc, msg: &SystemRpc, _from: NodeID) -> SystemRpc { let resp = match msg { SystemRpc::PullConfig => Ok(self.handle_pull_config()), SystemRpc::AdvertiseConfig(adv) => self.clone().handle_advertise_config(&adv).await, SystemRpc::GetKnownNodes => { let known_nodes = self .fullmesh .get_peer_list() .iter() .map(|n| (n.id, n.addr, n.is_up())) .collect::>(); Ok(SystemRpc::ReturnKnownNodes(known_nodes)) } _ => Err(Error::BadRpc("Unexpected RPC message".to_string())), }; match resp { Ok(r) => r, Err(e) => SystemRpc::Error(format!("{}", e)), } } }