From 5225a81dee21603950e7944cd93c40fdb1bd8feb Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 21 Mar 2024 09:47:04 +0100 Subject: [PATCH 1/3] [net-fixes] peering: only count node IDs and not addresses in hash --- src/net/peering.rs | 45 ++++++++++++++++++++++++++++----------------- 1 file changed, 28 insertions(+), 17 deletions(-) diff --git a/src/net/peering.rs b/src/net/peering.rs index 61882a18..f4283683 100644 --- a/src/net/peering.rs +++ b/src/net/peering.rs @@ -164,29 +164,40 @@ struct KnownHosts { impl KnownHosts { fn new() -> Self { let list = HashMap::new(); - let hash = Self::calculate_hash(vec![]); - Self { list, hash } + let mut ret = Self { + list, + hash: hash::Digest::from_slice(&[0u8; 64][..]).unwrap(), + }; + ret.update_hash(); + ret } fn update_hash(&mut self) { - self.hash = Self::calculate_hash(self.connected_peers_vec()); - } - fn connected_peers_vec(&self) -> Vec<(NodeID, SocketAddr)> { - let mut list = Vec::with_capacity(self.list.len()); - for (id, peer) in self.list.iter() { - if peer.state.is_up() { - list.push((*id, peer.addr)); - } - } - list - } - fn calculate_hash(mut list: Vec<(NodeID, SocketAddr)>) -> hash::Digest { + // The hash is a value that is exchanged between nodes when they ping one + // another. Nodes compare their known hosts hash to know if they are connected + // to the same set of nodes. If the hashes differ, they are connected to + // different nodes and they trigger an exchange of the full list of active + // connections. The hash value only represents the set of node IDs and not + // their actual socket addresses, because nodes can be connected via different + // addresses and that shouldn't necessarily trigger a full peer exchange. + let mut list = self + .list + .iter() + .filter(|(_, peer)| peer.state.is_up()) + .map(|(id, _)| *id) + .collect::>(); list.sort(); let mut hash_state = hash::State::new(); - for (id, addr) in list { + for id in list { hash_state.update(&id[..]); - hash_state.update(&format!("{}\n", addr).into_bytes()[..]); } - hash_state.finalize() + self.hash = hash_state.finalize(); + } + fn connected_peers_vec(&self) -> Vec<(NodeID, SocketAddr)> { + self.list + .iter() + .filter(|(_, peer)| peer.state.is_up()) + .map(|(id, peer)| (*id, peer.addr)) + .collect::>() } } From 961b4f9af36a7fb5d3a661ac19e8f2c168bb48ae Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 21 Mar 2024 10:45:34 +0100 Subject: [PATCH 2/3] [net-fixes] fix issues with local peer address (fix #761) --- src/api/admin/cluster.rs | 2 +- src/garage/cli/cmd.rs | 21 +++---- src/net/netapp.rs | 51 ++++++++-------- src/net/peering.rs | 124 +++++++++++++++++---------------------- src/rpc/system.rs | 15 +++-- 5 files changed, 100 insertions(+), 113 deletions(-) diff --git a/src/api/admin/cluster.rs b/src/api/admin/cluster.rs index 8ce6c5ed..8c9cb1e5 100644 --- a/src/api/admin/cluster.rs +++ b/src/api/admin/cluster.rs @@ -27,7 +27,7 @@ pub async fn handle_get_cluster_status(garage: &Arc) -> Result, rpc_host: NodeID) -> vec!["ID\tHostname\tAddress\tTags\tZone\tCapacity\tDataAvail".to_string()]; for adv in status.iter().filter(|adv| adv.is_up) { let host = adv.status.hostname.as_deref().unwrap_or("?"); + let addr = match adv.addr { + Some(addr) => addr.to_string(), + None => "N/A".to_string(), + }; if let Some(NodeRoleV(Some(cfg))) = layout.current().roles.get(&adv.id) { let data_avail = match &adv.status.data_disk_avail { _ if cfg.capacity.is_none() => "N/A".into(), @@ -71,7 +75,7 @@ pub async fn cmd_status(rpc_cli: &Endpoint, rpc_host: NodeID) -> "{id:?}\t{host}\t{addr}\t[{tags}]\t{zone}\t{capacity}\t{data_avail}", id = adv.id, host = host, - addr = adv.addr, + addr = addr, tags = cfg.tags.join(","), zone = cfg.zone, capacity = cfg.capacity_string(), @@ -91,7 +95,7 @@ pub async fn cmd_status(rpc_cli: &Endpoint, rpc_host: NodeID) -> "{id:?}\t{host}\t{addr}\t[{tags}]\t{zone}\tdraining metadata...", id = adv.id, host = host, - addr = adv.addr, + addr = addr, tags = cfg.tags.join(","), zone = cfg.zone, )); @@ -104,7 +108,7 @@ pub async fn cmd_status(rpc_cli: &Endpoint, rpc_host: NodeID) -> "{id:?}\t{h}\t{addr}\t\t\t{new_role}", id = adv.id, h = host, - addr = adv.addr, + addr = addr, new_role = new_role, )); } @@ -120,8 +124,7 @@ pub async fn cmd_status(rpc_cli: &Endpoint, rpc_host: NodeID) -> let tf = timeago::Formatter::new(); let mut drain_msg = false; - let mut failed_nodes = - vec!["ID\tHostname\tAddress\tTags\tZone\tCapacity\tLast seen".to_string()]; + let mut failed_nodes = vec!["ID\tHostname\tTags\tZone\tCapacity\tLast seen".to_string()]; let mut listed = HashSet::new(); for ver in layout.versions.iter().rev() { for (node, _, role) in ver.roles.items().iter() { @@ -142,15 +145,14 @@ pub async fn cmd_status(rpc_cli: &Endpoint, rpc_host: NodeID) -> // Node is in a layout version, is not a gateway node, and is not up: // it is in a failed state, add proper line to the output - let (host, addr, last_seen) = match adv { + let (host, last_seen) = match adv { Some(adv) => ( adv.status.hostname.as_deref().unwrap_or("?"), - adv.addr.to_string(), adv.last_seen_secs_ago .map(|s| tf.convert(Duration::from_secs(s))) .unwrap_or_else(|| "never seen".into()), ), - None => ("??", "??".into(), "never seen".into()), + None => ("??", "never seen".into()), }; let capacity = if ver.version == layout.current().version { cfg.capacity_string() @@ -159,10 +161,9 @@ pub async fn cmd_status(rpc_cli: &Endpoint, rpc_host: NodeID) -> "draining metadata...".to_string() }; failed_nodes.push(format!( - "{id:?}\t{host}\t{addr}\t[{tags}]\t{zone}\t{capacity}\t{last_seen}", + "{id:?}\t{host}\t[{tags}]\t{zone}\t{capacity}\t{last_seen}", id = node, host = host, - addr = addr, tags = cfg.tags.join(","), zone = cfg.zone, capacity = capacity, diff --git a/src/net/netapp.rs b/src/net/netapp.rs index faa51a99..6480a126 100644 --- a/src/net/netapp.rs +++ b/src/net/netapp.rs @@ -292,13 +292,7 @@ impl NetApp { /// the other node with `Netapp::request` pub async fn try_connect(self: Arc, ip: SocketAddr, id: NodeID) -> Result<(), Error> { // Don't connect to ourself, we don't care - // but pretend we did if id == self.id { - tokio::spawn(async move { - if let Some(h) = self.on_connected_handler.load().as_ref() { - h(id, ip, false); - } - }); return Ok(()); } @@ -327,31 +321,32 @@ impl NetApp { /// Close the outgoing connection we have to a node specified by its public key, /// if such a connection is currently open. pub fn disconnect(self: &Arc, id: &NodeID) { + let conn = self.client_conns.write().unwrap().remove(id); + // If id is ourself, we're not supposed to have a connection open - if *id != self.id { - let conn = self.client_conns.write().unwrap().remove(id); - if let Some(c) = conn { - debug!( - "Closing connection to {} ({})", - hex::encode(&c.peer_id[..8]), - c.remote_addr - ); - c.close(); - } else { - return; - } + if *id == self.id { + // sanity check + assert!(conn.is_none(), "had a connection to local node"); + return; } - // call on_disconnected_handler immediately, since the connection - // was removed - // (if id == self.id, we pretend we disconnected) - let id = *id; - let self2 = self.clone(); - tokio::spawn(async move { - if let Some(h) = self2.on_disconnected_handler.load().as_ref() { - h(id, false); - } - }); + if let Some(c) = conn { + debug!( + "Closing connection to {} ({})", + hex::encode(&c.peer_id[..8]), + c.remote_addr + ); + c.close(); + + // call on_disconnected_handler immediately, since the connection was removed + let id = *id; + let self2 = self.clone(); + tokio::spawn(async move { + if let Some(h) = self2.on_disconnected_handler.load().as_ref() { + h(id, false); + } + }); + } } // Called from conn.rs when an incoming connection is successfully established diff --git a/src/net/peering.rs b/src/net/peering.rs index f4283683..0b4fec9e 100644 --- a/src/net/peering.rs +++ b/src/net/peering.rs @@ -43,7 +43,7 @@ impl Message for PingMessage { #[derive(Serialize, Deserialize)] struct PeerListMessage { - pub list: Vec<(NodeID, SocketAddr)>, + pub list: Vec<(NodeID, Vec)>, } impl Message for PeerListMessage { @@ -54,12 +54,8 @@ impl Message for PeerListMessage { #[derive(Debug)] struct PeerInfoInternal { - // addr is the currently connected address, - // or the last address we were connected to, - // or an arbitrary address some other peer gave us - addr: SocketAddr, - // all_addrs contains all of the addresses everyone gave us - all_addrs: Vec, + // known_addrs contains all of the addresses everyone gave us + known_addrs: Vec, state: PeerConnState, last_send_ping: Option, @@ -69,10 +65,9 @@ struct PeerInfoInternal { } impl PeerInfoInternal { - fn new(addr: SocketAddr, state: PeerConnState) -> Self { + fn new(state: PeerConnState, known_addr: Option) -> Self { Self { - addr, - all_addrs: vec![addr], + known_addrs: known_addr.map(|x| vec![x]).unwrap_or_default(), state, last_send_ping: None, last_seen: None, @@ -81,8 +76,8 @@ impl PeerInfoInternal { } } fn add_addr(&mut self, addr: SocketAddr) -> bool { - if !self.all_addrs.contains(&addr) { - self.all_addrs.push(addr); + if !self.known_addrs.contains(&addr) { + self.known_addrs.push(addr); // If we are learning a new address for this node, // we want to retry connecting self.state = match self.state { @@ -90,7 +85,7 @@ impl PeerInfoInternal { PeerConnState::Waiting(_, _) | PeerConnState::Abandonned => { PeerConnState::Waiting(0, Instant::now()) } - x @ (PeerConnState::Ourself | PeerConnState::Connected) => x, + x @ (PeerConnState::Ourself | PeerConnState::Connected { .. }) => x, }; true } else { @@ -104,8 +99,6 @@ impl PeerInfoInternal { pub struct PeerInfo { /// The node's identifier (its public key) pub id: NodeID, - /// The node's network address - pub addr: SocketAddr, /// The current status of our connection to this node pub state: PeerConnState, /// The last time at which the node was seen @@ -136,7 +129,7 @@ pub enum PeerConnState { Ourself, /// We currently have a connection to this peer - Connected, + Connected { addr: SocketAddr }, /// Our next connection tentative (the nth, where n is the first value of the tuple) /// will be at given Instant @@ -152,7 +145,7 @@ pub enum PeerConnState { impl PeerConnState { /// Returns true if we can currently send requests to this peer pub fn is_up(&self) -> bool { - matches!(self, Self::Ourself | Self::Connected) + matches!(self, Self::Ourself | Self::Connected { .. }) } } @@ -192,11 +185,11 @@ impl KnownHosts { } self.hash = hash_state.finalize(); } - fn connected_peers_vec(&self) -> Vec<(NodeID, SocketAddr)> { + fn connected_peers_vec(&self) -> Vec<(NodeID, Vec)> { self.list .iter() .filter(|(_, peer)| peer.state.is_up()) - .map(|(id, peer)| (*id, peer.addr)) + .map(|(id, peer)| (*id, peer.known_addrs.clone())) .collect::>() } } @@ -231,18 +224,16 @@ impl PeeringManager { if id != netapp.id { known_hosts.list.insert( id, - PeerInfoInternal::new(addr, PeerConnState::Waiting(0, Instant::now())), + PeerInfoInternal::new(PeerConnState::Waiting(0, Instant::now()), Some(addr)), ); } } - if let Some(addr) = our_addr { - known_hosts.list.insert( - netapp.id, - PeerInfoInternal::new(addr, PeerConnState::Ourself), - ); - known_hosts.update_hash(); - } + known_hosts.list.insert( + netapp.id, + PeerInfoInternal::new(PeerConnState::Ourself, our_addr), + ); + known_hosts.update_hash(); // TODO for v0.10 / v1.0 : rename the endpoint (it will break compatibility) let strat = Arc::new(Self { @@ -287,7 +278,7 @@ impl PeeringManager { for (id, info) in known_hosts.list.iter() { trace!("{}, {:?}", hex::encode(&id[..8]), info); match info.state { - PeerConnState::Connected => { + PeerConnState::Connected { .. } => { let must_ping = match info.last_send_ping { None => true, Some(t) => Instant::now() - t > PING_INTERVAL, @@ -330,7 +321,7 @@ impl PeeringManager { info!( "Retrying connection to {} at {} ({})", hex::encode(&id[..8]), - h.all_addrs + h.known_addrs .iter() .map(|x| format!("{}", x)) .collect::>() @@ -339,13 +330,8 @@ impl PeeringManager { ); h.state = PeerConnState::Trying(i); - let alternate_addrs = h - .all_addrs - .iter() - .filter(|x| **x != h.addr) - .cloned() - .collect::>(); - tokio::spawn(self.clone().try_connect(id, h.addr, alternate_addrs)); + let addresses = h.known_addrs.clone(); + tokio::spawn(self.clone().try_connect(id, addresses)); } } } @@ -373,27 +359,24 @@ impl PeeringManager { fn update_public_peer_list(&self, known_hosts: &KnownHosts) { let mut pub_peer_list = Vec::with_capacity(known_hosts.list.len()); for (id, info) in known_hosts.list.iter() { + if *id == self.netapp.id { + // sanity check + assert!(matches!(info.state, PeerConnState::Ourself)); + } let mut pings = info.ping.iter().cloned().collect::>(); pings.sort(); if !pings.is_empty() { pub_peer_list.push(PeerInfo { id: *id, - addr: info.addr, state: info.state, last_seen: info.last_seen, - avg_ping: Some( - pings - .iter() - .fold(Duration::from_secs(0), |x, y| x + *y) - .div_f64(pings.len() as f64), - ), + avg_ping: Some(pings.iter().sum::().div_f64(pings.len() as f64)), max_ping: pings.last().cloned(), med_ping: Some(pings[pings.len() / 2]), }); } else { pub_peer_list.push(PeerInfo { id: *id, - addr: info.addr, state: info.state, last_seen: info.last_seen, avg_ping: None, @@ -485,18 +468,20 @@ impl PeeringManager { } } - fn handle_peer_list(&self, list: &[(NodeID, SocketAddr)]) { + fn handle_peer_list(&self, list: &[(NodeID, Vec)]) { let mut known_hosts = self.known_hosts.write().unwrap(); let mut changed = false; - for (id, addr) in list.iter() { - if let Some(kh) = known_hosts.list.get_mut(id) { - if kh.add_addr(*addr) { + for (id, addrs) in list.iter() { + for addr in addrs.iter() { + if let Some(kh) = known_hosts.list.get_mut(id) { + if kh.add_addr(*addr) { + changed = true; + } + } else { + known_hosts.list.insert(*id, self.new_peer(id, *addr)); changed = true; } - } else { - known_hosts.list.insert(*id, self.new_peer(id, *addr)); - changed = true; } } @@ -506,15 +491,10 @@ impl PeeringManager { } } - async fn try_connect( - self: Arc, - id: NodeID, - default_addr: SocketAddr, - alternate_addrs: Vec, - ) { + async fn try_connect(self: Arc, id: NodeID, addresses: Vec) { let conn_addr = { let mut ret = None; - for addr in [default_addr].iter().chain(alternate_addrs.iter()) { + for addr in addresses.iter() { debug!("Trying address {} for peer {}", addr, hex::encode(&id[..8])); match self.netapp.clone().try_connect(*addr, id).await { Ok(()) => { @@ -540,7 +520,7 @@ impl PeeringManager { warn!( "Could not connect to peer {} ({} addresses tried)", hex::encode(&id[..8]), - 1 + alternate_addrs.len() + addresses.len() ); let mut known_hosts = self.known_hosts.write().unwrap(); if let Some(host) = known_hosts.list.get_mut(&id) { @@ -560,6 +540,14 @@ impl PeeringManager { } fn on_connected(self: &Arc, id: NodeID, addr: SocketAddr, is_incoming: bool) { + if id == self.netapp.id { + // sanity check + panic!( + "on_connected from local node, id={:?}, addr={}, incoming={}", + id, addr, is_incoming + ); + } + let mut known_hosts = self.known_hosts.write().unwrap(); if is_incoming { if let Some(host) = known_hosts.list.get_mut(&id) { @@ -574,13 +562,13 @@ impl PeeringManager { addr ); if let Some(host) = known_hosts.list.get_mut(&id) { - host.state = PeerConnState::Connected; - host.addr = addr; + host.state = PeerConnState::Connected { addr }; host.add_addr(addr); } else { - known_hosts - .list - .insert(id, PeerInfoInternal::new(addr, PeerConnState::Connected)); + known_hosts.list.insert( + id, + PeerInfoInternal::new(PeerConnState::Connected { addr }, Some(addr)), + ); } } known_hosts.update_hash(); @@ -600,12 +588,8 @@ impl PeeringManager { } fn new_peer(&self, id: &NodeID, addr: SocketAddr) -> PeerInfoInternal { - let state = if *id == self.netapp.id { - PeerConnState::Ourself - } else { - PeerConnState::Waiting(0, Instant::now()) - }; - PeerInfoInternal::new(addr, state) + assert!(*id != self.netapp.id); + PeerInfoInternal::new(PeerConnState::Waiting(0, Instant::now()), Some(addr)) } } diff --git a/src/rpc/system.rs b/src/rpc/system.rs index 54d589d2..9da1b176 100644 --- a/src/rpc/system.rs +++ b/src/rpc/system.rs @@ -16,7 +16,7 @@ use tokio::sync::{watch, Notify}; use garage_net::endpoint::{Endpoint, EndpointHandler}; use garage_net::message::*; -use garage_net::peering::PeeringManager; +use garage_net::peering::{PeerConnState, PeeringManager}; use garage_net::util::parse_and_resolve_peer_addr_async; use garage_net::{NetApp, NetworkKey, NodeID, NodeKey}; @@ -142,7 +142,7 @@ pub struct NodeStatus { #[derive(Debug, Clone, Serialize, Deserialize)] pub struct KnownNodeInfo { pub id: Uuid, - pub addr: SocketAddr, + pub addr: Option, pub is_up: bool, pub last_seen_secs_ago: Option, pub status: NodeStatus, @@ -381,7 +381,11 @@ impl System { .iter() .map(|n| KnownNodeInfo { id: n.id.into(), - addr: n.addr, + addr: match n.state { + PeerConnState::Ourself => self.rpc_public_addr, + PeerConnState::Connected { addr } => Some(addr), + _ => None, + }, is_up: n.is_up(), last_seen_secs_ago: n .last_seen @@ -722,7 +726,10 @@ impl System { .peering .get_peer_list() .iter() - .map(|n| (n.id.into(), n.addr)) + .filter_map(|n| match n.state { + PeerConnState::Connected { addr } => Some((n.id.into(), addr)), + _ => None, + }) .collect::>(); // Before doing it, we read the current peer list file (if it exists) From 3844110cd03210a1600d57db1aab53e41cf4815f Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 21 Mar 2024 10:50:44 +0100 Subject: [PATCH 3/3] [net-fixes] netapp peer exchange: send only currently connected address --- src/net/peering.rs | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/src/net/peering.rs b/src/net/peering.rs index 0b4fec9e..b4271231 100644 --- a/src/net/peering.rs +++ b/src/net/peering.rs @@ -43,7 +43,7 @@ impl Message for PingMessage { #[derive(Serialize, Deserialize)] struct PeerListMessage { - pub list: Vec<(NodeID, Vec)>, + pub list: Vec<(NodeID, SocketAddr)>, } impl Message for PeerListMessage { @@ -185,11 +185,13 @@ impl KnownHosts { } self.hash = hash_state.finalize(); } - fn connected_peers_vec(&self) -> Vec<(NodeID, Vec)> { + fn connected_peers_vec(&self) -> Vec<(NodeID, SocketAddr)> { self.list .iter() - .filter(|(_, peer)| peer.state.is_up()) - .map(|(id, peer)| (*id, peer.known_addrs.clone())) + .filter_map(|(id, peer)| match peer.state { + PeerConnState::Connected { addr } => Some((*id, addr)), + _ => None, + }) .collect::>() } } @@ -468,20 +470,18 @@ impl PeeringManager { } } - fn handle_peer_list(&self, list: &[(NodeID, Vec)]) { + fn handle_peer_list(&self, list: &[(NodeID, SocketAddr)]) { let mut known_hosts = self.known_hosts.write().unwrap(); let mut changed = false; - for (id, addrs) in list.iter() { - for addr in addrs.iter() { - if let Some(kh) = known_hosts.list.get_mut(id) { - if kh.add_addr(*addr) { - changed = true; - } - } else { - known_hosts.list.insert(*id, self.new_peer(id, *addr)); + for (id, addr) in list.iter() { + if let Some(kh) = known_hosts.list.get_mut(id) { + if kh.add_addr(*addr) { changed = true; } + } else { + known_hosts.list.insert(*id, self.new_peer(id, *addr)); + changed = true; } }