Add logic to handle ping timeouts and other failures
continuous-integration/drone/push Build is passing Details

This commit is contained in:
Alex 2022-03-15 17:01:51 +01:00
parent fa7cdf3747
commit 22eaa0f404
Signed by: lx
GPG Key ID: 0E496D15096376BE
1 changed files with 32 additions and 2 deletions

View File

@ -9,11 +9,13 @@ use async_trait::async_trait;
use log::{debug, info, trace, warn};
use serde::{Deserialize, Serialize};
use tokio::select;
use tokio::sync::watch;
use sodiumoxide::crypto::hash;
use crate::endpoint::*;
use crate::error::*;
use crate::netapp::*;
use crate::proto::*;
use crate::NodeID;
@ -22,6 +24,8 @@ const CONN_RETRY_INTERVAL: Duration = Duration::from_secs(30);
const CONN_MAX_RETRIES: usize = 10;
const PING_INTERVAL: Duration = Duration::from_secs(10);
const LOOP_DELAY: Duration = Duration::from_secs(1);
const PING_TIMEOUT: Duration = Duration::from_secs(5);
const FAILED_PING_THRESHOLD: usize = 3;
// -- Protocol messages --
@ -52,6 +56,7 @@ struct PeerInfoInternal {
state: PeerConnState,
last_seen: Option<Instant>,
ping: VecDeque<Duration>,
failed_pings: usize,
}
#[derive(Copy, Clone, Debug)]
@ -178,6 +183,7 @@ impl FullMeshPeeringStrategy {
state: PeerConnState::Waiting(0, Instant::now()),
last_seen: None,
ping: VecDeque::new(),
failed_pings: 0,
},
);
}
@ -191,6 +197,7 @@ impl FullMeshPeeringStrategy {
state: PeerConnState::Ourself,
last_seen: None,
ping: VecDeque::new(),
failed_pings: 0,
},
);
}
@ -347,8 +354,28 @@ impl FullMeshPeeringStrategy {
hex::encode(&id[..8]),
ping_time
);
match self.ping_endpoint.call(&id, &ping_msg, PRIO_HIGH).await {
Err(e) => warn!("Error pinging {}: {}", hex::encode(&id[..8]), e),
let ping_response = select! {
r = self.ping_endpoint.call(&id, &ping_msg, PRIO_HIGH) => r,
_ = tokio::time::sleep(PING_TIMEOUT) => Err(Error::Message("Ping timeout".into())),
};
match ping_response {
Err(e) => {
warn!("Error pinging {}: {}", hex::encode(&id[..8]), e);
let mut known_hosts = self.known_hosts.write().unwrap();
if let Some(host) = known_hosts.list.get_mut(&id) {
host.failed_pings += 1;
if host.failed_pings > FAILED_PING_THRESHOLD {
warn!(
"Too many failed pings from {}, closing connection.",
hex::encode(&id[..8])
);
// this will later update info in known_hosts
// through the disconnection handler
self.netapp.disconnect(&id);
}
}
}
Ok(ping_resp) => {
let resp_time = Instant::now();
debug!(
@ -359,6 +386,7 @@ impl FullMeshPeeringStrategy {
{
let mut known_hosts = self.known_hosts.write().unwrap();
if let Some(host) = known_hosts.list.get_mut(&id) {
host.failed_pings = 0;
host.last_seen = Some(resp_time);
host.ping.push_back(resp_time - ping_time);
while host.ping.len() > 10 {
@ -455,6 +483,7 @@ impl FullMeshPeeringStrategy {
addr,
last_seen: None,
ping: VecDeque::new(),
failed_pings: 0,
},
);
}
@ -486,6 +515,7 @@ impl FullMeshPeeringStrategy {
state,
last_seen: None,
ping: VecDeque::new(),
failed_pings: 0,
}
}
}