forked from Deuxfleurs/tricot
Take into account unhealthy nodes
This commit is contained in:
parent
b5e8d1fcd8
commit
a3602eac82
3 changed files with 85 additions and 158 deletions
|
@ -233,7 +233,8 @@ async fn select_target_and_proxy(
|
||||||
.entries
|
.entries
|
||||||
.iter()
|
.iter()
|
||||||
.filter(|ent| {
|
.filter(|ent| {
|
||||||
ent.host.matches(host)
|
ent.flags.healthy
|
||||||
|
&& ent.host.matches(host)
|
||||||
&& ent
|
&& ent
|
||||||
.path_prefix
|
.path_prefix
|
||||||
.as_ref()
|
.as_ref()
|
||||||
|
|
|
@ -245,7 +245,13 @@ async fn dump_config_on_change(
|
||||||
for ((host, prefix), ents) in cfg_map.iter_mut() {
|
for ((host, prefix), ents) in cfg_map.iter_mut() {
|
||||||
println!("{}{}:", host, prefix.as_deref().unwrap_or_default());
|
println!("{}{}:", host, prefix.as_deref().unwrap_or_default());
|
||||||
for ent in ents.iter() {
|
for ent in ents.iter() {
|
||||||
println!(" {}", ent);
|
print!(" ");
|
||||||
|
if !ent.flags.healthy {
|
||||||
|
print!("/!\\ ");
|
||||||
|
} else {
|
||||||
|
print!(" ");
|
||||||
|
}
|
||||||
|
println!("{}", ent);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
println!();
|
println!();
|
||||||
|
|
|
@ -1,16 +1,13 @@
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::net::SocketAddr;
|
use std::net::SocketAddr;
|
||||||
use std::sync::{atomic, Arc};
|
use std::sync::{atomic, Arc};
|
||||||
use std::{cmp, time::Duration};
|
use std::time::Duration;
|
||||||
|
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use opentelemetry::{metrics, KeyValue};
|
use opentelemetry::{metrics, KeyValue};
|
||||||
|
|
||||||
use futures::future::BoxFuture;
|
|
||||||
use futures::stream::{FuturesUnordered, StreamExt};
|
|
||||||
|
|
||||||
use log::*;
|
use log::*;
|
||||||
use tokio::{select, sync::watch, time::sleep};
|
use tokio::{select, sync::watch};
|
||||||
|
|
||||||
use crate::consul;
|
use crate::consul;
|
||||||
|
|
||||||
|
@ -93,6 +90,9 @@ impl Eq for ProxyEntry {}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, Eq, PartialEq)]
|
#[derive(Debug, Clone, Copy, Eq, PartialEq)]
|
||||||
pub struct ProxyEntryFlags {
|
pub struct ProxyEntryFlags {
|
||||||
|
/// Is the target healthy?
|
||||||
|
pub healthy: bool,
|
||||||
|
|
||||||
/// Is the target the same node as we are running on?
|
/// Is the target the same node as we are running on?
|
||||||
/// (if yes priorize it over other matching targets)
|
/// (if yes priorize it over other matching targets)
|
||||||
pub same_node: bool,
|
pub same_node: bool,
|
||||||
|
@ -119,6 +119,9 @@ impl std::fmt::Display for ProxyEntry {
|
||||||
self.path_prefix.as_deref().unwrap_or_default(),
|
self.path_prefix.as_deref().unwrap_or_default(),
|
||||||
self.priority
|
self.priority
|
||||||
)?;
|
)?;
|
||||||
|
if !self.flags.healthy {
|
||||||
|
write!(f, " UNHEALTHY")?;
|
||||||
|
}
|
||||||
if self.flags.same_node {
|
if self.flags.same_node {
|
||||||
write!(f, " OURSELF")?;
|
write!(f, " OURSELF")?;
|
||||||
} else if self.flags.same_site {
|
} else if self.flags.same_site {
|
||||||
|
@ -141,16 +144,6 @@ pub struct ProxyConfig {
|
||||||
pub entries: Vec<ProxyEntry>,
|
pub entries: Vec<ProxyEntry>,
|
||||||
}
|
}
|
||||||
|
|
||||||
fn retry_to_time(retries: u32, max_time: Duration) -> Duration {
|
|
||||||
// 1.2^x seems to be a good value to exponentially increase time at a good pace
|
|
||||||
// eg. 1.2^32 = 341 seconds ~= 5 minutes - ie. after 32 retries we wait 5
|
|
||||||
// minutes
|
|
||||||
Duration::from_secs(cmp::min(
|
|
||||||
max_time.as_secs(),
|
|
||||||
1.2f64.powf(retries as f64) as u64,
|
|
||||||
))
|
|
||||||
}
|
|
||||||
|
|
||||||
fn parse_tricot_tag(
|
fn parse_tricot_tag(
|
||||||
service_name: String,
|
service_name: String,
|
||||||
tag: &str,
|
tag: &str,
|
||||||
|
@ -209,63 +202,55 @@ fn parse_tricot_add_header_tag(tag: &str) -> Option<(String, String)> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_consul_catalog(
|
fn parse_consul_service(
|
||||||
catalog: &consul::catalog::CatalogNode,
|
s: &consul::catalog::HealthServiceNode,
|
||||||
same_node: bool,
|
mut flags: ProxyEntryFlags,
|
||||||
same_site: bool,
|
|
||||||
) -> Vec<ProxyEntry> {
|
) -> Vec<ProxyEntry> {
|
||||||
trace!("Parsing node catalog: {:#?}", catalog);
|
trace!("Parsing service: {:#?}", s);
|
||||||
|
|
||||||
let mut entries = vec![];
|
let mut entries = vec![];
|
||||||
|
|
||||||
for (_, svc) in catalog.services.iter() {
|
let ip_addr = match s.service.address.parse() {
|
||||||
let ip_addr = match svc.address.parse() {
|
|
||||||
Ok(ip) => ip,
|
Ok(ip) => ip,
|
||||||
_ => match catalog.node.address.parse() {
|
_ => match s.node.address.parse() {
|
||||||
Ok(ip) => ip,
|
Ok(ip) => ip,
|
||||||
_ => {
|
_ => {
|
||||||
warn!(
|
warn!(
|
||||||
"Could not get address for service {} at node {}",
|
"Could not get address for service {} at node {}",
|
||||||
svc.service, catalog.node.node
|
s.service.service, s.node.node
|
||||||
);
|
);
|
||||||
continue;
|
return vec![];
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
let addr = SocketAddr::new(ip_addr, svc.port);
|
let addr = SocketAddr::new(ip_addr, s.service.port);
|
||||||
|
|
||||||
let (site_lb, global_lb) = if svc.tags.contains(&"tricot-global-lb".into()) {
|
if s.service.tags.contains(&"tricot-global-lb".into()) {
|
||||||
(false, true)
|
flags.global_lb = true;
|
||||||
} else if svc.tags.contains(&"tricot-site-lb".into()) {
|
} else if s.service.tags.contains(&"tricot-site-lb".into()) {
|
||||||
(true, false)
|
flags.site_lb = true;
|
||||||
} else {
|
|
||||||
(false, false)
|
|
||||||
};
|
|
||||||
|
|
||||||
let flags = ProxyEntryFlags {
|
|
||||||
same_node,
|
|
||||||
same_site,
|
|
||||||
site_lb,
|
|
||||||
global_lb,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut add_headers = vec![];
|
let mut add_headers = vec![];
|
||||||
for tag in svc.tags.iter() {
|
for tag in s.service.tags.iter() {
|
||||||
if let Some(pair) = parse_tricot_add_header_tag(tag) {
|
if let Some(pair) = parse_tricot_add_header_tag(tag) {
|
||||||
add_headers.push(pair);
|
add_headers.push(pair);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for tag in svc.tags.iter() {
|
for tag in s.service.tags.iter() {
|
||||||
if let Some(ent) =
|
if let Some(ent) = parse_tricot_tag(
|
||||||
parse_tricot_tag(svc.service.clone(), tag, addr, &add_headers[..], flags)
|
s.service.service.clone(),
|
||||||
{
|
tag,
|
||||||
|
addr,
|
||||||
|
&add_headers[..],
|
||||||
|
flags,
|
||||||
|
) {
|
||||||
entries.push(ent);
|
entries.push(ent);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
trace!("Result of parsing catalog:");
|
trace!("Result of parsing service:");
|
||||||
for ent in entries.iter() {
|
for ent in entries.iter() {
|
||||||
trace!(" {}", ent);
|
trace!(" {}", ent);
|
||||||
}
|
}
|
||||||
|
@ -273,13 +258,6 @@ fn parse_consul_catalog(
|
||||||
entries
|
entries
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Default)]
|
|
||||||
struct NodeWatchState {
|
|
||||||
last_idx: Option<usize>,
|
|
||||||
last_catalog: Option<consul::catalog::CatalogNode>,
|
|
||||||
retries: u32,
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn spawn_proxy_config_task(
|
pub fn spawn_proxy_config_task(
|
||||||
consul: consul::Consul,
|
consul: consul::Consul,
|
||||||
local_node: String,
|
local_node: String,
|
||||||
|
@ -293,108 +271,50 @@ pub fn spawn_proxy_config_task(
|
||||||
let consul = Arc::new(consul);
|
let consul = Arc::new(consul);
|
||||||
|
|
||||||
tokio::spawn(async move {
|
tokio::spawn(async move {
|
||||||
let mut nodes = HashMap::new();
|
let mut catalog_rx = consul.watch_all_service_health(Duration::from_secs(300));
|
||||||
let mut watches = FuturesUnordered::<BoxFuture<'static, (String, Result<_>)>>::new();
|
let mut local_node_site = None;
|
||||||
|
|
||||||
let mut node_site = HashMap::new();
|
|
||||||
|
|
||||||
while !*must_exit.borrow() {
|
while !*must_exit.borrow() {
|
||||||
let list_nodes = select! {
|
select! {
|
||||||
ln = consul.catalog_node_list(None) => ln,
|
_ = catalog_rx.changed() => (),
|
||||||
_ = must_exit.changed() => continue,
|
_ = must_exit.changed() => continue,
|
||||||
};
|
};
|
||||||
|
|
||||||
match list_nodes {
|
let services = catalog_rx.borrow_and_update().clone();
|
||||||
Ok(consul_nodes) => {
|
if local_node_site.is_none() {
|
||||||
info!("Watched consul nodes: {:?}", consul_nodes);
|
for (_, svcnodes) in services.iter() {
|
||||||
for consul_node in consul_nodes.into_inner() {
|
for svcnode in svcnodes.iter() {
|
||||||
let node = &consul_node.node;
|
if svcnode.node.node == local_node {
|
||||||
if !nodes.contains_key(node) {
|
if let Some(site) = svcnode.node.meta.get("site") {
|
||||||
nodes.insert(node.clone(), NodeWatchState::default());
|
local_node_site = Some(site.to_string());
|
||||||
|
|
||||||
let node = node.to_string();
|
|
||||||
let consul = consul.clone();
|
|
||||||
|
|
||||||
watches.push(Box::pin(async move {
|
|
||||||
let res = consul.catalog_node(&node, None).await;
|
|
||||||
(node, res)
|
|
||||||
}));
|
|
||||||
}
|
|
||||||
if let Some(site) = consul_node.meta.get("site") {
|
|
||||||
node_site.insert(node.clone(), site.clone());
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Err(e) => {
|
|
||||||
warn!("Could not get Consul node list: {}", e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let next_watch = select! {
|
|
||||||
nw = watches.next() => nw,
|
|
||||||
_ = must_exit.changed() => continue,
|
|
||||||
};
|
|
||||||
|
|
||||||
let (node, res): (String, Result<_>) = match next_watch {
|
|
||||||
Some(v) => v,
|
|
||||||
None => {
|
|
||||||
warn!("No nodes currently watched in proxy_config.rs");
|
|
||||||
sleep(Duration::from_secs(10)).await;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
match res {
|
|
||||||
Ok(res) => {
|
|
||||||
let new_idx = res.index();
|
|
||||||
let catalog = res.into_inner();
|
|
||||||
|
|
||||||
let mut watch_state = nodes.get_mut(&node).unwrap();
|
|
||||||
watch_state.last_idx = Some(new_idx);
|
|
||||||
watch_state.last_catalog = catalog;
|
|
||||||
watch_state.retries = 0;
|
|
||||||
|
|
||||||
let idx = watch_state.last_idx;
|
|
||||||
let consul = consul.clone();
|
|
||||||
watches.push(Box::pin(async move {
|
|
||||||
let res = consul.catalog_node(&node, idx).await;
|
|
||||||
(node, res)
|
|
||||||
}));
|
|
||||||
}
|
|
||||||
Err(e) => {
|
|
||||||
let mut watch_state = nodes.get_mut(&node).unwrap();
|
|
||||||
watch_state.retries += 1;
|
|
||||||
watch_state.last_idx = None;
|
|
||||||
|
|
||||||
let will_retry_in =
|
|
||||||
retry_to_time(watch_state.retries, Duration::from_secs(600));
|
|
||||||
error!(
|
|
||||||
"Failed to query consul for node {}. Will retry in {}s. {}",
|
|
||||||
node,
|
|
||||||
will_retry_in.as_secs(),
|
|
||||||
e
|
|
||||||
);
|
|
||||||
|
|
||||||
let consul = consul.clone();
|
|
||||||
watches.push(Box::pin(async move {
|
|
||||||
sleep(will_retry_in).await;
|
|
||||||
let res = consul.catalog_node(&node, None).await;
|
|
||||||
(node, res)
|
|
||||||
}));
|
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut entries = vec![];
|
let mut entries = vec![];
|
||||||
for (node_name, watch_state) in nodes.iter() {
|
|
||||||
if let Some(catalog) = &watch_state.last_catalog {
|
for (_service, svcnodes) in services.iter() {
|
||||||
let same_node = *node_name == local_node;
|
for svcnode in svcnodes.iter() {
|
||||||
let same_site = match (node_site.get(node_name), node_site.get(&local_node)) {
|
let healthy = !svcnode.checks.iter().any(|x| x.status == "critical");
|
||||||
|
|
||||||
|
let same_node = svcnode.node.node == local_node;
|
||||||
|
let same_site = match (svcnode.node.meta.get("site"), local_node_site.as_ref())
|
||||||
|
{
|
||||||
(Some(s1), Some(s2)) => s1 == s2,
|
(Some(s1), Some(s2)) => s1 == s2,
|
||||||
_ => false,
|
_ => false,
|
||||||
};
|
};
|
||||||
|
|
||||||
entries.extend(parse_consul_catalog(catalog, same_node, same_site));
|
let flags = ProxyEntryFlags {
|
||||||
|
healthy,
|
||||||
|
same_node,
|
||||||
|
same_site,
|
||||||
|
site_lb: false,
|
||||||
|
global_lb: false,
|
||||||
|
};
|
||||||
|
|
||||||
|
entries.extend(parse_consul_service(&svcnode, flags));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue