From 9a578b3c0425c2cef4c43136d7caec0670c9af25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arma=C3=ABl=20Gu=C3=A9neau?= Date: Sun, 24 Nov 2024 15:41:48 +0100 Subject: [PATCH] add tag "tricot-block-user-agent" to block clients with a matching user agent --- README.md | 1 + src/https.rs | 94 +++++++++++++++++++++++++++++++++++---------- src/proxy_config.rs | 14 ++++++- 3 files changed, 88 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index 4238594..21e32a0 100644 --- a/README.md +++ b/README.md @@ -43,6 +43,7 @@ Backends are configured by adding tags of the following form to the services in - `tricot-https myapp.example.com`: same, but indicates that the backend service handling the request expects an HTTPS request and not an HTTP request. In this case, Tricot will do everything in its power to NOT verify the backend's TLS certificate (ignore self-signed certificate, ignore TLS hostname, etc). - `tricot-add-header Access-Control-Allow-Origin *`: add the `Access-Control-Allow-Origin: *` header to all of the HTTP responses when they are proxied back to the client - `tricot-add-redirect old.example.com/maybe_subpath new.example.com/new/subpath 301`: redirects paths that match the first pattern to the second pattern with the given HTTP status code. More info in [PR#10](https://git.deuxfleurs.fr/Deuxfleurs/tricot/pulls/10). +- `tricot-block-user-agent AnnoyingRobot`: block requests from clients with a user agent containing `AnnoyingRobot` (they will get a 403 response) - `tricot-global-lb`: load-balance incoming requests to all matching backends - `tricot-site-lb`: load-balance incoming requests to all matching backends that are in the same site (geographical location); when site information about nodes is not available, this is equivalent to `tricot-global-lb` diff --git a/src/https.rs b/src/https.rs index 9d92470..676404f 100644 --- a/src/https.rs +++ b/src/https.rs @@ -24,7 +24,7 @@ use tokio_util::io::{ReaderStream, StreamReader}; use opentelemetry::{metrics, KeyValue}; use crate::cert_store::{CertStore, StoreResolver}; -use crate::proxy_config::{HostDescription, ProxyConfig, ProxyEntry}; +use crate::proxy_config::{HostDescription, ProxyConfig, ProxyEntry, UrlPrefix}; use crate::reverse_proxy; const MAX_CONNECTION_LIFETIME: Duration = Duration::from_secs(24 * 3600); @@ -257,6 +257,11 @@ async fn select_target_and_proxy( ) }); + let user_agent = + req.headers().get("User-Agent") + .and_then(|v| v.to_str().ok()) + .map(|s| s.to_string()); + if let Some(proxy_to) = best_match { tags.push(KeyValue::new("service", proxy_to.service_name.clone())); tags.push(KeyValue::new( @@ -265,6 +270,9 @@ async fn select_target_and_proxy( )); tags.push(KeyValue::new("same_node", proxy_to.flags.same_node)); tags.push(KeyValue::new("same_site", proxy_to.flags.same_site)); + if let Some(user_agent) = &user_agent { + tags.push(KeyValue::new("user_agent", user_agent.clone())); + } proxy_to.last_call.fetch_max( (received_time - https_config.time_origin).as_millis() as i64, @@ -276,17 +284,23 @@ async fn select_target_and_proxy( debug!("{}{} -> {}", host, path, proxy_to); trace!("Request: {:?}", req); - let response = if let Some(http_res) = try_redirect(host, path, proxy_to) { - // redirection middleware - http_res - } else { - // proxying to backend - match do_proxy(https_config, remote_addr, req, proxy_to).await { + let response = { + let res = match request_proxy_action(host, path, user_agent.as_deref(), proxy_to) { + ProxyAction::Redirect(src_prefix, dst_prefix, code) => + // redirection + do_redirect(host, path, src_prefix, dst_prefix, code), + ProxyAction::Block => + do_block(), + ProxyAction::Proxy => + // proxying to backend + do_proxy(https_config, remote_addr, req, proxy_to).await, + }; + match res { Ok(resp) => resp, Err(e) => Response::builder() .status(StatusCode::BAD_GATEWAY) .body(Body::from(format!("Proxy error: {}", e))) - .unwrap(), + .unwrap() } }; @@ -309,7 +323,18 @@ async fn select_target_and_proxy( } } -fn try_redirect(req_host: &str, req_path: &str, proxy_to: &ProxyEntry) -> Option> { +enum ProxyAction<'a> { + Redirect(&'a UrlPrefix, &'a UrlPrefix, u16), + Block, + Proxy, +} + +fn request_proxy_action<'a>( + req_host: &str, + req_path: &str, + req_user_agent: Option<&str>, + proxy_to: &'a ProxyEntry +) -> ProxyAction<'a> { let maybe_redirect = proxy_to.redirects.iter().find(|(src, _, _)| { let mut matched: bool = src.host.matches(req_host); @@ -320,11 +345,33 @@ fn try_redirect(req_host: &str, req_path: &str, proxy_to: &ProxyEntry) -> Option matched }); - let (src_prefix, dst_prefix, code) = match maybe_redirect { - None => return None, - Some(redirect) => redirect, - }; + if let Some((src_prefix, dst_prefix, code)) = maybe_redirect { + return ProxyAction::Redirect(src_prefix, dst_prefix, *code) + } + let is_block = + if let Some(user_agent) = req_user_agent { + proxy_to.block_user_agents.iter().any(|blocked| { + user_agent.contains(blocked) + }) + } else { + false + }; + + if is_block { + return ProxyAction::Block + } + + return ProxyAction::Proxy +} + +fn do_redirect( + req_host: &str, + req_path: &str, + src_prefix: &UrlPrefix, + dst_prefix: &UrlPrefix, + code: u16, +) -> Result> { let new_host = match &dst_prefix.host { HostDescription::Hostname(h) => h, _ => unreachable!(), // checked when ProxyEntry is created @@ -336,22 +383,29 @@ fn try_redirect(req_host: &str, req_path: &str, proxy_to: &ProxyEntry) -> Option let uri = format!("https://{}{}{}", new_host, new_prefix, suffix); - let status = match StatusCode::from_u16(*code) { + let status = match StatusCode::from_u16(code) { Err(e) => { warn!( "Couldn't redirect {}{} to {} as code {} in invalid: {}", req_host, req_path, uri, code, e ); - return None; + return Err(e)? } Ok(sc) => sc, }; - Response::builder() - .header("Location", uri.clone()) - .status(status) - .body(Body::from(uri)) - .ok() + Ok(Response::builder() + .header("Location", uri.clone()) + .status(status) + .body(Body::from(uri)) + .unwrap()) +} + +fn do_block() -> Result> { + Ok(Response::builder() + .status(StatusCode::FORBIDDEN) + .body(Body::empty()) + .unwrap()) } async fn do_proxy( diff --git a/src/proxy_config.rs b/src/proxy_config.rs index 7690f8a..338e400 100644 --- a/src/proxy_config.rs +++ b/src/proxy_config.rs @@ -108,10 +108,15 @@ pub struct ProxyEntry { /// when matching this rule pub redirects: Vec<(UrlPrefix, UrlPrefix, u16)>, - /// Wether or not the domain must be validated before asking a certificate + /// Whether or not the domain must be validated before asking a certificate /// to let's encrypt (only for Glob patterns) pub on_demand_tls_ask: Option, + /// User-agents to block. + /// A client request is blocked if its user-agent contains any of the + /// strings. + pub block_user_agents: Vec, + /// Number of calls in progress, used to deprioritize slow back-ends pub calls_in_progress: atomic::AtomicI64, /// Time of last call, used for round-robin selection @@ -147,12 +152,14 @@ impl ProxyEntry { let mut add_headers = vec![]; let mut redirects = vec![]; let mut on_demand_tls_ask: Option = None; + let mut block_user_agents = vec![]; for mid in middleware.into_iter() { // LocalLb and GlobalLb are handled in the parent function match mid { ConfigTag::AddHeader(k, v) => add_headers.push((k.to_string(), v.clone())), ConfigTag::AddRedirect(m, r, c) => redirects.push(((*m).clone(), (*r).clone(), *c)), ConfigTag::OnDemandTlsAsk(url) => on_demand_tls_ask = Some(url.to_string()), + ConfigTag::BlockUserAgent(s) => block_user_agents.push(s.clone()), ConfigTag::LocalLb | ConfigTag::GlobalLb => (), }; } @@ -171,6 +178,7 @@ impl ProxyEntry { add_headers, redirects, on_demand_tls_ask, + block_user_agents, // internal last_call: atomic::AtomicI64::from(0), calls_in_progress: atomic::AtomicI64::from(0), @@ -253,6 +261,7 @@ enum ConfigTag<'a> { AddHeader(&'a str, String), AddRedirect(UrlPrefix, UrlPrefix, u16), OnDemandTlsAsk(&'a str), + BlockUserAgent(String), GlobalLb, LocalLb, } @@ -330,6 +339,9 @@ fn parse_tricot_tags(tag: &str) -> Option { ["tricot-on-demand-tls-ask", url, ..] => { Some(ParsedTag::Middleware(ConfigTag::OnDemandTlsAsk(url))) } + ["tricot-block-user-agent", elts @ ..] => { + Some(ParsedTag::Middleware(ConfigTag::BlockUserAgent(elts.join(" ")))) + } ["tricot-global-lb", ..] => Some(ParsedTag::Middleware(ConfigTag::GlobalLb)), ["tricot-local-lb", ..] => Some(ParsedTag::Middleware(ConfigTag::LocalLb)), _ => None,