add tag "tricot-block-user-agent" to block clients with a matching user agent #16

Open
Armael wants to merge 1 commit from Armael/tricot:block_user_agents into main
3 changed files with 88 additions and 21 deletions
Showing only changes of commit 9a578b3c04 - Show all commits

View file

@ -43,6 +43,7 @@ Backends are configured by adding tags of the following form to the services in
- `tricot-https myapp.example.com`: same, but indicates that the backend service handling the request expects an HTTPS request and not an HTTP request. In this case, Tricot will do everything in its power to NOT verify the backend's TLS certificate (ignore self-signed certificate, ignore TLS hostname, etc).
- `tricot-add-header Access-Control-Allow-Origin *`: add the `Access-Control-Allow-Origin: *` header to all of the HTTP responses when they are proxied back to the client
- `tricot-add-redirect old.example.com/maybe_subpath new.example.com/new/subpath 301`: redirects paths that match the first pattern to the second pattern with the given HTTP status code. More info in [PR#10](https://git.deuxfleurs.fr/Deuxfleurs/tricot/pulls/10).
- `tricot-block-user-agent AnnoyingRobot`: block requests from clients with a user agent containing `AnnoyingRobot` (they will get a 403 response)
- `tricot-global-lb`: load-balance incoming requests to all matching backends
- `tricot-site-lb`: load-balance incoming requests to all matching backends that are in the same site (geographical location); when site information about nodes is not available, this is equivalent to `tricot-global-lb`

View file

@ -24,7 +24,7 @@ use tokio_util::io::{ReaderStream, StreamReader};
use opentelemetry::{metrics, KeyValue};
use crate::cert_store::{CertStore, StoreResolver};
use crate::proxy_config::{HostDescription, ProxyConfig, ProxyEntry};
use crate::proxy_config::{HostDescription, ProxyConfig, ProxyEntry, UrlPrefix};
use crate::reverse_proxy;
const MAX_CONNECTION_LIFETIME: Duration = Duration::from_secs(24 * 3600);
@ -257,6 +257,11 @@ async fn select_target_and_proxy(
)
});
let user_agent =
req.headers().get("User-Agent")
.and_then(|v| v.to_str().ok())
.map(|s| s.to_string());
if let Some(proxy_to) = best_match {
tags.push(KeyValue::new("service", proxy_to.service_name.clone()));
tags.push(KeyValue::new(
@ -265,6 +270,9 @@ async fn select_target_and_proxy(
));
tags.push(KeyValue::new("same_node", proxy_to.flags.same_node));
tags.push(KeyValue::new("same_site", proxy_to.flags.same_site));
if let Some(user_agent) = &user_agent {
tags.push(KeyValue::new("user_agent", user_agent.clone()));
}
proxy_to.last_call.fetch_max(
(received_time - https_config.time_origin).as_millis() as i64,
@ -276,17 +284,23 @@ async fn select_target_and_proxy(
debug!("{}{} -> {}", host, path, proxy_to);
trace!("Request: {:?}", req);
let response = if let Some(http_res) = try_redirect(host, path, proxy_to) {
// redirection middleware
http_res
} else {
// proxying to backend
match do_proxy(https_config, remote_addr, req, proxy_to).await {
let response = {
let res = match request_proxy_action(host, path, user_agent.as_deref(), proxy_to) {
ProxyAction::Redirect(src_prefix, dst_prefix, code) =>
// redirection
do_redirect(host, path, src_prefix, dst_prefix, code),
ProxyAction::Block =>
do_block(),
ProxyAction::Proxy =>
// proxying to backend
do_proxy(https_config, remote_addr, req, proxy_to).await,
};
match res {
Ok(resp) => resp,
Err(e) => Response::builder()
.status(StatusCode::BAD_GATEWAY)
.body(Body::from(format!("Proxy error: {}", e)))
.unwrap(),
.unwrap()
}
};
@ -309,7 +323,18 @@ async fn select_target_and_proxy(
}
}
fn try_redirect(req_host: &str, req_path: &str, proxy_to: &ProxyEntry) -> Option<Response<Body>> {
enum ProxyAction<'a> {
Redirect(&'a UrlPrefix, &'a UrlPrefix, u16),
Block,
Proxy,
}
fn request_proxy_action<'a>(
req_host: &str,
req_path: &str,
req_user_agent: Option<&str>,
proxy_to: &'a ProxyEntry
) -> ProxyAction<'a> {
let maybe_redirect = proxy_to.redirects.iter().find(|(src, _, _)| {
let mut matched: bool = src.host.matches(req_host);
@ -320,11 +345,33 @@ fn try_redirect(req_host: &str, req_path: &str, proxy_to: &ProxyEntry) -> Option
matched
});
let (src_prefix, dst_prefix, code) = match maybe_redirect {
None => return None,
Some(redirect) => redirect,
};
if let Some((src_prefix, dst_prefix, code)) = maybe_redirect {
return ProxyAction::Redirect(src_prefix, dst_prefix, *code)
}
let is_block =
if let Some(user_agent) = req_user_agent {
proxy_to.block_user_agents.iter().any(|blocked| {
user_agent.contains(blocked)
})
} else {
false
};
if is_block {
return ProxyAction::Block
}
return ProxyAction::Proxy
}
fn do_redirect(
req_host: &str,
req_path: &str,
src_prefix: &UrlPrefix,
dst_prefix: &UrlPrefix,
code: u16,
) -> Result<Response<Body>> {
let new_host = match &dst_prefix.host {
HostDescription::Hostname(h) => h,
_ => unreachable!(), // checked when ProxyEntry is created
@ -336,22 +383,29 @@ fn try_redirect(req_host: &str, req_path: &str, proxy_to: &ProxyEntry) -> Option
let uri = format!("https://{}{}{}", new_host, new_prefix, suffix);
let status = match StatusCode::from_u16(*code) {
let status = match StatusCode::from_u16(code) {
Err(e) => {
warn!(
"Couldn't redirect {}{} to {} as code {} in invalid: {}",
req_host, req_path, uri, code, e
);
return None;
return Err(e)?
}
Ok(sc) => sc,
};
Response::builder()
.header("Location", uri.clone())
.status(status)
.body(Body::from(uri))
.ok()
Ok(Response::builder()
.header("Location", uri.clone())
.status(status)
.body(Body::from(uri))
.unwrap())
}
fn do_block() -> Result<Response<Body>> {
Ok(Response::builder()
.status(StatusCode::FORBIDDEN)
.body(Body::empty())
.unwrap())
}
async fn do_proxy(

View file

@ -108,10 +108,15 @@ pub struct ProxyEntry {
/// when matching this rule
pub redirects: Vec<(UrlPrefix, UrlPrefix, u16)>,
/// Wether or not the domain must be validated before asking a certificate
/// Whether or not the domain must be validated before asking a certificate
/// to let's encrypt (only for Glob patterns)
pub on_demand_tls_ask: Option<String>,
/// User-agents to block.
/// A client request is blocked if its user-agent contains any of the
/// strings.
pub block_user_agents: Vec<String>,
/// Number of calls in progress, used to deprioritize slow back-ends
pub calls_in_progress: atomic::AtomicI64,
/// Time of last call, used for round-robin selection
@ -147,12 +152,14 @@ impl ProxyEntry {
let mut add_headers = vec![];
let mut redirects = vec![];
let mut on_demand_tls_ask: Option<String> = None;
let mut block_user_agents = vec![];
for mid in middleware.into_iter() {
// LocalLb and GlobalLb are handled in the parent function
match mid {
ConfigTag::AddHeader(k, v) => add_headers.push((k.to_string(), v.clone())),
ConfigTag::AddRedirect(m, r, c) => redirects.push(((*m).clone(), (*r).clone(), *c)),
ConfigTag::OnDemandTlsAsk(url) => on_demand_tls_ask = Some(url.to_string()),
ConfigTag::BlockUserAgent(s) => block_user_agents.push(s.clone()),
ConfigTag::LocalLb | ConfigTag::GlobalLb => (),
};
}
@ -171,6 +178,7 @@ impl ProxyEntry {
add_headers,
redirects,
on_demand_tls_ask,
block_user_agents,
// internal
last_call: atomic::AtomicI64::from(0),
calls_in_progress: atomic::AtomicI64::from(0),
@ -253,6 +261,7 @@ enum ConfigTag<'a> {
AddHeader(&'a str, String),
AddRedirect(UrlPrefix, UrlPrefix, u16),
OnDemandTlsAsk(&'a str),
BlockUserAgent(String),
GlobalLb,
LocalLb,
}
@ -330,6 +339,9 @@ fn parse_tricot_tags(tag: &str) -> Option<ParsedTag> {
["tricot-on-demand-tls-ask", url, ..] => {
Some(ParsedTag::Middleware(ConfigTag::OnDemandTlsAsk(url)))
}
["tricot-block-user-agent", elts @ ..] => {
Some(ParsedTag::Middleware(ConfigTag::BlockUserAgent(elts.join(" "))))
}
["tricot-global-lb", ..] => Some(ParsedTag::Middleware(ConfigTag::GlobalLb)),
["tricot-local-lb", ..] => Some(ParsedTag::Middleware(ConfigTag::LocalLb)),
_ => None,