From 9c58ec28d3b23accf782c6eb005b7c3966ec6314 Mon Sep 17 00:00:00 2001 From: Trinity Pointard Date: Thu, 11 Nov 2021 11:26:02 +0100 Subject: [PATCH] add support for vhost-style s3 bucket --- Cargo.lock | 2 +- src/api/Cargo.toml | 1 + src/api/api_server.rs | 110 ++++++++++++++++++++++++++++++++++++---- src/api/helpers.rs | 114 ++++++++++++++++++++++++++++++++++++++++++ src/api/lib.rs | 1 + src/util/config.rs | 3 ++ src/web/Cargo.toml | 1 - src/web/web_server.rs | 114 ++---------------------------------------- 8 files changed, 224 insertions(+), 122 deletions(-) create mode 100644 src/api/helpers.rs diff --git a/Cargo.lock b/Cargo.lock index 66cf79c2..58a28ab3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -426,6 +426,7 @@ dependencies = [ "http-range", "httpdate 0.3.2", "hyper", + "idna", "log", "md-5", "percent-encoding", @@ -539,7 +540,6 @@ dependencies = [ "garage_util", "http", "hyper", - "idna", "log", "percent-encoding", ] diff --git a/src/api/Cargo.toml b/src/api/Cargo.toml index ebbe7c0d..f06c67e4 100644 --- a/src/api/Cargo.toml +++ b/src/api/Cargo.toml @@ -24,6 +24,7 @@ crypto-mac = "0.10" err-derive = "0.3" hex = "0.4" hmac = "0.10" +idna = "0.2" log = "0.4" md-5 = "0.9" sha2 = "0.9" diff --git a/src/api/api_server.rs b/src/api/api_server.rs index d51b5a28..2217be1a 100644 --- a/src/api/api_server.rs +++ b/src/api/api_server.rs @@ -3,6 +3,7 @@ use std::net::SocketAddr; use std::sync::Arc; use futures::future::Future; +use hyper::header; use hyper::server::conn::AddrStream; use hyper::service::{make_service_fn, service_fn}; use hyper::{Body, Method, Request, Response, Server}; @@ -14,6 +15,7 @@ use garage_model::garage::Garage; use crate::error::*; use crate::signature::check_signature; +use crate::helpers::*; use crate::s3_bucket::*; use crate::s3_copy::*; use crate::s3_delete::*; @@ -86,7 +88,20 @@ async fn handler_inner(garage: Arc, req: Request) -> Result api_key.allow_read(bucket), _ => api_key.allow_write(bucket), @@ -137,7 +152,7 @@ async fn handler_inner(garage: Arc, req: Request) -> Result, req: Request) -> Result Result<(&str, Option<&str>), Error> { +fn parse_bucket_key<'a>( + path: &'a str, + host: Option<&'a str>, + root: Option<&str>, +) -> Result<(&'a str, Option<&'a str>), Error> { let path = path.trim_start_matches('/'); + if host.and(root).is_some() { + if let Some(bucket) = host_to_bucket(host.unwrap(), root.unwrap()) { + if !path.is_empty() { + return Ok((bucket, Some(path))); + } else { + return Ok((bucket, None)); + } + } + } + let (bucket, key) = match path.find('/') { Some(i) => { let key = &path[i + 1..]; @@ -275,7 +304,7 @@ mod tests { #[test] fn parse_bucket_containing_a_key() -> Result<(), Error> { - let (bucket, key) = parse_bucket_key("/my_bucket/a/super/file.jpg")?; + let (bucket, key) = parse_bucket_key("/my_bucket/a/super/file.jpg", None, None)?; assert_eq!(bucket, "my_bucket"); assert_eq!(key.expect("key must be set"), "a/super/file.jpg"); Ok(()) @@ -283,10 +312,10 @@ mod tests { #[test] fn parse_bucket_containing_no_key() -> Result<(), Error> { - let (bucket, key) = parse_bucket_key("/my_bucket/")?; + let (bucket, key) = parse_bucket_key("/my_bucket/", None, None)?; assert_eq!(bucket, "my_bucket"); assert!(key.is_none()); - let (bucket, key) = parse_bucket_key("/my_bucket")?; + let (bucket, key) = parse_bucket_key("/my_bucket", None, None)?; assert_eq!(bucket, "my_bucket"); assert!(key.is_none()); Ok(()) @@ -294,11 +323,74 @@ mod tests { #[test] fn parse_bucket_containing_no_bucket() { - let parsed = parse_bucket_key(""); + let parsed = parse_bucket_key("", None, None); assert!(parsed.is_err()); - let parsed = parse_bucket_key("/"); + let parsed = parse_bucket_key("/", None, None); assert!(parsed.is_err()); - let parsed = parse_bucket_key("////"); + let parsed = parse_bucket_key("////", None, None); assert!(parsed.is_err()); } + + #[test] + fn parse_bucket_with_vhost_and_key() -> Result<(), Error> { + let (bucket, key) = parse_bucket_key( + "/a/super/file.jpg", + Some("my-bucket.garage.tld"), + Some("garage.tld"), + )?; + assert_eq!(bucket, "my-bucket"); + assert_eq!(key.expect("key must be set"), "a/super/file.jpg"); + + let (bucket, key) = parse_bucket_key( + "/my_bucket/a/super/file.jpg", + Some("not-garage.tld"), + Some("garage.tld"), + )?; + assert_eq!(bucket, "my_bucket"); + assert_eq!(key.expect("key must be set"), "a/super/file.jpg"); + Ok(()) + } + + #[test] + fn parse_bucket_with_vhost_no_key() -> Result<(), Error> { + let (bucket, key) = parse_bucket_key("", Some("my-bucket.garage.tld"), Some("garage.tld"))?; + assert_eq!(bucket, "my-bucket"); + assert!(key.is_none()); + let (bucket, key) = + parse_bucket_key("/", Some("my-bucket.garage.tld"), Some("garage.tld"))?; + assert_eq!(bucket, "my-bucket"); + assert!(key.is_none()); + Ok(()) + } + + #[test] + fn parse_bucket_missmatch_vhost() { + let test_vec = [ + "/my_bucket/a/super/file.jpg", + "/my_bucket/", + "/my_bucket", + "", + "/", + "////", + ]; + let eq = |l, r| match (l, r) { + (Ok(l), Ok(r)) => l == r, + (Err(_), Err(_)) => true, + _ => false, + }; + for test in test_vec { + assert!(eq( + parse_bucket_key(test, None, None), + parse_bucket_key(test, Some("bucket.garage.tld"), None) + )); + assert!(eq( + parse_bucket_key(test, None, None), + parse_bucket_key(test, None, Some("garage.tld")) + )); + assert!(eq( + parse_bucket_key(test, None, None), + parse_bucket_key(test, Some("not-garage.tld"), Some("garage.tld")) + )); + } + } } diff --git a/src/api/helpers.rs b/src/api/helpers.rs new file mode 100644 index 00000000..9ba32537 --- /dev/null +++ b/src/api/helpers.rs @@ -0,0 +1,114 @@ +use crate::Error; +use idna::domain_to_unicode; + +/// Host to bucket +/// +/// Convert a host, like "bucket.garage-site.tld" or "john.doe.com" +/// to the corresponding bucket, resp. "bucket" and "john.doe.com" +/// considering that ".garage-site.tld" is the "root domain". +/// This behavior has been chosen to follow AWS S3 semantic. +pub fn host_to_bucket<'a>(host: &'a str, root: &str) -> Option<&'a str> { + let root = root.trim_start_matches('.'); + let label_root = root.chars().filter(|c| c == &'.').count() + 1; + let root = root.rsplit('.'); + let mut host = host.rsplitn(label_root + 1, '.'); + for root_part in root { + let host_part = host.next()?; + if root_part != host_part { + return None; + } + } + host.next() +} + +/// Extract host from the authority section given by the HTTP host header +/// +/// The HTTP host contains both a host and a port. +/// Extracting the port is more complex than just finding the colon (:) symbol due to IPv6 +/// We do not use the collect pattern as there is no way in std rust to collect over a stack allocated value +/// check here: https://docs.rs/collect_slice/1.2.0/collect_slice/ +pub fn authority_to_host(authority: &str) -> Result { + let mut iter = authority.chars().enumerate(); + let (_, first_char) = iter + .next() + .ok_or_else(|| Error::BadRequest("Authority is empty".to_string()))?; + + let split = match first_char { + '[' => { + let mut iter = iter.skip_while(|(_, c)| c != &']'); + match iter.next() { + Some((_, ']')) => iter.next(), + _ => { + return Err(Error::BadRequest(format!( + "Authority {} has an illegal format", + authority + ))) + } + } + } + _ => iter.find(|(_, c)| *c == ':'), + }; + + let authority = match split { + Some((i, ':')) => Ok(&authority[..i]), + None => Ok(authority), + Some((_, _)) => Err(Error::BadRequest(format!( + "Authority {} has an illegal format", + authority + ))), + }; + authority.map(|h| domain_to_unicode(h).0) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn authority_to_host_with_port() -> Result<(), Error> { + let domain = authority_to_host("[::1]:3902")?; + assert_eq!(domain, "[::1]"); + let domain2 = authority_to_host("garage.tld:65200")?; + assert_eq!(domain2, "garage.tld"); + let domain3 = authority_to_host("127.0.0.1:80")?; + assert_eq!(domain3, "127.0.0.1"); + Ok(()) + } + + #[test] + fn authority_to_host_without_port() -> Result<(), Error> { + let domain = authority_to_host("[::1]")?; + assert_eq!(domain, "[::1]"); + let domain2 = authority_to_host("garage.tld")?; + assert_eq!(domain2, "garage.tld"); + let domain3 = authority_to_host("127.0.0.1")?; + assert_eq!(domain3, "127.0.0.1"); + assert!(authority_to_host("[").is_err()); + assert!(authority_to_host("[hello").is_err()); + Ok(()) + } + + #[test] + fn host_to_bucket_test() { + assert_eq!( + host_to_bucket("john.doe.garage.tld", ".garage.tld").unwrap(), + "john.doe" + ); + + assert_eq!( + host_to_bucket("john.doe.garage.tld", "garage.tld").unwrap(), + "john.doe" + ); + + assert_eq!(host_to_bucket("john.doe.com", "garage.tld"), None); + + assert_eq!(host_to_bucket("john.doe.com", ".garage.tld"), None); + + assert_eq!(host_to_bucket("garage.tld", "garage.tld"), None); + + assert_eq!(host_to_bucket("garage.tld", ".garage.tld"), None); + + assert_eq!(host_to_bucket("not-garage.tld", "garage.tld"), None); + assert_eq!(host_to_bucket("not-garage.tld", ".garage.tld"), None); + } +} diff --git a/src/api/lib.rs b/src/api/lib.rs index 9a23c231..ab8e3dd1 100644 --- a/src/api/lib.rs +++ b/src/api/lib.rs @@ -12,6 +12,7 @@ pub use api_server::run_api_server; mod signature; +pub mod helpers; mod s3_bucket; mod s3_copy; mod s3_delete; diff --git a/src/util/config.rs b/src/util/config.rs index 0e4c76ae..33802012 100644 --- a/src/util/config.rs +++ b/src/util/config.rs @@ -68,6 +68,9 @@ pub struct ApiConfig { pub api_bind_addr: SocketAddr, /// S3 region to use pub s3_region: String, + /// Suffix to remove from domain name to find bucket. If None, + /// vhost-style S3 request are disabled + pub root_domain: Option, } /// Configuration for serving files as normal web server diff --git a/src/web/Cargo.toml b/src/web/Cargo.toml index f5b40370..634ce282 100644 --- a/src/web/Cargo.toml +++ b/src/web/Cargo.toml @@ -19,7 +19,6 @@ garage_util = { version = "0.4.0", path = "../util" } garage_table = { version = "0.4.0", path = "../table" } err-derive = "0.3" -idna = "0.2" log = "0.4" percent-encoding = "2.1.0" diff --git a/src/web/web_server.rs b/src/web/web_server.rs index bff9e71c..e9c5039d 100644 --- a/src/web/web_server.rs +++ b/src/web/web_server.rs @@ -9,9 +9,8 @@ use hyper::{ Body, Method, Request, Response, Server, }; -use idna::domain_to_unicode; - use crate::error::*; +use garage_api::helpers::{authority_to_host, host_to_bucket}; use garage_api::s3_get::{handle_get, handle_head}; use garage_model::bucket_table::*; use garage_model::garage::Garage; @@ -75,9 +74,9 @@ async fn serve_file(garage: Arc, req: Request) -> Result, req: Request) -> Result Result<&str, Error> { - let mut iter = authority.chars().enumerate(); - let (_, first_char) = iter - .next() - .ok_or_else(|| Error::BadRequest("Authority is empty".to_string()))?; - - let split = match first_char { - '[' => { - let mut iter = iter.skip_while(|(_, c)| c != &']'); - match iter.next() { - Some((_, ']')) => iter.next(), - _ => { - return Err(Error::BadRequest(format!( - "Authority {} has an illegal format", - authority - ))) - } - } - } - _ => iter.find(|(_, c)| *c == ':'), - }; - - match split { - Some((i, ':')) => Ok(&authority[..i]), - None => Ok(authority), - Some((_, _)) => Err(Error::BadRequest(format!( - "Authority {} has an illegal format", - authority - ))), - } -} - -/// Host to bucket -/// -/// Convert a host, like "bucket.garage-site.tld" or "john.doe.com" -/// to the corresponding bucket, resp. "bucket" and "john.doe.com" -/// considering that ".garage-site.tld" is the "root domain". -/// This behavior has been chosen to follow AWS S3 semantic. -fn host_to_bucket<'a>(host: &'a str, root: &str) -> &'a str { - if root.len() >= host.len() || !host.ends_with(root) { - return host; - } - - let len_diff = host.len() - root.len(); - let missing_starting_dot = !root.starts_with('.'); - let cursor = if missing_starting_dot { - len_diff - 1 - } else { - len_diff - }; - &host[..cursor] -} - /// Path to key /// /// Convert the provided path to the internal key @@ -200,54 +140,6 @@ fn path_to_key<'a>(path: &'a str, index: &str) -> Result, Error> { mod tests { use super::*; - #[test] - fn authority_to_host_with_port() -> Result<(), Error> { - let domain = authority_to_host("[::1]:3902")?; - assert_eq!(domain, "[::1]"); - let domain2 = authority_to_host("garage.tld:65200")?; - assert_eq!(domain2, "garage.tld"); - let domain3 = authority_to_host("127.0.0.1:80")?; - assert_eq!(domain3, "127.0.0.1"); - Ok(()) - } - - #[test] - fn authority_to_host_without_port() -> Result<(), Error> { - let domain = authority_to_host("[::1]")?; - assert_eq!(domain, "[::1]"); - let domain2 = authority_to_host("garage.tld")?; - assert_eq!(domain2, "garage.tld"); - let domain3 = authority_to_host("127.0.0.1")?; - assert_eq!(domain3, "127.0.0.1"); - assert!(authority_to_host("[").is_err()); - assert!(authority_to_host("[hello").is_err()); - Ok(()) - } - - #[test] - fn host_to_bucket_test() { - assert_eq!( - host_to_bucket("john.doe.garage.tld", ".garage.tld"), - "john.doe" - ); - - assert_eq!( - host_to_bucket("john.doe.garage.tld", "garage.tld"), - "john.doe" - ); - - assert_eq!(host_to_bucket("john.doe.com", "garage.tld"), "john.doe.com"); - - assert_eq!( - host_to_bucket("john.doe.com", ".garage.tld"), - "john.doe.com" - ); - - assert_eq!(host_to_bucket("garage.tld", "garage.tld"), "garage.tld"); - - assert_eq!(host_to_bucket("garage.tld", ".garage.tld"), "garage.tld"); - } - #[test] fn path_to_key_test() -> Result<(), Error> { assert_eq!(path_to_key("/file%20.jpg", "index.html")?, "file .jpg");