diff --git a/Cargo.lock b/Cargo.lock index 66cf79c2..58a28ab3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -426,6 +426,7 @@ dependencies = [ "http-range", "httpdate 0.3.2", "hyper", + "idna", "log", "md-5", "percent-encoding", @@ -539,7 +540,6 @@ dependencies = [ "garage_util", "http", "hyper", - "idna", "log", "percent-encoding", ] diff --git a/Cargo.nix b/Cargo.nix index f8b2c276..5307d1e2 100644 --- a/Cargo.nix +++ b/Cargo.nix @@ -660,6 +660,7 @@ in http_range = rustPackages."registry+https://github.com/rust-lang/crates.io-index".http-range."0.1.4" { inherit profileName; }; httpdate = rustPackages."registry+https://github.com/rust-lang/crates.io-index".httpdate."0.3.2" { inherit profileName; }; hyper = rustPackages."registry+https://github.com/rust-lang/crates.io-index".hyper."0.14.13" { inherit profileName; }; + idna = rustPackages."registry+https://github.com/rust-lang/crates.io-index".idna."0.2.3" { inherit profileName; }; log = rustPackages."registry+https://github.com/rust-lang/crates.io-index".log."0.4.14" { inherit profileName; }; md5 = rustPackages."registry+https://github.com/rust-lang/crates.io-index".md-5."0.9.1" { inherit profileName; }; percent_encoding = rustPackages."registry+https://github.com/rust-lang/crates.io-index".percent-encoding."2.1.0" { inherit profileName; }; @@ -788,7 +789,6 @@ in garage_util = rustPackages."unknown".garage_util."0.4.0" { inherit profileName; }; http = rustPackages."registry+https://github.com/rust-lang/crates.io-index".http."0.2.5" { inherit profileName; }; hyper = rustPackages."registry+https://github.com/rust-lang/crates.io-index".hyper."0.14.13" { inherit profileName; }; - idna = rustPackages."registry+https://github.com/rust-lang/crates.io-index".idna."0.2.3" { inherit profileName; }; log = rustPackages."registry+https://github.com/rust-lang/crates.io-index".log."0.4.14" { inherit profileName; }; percent_encoding = rustPackages."registry+https://github.com/rust-lang/crates.io-index".percent-encoding."2.1.0" { inherit profileName; }; }; diff --git a/doc/book/src/cookbook/real_world.md b/doc/book/src/cookbook/real_world.md index 70262cc3..7864274c 100644 --- a/doc/book/src/cookbook/real_world.md +++ b/doc/book/src/cookbook/real_world.md @@ -83,6 +83,7 @@ bootstrap_peers = [] [s3_api] s3_region = "garage" api_bind_addr = "[::]:3900" +root_domain = ".s3.garage" [s3_web] bind_addr = "[::]:3902" diff --git a/doc/book/src/quick_start/index.md b/doc/book/src/quick_start/index.md index 9ff8be7f..8de3fd8b 100644 --- a/doc/book/src/quick_start/index.md +++ b/doc/book/src/quick_start/index.md @@ -48,6 +48,7 @@ bootstrap_peers = [] [s3_api] s3_region = "garage" api_bind_addr = "[::]:3900" +root_domain = ".s3.garage" [s3_web] bind_addr = "[::]:3902" diff --git a/doc/book/src/reference_manual/configuration.md b/doc/book/src/reference_manual/configuration.md index e165eb87..61f7bcee 100644 --- a/doc/book/src/reference_manual/configuration.md +++ b/doc/book/src/reference_manual/configuration.md @@ -30,6 +30,7 @@ sled_flush_every_ms = 2000 [s3_api] api_bind_addr = "[::]:3900" s3_region = "garage" +root_domain = ".s3.garage" [s3_web] bind_addr = "[::]:3902" @@ -176,6 +177,15 @@ Garage will accept S3 API calls that are targetted to the S3 region defined here API calls targetted to other regions will fail with a AuthorizationHeaderMalformed error message that redirects the client to the correct region. +#### `root_domain` + +The optionnal suffix to access bucket using vhost-style in addition to path-style request. +Note path-style requests are always enabled, whether or not vhost-style is configured. +Configuring vhost-style S3 required a wildcard DNS entry, and possibly a wildcard TLS certificate, +but might be required by softwares not supporting path-style requests. + +If `root_domain` is `s3.garage.eu`, a bucket called `my-bucket` can be interacted with +using the hostname `my-bucket.s3.garage.eu`. ## The `[s3_web]` section diff --git a/src/api/Cargo.toml b/src/api/Cargo.toml index ebbe7c0d..f06c67e4 100644 --- a/src/api/Cargo.toml +++ b/src/api/Cargo.toml @@ -24,6 +24,7 @@ crypto-mac = "0.10" err-derive = "0.3" hex = "0.4" hmac = "0.10" +idna = "0.2" log = "0.4" md-5 = "0.9" sha2 = "0.9" diff --git a/src/api/api_server.rs b/src/api/api_server.rs index d51b5a28..74142453 100644 --- a/src/api/api_server.rs +++ b/src/api/api_server.rs @@ -3,6 +3,7 @@ use std::net::SocketAddr; use std::sync::Arc; use futures::future::Future; +use hyper::header; use hyper::server::conn::AddrStream; use hyper::service::{make_service_fn, service_fn}; use hyper::{Body, Method, Request, Response, Server}; @@ -14,6 +15,7 @@ use garage_model::garage::Garage; use crate::error::*; use crate::signature::check_signature; +use crate::helpers::*; use crate::s3_bucket::*; use crate::s3_copy::*; use crate::s3_delete::*; @@ -82,11 +84,27 @@ async fn handler_inner(garage: Arc, req: Request) -> Result api_key.allow_read(bucket), _ => api_key.allow_write(bucket), @@ -137,7 +155,7 @@ async fn handler_inner(garage: Arc, req: Request) -> Result, req: Request) -> Result Result<(&str, Option<&str>), Error> { +fn parse_bucket_key<'a>( + path: &'a str, + host_bucket: Option<&'a str>, +) -> Result<(&'a str, Option<&'a str>), Error> { let path = path.trim_start_matches('/'); + if let Some(bucket) = host_bucket { + if !path.is_empty() { + return Ok((bucket, Some(path))); + } else { + return Ok((bucket, None)); + } + } + let (bucket, key) = match path.find('/') { Some(i) => { let key = &path[i + 1..]; @@ -275,7 +305,7 @@ mod tests { #[test] fn parse_bucket_containing_a_key() -> Result<(), Error> { - let (bucket, key) = parse_bucket_key("/my_bucket/a/super/file.jpg")?; + let (bucket, key) = parse_bucket_key("/my_bucket/a/super/file.jpg", None)?; assert_eq!(bucket, "my_bucket"); assert_eq!(key.expect("key must be set"), "a/super/file.jpg"); Ok(()) @@ -283,10 +313,10 @@ mod tests { #[test] fn parse_bucket_containing_no_key() -> Result<(), Error> { - let (bucket, key) = parse_bucket_key("/my_bucket/")?; + let (bucket, key) = parse_bucket_key("/my_bucket/", None)?; assert_eq!(bucket, "my_bucket"); assert!(key.is_none()); - let (bucket, key) = parse_bucket_key("/my_bucket")?; + let (bucket, key) = parse_bucket_key("/my_bucket", None)?; assert_eq!(bucket, "my_bucket"); assert!(key.is_none()); Ok(()) @@ -294,11 +324,30 @@ mod tests { #[test] fn parse_bucket_containing_no_bucket() { - let parsed = parse_bucket_key(""); + let parsed = parse_bucket_key("", None); assert!(parsed.is_err()); - let parsed = parse_bucket_key("/"); + let parsed = parse_bucket_key("/", None); assert!(parsed.is_err()); - let parsed = parse_bucket_key("////"); + let parsed = parse_bucket_key("////", None); assert!(parsed.is_err()); } + + #[test] + fn parse_bucket_with_vhost_and_key() -> Result<(), Error> { + let (bucket, key) = parse_bucket_key("/a/super/file.jpg", Some("my-bucket"))?; + assert_eq!(bucket, "my-bucket"); + assert_eq!(key.expect("key must be set"), "a/super/file.jpg"); + Ok(()) + } + + #[test] + fn parse_bucket_with_vhost_no_key() -> Result<(), Error> { + let (bucket, key) = parse_bucket_key("", Some("my-bucket"))?; + assert_eq!(bucket, "my-bucket"); + assert!(key.is_none()); + let (bucket, key) = parse_bucket_key("/", Some("my-bucket"))?; + assert_eq!(bucket, "my-bucket"); + assert!(key.is_none()); + Ok(()) + } } diff --git a/src/api/helpers.rs b/src/api/helpers.rs new file mode 100644 index 00000000..2375d35d --- /dev/null +++ b/src/api/helpers.rs @@ -0,0 +1,114 @@ +use crate::Error; +use idna::domain_to_unicode; + +/// Host to bucket +/// +/// Convert a host, like "bucket.garage-site.tld" to the corresponding bucket "bucket", +/// considering that ".garage-site.tld" is the "root domain". For domains not matching +/// the provided root domain, no bucket is returned +/// This behavior has been chosen to follow AWS S3 semantic. +pub fn host_to_bucket<'a>(host: &'a str, root: &str) -> Option<&'a str> { + let root = root.trim_start_matches('.'); + let label_root = root.chars().filter(|c| c == &'.').count() + 1; + let root = root.rsplit('.'); + let mut host = host.rsplitn(label_root + 1, '.'); + for root_part in root { + let host_part = host.next()?; + if root_part != host_part { + return None; + } + } + host.next() +} + +/// Extract host from the authority section given by the HTTP host header +/// +/// The HTTP host contains both a host and a port. +/// Extracting the port is more complex than just finding the colon (:) symbol due to IPv6 +/// We do not use the collect pattern as there is no way in std rust to collect over a stack allocated value +/// check here: https://docs.rs/collect_slice/1.2.0/collect_slice/ +pub fn authority_to_host(authority: &str) -> Result { + let mut iter = authority.chars().enumerate(); + let (_, first_char) = iter + .next() + .ok_or_else(|| Error::BadRequest("Authority is empty".to_string()))?; + + let split = match first_char { + '[' => { + let mut iter = iter.skip_while(|(_, c)| c != &']'); + match iter.next() { + Some((_, ']')) => iter.next(), + _ => { + return Err(Error::BadRequest(format!( + "Authority {} has an illegal format", + authority + ))) + } + } + } + _ => iter.find(|(_, c)| *c == ':'), + }; + + let authority = match split { + Some((i, ':')) => Ok(&authority[..i]), + None => Ok(authority), + Some((_, _)) => Err(Error::BadRequest(format!( + "Authority {} has an illegal format", + authority + ))), + }; + authority.map(|h| domain_to_unicode(h).0) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn authority_to_host_with_port() -> Result<(), Error> { + let domain = authority_to_host("[::1]:3902")?; + assert_eq!(domain, "[::1]"); + let domain2 = authority_to_host("garage.tld:65200")?; + assert_eq!(domain2, "garage.tld"); + let domain3 = authority_to_host("127.0.0.1:80")?; + assert_eq!(domain3, "127.0.0.1"); + Ok(()) + } + + #[test] + fn authority_to_host_without_port() -> Result<(), Error> { + let domain = authority_to_host("[::1]")?; + assert_eq!(domain, "[::1]"); + let domain2 = authority_to_host("garage.tld")?; + assert_eq!(domain2, "garage.tld"); + let domain3 = authority_to_host("127.0.0.1")?; + assert_eq!(domain3, "127.0.0.1"); + assert!(authority_to_host("[").is_err()); + assert!(authority_to_host("[hello").is_err()); + Ok(()) + } + + #[test] + fn host_to_bucket_test() { + assert_eq!( + host_to_bucket("john.doe.garage.tld", ".garage.tld").unwrap(), + "john.doe" + ); + + assert_eq!( + host_to_bucket("john.doe.garage.tld", "garage.tld").unwrap(), + "john.doe" + ); + + assert_eq!(host_to_bucket("john.doe.com", "garage.tld"), None); + + assert_eq!(host_to_bucket("john.doe.com", ".garage.tld"), None); + + assert_eq!(host_to_bucket("garage.tld", "garage.tld"), None); + + assert_eq!(host_to_bucket("garage.tld", ".garage.tld"), None); + + assert_eq!(host_to_bucket("not-garage.tld", "garage.tld"), None); + assert_eq!(host_to_bucket("not-garage.tld", ".garage.tld"), None); + } +} diff --git a/src/api/lib.rs b/src/api/lib.rs index 9a23c231..ab8e3dd1 100644 --- a/src/api/lib.rs +++ b/src/api/lib.rs @@ -12,6 +12,7 @@ pub use api_server::run_api_server; mod signature; +pub mod helpers; mod s3_bucket; mod s3_copy; mod s3_delete; diff --git a/src/util/config.rs b/src/util/config.rs index 0e4c76ae..33802012 100644 --- a/src/util/config.rs +++ b/src/util/config.rs @@ -68,6 +68,9 @@ pub struct ApiConfig { pub api_bind_addr: SocketAddr, /// S3 region to use pub s3_region: String, + /// Suffix to remove from domain name to find bucket. If None, + /// vhost-style S3 request are disabled + pub root_domain: Option, } /// Configuration for serving files as normal web server diff --git a/src/web/Cargo.toml b/src/web/Cargo.toml index f5b40370..634ce282 100644 --- a/src/web/Cargo.toml +++ b/src/web/Cargo.toml @@ -19,7 +19,6 @@ garage_util = { version = "0.4.0", path = "../util" } garage_table = { version = "0.4.0", path = "../table" } err-derive = "0.3" -idna = "0.2" log = "0.4" percent-encoding = "2.1.0" diff --git a/src/web/web_server.rs b/src/web/web_server.rs index bff9e71c..e9c5039d 100644 --- a/src/web/web_server.rs +++ b/src/web/web_server.rs @@ -9,9 +9,8 @@ use hyper::{ Body, Method, Request, Response, Server, }; -use idna::domain_to_unicode; - use crate::error::*; +use garage_api::helpers::{authority_to_host, host_to_bucket}; use garage_api::s3_get::{handle_get, handle_head}; use garage_model::bucket_table::*; use garage_model::garage::Garage; @@ -75,9 +74,9 @@ async fn serve_file(garage: Arc, req: Request) -> Result, req: Request) -> Result Result<&str, Error> { - let mut iter = authority.chars().enumerate(); - let (_, first_char) = iter - .next() - .ok_or_else(|| Error::BadRequest("Authority is empty".to_string()))?; - - let split = match first_char { - '[' => { - let mut iter = iter.skip_while(|(_, c)| c != &']'); - match iter.next() { - Some((_, ']')) => iter.next(), - _ => { - return Err(Error::BadRequest(format!( - "Authority {} has an illegal format", - authority - ))) - } - } - } - _ => iter.find(|(_, c)| *c == ':'), - }; - - match split { - Some((i, ':')) => Ok(&authority[..i]), - None => Ok(authority), - Some((_, _)) => Err(Error::BadRequest(format!( - "Authority {} has an illegal format", - authority - ))), - } -} - -/// Host to bucket -/// -/// Convert a host, like "bucket.garage-site.tld" or "john.doe.com" -/// to the corresponding bucket, resp. "bucket" and "john.doe.com" -/// considering that ".garage-site.tld" is the "root domain". -/// This behavior has been chosen to follow AWS S3 semantic. -fn host_to_bucket<'a>(host: &'a str, root: &str) -> &'a str { - if root.len() >= host.len() || !host.ends_with(root) { - return host; - } - - let len_diff = host.len() - root.len(); - let missing_starting_dot = !root.starts_with('.'); - let cursor = if missing_starting_dot { - len_diff - 1 - } else { - len_diff - }; - &host[..cursor] -} - /// Path to key /// /// Convert the provided path to the internal key @@ -200,54 +140,6 @@ fn path_to_key<'a>(path: &'a str, index: &str) -> Result, Error> { mod tests { use super::*; - #[test] - fn authority_to_host_with_port() -> Result<(), Error> { - let domain = authority_to_host("[::1]:3902")?; - assert_eq!(domain, "[::1]"); - let domain2 = authority_to_host("garage.tld:65200")?; - assert_eq!(domain2, "garage.tld"); - let domain3 = authority_to_host("127.0.0.1:80")?; - assert_eq!(domain3, "127.0.0.1"); - Ok(()) - } - - #[test] - fn authority_to_host_without_port() -> Result<(), Error> { - let domain = authority_to_host("[::1]")?; - assert_eq!(domain, "[::1]"); - let domain2 = authority_to_host("garage.tld")?; - assert_eq!(domain2, "garage.tld"); - let domain3 = authority_to_host("127.0.0.1")?; - assert_eq!(domain3, "127.0.0.1"); - assert!(authority_to_host("[").is_err()); - assert!(authority_to_host("[hello").is_err()); - Ok(()) - } - - #[test] - fn host_to_bucket_test() { - assert_eq!( - host_to_bucket("john.doe.garage.tld", ".garage.tld"), - "john.doe" - ); - - assert_eq!( - host_to_bucket("john.doe.garage.tld", "garage.tld"), - "john.doe" - ); - - assert_eq!(host_to_bucket("john.doe.com", "garage.tld"), "john.doe.com"); - - assert_eq!( - host_to_bucket("john.doe.com", ".garage.tld"), - "john.doe.com" - ); - - assert_eq!(host_to_bucket("garage.tld", "garage.tld"), "garage.tld"); - - assert_eq!(host_to_bucket("garage.tld", ".garage.tld"), "garage.tld"); - } - #[test] fn path_to_key_test() -> Result<(), Error> { assert_eq!(path_to_key("/file%20.jpg", "index.html")?, "file .jpg");