add support for vhost/dns style bucket access #154

Merged
lx merged 6 commits from trinity-1686a/garage:vhost-style into main 2021-11-16 14:41:41 +00:00
12 changed files with 196 additions and 125 deletions

2
Cargo.lock generated
View File

@ -426,6 +426,7 @@ dependencies = [
"http-range",
"httpdate 0.3.2",
"hyper",
"idna",
"log",
"md-5",
"percent-encoding",
@ -539,7 +540,6 @@ dependencies = [
"garage_util",
"http",
"hyper",
"idna",
"log",
"percent-encoding",
]

View File

@ -660,6 +660,7 @@ in
http_range = rustPackages."registry+https://github.com/rust-lang/crates.io-index".http-range."0.1.4" { inherit profileName; };
httpdate = rustPackages."registry+https://github.com/rust-lang/crates.io-index".httpdate."0.3.2" { inherit profileName; };
hyper = rustPackages."registry+https://github.com/rust-lang/crates.io-index".hyper."0.14.13" { inherit profileName; };
idna = rustPackages."registry+https://github.com/rust-lang/crates.io-index".idna."0.2.3" { inherit profileName; };
log = rustPackages."registry+https://github.com/rust-lang/crates.io-index".log."0.4.14" { inherit profileName; };
md5 = rustPackages."registry+https://github.com/rust-lang/crates.io-index".md-5."0.9.1" { inherit profileName; };
percent_encoding = rustPackages."registry+https://github.com/rust-lang/crates.io-index".percent-encoding."2.1.0" { inherit profileName; };
@ -788,7 +789,6 @@ in
garage_util = rustPackages."unknown".garage_util."0.4.0" { inherit profileName; };
http = rustPackages."registry+https://github.com/rust-lang/crates.io-index".http."0.2.5" { inherit profileName; };
hyper = rustPackages."registry+https://github.com/rust-lang/crates.io-index".hyper."0.14.13" { inherit profileName; };
idna = rustPackages."registry+https://github.com/rust-lang/crates.io-index".idna."0.2.3" { inherit profileName; };
log = rustPackages."registry+https://github.com/rust-lang/crates.io-index".log."0.4.14" { inherit profileName; };
percent_encoding = rustPackages."registry+https://github.com/rust-lang/crates.io-index".percent-encoding."2.1.0" { inherit profileName; };
};

View File

@ -83,6 +83,7 @@ bootstrap_peers = []
[s3_api]
s3_region = "garage"
api_bind_addr = "[::]:3900"
root_domain = ".s3.garage"
[s3_web]
bind_addr = "[::]:3902"

View File

@ -48,6 +48,7 @@ bootstrap_peers = []
[s3_api]
s3_region = "garage"
api_bind_addr = "[::]:3900"
root_domain = ".s3.garage"
[s3_web]
bind_addr = "[::]:3902"

View File

@ -30,6 +30,7 @@ sled_flush_every_ms = 2000
[s3_api]
api_bind_addr = "[::]:3900"
s3_region = "garage"
root_domain = ".s3.garage"
[s3_web]
bind_addr = "[::]:3902"
@ -176,6 +177,15 @@ Garage will accept S3 API calls that are targetted to the S3 region defined here
API calls targetted to other regions will fail with a AuthorizationHeaderMalformed error
message that redirects the client to the correct region.
#### `root_domain`
The optionnal suffix to access bucket using vhost-style in addition to path-style request.
Note path-style requests are always enabled, whether or not vhost-style is configured.
Configuring vhost-style S3 required a wildcard DNS entry, and possibly a wildcard TLS certificate,
but might be required by softwares not supporting path-style requests.
If `root_domain` is `s3.garage.eu`, a bucket called `my-bucket` can be interacted with
using the hostname `my-bucket.s3.garage.eu`.
## The `[s3_web]` section

View File

@ -24,6 +24,7 @@ crypto-mac = "0.10"
err-derive = "0.3"
hex = "0.4"
hmac = "0.10"
idna = "0.2"
log = "0.4"
md-5 = "0.9"
sha2 = "0.9"

View File

@ -3,6 +3,7 @@ use std::net::SocketAddr;
use std::sync::Arc;
use futures::future::Future;
use hyper::header;
use hyper::server::conn::AddrStream;
use hyper::service::{make_service_fn, service_fn};
use hyper::{Body, Method, Request, Response, Server};
@ -14,6 +15,7 @@ use garage_model::garage::Garage;
use crate::error::*;
use crate::signature::check_signature;
use crate::helpers::*;
use crate::s3_bucket::*;
use crate::s3_copy::*;
use crate::s3_delete::*;
@ -82,11 +84,27 @@ async fn handler_inner(garage: Arc<Garage>, req: Request<Body>) -> Result<Respon
let path = req.uri().path().to_string();
let path = percent_encoding::percent_decode_str(&path).decode_utf8()?;
let (api_key, content_sha256) = check_signature(&garage, &req).await?;
if path == "/" {
let authority = req
.headers()
.get(header::HOST)
.ok_or_else(|| Error::BadRequest("HOST header required".to_owned()))?
.to_str()?;
let host = authority_to_host(authority)?;
let bucket = garage
.config
.s3_api
.root_domain
.as_ref()
.and_then(|root_domain| host_to_bucket(&host, root_domain));
if path == "/" && bucket.is_none() {
return handle_list_buckets(&api_key);
}
let (bucket, key) = parse_bucket_key(&path)?;
let (bucket, key) = parse_bucket_key(&path, bucket)?;
let allowed = match req.method() {
&Method::HEAD | &Method::GET => api_key.allow_read(bucket),
_ => api_key.allow_write(bucket),
@ -137,7 +155,7 @@ async fn handler_inner(garage: Arc<Garage>, req: Request<Body>) -> Result<Respon
let copy_source = req.headers().get("x-amz-copy-source").unwrap().to_str()?;
let copy_source =
percent_encoding::percent_decode_str(copy_source).decode_utf8()?;
let (source_bucket, source_key) = parse_bucket_key(&copy_source)?;
let (source_bucket, source_key) = parse_bucket_key(&copy_source, None)?;
if !api_key.allow_read(source_bucket) {
return Err(Error::Forbidden(format!(
"Reading from bucket {} not allowed for this key",
@ -245,13 +263,25 @@ async fn handler_inner(garage: Arc<Garage>, req: Request<Body>) -> Result<Respon
}
}
/// Extract the bucket name and the key name from an HTTP path
/// Extract the bucket name and the key name from an HTTP path and possibly a bucket provided in
/// the host header of the request
///
/// S3 internally manages only buckets and keys. This function splits
/// an HTTP path to get the corresponding bucket name and key.
fn parse_bucket_key(path: &str) -> Result<(&str, Option<&str>), Error> {
fn parse_bucket_key<'a>(
path: &'a str,
host_bucket: Option<&'a str>,
) -> Result<(&'a str, Option<&'a str>), Error> {
let path = path.trim_start_matches('/');
if let Some(bucket) = host_bucket {
if !path.is_empty() {
return Ok((bucket, Some(path)));
} else {
return Ok((bucket, None));
}
}
let (bucket, key) = match path.find('/') {
Some(i) => {
let key = &path[i + 1..];
@ -275,7 +305,7 @@ mod tests {
#[test]
fn parse_bucket_containing_a_key() -> Result<(), Error> {
let (bucket, key) = parse_bucket_key("/my_bucket/a/super/file.jpg")?;
let (bucket, key) = parse_bucket_key("/my_bucket/a/super/file.jpg", None)?;
assert_eq!(bucket, "my_bucket");
assert_eq!(key.expect("key must be set"), "a/super/file.jpg");
Ok(())
@ -283,10 +313,10 @@ mod tests {
#[test]
fn parse_bucket_containing_no_key() -> Result<(), Error> {
let (bucket, key) = parse_bucket_key("/my_bucket/")?;
let (bucket, key) = parse_bucket_key("/my_bucket/", None)?;
assert_eq!(bucket, "my_bucket");
assert!(key.is_none());
let (bucket, key) = parse_bucket_key("/my_bucket")?;
let (bucket, key) = parse_bucket_key("/my_bucket", None)?;
assert_eq!(bucket, "my_bucket");
assert!(key.is_none());
Ok(())
@ -294,11 +324,30 @@ mod tests {
#[test]
fn parse_bucket_containing_no_bucket() {
let parsed = parse_bucket_key("");
let parsed = parse_bucket_key("", None);
assert!(parsed.is_err());
let parsed = parse_bucket_key("/");
let parsed = parse_bucket_key("/", None);
assert!(parsed.is_err());
let parsed = parse_bucket_key("////");
let parsed = parse_bucket_key("////", None);
assert!(parsed.is_err());
}
#[test]
fn parse_bucket_with_vhost_and_key() -> Result<(), Error> {
let (bucket, key) = parse_bucket_key("/a/super/file.jpg", Some("my-bucket"))?;
assert_eq!(bucket, "my-bucket");
assert_eq!(key.expect("key must be set"), "a/super/file.jpg");
Ok(())
}
#[test]
fn parse_bucket_with_vhost_no_key() -> Result<(), Error> {
let (bucket, key) = parse_bucket_key("", Some("my-bucket"))?;
assert_eq!(bucket, "my-bucket");
assert!(key.is_none());
let (bucket, key) = parse_bucket_key("/", Some("my-bucket"))?;
assert_eq!(bucket, "my-bucket");
assert!(key.is_none());
Ok(())
}
}

114
src/api/helpers.rs Normal file
View File

@ -0,0 +1,114 @@
use crate::Error;
use idna::domain_to_unicode;
/// Host to bucket
///
/// Convert a host, like "bucket.garage-site.tld" to the corresponding bucket "bucket",
/// considering that ".garage-site.tld" is the "root domain". For domains not matching
/// the provided root domain, no bucket is returned
/// This behavior has been chosen to follow AWS S3 semantic.
pub fn host_to_bucket<'a>(host: &'a str, root: &str) -> Option<&'a str> {
let root = root.trim_start_matches('.');
let label_root = root.chars().filter(|c| c == &'.').count() + 1;
let root = root.rsplit('.');
let mut host = host.rsplitn(label_root + 1, '.');
for root_part in root {
let host_part = host.next()?;
if root_part != host_part {
return None;
}
}
host.next()
}
/// Extract host from the authority section given by the HTTP host header
///
/// The HTTP host contains both a host and a port.
/// Extracting the port is more complex than just finding the colon (:) symbol due to IPv6
/// We do not use the collect pattern as there is no way in std rust to collect over a stack allocated value
/// check here: https://docs.rs/collect_slice/1.2.0/collect_slice/
pub fn authority_to_host(authority: &str) -> Result<String, Error> {
let mut iter = authority.chars().enumerate();
let (_, first_char) = iter
.next()
.ok_or_else(|| Error::BadRequest("Authority is empty".to_string()))?;
let split = match first_char {
'[' => {
let mut iter = iter.skip_while(|(_, c)| c != &']');
match iter.next() {
Some((_, ']')) => iter.next(),
_ => {
return Err(Error::BadRequest(format!(
"Authority {} has an illegal format",
authority
)))
}
}
}
_ => iter.find(|(_, c)| *c == ':'),
};
let authority = match split {
Some((i, ':')) => Ok(&authority[..i]),
None => Ok(authority),
Some((_, _)) => Err(Error::BadRequest(format!(
"Authority {} has an illegal format",
authority
))),
};
authority.map(|h| domain_to_unicode(h).0)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn authority_to_host_with_port() -> Result<(), Error> {
let domain = authority_to_host("[::1]:3902")?;
assert_eq!(domain, "[::1]");
let domain2 = authority_to_host("garage.tld:65200")?;
assert_eq!(domain2, "garage.tld");
let domain3 = authority_to_host("127.0.0.1:80")?;
assert_eq!(domain3, "127.0.0.1");
Ok(())
}
#[test]
fn authority_to_host_without_port() -> Result<(), Error> {
let domain = authority_to_host("[::1]")?;
assert_eq!(domain, "[::1]");
let domain2 = authority_to_host("garage.tld")?;
assert_eq!(domain2, "garage.tld");
let domain3 = authority_to_host("127.0.0.1")?;
assert_eq!(domain3, "127.0.0.1");
assert!(authority_to_host("[").is_err());
assert!(authority_to_host("[hello").is_err());
Ok(())
}
#[test]
fn host_to_bucket_test() {
assert_eq!(
host_to_bucket("john.doe.garage.tld", ".garage.tld").unwrap(),
"john.doe"
);
assert_eq!(
host_to_bucket("john.doe.garage.tld", "garage.tld").unwrap(),
"john.doe"
);
assert_eq!(host_to_bucket("john.doe.com", "garage.tld"), None);
assert_eq!(host_to_bucket("john.doe.com", ".garage.tld"), None);
assert_eq!(host_to_bucket("garage.tld", "garage.tld"), None);
assert_eq!(host_to_bucket("garage.tld", ".garage.tld"), None);
assert_eq!(host_to_bucket("not-garage.tld", "garage.tld"), None);
assert_eq!(host_to_bucket("not-garage.tld", ".garage.tld"), None);
}
}

View File

@ -12,6 +12,7 @@ pub use api_server::run_api_server;
mod signature;
pub mod helpers;
mod s3_bucket;
mod s3_copy;
mod s3_delete;

View File

@ -68,6 +68,9 @@ pub struct ApiConfig {
pub api_bind_addr: SocketAddr,
/// S3 region to use
pub s3_region: String,
/// Suffix to remove from domain name to find bucket. If None,
/// vhost-style S3 request are disabled
pub root_domain: Option<String>,
}
/// Configuration for serving files as normal web server

View File

@ -19,7 +19,6 @@ garage_util = { version = "0.4.0", path = "../util" }
garage_table = { version = "0.4.0", path = "../table" }
err-derive = "0.3"
idna = "0.2"
log = "0.4"
percent-encoding = "2.1.0"

View File

@ -9,9 +9,8 @@ use hyper::{
Body, Method, Request, Response, Server,
};
use idna::domain_to_unicode;
use crate::error::*;
use garage_api::helpers::{authority_to_host, host_to_bucket};
use garage_api::s3_get::{handle_get, handle_head};
use garage_model::bucket_table::*;
use garage_model::garage::Garage;
@ -75,9 +74,9 @@ async fn serve_file(garage: Arc<Garage>, req: Request<Body>) -> Result<Response<
.to_str()?;
// Get bucket
let (host, _) = domain_to_unicode(authority_to_host(authority)?);
let host = authority_to_host(authority)?;
let root = &garage.config.s3_web.root_domain;
let bucket = host_to_bucket(&host, root);
let bucket = host_to_bucket(&host, root).unwrap_or(&host);
// Check bucket is exposed as a website
let bucket_desc = garage
@ -108,65 +107,6 @@ async fn serve_file(garage: Arc<Garage>, req: Request<Body>) -> Result<Response<
Ok(res)
}
/// Extract host from the authority section given by the HTTP host header
///
/// The HTTP host contains both a host and a port.
/// Extracting the port is more complex than just finding the colon (:) symbol due to IPv6
/// We do not use the collect pattern as there is no way in std rust to collect over a stack allocated value
/// check here: https://docs.rs/collect_slice/1.2.0/collect_slice/
fn authority_to_host(authority: &str) -> Result<&str, Error> {
let mut iter = authority.chars().enumerate();
let (_, first_char) = iter
.next()
.ok_or_else(|| Error::BadRequest("Authority is empty".to_string()))?;
let split = match first_char {
'[' => {
let mut iter = iter.skip_while(|(_, c)| c != &']');
match iter.next() {
Some((_, ']')) => iter.next(),
_ => {
return Err(Error::BadRequest(format!(
"Authority {} has an illegal format",
authority
)))
}
}
}
_ => iter.find(|(_, c)| *c == ':'),
};
match split {
Some((i, ':')) => Ok(&authority[..i]),
None => Ok(authority),
Some((_, _)) => Err(Error::BadRequest(format!(
"Authority {} has an illegal format",
authority
))),
}
}
/// Host to bucket
///
/// Convert a host, like "bucket.garage-site.tld" or "john.doe.com"
/// to the corresponding bucket, resp. "bucket" and "john.doe.com"
/// considering that ".garage-site.tld" is the "root domain".
/// This behavior has been chosen to follow AWS S3 semantic.
fn host_to_bucket<'a>(host: &'a str, root: &str) -> &'a str {
if root.len() >= host.len() || !host.ends_with(root) {
return host;
}
let len_diff = host.len() - root.len();
let missing_starting_dot = !root.starts_with('.');
let cursor = if missing_starting_dot {
len_diff - 1
} else {
len_diff
};
&host[..cursor]
}
/// Path to key
///
/// Convert the provided path to the internal key
@ -200,54 +140,6 @@ fn path_to_key<'a>(path: &'a str, index: &str) -> Result<Cow<'a, str>, Error> {
mod tests {
use super::*;
#[test]
fn authority_to_host_with_port() -> Result<(), Error> {
let domain = authority_to_host("[::1]:3902")?;
assert_eq!(domain, "[::1]");
let domain2 = authority_to_host("garage.tld:65200")?;
assert_eq!(domain2, "garage.tld");
let domain3 = authority_to_host("127.0.0.1:80")?;
assert_eq!(domain3, "127.0.0.1");
Ok(())
}
#[test]
fn authority_to_host_without_port() -> Result<(), Error> {
let domain = authority_to_host("[::1]")?;
assert_eq!(domain, "[::1]");
let domain2 = authority_to_host("garage.tld")?;
assert_eq!(domain2, "garage.tld");
let domain3 = authority_to_host("127.0.0.1")?;
assert_eq!(domain3, "127.0.0.1");
assert!(authority_to_host("[").is_err());
assert!(authority_to_host("[hello").is_err());
Ok(())
}
#[test]
fn host_to_bucket_test() {
assert_eq!(
host_to_bucket("john.doe.garage.tld", ".garage.tld"),
"john.doe"
);
assert_eq!(
host_to_bucket("john.doe.garage.tld", "garage.tld"),
"john.doe"
);
assert_eq!(host_to_bucket("john.doe.com", "garage.tld"), "john.doe.com");
assert_eq!(
host_to_bucket("john.doe.com", ".garage.tld"),
"john.doe.com"
);
assert_eq!(host_to_bucket("garage.tld", "garage.tld"), "garage.tld");
assert_eq!(host_to_bucket("garage.tld", ".garage.tld"), "garage.tld");
}
#[test]
fn path_to_key_test() -> Result<(), Error> {
assert_eq!(path_to_key("/file%20.jpg", "index.html")?, "file .jpg");