add support for vhost-style s3 bucket

This commit is contained in:
Trinity Pointard 2021-11-11 11:26:02 +01:00
parent 100aad8bf4
commit 4bc8b41cd7
8 changed files with 224 additions and 122 deletions

2
Cargo.lock generated
View file

@ -426,6 +426,7 @@ dependencies = [
"http-range", "http-range",
"httpdate 0.3.2", "httpdate 0.3.2",
"hyper", "hyper",
"idna",
"log", "log",
"md-5", "md-5",
"percent-encoding", "percent-encoding",
@ -539,7 +540,6 @@ dependencies = [
"garage_util", "garage_util",
"http", "http",
"hyper", "hyper",
"idna",
"log", "log",
"percent-encoding", "percent-encoding",
] ]

View file

@ -24,6 +24,7 @@ crypto-mac = "0.10"
err-derive = "0.3" err-derive = "0.3"
hex = "0.4" hex = "0.4"
hmac = "0.10" hmac = "0.10"
idna = "0.2"
log = "0.4" log = "0.4"
md-5 = "0.9" md-5 = "0.9"
sha2 = "0.9" sha2 = "0.9"

View file

@ -3,6 +3,7 @@ use std::net::SocketAddr;
use std::sync::Arc; use std::sync::Arc;
use futures::future::Future; use futures::future::Future;
use hyper::header;
use hyper::server::conn::AddrStream; use hyper::server::conn::AddrStream;
use hyper::service::{make_service_fn, service_fn}; use hyper::service::{make_service_fn, service_fn};
use hyper::{Body, Method, Request, Response, Server}; use hyper::{Body, Method, Request, Response, Server};
@ -14,6 +15,7 @@ use garage_model::garage::Garage;
use crate::error::*; use crate::error::*;
use crate::signature::check_signature; use crate::signature::check_signature;
use crate::helpers::*;
use crate::s3_bucket::*; use crate::s3_bucket::*;
use crate::s3_copy::*; use crate::s3_copy::*;
use crate::s3_delete::*; use crate::s3_delete::*;
@ -86,7 +88,20 @@ async fn handler_inner(garage: Arc<Garage>, req: Request<Body>) -> Result<Respon
return handle_list_buckets(&api_key); return handle_list_buckets(&api_key);
} }
let (bucket, key) = parse_bucket_key(&path)?; let authority = req
.headers()
.get(header::HOST)
.ok_or_else(|| Error::BadRequest("HOST header required".to_owned()))?
.to_str()?;
// Get bucket
let host = authority_to_host(authority)?;
let (bucket, key) = parse_bucket_key(
&path,
Some(&host),
garage.config.s3_api.root_domain.as_deref(),
)?;
let allowed = match req.method() { let allowed = match req.method() {
&Method::HEAD | &Method::GET => api_key.allow_read(bucket), &Method::HEAD | &Method::GET => api_key.allow_read(bucket),
_ => api_key.allow_write(bucket), _ => api_key.allow_write(bucket),
@ -137,7 +152,7 @@ async fn handler_inner(garage: Arc<Garage>, req: Request<Body>) -> Result<Respon
let copy_source = req.headers().get("x-amz-copy-source").unwrap().to_str()?; let copy_source = req.headers().get("x-amz-copy-source").unwrap().to_str()?;
let copy_source = let copy_source =
percent_encoding::percent_decode_str(copy_source).decode_utf8()?; percent_encoding::percent_decode_str(copy_source).decode_utf8()?;
let (source_bucket, source_key) = parse_bucket_key(&copy_source)?; let (source_bucket, source_key) = parse_bucket_key(&copy_source, None, None)?;
if !api_key.allow_read(source_bucket) { if !api_key.allow_read(source_bucket) {
return Err(Error::Forbidden(format!( return Err(Error::Forbidden(format!(
"Reading from bucket {} not allowed for this key", "Reading from bucket {} not allowed for this key",
@ -249,9 +264,23 @@ async fn handler_inner(garage: Arc<Garage>, req: Request<Body>) -> Result<Respon
/// ///
/// S3 internally manages only buckets and keys. This function splits /// S3 internally manages only buckets and keys. This function splits
/// an HTTP path to get the corresponding bucket name and key. /// an HTTP path to get the corresponding bucket name and key.
fn parse_bucket_key(path: &str) -> Result<(&str, Option<&str>), Error> { fn parse_bucket_key<'a>(
path: &'a str,
host: Option<&'a str>,
root: Option<&str>,
) -> Result<(&'a str, Option<&'a str>), Error> {
let path = path.trim_start_matches('/'); let path = path.trim_start_matches('/');
if host.and(root).is_some() {
if let Some(bucket) = host_to_bucket(host.unwrap(), root.unwrap()) {
if !path.is_empty() {
return Ok((bucket, Some(path)));
} else {
return Ok((bucket, None));
}
}
}
let (bucket, key) = match path.find('/') { let (bucket, key) = match path.find('/') {
Some(i) => { Some(i) => {
let key = &path[i + 1..]; let key = &path[i + 1..];
@ -275,7 +304,7 @@ mod tests {
#[test] #[test]
fn parse_bucket_containing_a_key() -> Result<(), Error> { fn parse_bucket_containing_a_key() -> Result<(), Error> {
let (bucket, key) = parse_bucket_key("/my_bucket/a/super/file.jpg")?; let (bucket, key) = parse_bucket_key("/my_bucket/a/super/file.jpg", None, None)?;
assert_eq!(bucket, "my_bucket"); assert_eq!(bucket, "my_bucket");
assert_eq!(key.expect("key must be set"), "a/super/file.jpg"); assert_eq!(key.expect("key must be set"), "a/super/file.jpg");
Ok(()) Ok(())
@ -283,10 +312,10 @@ mod tests {
#[test] #[test]
fn parse_bucket_containing_no_key() -> Result<(), Error> { fn parse_bucket_containing_no_key() -> Result<(), Error> {
let (bucket, key) = parse_bucket_key("/my_bucket/")?; let (bucket, key) = parse_bucket_key("/my_bucket/", None, None)?;
assert_eq!(bucket, "my_bucket"); assert_eq!(bucket, "my_bucket");
assert!(key.is_none()); assert!(key.is_none());
let (bucket, key) = parse_bucket_key("/my_bucket")?; let (bucket, key) = parse_bucket_key("/my_bucket", None, None)?;
assert_eq!(bucket, "my_bucket"); assert_eq!(bucket, "my_bucket");
assert!(key.is_none()); assert!(key.is_none());
Ok(()) Ok(())
@ -294,11 +323,74 @@ mod tests {
#[test] #[test]
fn parse_bucket_containing_no_bucket() { fn parse_bucket_containing_no_bucket() {
let parsed = parse_bucket_key(""); let parsed = parse_bucket_key("", None, None);
assert!(parsed.is_err()); assert!(parsed.is_err());
let parsed = parse_bucket_key("/"); let parsed = parse_bucket_key("/", None, None);
assert!(parsed.is_err()); assert!(parsed.is_err());
let parsed = parse_bucket_key("////"); let parsed = parse_bucket_key("////", None, None);
assert!(parsed.is_err()); assert!(parsed.is_err());
} }
#[test]
fn parse_bucket_with_vhost_and_key() -> Result<(), Error> {
let (bucket, key) = parse_bucket_key(
"/a/super/file.jpg",
Some("my-bucket.garage.tld"),
Some("garage.tld"),
)?;
assert_eq!(bucket, "my-bucket");
assert_eq!(key.expect("key must be set"), "a/super/file.jpg");
let (bucket, key) = parse_bucket_key(
"/my_bucket/a/super/file.jpg",
Some("not-garage.tld"),
Some("garage.tld"),
)?;
assert_eq!(bucket, "my_bucket");
assert_eq!(key.expect("key must be set"), "a/super/file.jpg");
Ok(())
}
#[test]
fn parse_bucket_with_vhost_no_key() -> Result<(), Error> {
let (bucket, key) = parse_bucket_key("", Some("my-bucket.garage.tld"), Some("garage.tld"))?;
assert_eq!(bucket, "my-bucket");
assert!(key.is_none());
let (bucket, key) =
parse_bucket_key("/", Some("my-bucket.garage.tld"), Some("garage.tld"))?;
assert_eq!(bucket, "my-bucket");
assert!(key.is_none());
Ok(())
}
#[test]
fn parse_bucket_missmatch_vhost() {
let test_vec = [
"/my_bucket/a/super/file.jpg",
"/my_bucket/",
"/my_bucket",
"",
"/",
"////",
];
let eq = |l, r| match (l, r) {
(Ok(l), Ok(r)) => l == r,
(Err(_), Err(_)) => true,
_ => false,
};
for test in test_vec {
assert!(eq(
parse_bucket_key(test, None, None),
parse_bucket_key(test, Some("bucket.garage.tld"), None)
));
assert!(eq(
parse_bucket_key(test, None, None),
parse_bucket_key(test, None, Some("garage.tld"))
));
assert!(eq(
parse_bucket_key(test, None, None),
parse_bucket_key(test, Some("not-garage.tld"), Some("garage.tld"))
));
}
}
} }

114
src/api/helpers.rs Normal file
View file

@ -0,0 +1,114 @@
use crate::Error;
use idna::domain_to_unicode;
/// Host to bucket
///
/// Convert a host, like "bucket.garage-site.tld" or "john.doe.com"
/// to the corresponding bucket, resp. "bucket" and "john.doe.com"
/// considering that ".garage-site.tld" is the "root domain".
/// This behavior has been chosen to follow AWS S3 semantic.
pub fn host_to_bucket<'a>(host: &'a str, root: &str) -> Option<&'a str> {
let root = root.trim_start_matches('.');
let label_root = root.chars().filter(|c| c == &'.').count() + 1;
let root = root.rsplit('.');
let mut host = host.rsplitn(label_root + 1, '.');
for root_part in root {
let host_part = host.next()?;
if root_part != host_part {
return None;
}
}
host.next()
}
/// Extract host from the authority section given by the HTTP host header
///
/// The HTTP host contains both a host and a port.
/// Extracting the port is more complex than just finding the colon (:) symbol due to IPv6
/// We do not use the collect pattern as there is no way in std rust to collect over a stack allocated value
/// check here: https://docs.rs/collect_slice/1.2.0/collect_slice/
pub fn authority_to_host(authority: &str) -> Result<String, Error> {
let mut iter = authority.chars().enumerate();
let (_, first_char) = iter
.next()
.ok_or_else(|| Error::BadRequest("Authority is empty".to_string()))?;
let split = match first_char {
'[' => {
let mut iter = iter.skip_while(|(_, c)| c != &']');
match iter.next() {
Some((_, ']')) => iter.next(),
_ => {
return Err(Error::BadRequest(format!(
"Authority {} has an illegal format",
authority
)))
}
}
}
_ => iter.find(|(_, c)| *c == ':'),
};
let authority = match split {
Some((i, ':')) => Ok(&authority[..i]),
None => Ok(authority),
Some((_, _)) => Err(Error::BadRequest(format!(
"Authority {} has an illegal format",
authority
))),
};
authority.map(|h| domain_to_unicode(h).0)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn authority_to_host_with_port() -> Result<(), Error> {
let domain = authority_to_host("[::1]:3902")?;
assert_eq!(domain, "[::1]");
let domain2 = authority_to_host("garage.tld:65200")?;
assert_eq!(domain2, "garage.tld");
let domain3 = authority_to_host("127.0.0.1:80")?;
assert_eq!(domain3, "127.0.0.1");
Ok(())
}
#[test]
fn authority_to_host_without_port() -> Result<(), Error> {
let domain = authority_to_host("[::1]")?;
assert_eq!(domain, "[::1]");
let domain2 = authority_to_host("garage.tld")?;
assert_eq!(domain2, "garage.tld");
let domain3 = authority_to_host("127.0.0.1")?;
assert_eq!(domain3, "127.0.0.1");
assert!(authority_to_host("[").is_err());
assert!(authority_to_host("[hello").is_err());
Ok(())
}
#[test]
fn host_to_bucket_test() {
assert_eq!(
host_to_bucket("john.doe.garage.tld", ".garage.tld").unwrap(),
"john.doe"
);
assert_eq!(
host_to_bucket("john.doe.garage.tld", "garage.tld").unwrap(),
"john.doe"
);
assert_eq!(host_to_bucket("john.doe.com", "garage.tld"), None);
assert_eq!(host_to_bucket("john.doe.com", ".garage.tld"), None);
assert_eq!(host_to_bucket("garage.tld", "garage.tld"), None);
assert_eq!(host_to_bucket("garage.tld", ".garage.tld"), None);
assert_eq!(host_to_bucket("not-garage.tld", "garage.tld"), None);
assert_eq!(host_to_bucket("not-garage.tld", ".garage.tld"), None);
}
}

View file

@ -12,6 +12,7 @@ pub use api_server::run_api_server;
mod signature; mod signature;
pub mod helpers;
mod s3_bucket; mod s3_bucket;
mod s3_copy; mod s3_copy;
mod s3_delete; mod s3_delete;

View file

@ -68,6 +68,9 @@ pub struct ApiConfig {
pub api_bind_addr: SocketAddr, pub api_bind_addr: SocketAddr,
/// S3 region to use /// S3 region to use
pub s3_region: String, pub s3_region: String,
/// Suffix to remove from domain name to find bucket. If None,
/// vhost-style S3 request are disabled
pub root_domain: Option<String>,
} }
/// Configuration for serving files as normal web server /// Configuration for serving files as normal web server

View file

@ -19,7 +19,6 @@ garage_util = { version = "0.4.0", path = "../util" }
garage_table = { version = "0.4.0", path = "../table" } garage_table = { version = "0.4.0", path = "../table" }
err-derive = "0.3" err-derive = "0.3"
idna = "0.2"
log = "0.4" log = "0.4"
percent-encoding = "2.1.0" percent-encoding = "2.1.0"

View file

@ -9,9 +9,8 @@ use hyper::{
Body, Method, Request, Response, Server, Body, Method, Request, Response, Server,
}; };
use idna::domain_to_unicode;
use crate::error::*; use crate::error::*;
use garage_api::helpers::{authority_to_host, host_to_bucket};
use garage_api::s3_get::{handle_get, handle_head}; use garage_api::s3_get::{handle_get, handle_head};
use garage_model::bucket_table::*; use garage_model::bucket_table::*;
use garage_model::garage::Garage; use garage_model::garage::Garage;
@ -75,9 +74,9 @@ async fn serve_file(garage: Arc<Garage>, req: Request<Body>) -> Result<Response<
.to_str()?; .to_str()?;
// Get bucket // Get bucket
let (host, _) = domain_to_unicode(authority_to_host(authority)?); let host = authority_to_host(authority)?;
let root = &garage.config.s3_web.root_domain; let root = &garage.config.s3_web.root_domain;
let bucket = host_to_bucket(&host, root); let bucket = host_to_bucket(&host, root).unwrap_or(&host);
// Check bucket is exposed as a website // Check bucket is exposed as a website
let bucket_desc = garage let bucket_desc = garage
@ -108,65 +107,6 @@ async fn serve_file(garage: Arc<Garage>, req: Request<Body>) -> Result<Response<
Ok(res) Ok(res)
} }
/// Extract host from the authority section given by the HTTP host header
///
/// The HTTP host contains both a host and a port.
/// Extracting the port is more complex than just finding the colon (:) symbol due to IPv6
/// We do not use the collect pattern as there is no way in std rust to collect over a stack allocated value
/// check here: https://docs.rs/collect_slice/1.2.0/collect_slice/
fn authority_to_host(authority: &str) -> Result<&str, Error> {
let mut iter = authority.chars().enumerate();
let (_, first_char) = iter
.next()
.ok_or_else(|| Error::BadRequest("Authority is empty".to_string()))?;
let split = match first_char {
'[' => {
let mut iter = iter.skip_while(|(_, c)| c != &']');
match iter.next() {
Some((_, ']')) => iter.next(),
_ => {
return Err(Error::BadRequest(format!(
"Authority {} has an illegal format",
authority
)))
}
}
}
_ => iter.find(|(_, c)| *c == ':'),
};
match split {
Some((i, ':')) => Ok(&authority[..i]),
None => Ok(authority),
Some((_, _)) => Err(Error::BadRequest(format!(
"Authority {} has an illegal format",
authority
))),
}
}
/// Host to bucket
///
/// Convert a host, like "bucket.garage-site.tld" or "john.doe.com"
/// to the corresponding bucket, resp. "bucket" and "john.doe.com"
/// considering that ".garage-site.tld" is the "root domain".
/// This behavior has been chosen to follow AWS S3 semantic.
fn host_to_bucket<'a>(host: &'a str, root: &str) -> &'a str {
if root.len() >= host.len() || !host.ends_with(root) {
return host;
}
let len_diff = host.len() - root.len();
let missing_starting_dot = !root.starts_with('.');
let cursor = if missing_starting_dot {
len_diff - 1
} else {
len_diff
};
&host[..cursor]
}
/// Path to key /// Path to key
/// ///
/// Convert the provided path to the internal key /// Convert the provided path to the internal key
@ -200,54 +140,6 @@ fn path_to_key<'a>(path: &'a str, index: &str) -> Result<Cow<'a, str>, Error> {
mod tests { mod tests {
use super::*; use super::*;
#[test]
fn authority_to_host_with_port() -> Result<(), Error> {
let domain = authority_to_host("[::1]:3902")?;
assert_eq!(domain, "[::1]");
let domain2 = authority_to_host("garage.tld:65200")?;
assert_eq!(domain2, "garage.tld");
let domain3 = authority_to_host("127.0.0.1:80")?;
assert_eq!(domain3, "127.0.0.1");
Ok(())
}
#[test]
fn authority_to_host_without_port() -> Result<(), Error> {
let domain = authority_to_host("[::1]")?;
assert_eq!(domain, "[::1]");
let domain2 = authority_to_host("garage.tld")?;
assert_eq!(domain2, "garage.tld");
let domain3 = authority_to_host("127.0.0.1")?;
assert_eq!(domain3, "127.0.0.1");
assert!(authority_to_host("[").is_err());
assert!(authority_to_host("[hello").is_err());
Ok(())
}
#[test]
fn host_to_bucket_test() {
assert_eq!(
host_to_bucket("john.doe.garage.tld", ".garage.tld"),
"john.doe"
);
assert_eq!(
host_to_bucket("john.doe.garage.tld", "garage.tld"),
"john.doe"
);
assert_eq!(host_to_bucket("john.doe.com", "garage.tld"), "john.doe.com");
assert_eq!(
host_to_bucket("john.doe.com", ".garage.tld"),
"john.doe.com"
);
assert_eq!(host_to_bucket("garage.tld", "garage.tld"), "garage.tld");
assert_eq!(host_to_bucket("garage.tld", ".garage.tld"), "garage.tld");
}
#[test] #[test]
fn path_to_key_test() -> Result<(), Error> { fn path_to_key_test() -> Result<(), Error> {
assert_eq!(path_to_key("/file%20.jpg", "index.html")?, "file .jpg"); assert_eq!(path_to_key("/file%20.jpg", "index.html")?, "file .jpg");