From c9b733a4a667c82c665d84352624902dcba093a7 Mon Sep 17 00:00:00 2001 From: trinity-1686a Date: Sat, 14 Dec 2024 17:46:27 +0100 Subject: [PATCH 1/9] support redirection on s3 endpoint --- src/api/admin/bucket.rs | 1 + src/api/s3/get.rs | 44 ++++-- src/api/s3/website.rs | 131 ++++++++++++---- src/garage/admin/bucket.rs | 1 + src/model/bucket_table.rs | 54 +++++++ src/web/web_server.rs | 312 +++++++++++++++++++++++++++++-------- 6 files changed, 445 insertions(+), 98 deletions(-) diff --git a/src/api/admin/bucket.rs b/src/api/admin/bucket.rs index ac3cba00..d5fd0e6b 100644 --- a/src/api/admin/bucket.rs +++ b/src/api/admin/bucket.rs @@ -423,6 +423,7 @@ pub async fn handle_update_bucket( "Please specify indexDocument when enabling website access.", )?, error_document: wa.error_document, + routing_rules: Vec::new(), })); } else { if wa.index_document.is_some() || wa.error_document.is_some() { diff --git a/src/api/s3/get.rs b/src/api/s3/get.rs index f5d3cf11..eea3434e 100644 --- a/src/api/s3/get.rs +++ b/src/api/s3/get.rs @@ -163,7 +163,15 @@ pub async fn handle_head( key: &str, part_number: Option, ) -> Result, Error> { - handle_head_without_ctx(ctx.garage, req, ctx.bucket_id, key, part_number).await + handle_head_without_ctx( + ctx.garage, + req, + ctx.bucket_id, + key, + StatusCode::OK, + part_number, + ) + .await } /// Handle HEAD request for website @@ -172,6 +180,7 @@ pub async fn handle_head_without_ctx( req: &Request, bucket_id: Uuid, key: &str, + status_code: StatusCode, part_number: Option, ) -> Result, Error> { let object = garage @@ -272,7 +281,7 @@ pub async fn handle_head_without_ctx( checksum_mode, ) .header(CONTENT_LENGTH, format!("{}", version_meta.size)) - .status(StatusCode::OK) + .status(status_code) .body(empty_body())?) } } @@ -285,7 +294,16 @@ pub async fn handle_get( part_number: Option, overrides: GetObjectOverrides, ) -> Result, Error> { - handle_get_without_ctx(ctx.garage, req, ctx.bucket_id, key, part_number, overrides).await + handle_get_without_ctx( + ctx.garage, + req, + ctx.bucket_id, + key, + StatusCode::OK, + part_number, + overrides, + ) + .await } /// Handle GET request @@ -294,6 +312,7 @@ pub async fn handle_get_without_ctx( req: &Request, bucket_id: Uuid, key: &str, + status_code: StatusCode, part_number: Option, overrides: GetObjectOverrides, ) -> Result, Error> { @@ -329,11 +348,15 @@ pub async fn handle_get_without_ctx( let checksum_mode = checksum_mode(&req); - match (part_number, parse_range_header(req, last_v_meta.size)?) { - (Some(_), Some(_)) => Err(Error::bad_request( + match ( + part_number, + parse_range_header(req, last_v_meta.size)?, + status_code == StatusCode::OK, + ) { + (Some(_), Some(_), _) => Err(Error::bad_request( "Cannot specify both partNumber and Range header", )), - (Some(pn), None) => { + (Some(pn), None, true) => { handle_get_part( garage, last_v, @@ -346,7 +369,7 @@ pub async fn handle_get_without_ctx( ) .await } - (None, Some(range)) => { + (None, Some(range), true) => { handle_get_range( garage, last_v, @@ -360,7 +383,8 @@ pub async fn handle_get_without_ctx( ) .await } - (None, None) => { + _ => { + // either not a range, or an error request: always return the full doc handle_get_full( garage, last_v, @@ -370,6 +394,7 @@ pub async fn handle_get_without_ctx( &headers, overrides, checksum_mode, + status_code, ) .await } @@ -385,6 +410,7 @@ async fn handle_get_full( meta_inner: &ObjectVersionMetaInner, overrides: GetObjectOverrides, checksum_mode: ChecksumMode, + status_code: StatusCode, ) -> Result, Error> { let mut resp_builder = object_headers( version, @@ -394,7 +420,7 @@ async fn handle_get_full( checksum_mode, ) .header(CONTENT_LENGTH, format!("{}", version_meta.size)) - .status(StatusCode::OK); + .status(status_code); getobject_override_headers(overrides, &mut resp_builder)?; let stream = full_object_byte_stream(garage, version, version_data, encryption); diff --git a/src/api/s3/website.rs b/src/api/s3/website.rs index 6af55677..934a20ff 100644 --- a/src/api/s3/website.rs +++ b/src/api/s3/website.rs @@ -10,7 +10,7 @@ use crate::s3::error::*; use crate::s3::xml::{to_xml_with_header, xmlns_tag, IntValue, Value}; use crate::signature::verify_signed_content; -use garage_model::bucket_table::*; +use garage_model::bucket_table::{self, *}; use garage_util::data::*; pub async fn handle_get_website(ctx: ReqCtx) -> Result, Error> { @@ -25,7 +25,8 @@ pub async fn handle_get_website(ctx: ReqCtx) -> Result, Error> suffix: Value(website.index_document.to_string()), }), redirect_all_requests_to: None, - routing_rules: None, + // TODO put the correct config here + routing_rules: Vec::new(), }; let xml = to_xml_with_header(&wc)?; Ok(Response::builder() @@ -101,8 +102,12 @@ pub struct WebsiteConfiguration { pub index_document: Option, #[serde(rename = "RedirectAllRequestsTo")] pub redirect_all_requests_to: Option, - #[serde(rename = "RoutingRules")] - pub routing_rules: Option>, + #[serde( + rename = "RoutingRules", + default, + skip_serializing_if = "Vec::is_empty" + )] + pub routing_rules: Vec, } #[derive(Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)] @@ -166,7 +171,7 @@ impl WebsiteConfiguration { if self.redirect_all_requests_to.is_some() && (self.error_document.is_some() || self.index_document.is_some() - || self.routing_rules.is_some()) + || !self.routing_rules.is_empty()) { return Err(Error::bad_request( "Bad XML: can't have RedirectAllRequestsTo and other fields", @@ -181,10 +186,15 @@ impl WebsiteConfiguration { if let Some(ref rart) = self.redirect_all_requests_to { rart.validate()?; } - if let Some(ref rrs) = self.routing_rules { - for rr in rrs { - rr.inner.validate()?; - } + for rr in &self.routing_rules { + rr.inner.validate()?; + } + if self.routing_rules.len() > 1000 { + // we will do linear scans, best to avoid overly long configuration. The + // limit was choosen arbitrarily + return Err(Error::bad_request( + "Bad XML: RoutingRules can't have more than 1000 child elements", + )); } Ok(()) @@ -195,10 +205,12 @@ impl WebsiteConfiguration { Err(Error::NotImplemented( "S3 website redirects are not currently implemented in Garage.".into(), )) - } else if self.routing_rules.map(|x| !x.is_empty()).unwrap_or(false) { - Err(Error::NotImplemented( - "S3 routing rules are not currently implemented in Garage.".into(), - )) + /* + } else if self.routing_rules.map(|x| !x.is_empty()).unwrap_or(false) { + Err(Error::NotImplemented( + "S3 routing rules are not currently implemented in Garage.".into(), + )) + */ } else { Ok(WebsiteConfig { index_document: self @@ -206,6 +218,35 @@ impl WebsiteConfiguration { .map(|x| x.suffix.0) .unwrap_or_else(|| "index.html".to_string()), error_document: self.error_document.map(|x| x.key.0), + routing_rules: self + .routing_rules + .into_iter() + .map(|rule| { + bucket_table::RoutingRule { + condition: rule.inner.condition.map(|condition| { + bucket_table::Condition { + http_error_code: condition.http_error_code.map(|c| c.0 as u16), + prefix: condition.prefix.map(|p| p.0), + } + }), + redirect: bucket_table::Redirect { + hostname: rule.inner.redirect.hostname.map(|h| h.0), + protocol: rule.inner.redirect.protocol.map(|p| p.0), + // aws default to 301, which i find punitive in case of + // missconfiguration (can be permanently cached on the + // user agent) + http_redirect_code: rule + .inner + .redirect + .http_redirect_code + .map(|c| c.0 as u16) + .unwrap_or(302), + replace_key_prefix: rule.inner.redirect.replace_prefix.map(|k| k.0), + replace_key: rule.inner.redirect.replace_full.map(|k| k.0), + }, + } + }) + .collect(), }) } } @@ -248,35 +289,69 @@ impl Target { impl RoutingRuleInner { pub fn validate(&self) -> Result<(), Error> { - let has_prefix = self - .condition - .as_ref() - .and_then(|c| c.prefix.as_ref()) - .is_some(); - self.redirect.validate(has_prefix) + if let Some(condition) = &self.condition { + condition.validate()?; + } + self.redirect.validate() + } +} + +impl Condition { + pub fn validate(&self) -> Result { + if let Some(ref error_code) = self.http_error_code { + // TODO do other error codes make sense? Aws only allows 4xx and 5xx + if error_code.0 != 404 { + return Err(Error::bad_request( + "Bad XML: HttpErrorCodeReturnedEquals must be 404 or absent", + )); + } + } + Ok(self.prefix.is_some()) } } impl Redirect { - pub fn validate(&self, has_prefix: bool) -> Result<(), Error> { + pub fn validate(&self) -> Result<(), Error> { if self.replace_prefix.is_some() { if self.replace_full.is_some() { return Err(Error::bad_request( "Bad XML: both ReplaceKeyPrefixWith and ReplaceKeyWith are set", )); } - if !has_prefix { - return Err(Error::bad_request( - "Bad XML: ReplaceKeyPrefixWith is set, but KeyPrefixEquals isn't", - )); - } } if let Some(ref protocol) = self.protocol { if protocol.0 != "http" && protocol.0 != "https" { return Err(Error::bad_request("Bad XML: invalid protocol")); } } - // TODO there are probably more invalide cases, but which ones? + if let Some(ref http_redirect_code) = self.http_redirect_code { + match http_redirect_code.0 { + // aws allows all 3xx except 300, but some are non-sensical (not modified, + // use proxy...) + 301 | 302 | 303 | 307 | 308 => { + if self.hostname.is_none() && self.protocol.is_some() { + return Err(Error::bad_request( + "Bad XML: HostName must be set if Protocol is set", + )); + } + } + // aws doesn't allow these codes, but netlify does, and it seems like a + // cool feature (change the page seen without changing the url shown by the + // user agent) + 200 | 404 => { + if self.hostname.is_some() || self.protocol.is_some() { + // hostname would mean different bucket, protocol doesn't make + // sense + return Err(Error::bad_request( + "Bad XML: an HttpRedirectCode of 200 is not acceptable alongside HostName or Protocol", + )); + } + } + _ => { + return Err(Error::bad_request("Bad XML: invalid HttpRedirectCode")); + } + } + } Ok(()) } } @@ -330,7 +405,7 @@ mod tests { hostname: Value("garage.tld".to_owned()), protocol: Some(Value("https".to_owned())), }), - routing_rules: Some(vec![RoutingRule { + routing_rules: vec![RoutingRule { inner: RoutingRuleInner { condition: Some(Condition { http_error_code: Some(IntValue(404)), @@ -344,7 +419,7 @@ mod tests { replace_full: Some(Value("fullkey".to_owned())), }, }, - }]), + }], }; assert_eq! { ref_value, diff --git a/src/garage/admin/bucket.rs b/src/garage/admin/bucket.rs index 1bdc6086..a9b4cc50 100644 --- a/src/garage/admin/bucket.rs +++ b/src/garage/admin/bucket.rs @@ -393,6 +393,7 @@ impl AdminRpcHandler { Some(WebsiteConfig { index_document: query.index_document.clone(), error_document: query.error_document.clone(), + routing_rules: Vec::new(), }) } else { None diff --git a/src/model/bucket_table.rs b/src/model/bucket_table.rs index 1dbdfac2..625c177d 100644 --- a/src/model/bucket_table.rs +++ b/src/model/bucket_table.rs @@ -60,6 +60,60 @@ mod v08 { pub struct WebsiteConfig { pub index_document: String, pub error_document: Option, + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub routing_rules: Vec, + } + + #[derive(PartialEq, Eq, Clone, Debug, Serialize, Deserialize)] + pub struct RoutingRule { + pub condition: Option, + pub redirect: Redirect, + } + + #[derive(PartialEq, Eq, Clone, Debug, Serialize, Deserialize)] + pub struct Condition { + pub http_error_code: Option, + pub prefix: Option, + } + + #[derive(PartialEq, Eq, Clone, Debug, Serialize, Deserialize)] + pub struct Redirect { + pub hostname: Option, + pub http_redirect_code: u16, + pub protocol: Option, + pub replace_key_prefix: Option, + pub replace_key: Option, + } + + impl Redirect { + pub fn compute_target(&self, suffix: Option<&str>) -> String { + let mut res = String::new(); + if let Some(hostname) = &self.hostname { + if let Some(protocol) = &self.protocol { + res.push_str(&protocol); + res.push_str("://"); + } else { + res.push_str("//"); + } + res.push_str(&hostname); + } + res.push('/'); + if let Some(replace_key_prefix) = &self.replace_key_prefix { + res.push_str(&replace_key_prefix); + if let Some(suffix) = suffix { + res.push_str(suffix) + } + } else if let Some(replace_key) = &self.replace_key { + res.push_str(&replace_key) + } + res + } + } + + #[derive(PartialEq, Eq, Clone, Debug, Serialize, Deserialize)] + pub struct RedirectAll { + pub hostname: String, + pub protoco: String, } #[derive(PartialEq, Eq, Clone, Debug, Serialize, Deserialize)] diff --git a/src/web/web_server.rs b/src/web/web_server.rs index 69939f65..d8ce0460 100644 --- a/src/web/web_server.rs +++ b/src/web/web_server.rs @@ -28,6 +28,7 @@ use garage_api::s3::error::{ }; use garage_api::s3::get::{handle_get_without_ctx, handle_head_without_ctx}; +use garage_model::bucket_table::RoutingRule; use garage_model::garage::Garage; use garage_table::*; @@ -234,26 +235,32 @@ impl WebServer { // Get path let path = req.uri().path().to_string(); let index = &website_config.index_document; - let (key, may_redirect) = path_to_keys(&path, index)?; + let routing_result = path_to_keys(&path, index, &[])?; debug!( - "Selected bucket: \"{}\" {:?}, target key: \"{}\", may redirect to: {:?}", - bucket_name, bucket_id, key, may_redirect + "Selected bucket: \"{}\" {:?}, routing to {:?}", + bucket_name, bucket_id, routing_result, ); - let ret_doc = match *req.method() { - Method::OPTIONS => handle_options_for_bucket(req, &bucket_params) + let ret_doc = match (req.method(), routing_result.main_target()) { + (&Method::OPTIONS, _) => handle_options_for_bucket(req, &bucket_params) .map_err(ApiError::from) .map(|res| res.map(|_empty_body: EmptyBody| empty_body())), - Method::HEAD => { - handle_head_without_ctx(self.garage.clone(), req, bucket_id, &key, None).await + (_, Err((url, code))) => Ok(Response::builder() + .status(code) + .header("Location", url) + .body(empty_body()) + .unwrap()), + (&Method::HEAD, Ok((key, code))) => { + handle_head_without_ctx(self.garage.clone(), req, bucket_id, key, code, None).await } - Method::GET => { + (&Method::GET, Ok((key, code))) => { handle_get_without_ctx( self.garage.clone(), req, bucket_id, - &key, + key, + code, None, Default::default(), ) @@ -262,16 +269,68 @@ impl WebServer { _ => Err(ApiError::bad_request("HTTP method not supported")), }; - // Try implicit redirect on error - let ret_doc_with_redir = match (&ret_doc, may_redirect) { - (Err(ApiError::NoSuchKey), ImplicitRedirect::To { key, url }) - if self.check_key_exists(bucket_id, key.as_str()).await? => - { - Ok(Response::builder() - .status(StatusCode::FOUND) - .header("Location", url) - .body(empty_body()) - .unwrap()) + // Try handling errors if bucket configuration provided fallbacks + let ret_doc_with_redir = match (&ret_doc, &routing_result) { + ( + Err(ApiError::NoSuchKey), + RoutingResult::LoadOrRedirect { + redirect_if_exists, + redirect_url, + redirect_code, + .. + }, + ) => { + let redirect = if let Some(redirect_key) = redirect_if_exists { + self.check_key_exists(bucket_id, redirect_key.as_str()) + .await? + } else { + true + }; + if redirect { + Ok(Response::builder() + .status(redirect_code) + .header("Location", redirect_url) + .body(empty_body()) + .unwrap()) + } else { + ret_doc + } + } + ( + Err(ApiError::NoSuchKey), + RoutingResult::LoadOrAlternativeError { + redirect_key, + redirect_code, + .. + }, + ) => { + match *req.method() { + Method::HEAD => { + handle_head_without_ctx( + self.garage.clone(), + req, + bucket_id, + redirect_key, + *redirect_code, + None, + ) + .await + } + Method::GET => { + handle_get_without_ctx( + self.garage.clone(), + req, + bucket_id, + redirect_key, + *redirect_code, + None, + Default::default(), + ) + .await + } + // we shouldn't ever reach here + _ => Err(ApiError::bad_request("HTTP method not supported")), + } } _ => ret_doc, }; @@ -307,6 +366,7 @@ impl WebServer { &req2, bucket_id, &error_document, + error.http_status_code(), None, Default::default(), ) @@ -323,8 +383,6 @@ impl WebServer { error ); - *error_doc.status_mut() = error.http_status_code(); - // Preserve error message in a special header for error_line in error.to_string().split('\n') { if let Ok(v) = HeaderValue::from_bytes(error_line.as_bytes()) { @@ -371,9 +429,44 @@ fn error_to_res(e: Error) -> Response> { } #[derive(Debug, PartialEq)] -enum ImplicitRedirect { - No, - To { key: String, url: String }, +enum RoutingResult { + // Load a key and use `code` as status, or fallback to normal 404 handler if not found + LoadKey { + key: String, + code: StatusCode, + }, + // Load a key and use `200` as status, or fallback with a redirection using `redirect_code` + // as status + LoadOrRedirect { + key: String, + redirect_if_exists: Option, + redirect_url: String, + redirect_code: StatusCode, + }, + // Load a key and use `200` as status, or fallback by loading a different key and use + // `redirect_code` as status + LoadOrAlternativeError { + key: String, + redirect_key: String, + redirect_code: StatusCode, + }, + // Send an http redirect with `code` as status + Redirect { + url: String, + code: StatusCode, + }, +} + +impl RoutingResult { + // return Ok((key_to_deref, status_code)) or Err((redirect_target, status_code)) + fn main_target(&self) -> Result<(&str, StatusCode), (&str, StatusCode)> { + match self { + RoutingResult::LoadKey { key, code } => Ok((key, *code)), + RoutingResult::LoadOrRedirect { key, .. } => Ok((key, StatusCode::OK)), + RoutingResult::LoadOrAlternativeError { key, .. } => Ok((key, StatusCode::OK)), + RoutingResult::Redirect { url, code } => Err((url, *code)), + } + } } /// Path to key @@ -383,35 +476,128 @@ enum ImplicitRedirect { /// which is also AWS S3 behavior. /// /// Check: https://docs.aws.amazon.com/AmazonS3/latest/userguide/IndexDocumentSupport.html -fn path_to_keys<'a>(path: &'a str, index: &str) -> Result<(String, ImplicitRedirect), Error> { +fn path_to_keys<'a>( + path: &'a str, + index: &str, + routing_rules: &[RoutingRule], +) -> Result { let path_utf8 = percent_encoding::percent_decode_str(path).decode_utf8()?; let base_key = match path_utf8.strip_prefix("/") { Some(bk) => bk, None => return Err(Error::BadRequest("Path must start with a / (slash)".into())), }; + let is_bucket_root = base_key.len() == 0; let is_trailing_slash = path_utf8.ends_with("/"); - match (is_bucket_root, is_trailing_slash) { - // It is not possible to store something at the root of the bucket (ie. empty key), - // the only option is to fetch the index - (true, _) => Ok((index.to_string(), ImplicitRedirect::No)), + let key = if is_bucket_root || is_trailing_slash { + // we can't store anything at the root, so we need to query the index + // if the key end with a slash, we always query the index + format!("{base_key}{index}") + } else { + // if the key doesn't end with `/`, leave it unmodified + base_key.to_string() + }; - // "If you create a folder structure in your bucket, you must have an index document at each level. In each folder, the index document must have the same name, for example, index.html. When a user specifies a URL that resembles a folder lookup, the presence or absence of a trailing slash determines the behavior of the website. For example, the following URL, with a trailing slash, returns the photos/index.html index document." - (false, true) => Ok((format!("{base_key}{index}"), ImplicitRedirect::No)), + let mut routing_rules_iter = routing_rules.iter(); + let key = loop { + let Some(routing_rule) = routing_rules_iter.next() else { + break key; + }; - // "However, if you exclude the trailing slash from the preceding URL, Amazon S3 first looks for an object photos in the bucket. If the photos object is not found, it searches for an index document, photos/index.html. If that document is found, Amazon S3 returns a 302 Found message and points to the photos/ key. For subsequent requests to photos/, Amazon S3 returns photos/index.html. If the index document is not found, Amazon S3 returns an error." - (false, false) => Ok(( - base_key.to_string(), - ImplicitRedirect::To { - key: format!("{base_key}/{index}"), - url: format!("{path}/"), - }, - )), + let Ok(status_code) = StatusCode::from_u16(routing_rule.redirect.http_redirect_code) else { + continue; + }; + if let Some(condition) = &routing_rule.condition { + let suffix = if let Some(prefix) = &condition.prefix { + let Some(suffix) = key.strip_prefix(prefix) else { + continue; + }; + Some(suffix) + } else { + None + }; + let target = routing_rule.redirect.compute_target(suffix); + let query_alternative_key = + status_code == StatusCode::OK || status_code == StatusCode::NOT_FOUND; + let redirect_on_error = + condition.http_error_code == Some(StatusCode::NOT_FOUND.as_u16()); + match (query_alternative_key, redirect_on_error) { + (false, false) => { + return Ok(RoutingResult::Redirect { + url: target, + code: status_code, + }) + } + (true, false) => { + if status_code == StatusCode::OK { + break target; + } else { + return Ok(RoutingResult::LoadKey { + key: target, + code: status_code, + }); + } + } + (false, true) => { + return Ok(RoutingResult::LoadOrRedirect { + key, + redirect_if_exists: None, + redirect_url: target, + redirect_code: status_code, + }); + } + (true, true) => { + return Ok(RoutingResult::LoadOrAlternativeError { + key, + redirect_key: target, + redirect_code: status_code, + }); + } + } + } else { + let target = routing_rule.redirect.compute_target(None); + return Ok(RoutingResult::Redirect { + url: target, + code: status_code, + }); + } + }; + + if is_bucket_root || is_trailing_slash { + Ok(RoutingResult::LoadKey { + key, + code: StatusCode::OK, + }) + } else { + Ok(RoutingResult::LoadOrRedirect { + redirect_if_exists: Some(format!("{key}/{index}")), + key, + // we can't use `path` because key might have changed substentially in case of + // routing rules + redirect_url: percent_encoding::percent_encode( + format!("{path}/").as_bytes(), + PATH_ENCODING_SET, + ) + .to_string(), + redirect_code: StatusCode::FOUND, + }) } } +// per https://url.spec.whatwg.org/#path-percent-encode-set +const PATH_ENCODING_SET: &percent_encoding::AsciiSet = &percent_encoding::CONTROLS + .add(b' ') + .add(b'"') + .add(b'#') + .add(b'<') + .add(b'>') + .add(b'?') + .add(b'`') + .add(b'{') + .add(b'}'); + #[cfg(test)] mod tests { use super::*; @@ -419,35 +605,39 @@ mod tests { #[test] fn path_to_keys_test() -> Result<(), Error> { assert_eq!( - path_to_keys("/file%20.jpg", "index.html")?, - ( - "file .jpg".to_string(), - ImplicitRedirect::To { - key: "file .jpg/index.html".to_string(), - url: "/file%20.jpg/".to_string() - } - ) + path_to_keys("/file%20.jpg", "index.html", &[])?, + RoutingResult::LoadOrRedirect { + key: "file .jpg".to_string(), + redirect_url: "/file%20.jpg/".to_string(), + redirect_if_exists: Some("file .jpg/index.html".to_string()), + redirect_code: StatusCode::FOUND, + } ); assert_eq!( - path_to_keys("/%20t/", "index.html")?, - (" t/index.html".to_string(), ImplicitRedirect::No) + path_to_keys("/%20t/", "index.html", &[])?, + RoutingResult::LoadKey { + key: " t/index.html".to_string(), + code: StatusCode::OK + } ); assert_eq!( - path_to_keys("/", "index.html")?, - ("index.html".to_string(), ImplicitRedirect::No) + path_to_keys("/", "index.html", &[])?, + RoutingResult::LoadKey { + key: "index.html".to_string(), + code: StatusCode::OK + } ); assert_eq!( - path_to_keys("/hello", "index.html")?, - ( - "hello".to_string(), - ImplicitRedirect::To { - key: "hello/index.html".to_string(), - url: "/hello/".to_string() - } - ) + path_to_keys("/hello", "index.html", &[])?, + RoutingResult::LoadOrRedirect { + key: "hello".to_string(), + redirect_url: "/hello/".to_string(), + redirect_if_exists: Some("hello/index.html".to_string()), + redirect_code: StatusCode::FOUND, + } ); - assert!(path_to_keys("", "index.html").is_err()); - assert!(path_to_keys("i/am/relative", "index.html").is_err()); + assert!(path_to_keys("", "index.html", &[]).is_err()); + assert!(path_to_keys("i/am/relative", "index.html", &[]).is_err()); Ok(()) } } -- 2.45.2 From 65e9dde8c99a4c1406e58da0741c9e566d18b1ff Mon Sep 17 00:00:00 2001 From: trinity-1686a Date: Sun, 22 Dec 2024 15:20:09 +0100 Subject: [PATCH 2/9] add tests --- src/api/s3/website.rs | 130 ++++++---- src/garage/tests/s3/website.rs | 446 ++++++++++++++++++++++++++++++++- src/web/web_server.rs | 11 +- 3 files changed, 538 insertions(+), 49 deletions(-) diff --git a/src/api/s3/website.rs b/src/api/s3/website.rs index 934a20ff..ad592260 100644 --- a/src/api/s3/website.rs +++ b/src/api/s3/website.rs @@ -25,8 +25,28 @@ pub async fn handle_get_website(ctx: ReqCtx) -> Result, Error> suffix: Value(website.index_document.to_string()), }), redirect_all_requests_to: None, - // TODO put the correct config here - routing_rules: Vec::new(), + routing_rules: RoutingRules { + rules: website + .routing_rules + .clone() + .into_iter() + .map(|rule| RoutingRule { + condition: rule.condition.map(|cond| Condition { + http_error_code: cond.http_error_code.map(|c| IntValue(c as i64)), + prefix: cond.prefix.map(Value), + }), + redirect: Redirect { + hostname: rule.redirect.hostname.map(Value), + http_redirect_code: Some(IntValue( + rule.redirect.http_redirect_code as i64, + )), + protocol: rule.redirect.protocol.map(Value), + replace_full: rule.redirect.replace_key.map(Value), + replace_prefix: rule.redirect.replace_key_prefix.map(Value), + }, + }) + .collect(), + }, }; let xml = to_xml_with_header(&wc)?; Ok(Response::builder() @@ -105,19 +125,25 @@ pub struct WebsiteConfiguration { #[serde( rename = "RoutingRules", default, - skip_serializing_if = "Vec::is_empty" + skip_serializing_if = "RoutingRules::is_empty" )] - pub routing_rules: Vec, + pub routing_rules: RoutingRules, +} + +#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Default)] +pub struct RoutingRules { + #[serde(rename = "RoutingRule")] + pub rules: Vec, +} + +impl RoutingRules { + fn is_empty(&self) -> bool { + self.rules.is_empty() + } } #[derive(Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)] pub struct RoutingRule { - #[serde(rename = "RoutingRule")] - pub inner: RoutingRuleInner, -} - -#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)] -pub struct RoutingRuleInner { #[serde(rename = "Condition")] pub condition: Option, #[serde(rename = "Redirect")] @@ -186,10 +212,10 @@ impl WebsiteConfiguration { if let Some(ref rart) = self.redirect_all_requests_to { rart.validate()?; } - for rr in &self.routing_rules { - rr.inner.validate()?; + for rr in &self.routing_rules.rules { + rr.validate()?; } - if self.routing_rules.len() > 1000 { + if self.routing_rules.rules.len() > 1000 { // we will do linear scans, best to avoid overly long configuration. The // limit was choosen arbitrarily return Err(Error::bad_request( @@ -205,12 +231,6 @@ impl WebsiteConfiguration { Err(Error::NotImplemented( "S3 website redirects are not currently implemented in Garage.".into(), )) - /* - } else if self.routing_rules.map(|x| !x.is_empty()).unwrap_or(false) { - Err(Error::NotImplemented( - "S3 routing rules are not currently implemented in Garage.".into(), - )) - */ } else { Ok(WebsiteConfig { index_document: self @@ -220,29 +240,27 @@ impl WebsiteConfiguration { error_document: self.error_document.map(|x| x.key.0), routing_rules: self .routing_rules + .rules .into_iter() .map(|rule| { bucket_table::RoutingRule { - condition: rule.inner.condition.map(|condition| { - bucket_table::Condition { - http_error_code: condition.http_error_code.map(|c| c.0 as u16), - prefix: condition.prefix.map(|p| p.0), - } + condition: rule.condition.map(|condition| bucket_table::Condition { + http_error_code: condition.http_error_code.map(|c| c.0 as u16), + prefix: condition.prefix.map(|p| p.0), }), redirect: bucket_table::Redirect { - hostname: rule.inner.redirect.hostname.map(|h| h.0), - protocol: rule.inner.redirect.protocol.map(|p| p.0), + hostname: rule.redirect.hostname.map(|h| h.0), + protocol: rule.redirect.protocol.map(|p| p.0), // aws default to 301, which i find punitive in case of // missconfiguration (can be permanently cached on the // user agent) http_redirect_code: rule - .inner .redirect .http_redirect_code .map(|c| c.0 as u16) .unwrap_or(302), - replace_key_prefix: rule.inner.redirect.replace_prefix.map(|k| k.0), - replace_key: rule.inner.redirect.replace_full.map(|k| k.0), + replace_key_prefix: rule.redirect.replace_prefix.map(|k| k.0), + replace_key: rule.redirect.replace_full.map(|k| k.0), }, } }) @@ -287,7 +305,7 @@ impl Target { } } -impl RoutingRuleInner { +impl RoutingRule { pub fn validate(&self) -> Result<(), Error> { if let Some(condition) = &self.condition { condition.validate()?; @@ -390,6 +408,15 @@ mod tests { fullkey + + + + + + 404 + missing + + "#; let conf: WebsiteConfiguration = from_str(message).unwrap(); @@ -405,21 +432,36 @@ mod tests { hostname: Value("garage.tld".to_owned()), protocol: Some(Value("https".to_owned())), }), - routing_rules: vec![RoutingRule { - inner: RoutingRuleInner { - condition: Some(Condition { - http_error_code: Some(IntValue(404)), - prefix: Some(Value("prefix1".to_owned())), - }), - redirect: Redirect { - hostname: Some(Value("gara.ge".to_owned())), - protocol: Some(Value("http".to_owned())), - http_redirect_code: Some(IntValue(303)), - replace_prefix: Some(Value("prefix2".to_owned())), - replace_full: Some(Value("fullkey".to_owned())), + routing_rules: RoutingRules { + rules: vec![ + RoutingRule { + condition: Some(Condition { + http_error_code: Some(IntValue(404)), + prefix: Some(Value("prefix1".to_owned())), + }), + redirect: Redirect { + hostname: Some(Value("gara.ge".to_owned())), + protocol: Some(Value("http".to_owned())), + http_redirect_code: Some(IntValue(303)), + replace_prefix: Some(Value("prefix2".to_owned())), + replace_full: Some(Value("fullkey".to_owned())), + }, }, - }, - }], + RoutingRule { + condition: Some(Condition { + http_error_code: None, + prefix: Some(Value("".to_owned())), + }), + redirect: Redirect { + hostname: None, + protocol: None, + http_redirect_code: Some(IntValue(404)), + replace_prefix: None, + replace_full: Some(Value("missing".to_owned())), + }, + }, + ], + }, }; assert_eq! { ref_value, diff --git a/src/garage/tests/s3/website.rs b/src/garage/tests/s3/website.rs index 0cadc388..12d4973b 100644 --- a/src/garage/tests/s3/website.rs +++ b/src/garage/tests/s3/website.rs @@ -5,7 +5,10 @@ use crate::json_body; use assert_json_diff::assert_json_eq; use aws_sdk_s3::{ primitives::ByteStream, - types::{CorsConfiguration, CorsRule, ErrorDocument, IndexDocument, WebsiteConfiguration}, + types::{ + Condition, CorsConfiguration, CorsRule, ErrorDocument, IndexDocument, Protocol, Redirect, + RoutingRule, WebsiteConfiguration, + }, }; use http::{Request, StatusCode}; use http_body_util::BodyExt; @@ -505,3 +508,444 @@ async fn test_website_check_domain() { }) ); } + +#[tokio::test] +async fn test_website_redirect_full_bucket() { + const BCKT_NAME: &str = "my-redirect-full"; + let ctx = common::context(); + let bucket = ctx.create_bucket(BCKT_NAME); + + let conf = WebsiteConfiguration::builder() + .routing_rules( + RoutingRule::builder() + .condition(Condition::builder().key_prefix_equals("").build()) + .redirect( + Redirect::builder() + .protocol(Protocol::Https) + .host_name("other.tld") + .replace_key_prefix_with("") + .build(), + ) + .build(), + ) + .build(); + + ctx.client + .put_bucket_website() + .bucket(&bucket) + .website_configuration(conf) + .send() + .await + .unwrap(); + + let req = Request::builder() + .method("GET") + .uri(format!("http://127.0.0.1:{}/my-path", ctx.garage.web_port)) + .header("Host", format!("{}.web.garage", BCKT_NAME)) + .body(Body::new(Bytes::new())) + .unwrap(); + + let client = Client::builder(TokioExecutor::new()).build_http(); + let resp = client.request(req).await.unwrap(); + assert_eq!(resp.status(), StatusCode::FOUND); + assert_eq!( + resp.headers() + .get(hyper::header::LOCATION) + .unwrap() + .to_str() + .unwrap(), + "https://other.tld/my-path" + ); +} + +#[tokio::test] +async fn test_website_redirect() { + const BCKT_NAME: &str = "my-redirect"; + let ctx = common::context(); + let bucket = ctx.create_bucket(BCKT_NAME); + + ctx.client + .put_object() + .bucket(&bucket) + .key("index.html") + .body(ByteStream::from_static(b"index")) + .send() + .await + .unwrap(); + ctx.client + .put_object() + .bucket(&bucket) + .key("404.html") + .body(ByteStream::from_static(b"main 404")) + .send() + .await + .unwrap(); + ctx.client + .put_object() + .bucket(&bucket) + .key("static-file") + .body(ByteStream::from_static(b"static file")) + .send() + .await + .unwrap(); + + let mut conf = WebsiteConfiguration::builder() + .index_document( + IndexDocument::builder() + .suffix("home.html") + .build() + .unwrap(), + ) + .error_document(ErrorDocument::builder().key("404.html").build().unwrap()); + + for (prefix, condition) in [("unconditional", false), ("conditional", true)] { + let code = condition.then(|| "404".to_string()); + conf = conf + // simple redirect + .routing_rules( + RoutingRule::builder() + .condition( + Condition::builder() + .set_http_error_code_returned_equals(code.clone()) + .key_prefix_equals(format!("{prefix}/redirect-prefix/")) + .build(), + ) + .redirect( + Redirect::builder() + .http_redirect_code("302") + .replace_key_prefix_with("other-prefix/") + .build(), + ) + .build(), + ) + .routing_rules( + RoutingRule::builder() + .condition( + Condition::builder() + .set_http_error_code_returned_equals(code.clone()) + .key_prefix_equals(format!("{prefix}/redirect-prefix-307/")) + .build(), + ) + .redirect( + Redirect::builder() + .http_redirect_code("307") + .replace_key_prefix_with("other-prefix/") + .build(), + ) + .build(), + ) + // simple redirect + .routing_rules( + RoutingRule::builder() + .condition( + Condition::builder() + .set_http_error_code_returned_equals(code.clone()) + .key_prefix_equals(format!("{prefix}/redirect-fixed/")) + .build(), + ) + .redirect( + Redirect::builder() + .http_redirect_code("302") + .replace_key_with("fixed_key") + .build(), + ) + .build(), + ) + // stream other file + .routing_rules( + RoutingRule::builder() + .condition( + Condition::builder() + .set_http_error_code_returned_equals(code.clone()) + .key_prefix_equals(format!("{prefix}/stream-fixed/")) + .build(), + ) + .redirect( + Redirect::builder() + .http_redirect_code("200") + .replace_key_with("static-file") + .build(), + ) + .build(), + ) + // stream other file as error + .routing_rules( + RoutingRule::builder() + .condition( + Condition::builder() + .set_http_error_code_returned_equals(code.clone()) + .key_prefix_equals(format!("{prefix}/stream-404/")) + .build(), + ) + .redirect( + Redirect::builder() + .http_redirect_code("404") + .replace_key_with("static-file") + .build(), + ) + .build(), + ) + // fail to stream other file + .routing_rules( + RoutingRule::builder() + .condition( + Condition::builder() + .set_http_error_code_returned_equals(code.clone()) + .key_prefix_equals(format!("{prefix}/stream-missing/")) + .build(), + ) + .redirect( + Redirect::builder() + .http_redirect_code("200") + .replace_key_with("missing-file") + .build(), + ) + .build(), + ); + } + let conf = conf.build(); + + ctx.client + .put_bucket_website() + .bucket(&bucket) + .website_configuration(conf.clone()) + .send() + .await + .unwrap(); + + let stored_cfg = ctx + .client + .get_bucket_website() + .bucket(&bucket) + .send() + .await + .unwrap(); + assert_eq!(stored_cfg.index_document, conf.index_document); + assert_eq!(stored_cfg.error_document, conf.error_document); + assert_eq!(stored_cfg.routing_rules, conf.routing_rules); + + let req = |path| { + Request::builder() + .method("GET") + .uri(format!( + "http://127.0.0.1:{}/{}/path", + ctx.garage.web_port, path + )) + .header("Host", format!("{}.web.garage", BCKT_NAME)) + .body(Body::new(Bytes::new())) + .unwrap() + }; + + test_redirect_helper("unconditional", true, &req).await; + test_redirect_helper("conditional", true, &req).await; + for prefix in ["unconditional", "conditional"] { + for rule_path in [ + "redirect-prefix", + "redirect-prefix-307", + "redirect-fixed", + "stream-fixed", + "stream-404", + "stream-missing", + ] { + ctx.client + .put_object() + .bucket(&bucket) + .key(format!("{prefix}/{rule_path}/path")) + .body(ByteStream::from_static(b"i exist")) + .send() + .await + .unwrap(); + } + } + test_redirect_helper("unconditional", true, &req).await; + test_redirect_helper("conditional", false, &req).await; +} + +async fn test_redirect_helper( + prefix: &str, + should_see_redirect: bool, + req: impl Fn(String) -> Request>, +) { + use http::header; + let client = Client::builder(TokioExecutor::new()).build_http(); + let expected_body = b"i exist".as_ref(); + + let resp = client + .request(req(format!("{prefix}/redirect-prefix"))) + .await + .unwrap(); + if should_see_redirect { + assert_eq!(resp.status(), StatusCode::FOUND); + assert_eq!( + resp.headers() + .get(header::LOCATION) + .unwrap() + .to_str() + .unwrap(), + "/other-prefix/path" + ); + assert!(resp + .into_body() + .collect() + .await + .unwrap() + .to_bytes() + .is_empty()); + } else { + assert_eq!(resp.status(), StatusCode::OK); + assert!(resp.headers().get(header::LOCATION).is_none()); + assert_eq!( + resp.into_body().collect().await.unwrap().to_bytes(), + expected_body, + ); + } + + let resp = client + .request(req(format!("{prefix}/redirect-prefix-307"))) + .await + .unwrap(); + if should_see_redirect { + assert_eq!(resp.status(), StatusCode::TEMPORARY_REDIRECT); + assert_eq!( + resp.headers() + .get(header::LOCATION) + .unwrap() + .to_str() + .unwrap(), + "/other-prefix/path" + ); + assert!(resp + .into_body() + .collect() + .await + .unwrap() + .to_bytes() + .is_empty()); + } else { + assert_eq!(resp.status(), StatusCode::OK); + assert!(resp.headers().get(header::LOCATION).is_none()); + assert_eq!( + resp.into_body().collect().await.unwrap().to_bytes(), + expected_body, + ); + } + + let resp = client + .request(req(format!("{prefix}/redirect-fixed"))) + .await + .unwrap(); + if should_see_redirect { + assert_eq!(resp.status(), StatusCode::FOUND); + assert_eq!( + resp.headers() + .get(header::LOCATION) + .unwrap() + .to_str() + .unwrap(), + "/fixed_key" + ); + assert!(resp + .into_body() + .collect() + .await + .unwrap() + .to_bytes() + .is_empty()); + } else { + assert_eq!(resp.status(), StatusCode::OK); + assert!(resp.headers().get(header::LOCATION).is_none()); + assert_eq!( + resp.into_body().collect().await.unwrap().to_bytes(), + expected_body, + ); + } + let resp = client + .request(req(format!("{prefix}/stream-fixed"))) + .await + .unwrap(); + if should_see_redirect { + assert_eq!(resp.status(), StatusCode::OK); + assert!(resp.headers().get(header::LOCATION).is_none()); + assert_eq!( + resp.into_body().collect().await.unwrap().to_bytes(), + b"static file".as_ref(), + ); + } else { + assert_eq!(resp.status(), StatusCode::OK); + assert!(resp.headers().get(header::LOCATION).is_none()); + assert_eq!( + resp.into_body().collect().await.unwrap().to_bytes(), + expected_body, + ); + } + let resp = client + .request(req(format!("{prefix}/stream-404"))) + .await + .unwrap(); + if should_see_redirect { + assert_eq!(resp.status(), StatusCode::NOT_FOUND); + assert!(resp.headers().get(header::LOCATION).is_none()); + assert_eq!( + resp.into_body().collect().await.unwrap().to_bytes(), + b"static file".as_ref(), + ); + } else { + assert_eq!(resp.status(), StatusCode::OK); + assert!(resp.headers().get(header::LOCATION).is_none()); + assert_eq!( + resp.into_body().collect().await.unwrap().to_bytes(), + expected_body, + ); + } + let resp = client + .request(req(format!("{prefix}/stream-404"))) + .await + .unwrap(); + if should_see_redirect { + assert_eq!(resp.status(), StatusCode::NOT_FOUND); + assert!(resp.headers().get(header::LOCATION).is_none()); + assert_eq!( + resp.into_body().collect().await.unwrap().to_bytes(), + b"static file".as_ref(), + ); + } else { + assert_eq!(resp.status(), StatusCode::OK); + assert!(resp.headers().get(header::LOCATION).is_none()); + assert_eq!( + resp.into_body().collect().await.unwrap().to_bytes(), + expected_body, + ); + } +} + +#[tokio::test] +async fn test_website_invalid_redirect() { + const BCKT_NAME: &str = "my-invalid-redirect"; + let ctx = common::context(); + let bucket = ctx.create_bucket(BCKT_NAME); + + let conf = WebsiteConfiguration::builder() + .routing_rules( + RoutingRule::builder() + .condition(Condition::builder().key_prefix_equals("").build()) + .redirect( + Redirect::builder() + .protocol(Protocol::Https) + .host_name("other.tld") + .replace_key_prefix_with("") + // we don't allow 200 with hostname + .http_redirect_code("200") + .build(), + ) + .build(), + ) + .build(); + + ctx.client + .put_bucket_website() + .bucket(&bucket) + .website_configuration(conf) + .send() + .await + .unwrap_err(); +} diff --git a/src/web/web_server.rs b/src/web/web_server.rs index d8ce0460..dfdbf8a2 100644 --- a/src/web/web_server.rs +++ b/src/web/web_server.rs @@ -235,7 +235,7 @@ impl WebServer { // Get path let path = req.uri().path().to_string(); let index = &website_config.index_document; - let routing_result = path_to_keys(&path, index, &[])?; + let routing_result = path_to_keys(&path, index, &website_config.routing_rules)?; debug!( "Selected bucket: \"{}\" {:?}, routing to {:?}", @@ -518,7 +518,7 @@ fn path_to_keys<'a>( } else { None }; - let target = routing_rule.redirect.compute_target(suffix); + let mut target = routing_rule.redirect.compute_target(suffix); let query_alternative_key = status_code == StatusCode::OK || status_code == StatusCode::NOT_FOUND; let redirect_on_error = @@ -531,6 +531,8 @@ fn path_to_keys<'a>( }) } (true, false) => { + // we need to remove the leading / + target.remove(0); if status_code == StatusCode::OK { break target; } else { @@ -549,6 +551,7 @@ fn path_to_keys<'a>( }); } (true, true) => { + target.remove(0); return Ok(RoutingResult::LoadOrAlternativeError { key, redirect_key: target, @@ -573,14 +576,14 @@ fn path_to_keys<'a>( } else { Ok(RoutingResult::LoadOrRedirect { redirect_if_exists: Some(format!("{key}/{index}")), - key, // we can't use `path` because key might have changed substentially in case of // routing rules redirect_url: percent_encoding::percent_encode( - format!("{path}/").as_bytes(), + format!("/{key}/").as_bytes(), PATH_ENCODING_SET, ) .to_string(), + key, redirect_code: StatusCode::FOUND, }) } -- 2.45.2 From c939d2a936e60e8cb67ecae7a35706e7b7d73ab6 Mon Sep 17 00:00:00 2001 From: trinity-1686a Date: Sun, 22 Dec 2024 15:26:06 +0100 Subject: [PATCH 3/9] clippy --- src/api/s3/website.rs | 10 ++++------ src/model/bucket_table.rs | 8 ++++---- src/web/web_server.rs | 6 +++--- 3 files changed, 11 insertions(+), 13 deletions(-) diff --git a/src/api/s3/website.rs b/src/api/s3/website.rs index ad592260..6ec786a0 100644 --- a/src/api/s3/website.rs +++ b/src/api/s3/website.rs @@ -330,12 +330,10 @@ impl Condition { impl Redirect { pub fn validate(&self) -> Result<(), Error> { - if self.replace_prefix.is_some() { - if self.replace_full.is_some() { - return Err(Error::bad_request( - "Bad XML: both ReplaceKeyPrefixWith and ReplaceKeyWith are set", - )); - } + if self.replace_prefix.is_some() && self.replace_full.is_some() { + return Err(Error::bad_request( + "Bad XML: both ReplaceKeyPrefixWith and ReplaceKeyWith are set", + )); } if let Some(ref protocol) = self.protocol { if protocol.0 != "http" && protocol.0 != "https" { diff --git a/src/model/bucket_table.rs b/src/model/bucket_table.rs index 625c177d..644189b2 100644 --- a/src/model/bucket_table.rs +++ b/src/model/bucket_table.rs @@ -90,21 +90,21 @@ mod v08 { let mut res = String::new(); if let Some(hostname) = &self.hostname { if let Some(protocol) = &self.protocol { - res.push_str(&protocol); + res.push_str(protocol); res.push_str("://"); } else { res.push_str("//"); } - res.push_str(&hostname); + res.push_str(hostname); } res.push('/'); if let Some(replace_key_prefix) = &self.replace_key_prefix { - res.push_str(&replace_key_prefix); + res.push_str(replace_key_prefix); if let Some(suffix) = suffix { res.push_str(suffix) } } else if let Some(replace_key) = &self.replace_key { - res.push_str(&replace_key) + res.push_str(replace_key) } res } diff --git a/src/web/web_server.rs b/src/web/web_server.rs index dfdbf8a2..c497270a 100644 --- a/src/web/web_server.rs +++ b/src/web/web_server.rs @@ -476,8 +476,8 @@ impl RoutingResult { /// which is also AWS S3 behavior. /// /// Check: https://docs.aws.amazon.com/AmazonS3/latest/userguide/IndexDocumentSupport.html -fn path_to_keys<'a>( - path: &'a str, +fn path_to_keys( + path: &str, index: &str, routing_rules: &[RoutingRule], ) -> Result { @@ -488,7 +488,7 @@ fn path_to_keys<'a>( None => return Err(Error::BadRequest("Path must start with a / (slash)".into())), }; - let is_bucket_root = base_key.len() == 0; + let is_bucket_root = base_key.is_empty(); let is_trailing_slash = path_utf8.ends_with("/"); let key = if is_bucket_root || is_trailing_slash { -- 2.45.2 From 6ccfbb298631e0176d587fe17f736dd2100b38b2 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Sat, 4 Jan 2025 17:04:17 +0100 Subject: [PATCH 4/9] remove obsolete RedirectAll struct --- src/model/bucket_table.rs | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/model/bucket_table.rs b/src/model/bucket_table.rs index 644189b2..eba89e9b 100644 --- a/src/model/bucket_table.rs +++ b/src/model/bucket_table.rs @@ -110,12 +110,6 @@ mod v08 { } } - #[derive(PartialEq, Eq, Clone, Debug, Serialize, Deserialize)] - pub struct RedirectAll { - pub hostname: String, - pub protoco: String, - } - #[derive(PartialEq, Eq, Clone, Debug, Serialize, Deserialize)] pub struct CorsRule { pub id: Option, -- 2.45.2 From 22487ceddfd3b8d0641601874e0f19143da38699 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Sat, 4 Jan 2025 18:22:42 +0100 Subject: [PATCH 5/9] move Redirect::compute_target to standalone function in web_server.rs --- src/model/bucket_table.rs | 25 ------------------------- src/web/web_server.rs | 29 ++++++++++++++++++++++++++--- 2 files changed, 26 insertions(+), 28 deletions(-) diff --git a/src/model/bucket_table.rs b/src/model/bucket_table.rs index eba89e9b..b6daab75 100644 --- a/src/model/bucket_table.rs +++ b/src/model/bucket_table.rs @@ -85,31 +85,6 @@ mod v08 { pub replace_key: Option, } - impl Redirect { - pub fn compute_target(&self, suffix: Option<&str>) -> String { - let mut res = String::new(); - if let Some(hostname) = &self.hostname { - if let Some(protocol) = &self.protocol { - res.push_str(protocol); - res.push_str("://"); - } else { - res.push_str("//"); - } - res.push_str(hostname); - } - res.push('/'); - if let Some(replace_key_prefix) = &self.replace_key_prefix { - res.push_str(replace_key_prefix); - if let Some(suffix) = suffix { - res.push_str(suffix) - } - } else if let Some(replace_key) = &self.replace_key { - res.push_str(replace_key) - } - res - } - } - #[derive(PartialEq, Eq, Clone, Debug, Serialize, Deserialize)] pub struct CorsRule { pub id: Option, diff --git a/src/web/web_server.rs b/src/web/web_server.rs index c497270a..bd543bb0 100644 --- a/src/web/web_server.rs +++ b/src/web/web_server.rs @@ -28,7 +28,7 @@ use garage_api::s3::error::{ }; use garage_api::s3::get::{handle_get_without_ctx, handle_head_without_ctx}; -use garage_model::bucket_table::RoutingRule; +use garage_model::bucket_table::{self, RoutingRule}; use garage_model::garage::Garage; use garage_table::*; @@ -518,7 +518,7 @@ fn path_to_keys( } else { None }; - let mut target = routing_rule.redirect.compute_target(suffix); + let mut target = compute_redirect_target(&routing_rule.redirect, suffix); let query_alternative_key = status_code == StatusCode::OK || status_code == StatusCode::NOT_FOUND; let redirect_on_error = @@ -560,7 +560,7 @@ fn path_to_keys( } } } else { - let target = routing_rule.redirect.compute_target(None); + let target = compute_redirect_target(&routing_rule.redirect, None); return Ok(RoutingResult::Redirect { url: target, code: status_code, @@ -601,6 +601,29 @@ const PATH_ENCODING_SET: &percent_encoding::AsciiSet = &percent_encoding::CONTRO .add(b'{') .add(b'}'); +fn compute_redirect_target(redirect: &bucket_table::Redirect, suffix: Option<&str>) -> String { + let mut res = String::new(); + if let Some(hostname) = &redirect.hostname { + if let Some(protocol) = &redirect.protocol { + res.push_str(protocol); + res.push_str("://"); + } else { + res.push_str("//"); + } + res.push_str(hostname); + } + res.push('/'); + if let Some(replace_key_prefix) = &redirect.replace_key_prefix { + res.push_str(replace_key_prefix); + if let Some(suffix) = suffix { + res.push_str(suffix) + } + } else if let Some(replace_key) = &redirect.replace_key { + res.push_str(replace_key) + } + res +} + #[cfg(test)] mod tests { use super::*; -- 2.45.2 From 44ce6ae5b4c5b30a144d37aab1d11e7237a954c1 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Sat, 4 Jan 2025 18:50:49 +0100 Subject: [PATCH 6/9] properly implement new bucket model using a migration --- src/model/bucket_table.rs | 130 ++++++++++++++++++++++++++++++------- src/util/crdt/deletable.rs | 10 +++ src/util/crdt/lww.rs | 10 +++ 3 files changed, 126 insertions(+), 24 deletions(-) diff --git a/src/model/bucket_table.rs b/src/model/bucket_table.rs index b6daab75..b2679323 100644 --- a/src/model/bucket_table.rs +++ b/src/model/bucket_table.rs @@ -60,29 +60,6 @@ mod v08 { pub struct WebsiteConfig { pub index_document: String, pub error_document: Option, - #[serde(default, skip_serializing_if = "Vec::is_empty")] - pub routing_rules: Vec, - } - - #[derive(PartialEq, Eq, Clone, Debug, Serialize, Deserialize)] - pub struct RoutingRule { - pub condition: Option, - pub redirect: Redirect, - } - - #[derive(PartialEq, Eq, Clone, Debug, Serialize, Deserialize)] - pub struct Condition { - pub http_error_code: Option, - pub prefix: Option, - } - - #[derive(PartialEq, Eq, Clone, Debug, Serialize, Deserialize)] - pub struct Redirect { - pub hostname: Option, - pub http_redirect_code: u16, - pub protocol: Option, - pub replace_key_prefix: Option, - pub replace_key: Option, } #[derive(PartialEq, Eq, Clone, Debug, Serialize, Deserialize)] @@ -142,7 +119,112 @@ mod v08 { impl garage_util::migrate::InitialFormat for Bucket {} } -pub use v08::*; +mod v2 { + use crate::permission::BucketKeyPerm; + use garage_util::crdt; + use garage_util::data::Uuid; + use serde::{Deserialize, Serialize}; + + use super::v08; + + pub use v08::{BucketQuotas, CorsRule, LifecycleExpiration, LifecycleFilter, LifecycleRule}; + + #[derive(PartialEq, Eq, Clone, Debug, Serialize, Deserialize)] + pub struct Bucket { + /// ID of the bucket + pub id: Uuid, + /// State, and configuration if not deleted, of the bucket + pub state: crdt::Deletable, + } + + /// Configuration for a bucket + #[derive(PartialEq, Eq, Clone, Debug, Serialize, Deserialize)] + pub struct BucketParams { + /// Bucket's creation date + pub creation_date: u64, + /// Map of key with access to the bucket, and what kind of access they give + pub authorized_keys: crdt::Map, + + /// Map of aliases that are or have been given to this bucket + /// in the global namespace + /// (not authoritative: this is just used as an indication to + /// map back to aliases when doing ListBuckets) + pub aliases: crdt::LwwMap, + /// Map of aliases that are or have been given to this bucket + /// in namespaces local to keys + /// key = (access key id, alias name) + pub local_aliases: crdt::LwwMap<(String, String), bool>, + + /// Whether this bucket is allowed for website access + /// (under all of its global alias names), + /// and if so, the website configuration XML document + pub website_config: crdt::Lww>, + /// CORS rules + pub cors_config: crdt::Lww>>, + /// Lifecycle configuration + pub lifecycle_config: crdt::Lww>>, + /// Bucket quotas + pub quotas: crdt::Lww, + } + + #[derive(PartialEq, Eq, Clone, Debug, Serialize, Deserialize)] + pub struct WebsiteConfig { + pub index_document: String, + pub error_document: Option, + pub routing_rules: Vec, + } + + #[derive(PartialEq, Eq, Clone, Debug, Serialize, Deserialize)] + pub struct RoutingRule { + pub condition: Option, + pub redirect: Redirect, + } + + #[derive(PartialEq, Eq, Clone, Debug, Serialize, Deserialize)] + pub struct Condition { + pub http_error_code: Option, + pub prefix: Option, + } + + #[derive(PartialEq, Eq, Clone, Debug, Serialize, Deserialize)] + pub struct Redirect { + pub hostname: Option, + pub http_redirect_code: u16, + pub protocol: Option, + pub replace_key_prefix: Option, + pub replace_key: Option, + } + + impl garage_util::migrate::Migrate for Bucket { + const VERSION_MARKER: &'static [u8] = b"G2bkt"; + + type Previous = v08::Bucket; + + fn migrate(old: v08::Bucket) -> Bucket { + Bucket { + id: old.id, + state: old.state.map(|x| BucketParams { + creation_date: x.creation_date, + authorized_keys: x.authorized_keys, + aliases: x.aliases, + local_aliases: x.local_aliases, + website_config: x.website_config.map(|wc_opt| { + wc_opt.map(|wc| WebsiteConfig { + index_document: wc.index_document, + error_document: wc.error_document, + routing_rules: vec![], + }) + }), + cors_config: x.cors_config, + lifecycle_config: x.lifecycle_config, + quotas: x.quotas, + }), + } + } + } +} + +pub use v2::*; impl AutoCrdt for BucketQuotas { const WARN_IF_DIFFERENT: bool = true; diff --git a/src/util/crdt/deletable.rs b/src/util/crdt/deletable.rs index e771aceb..0594d850 100644 --- a/src/util/crdt/deletable.rs +++ b/src/util/crdt/deletable.rs @@ -9,6 +9,16 @@ pub enum Deletable { Deleted, } +impl Deletable { + /// Map value, used for migrations + pub fn map U>(self, f: F) -> Deletable { + match self { + Self::Present(x) => Deletable::::Present(f(x)), + Self::Deleted => Deletable::::Deleted, + } + } +} + impl Deletable { /// Create a new deletable object that isn't deleted pub fn present(v: T) -> Self { diff --git a/src/util/crdt/lww.rs b/src/util/crdt/lww.rs index 958844c9..2e5875ea 100644 --- a/src/util/crdt/lww.rs +++ b/src/util/crdt/lww.rs @@ -43,6 +43,16 @@ pub struct Lww { v: T, } +impl Lww { + /// Map value, used for migrations + pub fn map U>(self, f: F) -> Lww { + Lww:: { + ts: self.ts, + v: f(self.v), + } + } +} + impl Lww where T: Crdt, -- 2.45.2 From 9b7fea4cb0acabee5e6a4272e00e4947090630d4 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Sat, 4 Jan 2025 19:16:24 +0100 Subject: [PATCH 7/9] put bucket website: improve error message for redirectallrequests --- src/api/s3/website.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/api/s3/website.rs b/src/api/s3/website.rs index 6ec786a0..9c1422b5 100644 --- a/src/api/s3/website.rs +++ b/src/api/s3/website.rs @@ -229,7 +229,7 @@ impl WebsiteConfiguration { pub fn into_garage_website_config(self) -> Result { if self.redirect_all_requests_to.is_some() { Err(Error::NotImplemented( - "S3 website redirects are not currently implemented in Garage.".into(), + "RedirectAllRequestsTo is not currently implemented in Garage, however its effect can be emulated using a single inconditional RoutingRule.".into(), )) } else { Ok(WebsiteConfig { -- 2.45.2 From 47467df83e7be107d92ec9fefb23542f760e9039 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Sat, 4 Jan 2025 19:50:57 +0100 Subject: [PATCH 8/9] avoid handling status_code-related logic in api/s3/get.rs --- src/api/s3/get.rs | 44 +++++------------------ src/web/web_server.rs | 83 +++++++++++++++++++++++++++++++++---------- 2 files changed, 73 insertions(+), 54 deletions(-) diff --git a/src/api/s3/get.rs b/src/api/s3/get.rs index eea3434e..f5d3cf11 100644 --- a/src/api/s3/get.rs +++ b/src/api/s3/get.rs @@ -163,15 +163,7 @@ pub async fn handle_head( key: &str, part_number: Option, ) -> Result, Error> { - handle_head_without_ctx( - ctx.garage, - req, - ctx.bucket_id, - key, - StatusCode::OK, - part_number, - ) - .await + handle_head_without_ctx(ctx.garage, req, ctx.bucket_id, key, part_number).await } /// Handle HEAD request for website @@ -180,7 +172,6 @@ pub async fn handle_head_without_ctx( req: &Request, bucket_id: Uuid, key: &str, - status_code: StatusCode, part_number: Option, ) -> Result, Error> { let object = garage @@ -281,7 +272,7 @@ pub async fn handle_head_without_ctx( checksum_mode, ) .header(CONTENT_LENGTH, format!("{}", version_meta.size)) - .status(status_code) + .status(StatusCode::OK) .body(empty_body())?) } } @@ -294,16 +285,7 @@ pub async fn handle_get( part_number: Option, overrides: GetObjectOverrides, ) -> Result, Error> { - handle_get_without_ctx( - ctx.garage, - req, - ctx.bucket_id, - key, - StatusCode::OK, - part_number, - overrides, - ) - .await + handle_get_without_ctx(ctx.garage, req, ctx.bucket_id, key, part_number, overrides).await } /// Handle GET request @@ -312,7 +294,6 @@ pub async fn handle_get_without_ctx( req: &Request, bucket_id: Uuid, key: &str, - status_code: StatusCode, part_number: Option, overrides: GetObjectOverrides, ) -> Result, Error> { @@ -348,15 +329,11 @@ pub async fn handle_get_without_ctx( let checksum_mode = checksum_mode(&req); - match ( - part_number, - parse_range_header(req, last_v_meta.size)?, - status_code == StatusCode::OK, - ) { - (Some(_), Some(_), _) => Err(Error::bad_request( + match (part_number, parse_range_header(req, last_v_meta.size)?) { + (Some(_), Some(_)) => Err(Error::bad_request( "Cannot specify both partNumber and Range header", )), - (Some(pn), None, true) => { + (Some(pn), None) => { handle_get_part( garage, last_v, @@ -369,7 +346,7 @@ pub async fn handle_get_without_ctx( ) .await } - (None, Some(range), true) => { + (None, Some(range)) => { handle_get_range( garage, last_v, @@ -383,8 +360,7 @@ pub async fn handle_get_without_ctx( ) .await } - _ => { - // either not a range, or an error request: always return the full doc + (None, None) => { handle_get_full( garage, last_v, @@ -394,7 +370,6 @@ pub async fn handle_get_without_ctx( &headers, overrides, checksum_mode, - status_code, ) .await } @@ -410,7 +385,6 @@ async fn handle_get_full( meta_inner: &ObjectVersionMetaInner, overrides: GetObjectOverrides, checksum_mode: ChecksumMode, - status_code: StatusCode, ) -> Result, Error> { let mut resp_builder = object_headers( version, @@ -420,7 +394,7 @@ async fn handle_get_full( checksum_mode, ) .header(CONTENT_LENGTH, format!("{}", version_meta.size)) - .status(status_code); + .status(StatusCode::OK); getobject_override_headers(overrides, &mut resp_builder)?; let stream = full_object_byte_stream(garage, version, version_data, encryption); diff --git a/src/web/web_server.rs b/src/web/web_server.rs index bd543bb0..23a21614 100644 --- a/src/web/web_server.rs +++ b/src/web/web_server.rs @@ -6,6 +6,7 @@ use tokio::net::{TcpListener, UnixListener}; use tokio::sync::watch; use hyper::{ + body::Body, body::Incoming as IncomingBody, header::{HeaderValue, HOST}, Method, Request, Response, StatusCode, @@ -22,6 +23,7 @@ use crate::error::*; use garage_api::generic_server::{server_loop, UnixListenerOn}; use garage_api::helpers::*; +use garage_api::s3::api_server::ResBody; use garage_api::s3::cors::{add_cors_headers, find_matching_cors_rule, handle_options_for_bucket}; use garage_api::s3::error::{ CommonErrorDerivative, Error as ApiError, OkOrBadRequest, OkOrInternalError, @@ -252,19 +254,10 @@ impl WebServer { .body(empty_body()) .unwrap()), (&Method::HEAD, Ok((key, code))) => { - handle_head_without_ctx(self.garage.clone(), req, bucket_id, key, code, None).await + handle_head(self.garage.clone(), req, bucket_id, key, code).await } (&Method::GET, Ok((key, code))) => { - handle_get_without_ctx( - self.garage.clone(), - req, - bucket_id, - key, - code, - None, - Default::default(), - ) - .await + handle_get(self.garage.clone(), req, bucket_id, key, code).await } _ => Err(ApiError::bad_request("HTTP method not supported")), }; @@ -306,25 +299,22 @@ impl WebServer { ) => { match *req.method() { Method::HEAD => { - handle_head_without_ctx( + handle_head( self.garage.clone(), req, bucket_id, redirect_key, *redirect_code, - None, ) .await } Method::GET => { - handle_get_without_ctx( + handle_get( self.garage.clone(), req, bucket_id, redirect_key, *redirect_code, - None, - Default::default(), ) .await } @@ -361,14 +351,12 @@ impl WebServer { .body(empty_body::()) .unwrap(); - match handle_get_without_ctx( + match handle_get( self.garage.clone(), &req2, bucket_id, &error_document, error.http_status_code(), - None, - Default::default(), ) .await { @@ -413,6 +401,63 @@ impl WebServer { } } +async fn handle_head( + garage: Arc, + req: &Request, + bucket_id: Uuid, + key: &str, + status_code: StatusCode, +) -> Result, ApiError> { + if status_code != StatusCode::OK { + // See comment in handle_get + let cleaned_req = Request::builder() + .uri(req.uri()) + .body(empty_body::()) + .unwrap(); + + let mut ret = handle_head_without_ctx(garage, &cleaned_req, bucket_id, key, None).await?; + *ret.status_mut() = status_code; + Ok(ret) + } else { + handle_head_without_ctx(garage, req, bucket_id, key, None).await + } +} + +pub async fn handle_get( + garage: Arc, + req: &Request, + bucket_id: Uuid, + key: &str, + status_code: StatusCode, +) -> Result, ApiError> { + if status_code != StatusCode::OK { + // If we are returning an error document, discard all headers from + // the original GET request that would have influenced the result: + // - Range header, we don't want to return a subrange of the error document + // - Caching directives such as If-None-Match, etc, which are not relevant + let cleaned_req = Request::builder() + .uri(req.uri()) + .body(empty_body::()) + .unwrap(); + + let mut ret = handle_get_without_ctx( + garage, + &cleaned_req, + bucket_id, + key, + None, + Default::default(), + ) + .await?; + + *ret.status_mut() = status_code; + + Ok(ret) + } else { + handle_get_without_ctx(garage, req, bucket_id, key, None, Default::default()).await + } +} + fn error_to_res(e: Error) -> Response> { // If we are here, it is either that: // - there was an error before trying to get the requested URL -- 2.45.2 From 2aaba39ddc41df99ffc488a1377a82e8482be495 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Sat, 4 Jan 2025 20:11:54 +0100 Subject: [PATCH 9/9] refactor web_server.rs --- src/web/web_server.rs | 103 ++++++++++++++++-------------------------- 1 file changed, 38 insertions(+), 65 deletions(-) diff --git a/src/web/web_server.rs b/src/web/web_server.rs index 23a21614..7a9eb1b4 100644 --- a/src/web/web_server.rs +++ b/src/web/web_server.rs @@ -253,13 +253,9 @@ impl WebServer { .header("Location", url) .body(empty_body()) .unwrap()), - (&Method::HEAD, Ok((key, code))) => { - handle_head(self.garage.clone(), req, bucket_id, key, code).await + (_, Ok((key, code))) => { + handle_inner(self.garage.clone(), req, bucket_id, key, code).await } - (&Method::GET, Ok((key, code))) => { - handle_get(self.garage.clone(), req, bucket_id, key, code).await - } - _ => Err(ApiError::bad_request("HTTP method not supported")), }; // Try handling errors if bucket configuration provided fallbacks @@ -297,30 +293,14 @@ impl WebServer { .. }, ) => { - match *req.method() { - Method::HEAD => { - handle_head( - self.garage.clone(), - req, - bucket_id, - redirect_key, - *redirect_code, - ) - .await - } - Method::GET => { - handle_get( - self.garage.clone(), - req, - bucket_id, - redirect_key, - *redirect_code, - ) - .await - } - // we shouldn't ever reach here - _ => Err(ApiError::bad_request("HTTP method not supported")), - } + handle_inner( + self.garage.clone(), + req, + bucket_id, + redirect_key, + *redirect_code, + ) + .await } _ => ret_doc, }; @@ -347,11 +327,12 @@ impl WebServer { // We want to return the error document // Create a fake HTTP request with path = the error document let req2 = Request::builder() + .method("GET") .uri(format!("http://{}/{}", host, &error_document)) .body(empty_body::()) .unwrap(); - match handle_get( + match handle_inner( self.garage.clone(), &req2, bucket_id, @@ -401,29 +382,7 @@ impl WebServer { } } -async fn handle_head( - garage: Arc, - req: &Request, - bucket_id: Uuid, - key: &str, - status_code: StatusCode, -) -> Result, ApiError> { - if status_code != StatusCode::OK { - // See comment in handle_get - let cleaned_req = Request::builder() - .uri(req.uri()) - .body(empty_body::()) - .unwrap(); - - let mut ret = handle_head_without_ctx(garage, &cleaned_req, bucket_id, key, None).await?; - *ret.status_mut() = status_code; - Ok(ret) - } else { - handle_head_without_ctx(garage, req, bucket_id, key, None).await - } -} - -pub async fn handle_get( +async fn handle_inner( garage: Arc, req: &Request, bucket_id: Uuid, @@ -432,7 +391,7 @@ pub async fn handle_get( ) -> Result, ApiError> { if status_code != StatusCode::OK { // If we are returning an error document, discard all headers from - // the original GET request that would have influenced the result: + // the original request that would have influenced the result: // - Range header, we don't want to return a subrange of the error document // - Caching directives such as If-None-Match, etc, which are not relevant let cleaned_req = Request::builder() @@ -440,21 +399,35 @@ pub async fn handle_get( .body(empty_body::()) .unwrap(); - let mut ret = handle_get_without_ctx( - garage, - &cleaned_req, - bucket_id, - key, - None, - Default::default(), - ) - .await?; + let mut ret = match req.method() { + &Method::HEAD => { + handle_head_without_ctx(garage, &cleaned_req, bucket_id, key, None).await? + } + &Method::GET => { + handle_get_without_ctx( + garage, + &cleaned_req, + bucket_id, + key, + None, + Default::default(), + ) + .await? + } + _ => return Err(ApiError::bad_request("HTTP method not supported")), + }; *ret.status_mut() = status_code; Ok(ret) } else { - handle_get_without_ctx(garage, req, bucket_id, key, None, Default::default()).await + match req.method() { + &Method::HEAD => handle_head_without_ctx(garage, req, bucket_id, key, None).await, + &Method::GET => { + handle_get_without_ctx(garage, req, bucket_id, key, None, Default::default()).await + } + _ => Err(ApiError::bad_request("HTTP method not supported")), + } } } -- 2.45.2