garage/src/web/web_server.rs

454 lines
13 KiB
Rust
Raw Normal View History

use std::fs::{self, Permissions};
use std::os::unix::prelude::PermissionsExt;
use std::{convert::Infallible, sync::Arc};
2020-11-02 15:48:39 +01:00
use tokio::net::{TcpListener, UnixListener};
use tokio::sync::watch;
2020-11-02 15:48:39 +01:00
2020-11-21 15:15:25 +01:00
use hyper::{
body::Incoming as IncomingBody,
header::{HeaderValue, HOST},
Method, Request, Response, StatusCode,
2020-11-21 17:50:19 +01:00
};
2020-11-02 15:48:39 +01:00
2022-02-22 15:21:06 +01:00
use opentelemetry::{
global,
metrics::{Counter, ValueRecorder},
trace::{FutureExt, TraceContextExt, Tracer},
Context, KeyValue,
};
2020-11-21 17:50:19 +01:00
use crate::error::*;
2021-12-16 11:47:58 +01:00
use garage_api::generic_server::{server_loop, UnixListenerOn};
use garage_api::helpers::*;
First implementation of K2V (#293) **Specification:** View spec at [this URL](https://git.deuxfleurs.fr/Deuxfleurs/garage/src/branch/k2v/doc/drafts/k2v-spec.md) - [x] Specify the structure of K2V triples - [x] Specify the DVVS format used for causality detection - [x] Specify the K2V index (just a counter of number of values per partition key) - [x] Specify single-item endpoints: ReadItem, InsertItem, DeleteItem - [x] Specify index endpoint: ReadIndex - [x] Specify multi-item endpoints: InsertBatch, ReadBatch, DeleteBatch - [x] Move to JSON objects instead of tuples - [x] Specify endpoints for polling for updates on single values (PollItem) **Implementation:** - [x] Table for K2V items, causal contexts - [x] Indexing mechanism and table for K2V index - [x] Make API handlers a bit more generic - [x] K2V API endpoint - [x] K2V API router - [x] ReadItem - [x] InsertItem - [x] DeleteItem - [x] PollItem - [x] ReadIndex - [x] InsertBatch - [x] ReadBatch - [x] DeleteBatch **Testing:** - [x] Just a simple Python script that does some requests to check visually that things are going right (does not contain parsing of results or assertions on returned values) - [x] Actual tests: - [x] Adapt testing framework - [x] Simple test with InsertItem + ReadItem - [x] Test with several Insert/Read/DeleteItem + ReadIndex - [x] Test all combinations of return formats for ReadItem - [x] Test with ReadBatch, InsertBatch, DeleteBatch - [x] Test with PollItem - [x] Test error codes - [ ] Fix most broken stuff - [x] test PollItem broken randomly - [x] when invalid causality tokens are given, errors should be 4xx not 5xx **Improvements:** - [x] Descending range queries - [x] Specify - [x] Implement - [x] Add test - [x] Batch updates to index counter - [x] Put K2V behind `k2v` feature flag Co-authored-by: Alex Auvolat <alex@adnab.me> Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/293 Co-authored-by: Alex <alex@adnab.me> Co-committed-by: Alex <alex@adnab.me>
2022-05-10 13:16:57 +02:00
use garage_api::s3::cors::{add_cors_headers, find_matching_cors_rule, handle_options_for_bucket};
First version of admin API (#298) **Spec:** - [x] Start writing - [x] Specify all layout endpoints - [x] Specify all endpoints for operations on keys - [x] Specify all endpoints for operations on key/bucket permissions - [x] Specify all endpoints for operations on buckets - [x] Specify all endpoints for operations on bucket aliases View rendered spec at <https://git.deuxfleurs.fr/Deuxfleurs/garage/src/branch/admin-api/doc/drafts/admin-api.md> **Code:** - [x] Refactor code for admin api to use common api code that was created for K2V **General endpoints:** - [x] Metrics - [x] GetClusterStatus - [x] ConnectClusterNodes - [x] GetClusterLayout - [x] UpdateClusterLayout - [x] ApplyClusterLayout - [x] RevertClusterLayout **Key-related endpoints:** - [x] ListKeys - [x] CreateKey - [x] ImportKey - [x] GetKeyInfo - [x] UpdateKey - [x] DeleteKey **Bucket-related endpoints:** - [x] ListBuckets - [x] CreateBucket - [x] GetBucketInfo - [x] DeleteBucket - [x] PutBucketWebsite - [x] DeleteBucketWebsite **Operations on key/bucket permissions:** - [x] BucketAllowKey - [x] BucketDenyKey **Operations on bucket aliases:** - [x] GlobalAliasBucket - [x] GlobalUnaliasBucket - [x] LocalAliasBucket - [x] LocalUnaliasBucket **And also:** - [x] Separate error type for the admin API (this PR includes a quite big refactoring of error handling) - [x] Add management of website access - [ ] Check that nothing is missing wrt what can be done using the CLI - [ ] Improve formatting of the spec - [x] Make sure everyone is cool with the API design Fix #231 Fix #295 Co-authored-by: Alex Auvolat <alex@adnab.me> Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/298 Co-authored-by: Alex <alex@adnab.me> Co-committed-by: Alex <alex@adnab.me>
2022-05-24 12:16:39 +02:00
use garage_api::s3::error::{
CommonErrorDerivative, Error as ApiError, OkOrBadRequest, OkOrInternalError,
};
2024-03-03 14:56:52 +01:00
use garage_api::s3::get::{handle_get_without_ctx, handle_head_without_ctx};
2021-12-16 11:47:58 +01:00
2020-11-02 15:48:39 +01:00
use garage_model::garage::Garage;
2021-12-16 11:47:58 +01:00
2020-12-17 22:51:44 +01:00
use garage_table::*;
use garage_util::data::Uuid;
2020-11-19 14:56:00 +01:00
use garage_util::error::Error as GarageError;
use garage_util::forwarded_headers;
2022-02-22 15:21:06 +01:00
use garage_util::metrics::{gen_trace_id, RecordDuration};
use garage_util::socket_address::UnixOrTCPSocketAddress;
2022-02-22 15:21:06 +01:00
struct WebMetrics {
request_counter: Counter<u64>,
error_counter: Counter<u64>,
request_duration: ValueRecorder<f64>,
}
impl WebMetrics {
fn new() -> Self {
let meter = global::meter("garage/web");
Self {
request_counter: meter
.u64_counter("web.request_counter")
.with_description("Number of requests to the web endpoint")
.init(),
error_counter: meter
.u64_counter("web.error_counter")
.with_description("Number of requests to the web endpoint resulting in errors")
.init(),
request_duration: meter
.f64_value_recorder("web.request_duration")
.with_description("Duration of requests to the web endpoint")
.init(),
}
}
}
2020-11-02 15:48:39 +01:00
2022-09-07 17:54:16 +02:00
pub struct WebServer {
2020-11-02 15:48:39 +01:00
garage: Arc<Garage>,
2022-09-07 17:54:16 +02:00
metrics: Arc<WebMetrics>,
root_domain: String,
}
2020-11-02 15:48:39 +01:00
2022-09-07 17:54:16 +02:00
impl WebServer {
/// Run a web server
pub fn new(garage: Arc<Garage>, root_domain: String) -> Arc<Self> {
2022-09-07 17:54:16 +02:00
let metrics = Arc::new(WebMetrics::new());
Arc::new(WebServer {
2022-09-07 17:54:16 +02:00
garage,
metrics,
root_domain,
})
}
2022-09-07 17:54:16 +02:00
pub async fn run(
self: Arc<Self>,
bind_addr: UnixOrTCPSocketAddress,
must_exit: watch::Receiver<bool>,
) -> Result<(), GarageError> {
let server_name = "Web".into();
info!("Web server listening on {}", bind_addr);
match bind_addr {
UnixOrTCPSocketAddress::TCPSocket(addr) => {
let listener = TcpListener::bind(addr).await?;
2022-02-22 15:21:06 +01:00
let handler =
move |stream, socketaddr| self.clone().handle_request(stream, socketaddr);
server_loop(server_name, listener, handler, must_exit).await
}
UnixOrTCPSocketAddress::UnixSocket(ref path) => {
if path.exists() {
fs::remove_file(path)?
}
let listener = UnixListener::bind(path)?;
let listener = UnixListenerOn(listener, path.display().to_string());
fs::set_permissions(path, Permissions::from_mode(0o222))?;
let handler =
move |stream, socketaddr| self.clone().handle_request(stream, socketaddr);
server_loop(server_name, listener, handler, must_exit).await
}
}
}
2022-09-07 17:54:16 +02:00
async fn handle_request(
self: Arc<Self>,
req: Request<IncomingBody>,
addr: String,
) -> Result<Response<BoxBody<Error>>, http::Error> {
if let Ok(forwarded_for_ip_addr) =
2023-05-09 20:49:34 +01:00
forwarded_headers::handle_forwarded_for_headers(req.headers())
{
info!(
"{} (via {}) {} {}",
forwarded_for_ip_addr,
addr,
req.method(),
req.uri()
);
} else {
info!("{} {} {}", addr, req.method(), req.uri());
}
2022-09-07 17:54:16 +02:00
// Lots of instrumentation
let tracer = opentelemetry::global::tracer("garage");
let span = tracer
.span_builder(format!("Web {} request", req.method()))
.with_trace_id(gen_trace_id())
.with_attributes(vec![
KeyValue::new("method", format!("{}", req.method())),
KeyValue::new("uri", req.uri().to_string()),
])
.start(&tracer);
let metrics_tags = &[KeyValue::new("method", req.method().to_string())];
// The actual handler
let res = self
.serve_file(&req)
.with_context(Context::current_with_span(span))
.record_duration(&self.metrics.request_duration, &metrics_tags[..])
.await;
// More instrumentation
self.metrics.request_counter.add(1, &metrics_tags[..]);
// Returning the result
match res {
Ok(res) => {
debug!("{} {} {}", req.method(), res.status(), req.uri());
Ok(res
.map(|body| BoxBody::new(http_body_util::BodyExt::map_err(body, Error::from))))
2022-09-07 17:54:16 +02:00
}
Err(error) => {
info!(
"{} {} {} {}",
req.method(),
error.http_status_code(),
req.uri(),
error
);
self.metrics.error_counter.add(
1,
&[
metrics_tags[0].clone(),
KeyValue::new("status_code", error.http_status_code().to_string()),
],
);
Ok(error_to_res(error))
}
2020-11-02 15:48:39 +01:00
}
2022-09-07 17:54:16 +02:00
}
2020-11-02 15:48:39 +01:00
async fn check_key_exists(self: &Arc<Self>, bucket_id: Uuid, key: &str) -> Result<bool, Error> {
let exists = self
.garage
.object_table
.get(&bucket_id, &key.to_string())
.await?
.map(|object| object.versions().iter().any(|v| v.is_data()))
.unwrap_or(false);
Ok(exists)
}
async fn serve_file(
self: &Arc<Self>,
req: &Request<IncomingBody>,
) -> Result<Response<BoxBody<ApiError>>, Error> {
2022-09-07 17:54:16 +02:00
// Get http authority string (eg. [::1]:3902 or garage.tld:80)
let authority = req
.headers()
.get(HOST)
.ok_or_bad_request("HOST header required")?
.to_str()?;
// Get bucket
let host = authority_to_host(authority)?;
let bucket_name = host_to_bucket(&host, &self.root_domain).unwrap_or(&host);
let bucket_id = self
.garage
.bucket_alias_table
.get(&EmptyKey, &bucket_name.to_string())
.await?
.and_then(|x| x.state.take())
.ok_or(Error::NotFound)?;
// Check bucket isn't deleted and has website access enabled
let bucket = self
.garage
2024-03-03 14:56:52 +01:00
.bucket_helper()
.get_existing_bucket(bucket_id)
.await
.map_err(|_| Error::NotFound)?;
let bucket_params = bucket.state.into_option().unwrap();
2022-09-07 17:54:16 +02:00
2024-03-03 14:56:52 +01:00
let website_config = bucket_params
2022-09-07 17:54:16 +02:00
.website_config
.get()
.as_ref()
.ok_or(Error::NotFound)?;
// Get path
let path = req.uri().path().to_string();
let index = &website_config.index_document;
let (key, may_redirect) = path_to_keys(&path, index)?;
2022-09-07 17:54:16 +02:00
debug!(
"Selected bucket: \"{}\" {:?}, target key: \"{}\", may redirect to: {:?}",
bucket_name, bucket_id, key, may_redirect
2022-09-07 17:54:16 +02:00
);
let ret_doc = match *req.method() {
2024-03-03 14:56:52 +01:00
Method::OPTIONS => handle_options_for_bucket(req, &bucket_params)
.map_err(ApiError::from)
.map(|res| res.map(|_empty_body: EmptyBody| empty_body())),
2024-03-03 14:56:52 +01:00
Method::HEAD => {
handle_head_without_ctx(self.garage.clone(), req, bucket_id, &key, None).await
}
Method::GET => {
2024-03-03 14:56:52 +01:00
handle_get_without_ctx(
self.garage.clone(),
2024-03-03 14:56:52 +01:00
req,
bucket_id,
&key,
None,
Default::default(),
)
.await
}
2022-09-07 17:54:16 +02:00
_ => Err(ApiError::bad_request("HTTP method not supported")),
};
// Try implicit redirect on error
let ret_doc_with_redir = match (&ret_doc, may_redirect) {
(Err(ApiError::NoSuchKey), ImplicitRedirect::To { key, url })
if self.check_key_exists(bucket_id, key.as_str()).await? =>
{
Ok(Response::builder()
.status(StatusCode::FOUND)
.header("Location", url)
.body(empty_body())
.unwrap())
}
_ => ret_doc,
};
2022-09-07 17:54:16 +02:00
match ret_doc_with_redir.map_err(Error::from) {
2022-09-07 17:54:16 +02:00
Err(error) => {
// For a HEAD or OPTIONS method, and for non-4xx errors,
// we don't return the error document as content,
// we return above and just return the error message
// by relying on err_to_res that is called when we return an Err.
if *req.method() == Method::HEAD
|| *req.method() == Method::OPTIONS
|| !error.http_status_code().is_client_error()
{
return Err(error);
}
2020-11-02 15:48:39 +01:00
2022-09-07 17:54:16 +02:00
// If no error document is set: just return the error directly
let error_document = match &website_config.error_document {
Some(ed) => ed.trim_start_matches('/').to_owned(),
None => return Err(error),
};
// We want to return the error document
// Create a fake HTTP request with path = the error document
let req2 = Request::builder()
.uri(format!("http://{}/{}", host, &error_document))
.body(empty_body::<Infallible>())
2022-09-07 17:54:16 +02:00
.unwrap();
2024-03-03 14:56:52 +01:00
match handle_get_without_ctx(
self.garage.clone(),
&req2,
bucket_id,
&error_document,
None,
Default::default(),
)
.await
2022-09-07 17:54:16 +02:00
{
Ok(mut error_doc) => {
// The error won't be logged back in handle_request,
// so log it here
info!(
"{} {} {} {}",
req.method(),
req.uri(),
error.http_status_code(),
error
);
*error_doc.status_mut() = error.http_status_code();
// Preserve error message in a special header
for error_line in error.to_string().split('\n') {
if let Ok(v) = HeaderValue::from_bytes(error_line.as_bytes()) {
error_doc.headers_mut().append("X-Garage-Error", v);
}
}
2020-11-08 15:47:25 +01:00
2022-09-07 17:54:16 +02:00
Ok(error_doc)
}
Err(error_doc_error) => {
warn!(
"Couldn't get error document {} for bucket {:?}: {}",
error_document, bucket_id, error_doc_error
);
Err(error)
}
}
}
Ok(mut resp) => {
// Maybe add CORS headers
2024-03-03 14:56:52 +01:00
if let Some(rule) = find_matching_cors_rule(&bucket_params, req)? {
2022-09-07 17:54:16 +02:00
add_cors_headers(&mut resp, rule)
.ok_or_internal_error("Invalid bucket CORS configuration")?;
}
Ok(resp)
}
}
2020-11-11 21:17:34 +01:00
}
}
fn error_to_res(e: Error) -> Response<BoxBody<Error>> {
// If we are here, it is either that:
// - there was an error before trying to get the requested URL
// from the bucket (e.g. bucket not found)
// - there was an error processing the request and (the request
// was a HEAD request or we couldn't get the error document)
// We do NOT enter this code path when returning the bucket's
// error document (this is handled in serve_file)
let body = string_body(format!("{}\n", e));
2020-11-11 21:17:34 +01:00
let mut http_error = Response::new(body);
*http_error.status_mut() = e.http_status_code();
e.add_headers(http_error.headers_mut());
2020-11-11 21:17:34 +01:00
http_error
}
#[derive(Debug, PartialEq)]
enum ImplicitRedirect {
No,
To { key: String, url: String },
}
2020-11-11 19:48:01 +01:00
/// Path to key
///
/// Convert the provided path to the internal key
/// When a path ends with "/", we append the index name to match traditional web server behavior
/// which is also AWS S3 behavior.
///
/// Check: https://docs.aws.amazon.com/AmazonS3/latest/userguide/IndexDocumentSupport.html
fn path_to_keys<'a>(path: &'a str, index: &str) -> Result<(String, ImplicitRedirect), Error> {
2021-10-26 10:20:05 +02:00
let path_utf8 = percent_encoding::percent_decode_str(path).decode_utf8()?;
2020-11-21 17:50:19 +01:00
let base_key = match path_utf8.strip_prefix("/") {
Some(bk) => bk,
None => return Err(Error::BadRequest("Path must start with a / (slash)".into())),
};
let is_bucket_root = base_key.len() == 0;
let is_trailing_slash = path_utf8.ends_with("/");
match (is_bucket_root, is_trailing_slash) {
// It is not possible to store something at the root of the bucket (ie. empty key),
// the only option is to fetch the index
(true, _) => Ok((index.to_string(), ImplicitRedirect::No)),
// "If you create a folder structure in your bucket, you must have an index document at each level. In each folder, the index document must have the same name, for example, index.html. When a user specifies a URL that resembles a folder lookup, the presence or absence of a trailing slash determines the behavior of the website. For example, the following URL, with a trailing slash, returns the photos/index.html index document."
(false, true) => Ok((format!("{base_key}{index}"), ImplicitRedirect::No)),
// "However, if you exclude the trailing slash from the preceding URL, Amazon S3 first looks for an object photos in the bucket. If the photos object is not found, it searches for an index document, photos/index.html. If that document is found, Amazon S3 returns a 302 Found message and points to the photos/ key. For subsequent requests to photos/, Amazon S3 returns photos/index.html. If the index document is not found, Amazon S3 returns an error."
(false, false) => Ok((
base_key.to_string(),
ImplicitRedirect::To {
key: format!("{base_key}/{index}"),
url: format!("{path}/"),
},
)),
2020-11-11 19:48:01 +01:00
}
}
2020-11-08 15:47:25 +01:00
#[cfg(test)]
mod tests {
use super::*;
2020-11-11 19:48:01 +01:00
#[test]
fn path_to_keys_test() -> Result<(), Error> {
assert_eq!(
path_to_keys("/file%20.jpg", "index.html")?,
(
"file .jpg".to_string(),
ImplicitRedirect::To {
key: "file .jpg/index.html".to_string(),
url: "/file%20.jpg/".to_string()
}
)
);
assert_eq!(
path_to_keys("/%20t/", "index.html")?,
(" t/index.html".to_string(), ImplicitRedirect::No)
);
assert_eq!(
path_to_keys("/", "index.html")?,
("index.html".to_string(), ImplicitRedirect::No)
);
assert_eq!(
path_to_keys("/hello", "index.html")?,
(
"hello".to_string(),
ImplicitRedirect::To {
key: "hello/index.html".to_string(),
url: "/hello/".to_string()
}
)
);
assert!(path_to_keys("", "index.html").is_err());
assert!(path_to_keys("i/am/relative", "index.html").is_err());
2020-11-11 19:48:01 +01:00
Ok(())
}
2020-11-08 15:47:25 +01:00
}