garage/src/web/web_server.rs

328 lines
9 KiB
Rust
Raw Normal View History

2020-11-21 16:50:19 +00:00
use std::{borrow::Cow, convert::Infallible, net::SocketAddr, sync::Arc};
2020-11-02 14:48:39 +00:00
use futures::future::Future;
2020-11-21 14:15:25 +00:00
use hyper::{
header::{HeaderValue, HOST},
2020-11-21 14:15:25 +00:00
server::conn::AddrStream,
service::{make_service_fn, service_fn},
2020-11-21 16:58:14 +00:00
Body, Method, Request, Response, Server,
2020-11-21 16:50:19 +00:00
};
2020-11-02 14:48:39 +00:00
2022-02-22 14:21:06 +00:00
use opentelemetry::{
global,
metrics::{Counter, ValueRecorder},
trace::{FutureExt, TraceContextExt, Tracer},
Context, KeyValue,
};
2020-11-21 16:50:19 +00:00
use crate::error::*;
2021-12-16 10:47:58 +00:00
use garage_api::error::{Error as ApiError, OkOrBadRequest, OkOrInternalError};
2021-11-11 10:26:02 +00:00
use garage_api::helpers::{authority_to_host, host_to_bucket};
First implementation of K2V (#293) **Specification:** View spec at [this URL](https://git.deuxfleurs.fr/Deuxfleurs/garage/src/branch/k2v/doc/drafts/k2v-spec.md) - [x] Specify the structure of K2V triples - [x] Specify the DVVS format used for causality detection - [x] Specify the K2V index (just a counter of number of values per partition key) - [x] Specify single-item endpoints: ReadItem, InsertItem, DeleteItem - [x] Specify index endpoint: ReadIndex - [x] Specify multi-item endpoints: InsertBatch, ReadBatch, DeleteBatch - [x] Move to JSON objects instead of tuples - [x] Specify endpoints for polling for updates on single values (PollItem) **Implementation:** - [x] Table for K2V items, causal contexts - [x] Indexing mechanism and table for K2V index - [x] Make API handlers a bit more generic - [x] K2V API endpoint - [x] K2V API router - [x] ReadItem - [x] InsertItem - [x] DeleteItem - [x] PollItem - [x] ReadIndex - [x] InsertBatch - [x] ReadBatch - [x] DeleteBatch **Testing:** - [x] Just a simple Python script that does some requests to check visually that things are going right (does not contain parsing of results or assertions on returned values) - [x] Actual tests: - [x] Adapt testing framework - [x] Simple test with InsertItem + ReadItem - [x] Test with several Insert/Read/DeleteItem + ReadIndex - [x] Test all combinations of return formats for ReadItem - [x] Test with ReadBatch, InsertBatch, DeleteBatch - [x] Test with PollItem - [x] Test error codes - [ ] Fix most broken stuff - [x] test PollItem broken randomly - [x] when invalid causality tokens are given, errors should be 4xx not 5xx **Improvements:** - [x] Descending range queries - [x] Specify - [x] Implement - [x] Add test - [x] Batch updates to index counter - [x] Put K2V behind `k2v` feature flag Co-authored-by: Alex Auvolat <alex@adnab.me> Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/293 Co-authored-by: Alex <alex@adnab.me> Co-committed-by: Alex <alex@adnab.me>
2022-05-10 11:16:57 +00:00
use garage_api::s3::cors::{add_cors_headers, find_matching_cors_rule, handle_options_for_bucket};
use garage_api::s3::get::{handle_get, handle_head};
2021-12-16 10:47:58 +00:00
2020-11-02 14:48:39 +00:00
use garage_model::garage::Garage;
2021-12-16 10:47:58 +00:00
2020-12-17 21:51:44 +00:00
use garage_table::*;
2020-11-19 13:56:00 +00:00
use garage_util::error::Error as GarageError;
2022-02-22 14:21:06 +00:00
use garage_util::metrics::{gen_trace_id, RecordDuration};
struct WebMetrics {
request_counter: Counter<u64>,
error_counter: Counter<u64>,
request_duration: ValueRecorder<f64>,
}
impl WebMetrics {
fn new() -> Self {
let meter = global::meter("garage/web");
Self {
request_counter: meter
.u64_counter("web.request_counter")
.with_description("Number of requests to the web endpoint")
.init(),
error_counter: meter
.u64_counter("web.error_counter")
.with_description("Number of requests to the web endpoint resulting in errors")
.init(),
request_duration: meter
.f64_value_recorder("web.request_duration")
.with_description("Duration of requests to the web endpoint")
.init(),
}
}
}
2020-11-02 14:48:39 +00:00
2021-03-26 21:05:16 +00:00
/// Run a web server
2020-11-02 14:48:39 +00:00
pub async fn run_web_server(
garage: Arc<Garage>,
shutdown_signal: impl Future<Output = ()>,
2020-11-19 13:56:00 +00:00
) -> Result<(), GarageError> {
2020-11-10 08:57:07 +00:00
let addr = &garage.config.s3_web.bind_addr;
2020-11-02 14:48:39 +00:00
2022-02-22 14:21:06 +00:00
let metrics = Arc::new(WebMetrics::new());
2020-11-02 14:48:39 +00:00
let service = make_service_fn(|conn: &AddrStream| {
let garage = garage.clone();
2022-02-22 14:21:06 +00:00
let metrics = metrics.clone();
2020-11-02 14:48:39 +00:00
let client_addr = conn.remote_addr();
2020-11-10 08:59:52 +00:00
async move {
2020-11-02 14:48:39 +00:00
Ok::<_, Error>(service_fn(move |req: Request<Body>| {
let garage = garage.clone();
2022-02-22 14:21:06 +00:00
let metrics = metrics.clone();
handle_request(garage, metrics, req, client_addr)
2020-11-02 14:48:39 +00:00
}))
}
});
2021-10-26 08:20:05 +00:00
let server = Server::bind(addr).serve(service);
2020-11-02 14:48:39 +00:00
let graceful = server.with_graceful_shutdown(shutdown_signal);
info!("Web server listening on http://{}", addr);
graceful.await?;
Ok(())
}
2020-11-08 14:47:25 +00:00
2020-11-11 20:17:34 +00:00
async fn handle_request(
2020-11-08 14:47:25 +00:00
garage: Arc<Garage>,
2022-02-22 14:21:06 +00:00
metrics: Arc<WebMetrics>,
2020-11-08 14:47:25 +00:00
req: Request<Body>,
addr: SocketAddr,
2020-11-11 20:17:34 +00:00
) -> Result<Response<Body>, Infallible> {
info!("{} {} {}", addr, req.method(), req.uri());
2022-02-22 14:21:06 +00:00
// Lots of instrumentation
let tracer = opentelemetry::global::tracer("garage");
let span = tracer
.span_builder(format!("Web {} request", req.method()))
.with_trace_id(gen_trace_id())
.with_attributes(vec![
KeyValue::new("method", format!("{}", req.method())),
KeyValue::new("uri", req.uri().to_string()),
])
.start(&tracer);
let metrics_tags = &[KeyValue::new("method", req.method().to_string())];
// The actual handler
let res = serve_file(garage, &req)
.with_context(Context::current_with_span(span))
.record_duration(&metrics.request_duration, &metrics_tags[..])
.await;
// More instrumentation
metrics.request_counter.add(1, &metrics_tags[..]);
// Returning the result
match res {
Ok(res) => {
2022-02-22 14:21:06 +00:00
debug!("{} {} {}", req.method(), res.status(), req.uri());
Ok(res)
}
Err(error) => {
info!(
"{} {} {} {}",
req.method(),
error.http_status_code(),
2022-02-22 14:21:06 +00:00
req.uri(),
error
);
2022-02-22 14:21:06 +00:00
metrics.error_counter.add(
1,
&[
metrics_tags[0].clone(),
KeyValue::new("status_code", error.http_status_code().to_string()),
],
);
Ok(error_to_res(error))
}
2020-11-11 20:17:34 +00:00
}
}
fn error_to_res(e: Error) -> Response<Body> {
// If we are here, it is either that:
// - there was an error before trying to get the requested URL
// from the bucket (e.g. bucket not found)
// - there was an error processing the request and (the request
// was a HEAD request or we couldn't get the error document)
// We do NOT enter this code path when returning the bucket's
// error document (this is handled in serve_file)
let body = Body::from(format!("{}\n", e));
2020-11-11 20:17:34 +00:00
let mut http_error = Response::new(body);
*http_error.status_mut() = e.http_status_code();
e.add_headers(http_error.headers_mut());
2020-11-11 20:17:34 +00:00
http_error
}
async fn serve_file(garage: Arc<Garage>, req: &Request<Body>) -> Result<Response<Body>, Error> {
2020-11-10 08:57:07 +00:00
// Get http authority string (eg. [::1]:3902 or garage.tld:80)
2020-11-08 14:47:25 +00:00
let authority = req
.headers()
.get(HOST)
.ok_or_bad_request("HOST header required")?
2020-11-08 14:47:25 +00:00
.to_str()?;
2020-11-10 08:57:07 +00:00
// Get bucket
2021-11-11 10:26:02 +00:00
let host = authority_to_host(authority)?;
2020-11-10 08:59:52 +00:00
let root = &garage.config.s3_web.root_domain;
2020-11-10 08:57:07 +00:00
2021-12-14 12:55:11 +00:00
let bucket_name = host_to_bucket(&host, root).unwrap_or(&host);
let bucket_id = garage
.bucket_alias_table
.get(&EmptyKey, &bucket_name.to_string())
.await?
2022-03-14 11:00:23 +00:00
.and_then(|x| x.state.take())
2021-12-14 12:55:11 +00:00
.ok_or(Error::NotFound)?;
2021-12-16 10:47:58 +00:00
// Check bucket isn't deleted and has website access enabled
let bucket = garage
2020-12-17 21:51:44 +00:00
.bucket_table
.get(&EmptyKey, &bucket_id)
2020-12-17 21:51:44 +00:00
.await?
.ok_or(Error::NotFound)?;
let website_config = bucket
.params()
.ok_or(Error::NotFound)?
.website_config
.get()
.as_ref()
2020-12-17 21:51:44 +00:00
.ok_or(Error::NotFound)?;
2020-11-10 08:57:07 +00:00
// Get path
let path = req.uri().path().to_string();
let index = &website_config.index_document;
2021-10-26 08:20:05 +00:00
let key = path_to_key(&path, index)?;
2020-11-10 08:59:52 +00:00
debug!(
2021-12-14 12:55:11 +00:00
"Selected bucket: \"{}\" {:?}, selected key: \"{}\"",
bucket_name, bucket_id, key
);
2020-11-11 18:48:01 +00:00
let ret_doc = match *req.method() {
2022-03-01 10:15:16 +00:00
Method::OPTIONS => handle_options_for_bucket(req, &bucket),
Method::HEAD => handle_head(garage.clone(), req, bucket_id, &key, None).await,
Method::GET => handle_get(garage.clone(), req, bucket_id, &key, None).await,
_ => Err(ApiError::BadRequest("HTTP method not supported".into())),
}
.map_err(Error::from);
match ret_doc {
Err(error) => {
2022-01-24 11:03:57 +00:00
// For a HEAD or OPTIONS method, and for non-4xx errors,
// we don't return the error document as content,
// we return above and just return the error message
// by relying on err_to_res that is called when we return an Err.
2022-01-24 11:03:57 +00:00
if *req.method() == Method::HEAD
|| *req.method() == Method::OPTIONS
|| !error.http_status_code().is_client_error()
{
return Err(error);
}
2020-11-21 14:15:25 +00:00
// If no error document is set: just return the error directly
let error_document = match &website_config.error_document {
Some(ed) => ed.trim_start_matches('/').to_owned(),
None => return Err(error),
};
// We want to return the error document
// Create a fake HTTP request with path = the error document
let req2 = Request::builder()
.uri(format!("http://{}/{}", host, &error_document))
.body(Body::empty())
.unwrap();
match handle_get(garage, &req2, bucket_id, &error_document, None).await {
Ok(mut error_doc) => {
// The error won't be logged back in handle_request,
// so log it here
info!(
"{} {} {} {}",
req.method(),
req.uri(),
error.http_status_code(),
error
);
*error_doc.status_mut() = error.http_status_code();
error.add_headers(error_doc.headers_mut());
// Preserve error message in a special header
for error_line in error.to_string().split('\n') {
if let Ok(v) = HeaderValue::from_bytes(error_line.as_bytes()) {
error_doc.headers_mut().append("X-Garage-Error", v);
}
}
Ok(error_doc)
}
Err(error_doc_error) => {
warn!(
"Couldn't get error document {} for bucket {:?}: {}",
error_document, bucket_id, error_doc_error
);
Err(error)
}
}
}
Ok(mut resp) => {
// Maybe add CORS headers
if let Some(rule) = find_matching_cors_rule(&bucket, req)? {
add_cors_headers(&mut resp, rule)
.ok_or_internal_error("Invalid bucket CORS configuration")?;
}
Ok(resp)
}
}
2020-11-08 14:47:25 +00:00
}
2020-11-11 18:48:01 +00:00
/// Path to key
///
/// Convert the provided path to the internal key
/// When a path ends with "/", we append the index name to match traditional web server behavior
/// which is also AWS S3 behavior.
fn path_to_key<'a>(path: &'a str, index: &str) -> Result<Cow<'a, str>, Error> {
2021-10-26 08:20:05 +00:00
let path_utf8 = percent_encoding::percent_decode_str(path).decode_utf8()?;
2020-11-21 16:50:19 +00:00
2021-04-23 20:26:27 +00:00
if !path_utf8.starts_with('/') {
return Err(Error::BadRequest(
"Path must start with a / (slash)".to_string(),
));
2020-11-21 11:01:02 +00:00
}
2020-11-11 18:48:01 +00:00
match path_utf8.chars().last() {
2021-01-15 16:11:15 +00:00
None => unreachable!(),
2020-11-11 18:48:01 +00:00
Some('/') => {
let mut key = String::with_capacity(path_utf8.len() + index.len());
2020-11-21 11:01:02 +00:00
key.push_str(&path_utf8[1..]);
2020-11-11 18:48:01 +00:00
key.push_str(index);
Ok(key.into())
}
2020-11-21 16:50:19 +00:00
Some(_) => match path_utf8 {
Cow::Borrowed(pu8) => Ok((&pu8[1..]).into()),
Cow::Owned(pu8) => Ok((&pu8[1..]).to_string().into()),
},
2020-11-11 18:48:01 +00:00
}
}
2020-11-08 14:47:25 +00:00
#[cfg(test)]
mod tests {
use super::*;
2020-11-11 18:48:01 +00:00
#[test]
fn path_to_key_test() -> Result<(), Error> {
2020-11-21 11:01:02 +00:00
assert_eq!(path_to_key("/file%20.jpg", "index.html")?, "file .jpg");
assert_eq!(path_to_key("/%20t/", "index.html")?, " t/index.html");
assert_eq!(path_to_key("/", "index.html")?, "index.html");
assert_eq!(path_to_key("/hello", "index.html")?, "hello");
2020-11-11 18:48:01 +00:00
assert!(path_to_key("", "index.html").is_err());
2020-11-21 11:01:02 +00:00
assert!(path_to_key("i/am/relative", "index.html").is_err());
2020-11-11 18:48:01 +00:00
Ok(())
}
2020-11-08 14:47:25 +00:00
}