diff --git a/Cargo.lock b/Cargo.lock index a7cf8b56..1d9525be 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -461,6 +461,7 @@ dependencies = [ "log", "md-5", "percent-encoding", + "rand", "roxmltree", "sha2", "tokio", diff --git a/README.md b/README.md index 10dcc0cd..8a6363ff 100644 --- a/README.md +++ b/README.md @@ -20,17 +20,24 @@ Our main use case is to provide a distributed storage layer for small-scale self We propose the following quickstart to setup a full dev. environment as quickly as possible: - 1. Setup a rust/cargo environment and install s3cmd. eg. `dnf install rust cargo s3cmd` - 2. Run `cargo build` to build the project - 3. Run `./script/dev-cluster.sh` to launch a test cluster (feel free to read the script) - 4. Run `./script/dev-configure.sh` to configure your test cluster with default values (same datacenter, 100 tokens) - 5. Run `./script/dev-bucket.sh` to create a bucket named `éprouvette` and an API key that will be stored in `/tmp/garage.s3` - 6. Run `source ./script/dev-env.sh` to configure your CLI environment - 7. You can use `garage` to manage the cluster. Try `garage --help`. - 8. You can use `s3grg` to add, remove, and delete files. Try `s3grg --help`, `s3grg put /proc/cpuinfo s3://éprouvette/cpuinfo.txt`, `s3grg ls s3://éprouvette`. `s3grg` is a wrapper on `s3cmd` configured with the previously generated API key (the one in `/tmp/garage.s3`). + 1. Setup a rust/cargo environment. eg. `dnf install rust cargo` + 2. Install awscli v2 by following the guide [here](https://docs.aws.amazon.com/cli/latest/userguide/install-cliv2.html). + 3. Run `cargo build` to build the project + 4. Run `./script/dev-cluster.sh` to launch a test cluster (feel free to read the script) + 5. Run `./script/dev-configure.sh` to configure your test cluster with default values (same datacenter, 100 tokens) + 6. Run `./script/dev-bucket.sh` to create a bucket named `eprouvette` and an API key that will be stored in `/tmp/garage.s3` + 7. Run `source ./script/dev-env-aws.sh` to configure your CLI environment + 8. You can use `garage` to manage the cluster. Try `garage --help`. + 9. You can use the `awsgrg` alias to add, remove, and delete files. Try `awsgrg help`, `awsgrg cp /proc/cpuinfo s3://eprouvette/cpuinfo.txt`, or `awsgrg ls s3://eprouvette`. `awsgrg` is a wrapper on the `aws s3` command pre-configured with the previously generated API key (the one in `/tmp/garage.s3`) and localhost as the endpoint. Now you should be ready to start hacking on garage! +## S3 compatibility + +Only a subset of S3 is supported: adding, listing, getting and deleting files in a bucket. +Bucket management, ACL and other advanced features are not (yet?) handled through the S3 API but through the `garage` CLI. +We primarily test `garage` against the `awscli` tool and `nextcloud`. + ## Setting up Garage Use the `genkeys.sh` script to generate TLS keys for encrypting communications between Garage nodes. diff --git a/doc/20201202_talk/.gitignore b/doc/20201202_talk/.gitignore new file mode 100644 index 00000000..a5e8d18b --- /dev/null +++ b/doc/20201202_talk/.gitignore @@ -0,0 +1,12 @@ +* + +!img + +!.gitignore +!*.svg +!*.png +!*.jpg +!*.tex +!Makefile +!.gitignore +!talk.pdf diff --git a/doc/20201202_talk/Makefile b/doc/20201202_talk/Makefile new file mode 100644 index 00000000..27dbf788 --- /dev/null +++ b/doc/20201202_talk/Makefile @@ -0,0 +1,6 @@ +talk.pdf: talk.tex img/garage_distributed.pdf img/consistent_hashing_1.pdf img/consistent_hashing_2.pdf img/consistent_hashing_3.pdf img/consistent_hashing_4.pdf img/garage_tables.pdf + pdflatex talk.tex + +img/%.pdf: img/%.svg + inkscape -D -z --file=$^ --export-pdf=$@ + diff --git a/doc/20201202_talk/img/Amazon-S3.jpg b/doc/20201202_talk/img/Amazon-S3.jpg new file mode 100644 index 00000000..a9501973 Binary files /dev/null and b/doc/20201202_talk/img/Amazon-S3.jpg differ diff --git a/doc/20201202_talk/img/cloud.png b/doc/20201202_talk/img/cloud.png new file mode 100644 index 00000000..d95b2ae3 Binary files /dev/null and b/doc/20201202_talk/img/cloud.png differ diff --git a/doc/20201202_talk/img/consistent_hashing_1.svg b/doc/20201202_talk/img/consistent_hashing_1.svg new file mode 100644 index 00000000..f8d24fd8 --- /dev/null +++ b/doc/20201202_talk/img/consistent_hashing_1.svg @@ -0,0 +1,301 @@ + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + 1 + + + + 2 + + + + 3 + + + + 4 + + + + 5 + + + + 6 + + + + diff --git a/doc/20201202_talk/img/consistent_hashing_2.svg b/doc/20201202_talk/img/consistent_hashing_2.svg new file mode 100644 index 00000000..5ac8faf6 --- /dev/null +++ b/doc/20201202_talk/img/consistent_hashing_2.svg @@ -0,0 +1,334 @@ + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + 1 + + + + 2 + + + + 3 + + + + 4 + + + + 5 + + + + 6 + + + + + + + + + + + + diff --git a/doc/20201202_talk/img/consistent_hashing_3.svg b/doc/20201202_talk/img/consistent_hashing_3.svg new file mode 100644 index 00000000..fdfd3efc --- /dev/null +++ b/doc/20201202_talk/img/consistent_hashing_3.svg @@ -0,0 +1,358 @@ + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + 1 + + + + 2 + + + + 3 + + + + 4 + + + + 5 + + + + 6 + + + + + + + + + + + + + + diff --git a/doc/20201202_talk/img/consistent_hashing_4.svg b/doc/20201202_talk/img/consistent_hashing_4.svg new file mode 100644 index 00000000..95ed0e02 --- /dev/null +++ b/doc/20201202_talk/img/consistent_hashing_4.svg @@ -0,0 +1,377 @@ + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + 1 + + + + 2 + + + + 3 + + + + 4 + + + + 5 + + + + 6 + + + + + + + + + + + + + + + + + + + diff --git a/doc/20201202_talk/img/dc.jpg b/doc/20201202_talk/img/dc.jpg new file mode 100644 index 00000000..52cb1e70 Binary files /dev/null and b/doc/20201202_talk/img/dc.jpg differ diff --git a/doc/20201202_talk/img/death.jpg b/doc/20201202_talk/img/death.jpg new file mode 100644 index 00000000..2ffa9180 Binary files /dev/null and b/doc/20201202_talk/img/death.jpg differ diff --git a/doc/20201202_talk/img/garage_distributed.svg b/doc/20201202_talk/img/garage_distributed.svg new file mode 100644 index 00000000..2db03626 --- /dev/null +++ b/doc/20201202_talk/img/garage_distributed.svg @@ -0,0 +1,404 @@ + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/20201202_talk/img/garage_tables.svg b/doc/20201202_talk/img/garage_tables.svg new file mode 100644 index 00000000..fc3d8fc5 --- /dev/null +++ b/doc/20201202_talk/img/garage_tables.svg @@ -0,0 +1,502 @@ + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + Object + + + + + + bucket + + + + + + file path + + + + + + Version 1 + deleted + + + + + + + Version 2 + id + + size + MIME type + ... + + + + + + Version + + id + h(block 1) + h(block 2) + ... + + + + + Data block + + hash + data + + + + Objects table + Versions table + Blocks table + + diff --git a/doc/20201202_talk/img/rustacean-flat-happy.png b/doc/20201202_talk/img/rustacean-flat-happy.png new file mode 100644 index 00000000..ebce1a14 Binary files /dev/null and b/doc/20201202_talk/img/rustacean-flat-happy.png differ diff --git a/doc/20201202_talk/img/shh.jpg b/doc/20201202_talk/img/shh.jpg new file mode 100644 index 00000000..ad4689ce Binary files /dev/null and b/doc/20201202_talk/img/shh.jpg differ diff --git a/doc/20201202_talk/img/sync.png b/doc/20201202_talk/img/sync.png new file mode 100644 index 00000000..d0404996 Binary files /dev/null and b/doc/20201202_talk/img/sync.png differ diff --git a/doc/20201202_talk/talk.pdf b/doc/20201202_talk/talk.pdf new file mode 100644 index 00000000..b27eca29 Binary files /dev/null and b/doc/20201202_talk/talk.pdf differ diff --git a/doc/20201202_talk/talk.tex b/doc/20201202_talk/talk.tex new file mode 100644 index 00000000..1acda842 --- /dev/null +++ b/doc/20201202_talk/talk.tex @@ -0,0 +1,247 @@ +%\nonstopmode +\documentclass[aspectratio=169]{beamer} +\usepackage[utf8]{inputenc} +% \usepackage[frenchb]{babel} +\usepackage{amsmath} +\usepackage{mathtools} +\usepackage{breqn} +\usepackage{multirow} +\usetheme{Luebeck} +\usepackage{graphicx} +%\useoutertheme[footline=authortitle,subsection=false]{miniframes} + +\beamertemplatenavigationsymbolsempty +\setbeamertemplate{footline} +{% + \leavevmode% + \hbox{\begin{beamercolorbox}[wd=.15\paperwidth,ht=2.5ex,dp=1.125ex,leftskip=.3cm,rightskip=.3cm plus1fill]{author in head/foot}% + \usebeamerfont{author in head/foot} \insertframenumber{} / \inserttotalframenumber + \end{beamercolorbox}% + \begin{beamercolorbox}[wd=.2\paperwidth,ht=2.5ex,dp=1.125ex,leftskip=.3cm plus1fill,rightskip=.3cm]{author in head/foot}% + \usebeamerfont{author in head/foot}\insertshortauthor + \end{beamercolorbox}% + \begin{beamercolorbox}[wd=.65\paperwidth,ht=2.5ex,dp=1.125ex,leftskip=.3cm,rightskip=.3cm plus1fil]{title in head/foot}% + \usebeamerfont{title in head/foot}\insertshorttitle~--~\insertshortdate + \end{beamercolorbox}}% + \vskip0pt% +} + +\usepackage{tabu} +\usepackage{multicol} +\usepackage{vwcol} +\usepackage{stmaryrd} +\usepackage{graphicx} + +\usepackage[normalem]{ulem} + +\title[Garage : jouer dans la cour des grands quand on est un hébergeur associatif]{Garage : jouer dans la cour des grands \\quand on est un hébergeur associatif} +\subtitle{(ou pourquoi on a décidé de réinventer la roue)} +\author[Q. Dufour \& A. Auvolat]{Quentin Dufour \& Alex Auvolat} +\date[02/12/2020]{Mercredi 2 décembre 2020} + +\begin{document} + +\begin{frame} + \titlepage +\end{frame} + +\begin{frame} + \frametitle{La question qui tue} + + \begin{center} + \includegraphics[scale=3]{img/sync.png} \\ + \Huge Pourquoi vous n'hébergez pas vos fichiers chez vous ? \\ + \end{center} + +\end{frame} + +\begin{frame}[t] + \frametitle{La cour des grands} + + \begin{columns}[t] + \begin{column}{0.5\textwidth} + {\huge Le modèle du cloud...} + + \begin{center} + \includegraphics[scale=0.08]{img/cloud.png} + \end{center} + + + \underline{intégrité} : plus de perte de données + + + \underline{disponibilité} : tout le temps accessible + + + \underline{service} : rien à gérer + + \vspace{0.15cm} + \textbf{changement des comportements} + \end{column} + \pause + \begin{column}{0.5\textwidth} + {\huge ...et son prix} + + \begin{center} + \includegraphics[scale=0.07]{img/dc.jpg} + \end{center} + + - matériel couteux et polluant + + - logiciels secrets + + - gestion opaque + + \vspace{0.2cm} + \textbf{prisonnier de l'écosystème} + \end{column} + \end{columns} +\end{frame} + +\begin{frame}[t] + \frametitle{Garage l'imposteur} + + \begin{columns}[t] + \begin{column}{0.5\textwidth} + {\huge Ressemble à du cloud...} + + \begin{center} + \includegraphics[scale=0.5]{img/shh.jpg} + \end{center} + + + \underline{compatible} avec les apps existantes + + + \underline{fonctionne} avec le mobile + + + \underline{s'adapte} aux habitudes prises + + + \end{column} + + \pause + \begin{column}{0.5\textwidth} + {\huge ...fait du P2P} + + \begin{center} + \includegraphics[scale=1]{img/death.jpg} + \end{center} + + \vspace{0.4cm} + + + \underline{contrôle} de l'infrastructure + + + \underline{transparent} code libre + + + \underline{sobre} fonctionne avec de vieilles machines à la maison + \end{column} + \end{columns} + +\end{frame} + + +\graphicspath{{img/}} + +\begin{frame} + \frametitle{Mais donc, c'est quoi Garage ?} + + \begin{columns}[t] + \begin{column}{0.5\textwidth} + \centering + \textbf{Un système de stockage distribué} + \vspace{1em} + + \includegraphics[width=.7\columnwidth]{img/garage_distributed.pdf} + \end{column} + \pause + + \begin{column}{0.5\textwidth} + \centering + \textbf{qui implémente l'API S3} + \vspace{2em} + + \includegraphics[width=.7\columnwidth]{img/Amazon-S3.jpg} + \end{column} + \end{columns} +\end{frame} + +\begin{frame} + \frametitle{Consistent Hashing (DynamoDB)} + \textbf{Comment répartir les fichiers sur les différentes machines ?} + \vspace{1em} + + \centering + + \only<1>{\includegraphics[width=.55\columnwidth]{img/consistent_hashing_1.pdf}}% + \only<2>{\includegraphics[width=.55\columnwidth]{img/consistent_hashing_2.pdf}}% + \only<3>{\includegraphics[width=.55\columnwidth]{img/consistent_hashing_3.pdf}}% + \only<4>{\includegraphics[width=.55\columnwidth]{img/consistent_hashing_4.pdf}}% +\end{frame} + +\begin{frame} + \frametitle{Garage Internals : 3 niveaux de consistent hashing} + \centering + \includegraphics[width=.85\columnwidth]{img/garage_tables.pdf} +\end{frame} + +\begin{frame} + \frametitle{Modèles de cohérence} + Garage utilise un modèle de cohérence relativement faible : + \vspace{1em} + + \begin{itemize} + \item Objets répliqués 3 fois, quorum de 2 pour les lectures et les écritures\\ + $\to$ cohérence \textbf{``read your writes''} + \vspace{1em} + \item<2-> Types de donnée CRDT + mécanisme d'anti-entropie\\ + $\to$ cohérence \textbf{à terme} (eventual consistency) + \vspace{1em} + \item<3-> Cela s'applique pour chaque fichier individuellement :\\ + pas de linéarisabilté ou de cohérence causale entre les opérations\\ + sur des fichiers différents + \vspace{1em} + \item<4-> \textbf{Avantage :} convient bien à un déploiement géodistribué (multi-datacenter) + \end{itemize} +\end{frame} + +\begin{frame} + \frametitle{Rust : retour d'expérience} + + \begin{columns} + \begin{column}{0.55\textwidth} + Garage est entièrement écrit en Rust ! + \vspace{2em} + + \textbf{Points forts :} + \vspace{.5em} + \begin{itemize} + \item Langage compilé, très rapide + \vspace{.5em} + \item Typage fort, beaucoup de sécurités + \vspace{.5em} + \item Le meilleur de plusieurs paradigmes: + fonctionnel, orienté objet, impératif + \vspace{.5em} + \item Un écosytème de librairies très complet: + serialisation, async/await, http, ... + \end{itemize} + \end{column} + + \begin{column}{0.45\textwidth} + \begin{centering} + \hspace{2em}\includegraphics[width=0.55\columnwidth]{img/rustacean-flat-happy.png} + \end{centering} + + \vspace{2em} + \textbf{Points faibles :} + \vspace{.5em} + \begin{itemize} + \item Les temps de compilation... + \vspace{.5em} + \item Compliqué à apprendre + \end{itemize} + \vspace{2em} + \end{column} + \end{columns} + +\end{frame} + +\end{document} + +%% vim: set ts=4 sw=4 tw=0 noet spelllang=fr : diff --git a/script/dev-bucket.sh b/script/dev-bucket.sh index f07263f5..8c0ef4e4 100755 --- a/script/dev-bucket.sh +++ b/script/dev-bucket.sh @@ -6,11 +6,11 @@ GARAGE_DEBUG="${REPO_FOLDER}/target/debug/" GARAGE_RELEASE="${REPO_FOLDER}/target/release/" PATH="${GARAGE_DEBUG}:${GARAGE_RELEASE}:$PATH" -garage bucket create éprouvette +garage bucket create eprouvette KEY_INFO=`garage key new --name opérateur` ACCESS_KEY=`echo $KEY_INFO|grep -Po 'GK[a-f0-9]+'` SECRET_KEY=`echo $KEY_INFO|grep -Po 'secret_key: "[a-f0-9]+'|grep -Po '[a-f0-9]+$'` -garage bucket allow éprouvette --read --write --key $ACCESS_KEY +garage bucket allow eprouvette --read --write --key $ACCESS_KEY echo "$ACCESS_KEY $SECRET_KEY" > /tmp/garage.s3 -echo "Bucket s3://éprouvette created. Credentials stored in /tmp/garage.s3." +echo "Bucket s3://eprouvette created. Credentials stored in /tmp/garage.s3." diff --git a/script/dev-clean.sh b/script/dev-clean.sh new file mode 100755 index 00000000..151c5547 --- /dev/null +++ b/script/dev-clean.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +set -ex + +killall -9 garage || echo "garage is not running" +rm -rf /tmp/garage* +rm -rf /tmp/config.*.toml diff --git a/script/dev-cluster.sh b/script/dev-cluster.sh index cfe9be0d..101a18bd 100755 --- a/script/dev-cluster.sh +++ b/script/dev-cluster.sh @@ -24,11 +24,11 @@ cat > $CONF_PATH <&1|grep -q Healthy ; do + echo "cluster starting..." + sleep 1 +done + garage status \ | grep UNCONFIGURED \ | grep -Po '^[0-9a-f]+' \ diff --git a/script/dev-env-aws.sh b/script/dev-env-aws.sh new file mode 100644 index 00000000..c9a57660 --- /dev/null +++ b/script/dev-env-aws.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +SCRIPT_FOLDER="`dirname \"${BASH_SOURCE[0]}\"`" +REPO_FOLDER="${SCRIPT_FOLDER}/../" +GARAGE_DEBUG="${REPO_FOLDER}/target/debug/" +GARAGE_RELEASE="${REPO_FOLDER}/target/release/" +PATH="${GARAGE_DEBUG}:${GARAGE_RELEASE}:$PATH" + +export AWS_ACCESS_KEY_ID=`cat /tmp/garage.s3 |cut -d' ' -f1` +export AWS_SECRET_ACCESS_KEY=`cat /tmp/garage.s3 |cut -d' ' -f2` +export AWS_DEFAULT_REGION='garage' + +alias awsgrg="aws s3 \ + --endpoint-url http://127.0.0.1:3911" diff --git a/script/dev-env.sh b/script/dev-env-s3cmd.sh old mode 100755 new mode 100644 similarity index 88% rename from script/dev-env.sh rename to script/dev-env-s3cmd.sh index 7e8ffc50..88d2941f --- a/script/dev-env.sh +++ b/script/dev-env-s3cmd.sh @@ -10,7 +10,8 @@ ACCESS_KEY=`cat /tmp/garage.s3 |cut -d' ' -f1` SECRET_KEY=`cat /tmp/garage.s3 |cut -d' ' -f2` alias s3grg="s3cmd \ - --host 127.0.0.1:3900 \ + --host 127.0.0.1:3911 \ + --host-bucket 127.0.0.1:3911 \ --access_key=$ACCESS_KEY \ --secret_key=$SECRET_KEY \ --region=garage \ diff --git a/script/test-smoke.sh b/script/test-smoke.sh new file mode 100755 index 00000000..111afac9 --- /dev/null +++ b/script/test-smoke.sh @@ -0,0 +1,62 @@ +#!/bin/bash + +set -ex +shopt -s expand_aliases + +SCRIPT_FOLDER="`dirname \"$0\"`" +REPO_FOLDER="${SCRIPT_FOLDER}/../" + +cargo build +${SCRIPT_FOLDER}/dev-clean.sh +${SCRIPT_FOLDER}/dev-cluster.sh > /tmp/garage.log 2>&1 & +${SCRIPT_FOLDER}/dev-configure.sh +${SCRIPT_FOLDER}/dev-bucket.sh +source ${SCRIPT_FOLDER}/dev-env-aws.sh +source ${SCRIPT_FOLDER}/dev-env-s3cmd.sh + +garage status +garage key list +garage bucket list + +dd if=/dev/urandom of=/tmp/garage.1.rnd bs=1k count=2 # < INLINE_THRESHOLD = 3072 bytes +dd if=/dev/urandom of=/tmp/garage.2.rnd bs=1M count=5 +dd if=/dev/urandom of=/tmp/garage.3.rnd bs=1M count=10 + +for idx in $(seq 1 3); do + # AWS sends + awsgrg cp /tmp/garage.$idx.rnd s3://eprouvette/garage.$idx.aws + + awsgrg ls s3://eprouvette + + awsgrg cp s3://eprouvette/garage.$idx.aws /tmp/garage.$idx.dl + diff /tmp/garage.$idx.rnd /tmp/garage.$idx.dl + rm /tmp/garage.$idx.dl + + s3grg get s3://eprouvette/garage.$idx.aws /tmp/garage.$idx.dl + diff /tmp/garage.$idx.rnd /tmp/garage.$idx.dl + rm /tmp/garage.$idx.dl + + awsgrg rm s3://eprouvette/garage.$idx.aws + + # S3CMD sends + s3grg put /tmp/garage.$idx.rnd s3://eprouvette/garage.$idx.s3cmd + + s3grg ls s3://eprouvette + + s3grg get s3://eprouvette/garage.$idx.s3cmd /tmp/garage.$idx.dl + diff /tmp/garage.$idx.rnd /tmp/garage.$idx.dl + rm /tmp/garage.$idx.dl + + awsgrg cp s3://eprouvette/garage.$idx.s3cmd /tmp/garage.$idx.dl + diff /tmp/garage.$idx.rnd /tmp/garage.$idx.dl + rm /tmp/garage.$idx.dl + + s3grg rm s3://eprouvette/garage.$idx.s3cmd +done +rm /tmp/garage.{1,2,3}.rnd + +garage bucket deny --read --write eprouvette --key $AWS_ACCESS_KEY_ID +garage bucket delete --yes eprouvette +garage key delete --yes $AWS_ACCESS_KEY_ID + +echo "success" diff --git a/src/api/Cargo.toml b/src/api/Cargo.toml index a366f9b8..079993c3 100644 --- a/src/api/Cargo.toml +++ b/src/api/Cargo.toml @@ -27,6 +27,7 @@ md-5 = "0.9.1" sha2 = "0.8" hmac = "0.7" crypto-mac = "0.7" +rand = "0.7" futures = "0.3" futures-util = "0.3" diff --git a/src/api/s3_get.rs b/src/api/s3_get.rs index a68c485b..1a23f476 100644 --- a/src/api/s3_get.rs +++ b/src/api/s3_get.rs @@ -24,11 +24,13 @@ fn object_headers( "Content-Type", version_meta.headers.content_type.to_string(), ) - .header("Content-Length", format!("{}", version_meta.size)) - .header("ETag", version_meta.etag.to_string()) .header("Last-Modified", date_str) .header("Accept-Ranges", format!("bytes")); + if !version_meta.etag.is_empty() { + resp = resp.header("ETag", format!("\"{}\"", version_meta.etag)); + } + for (k, v) in version_meta.headers.other.iter() { resp = resp.header(k, v.to_string()); } @@ -63,6 +65,7 @@ pub async fn handle_head( let body: Body = Body::from(vec![]); let response = object_headers(&version, version_meta) + .header("Content-Length", format!("{}", version_meta.size)) .status(StatusCode::OK) .body(body) .unwrap(); @@ -123,7 +126,9 @@ pub async fn handle_get( .await; } - let resp_builder = object_headers(&last_v, last_v_meta).status(StatusCode::OK); + let resp_builder = object_headers(&last_v, last_v_meta) + .header("Content-Length", format!("{}", last_v_meta.size)) + .status(StatusCode::OK); match &last_v_data { ObjectVersionData::DeleteMarker => unreachable!(), @@ -161,7 +166,7 @@ pub async fn handle_get( } }) .buffered(2); - //let body: Body = Box::new(StreamBody::new(Box::pin(body_stream))); + let body = hyper::body::Body::wrap_stream(body_stream); Ok(resp_builder.body(body)?) } @@ -181,9 +186,10 @@ pub async fn handle_get_range( } let resp_builder = object_headers(version, version_meta) + .header("Content-Length", format!("{}", end - begin)) .header( "Content-Range", - format!("bytes {}-{}/{}", begin, end, version_meta.size), + format!("bytes {}-{}/{}", begin, end - 1, version_meta.size), ) .status(StatusCode::PARTIAL_CONTENT); @@ -206,35 +212,49 @@ pub async fn handle_get_range( None => return Err(Error::NotFound), }; - let blocks = version - .blocks() - .iter() - .cloned() - .filter(|block| block.offset + block.size > begin && block.offset < end) - .collect::>(); + // We will store here the list of blocks that have an intersection with the requested + // range, as well as their "true offset", which is their actual offset in the complete + // file (whereas block.offset designates the offset of the block WITHIN THE PART + // block.part_number, which is not the same in the case of a multipart upload) + let mut blocks = Vec::with_capacity(std::cmp::min( + version.blocks().len(), + 4 + ((end - begin) / std::cmp::max(version.blocks()[0].size as u64, 1024)) as usize, + )); + let mut true_offset = 0; + for b in version.blocks().iter() { + if true_offset >= end { + break; + } + // Keep only blocks that have an intersection with the requested range + if true_offset < end && true_offset + b.size > begin { + blocks.push((b.clone(), true_offset)); + } + true_offset += b.size; + } let body_stream = futures::stream::iter(blocks) - .map(move |block| { + .map(move |(block, true_offset)| { let garage = garage.clone(); async move { let data = garage.block_manager.rpc_get_block(&block.hash).await?; - let start_in_block = if block.offset > begin { + let data = Bytes::from(data); + let start_in_block = if true_offset > begin { 0 } else { - begin - block.offset + begin - true_offset }; - let end_in_block = if block.offset + block.size < end { + let end_in_block = if true_offset + block.size < end { block.size } else { - end - block.offset + end - true_offset }; Result::::Ok(Bytes::from( - data[start_in_block as usize..end_in_block as usize].to_vec(), + data.slice(start_in_block as usize..end_in_block as usize), )) } }) .buffered(2); - //let body: Body = Box::new(StreamBody::new(Box::pin(body_stream))); + let body = hyper::body::Body::wrap_stream(body_stream); Ok(resp_builder.body(body)?) } diff --git a/src/api/s3_list.rs b/src/api/s3_list.rs index 3b739a8a..599d0d11 100644 --- a/src/api/s3_list.rs +++ b/src/api/s3_list.rs @@ -18,6 +18,7 @@ use crate::encoding::*; struct ListResultInfo { last_modified: u64, size: u64, + etag: String, } pub async fn handle_list( @@ -56,12 +57,12 @@ pub async fn handle_list( for object in objects.iter() { if !object.key.starts_with(prefix) { - truncated = false; + truncated = None; break 'query_loop; } if let Some(version) = object.versions().iter().find(|x| x.is_data()) { if result_keys.len() + result_common_prefixes.len() >= max_keys { - truncated = true; + truncated = Some(object.key.to_string()); break 'query_loop; } let common_prefix = if delimiter.len() > 0 { @@ -75,19 +76,18 @@ pub async fn handle_list( if let Some(pfx) = common_prefix { result_common_prefixes.insert(pfx.to_string()); } else { - let size = match &version.state { - ObjectVersionState::Complete(ObjectVersionData::Inline(meta, _)) => { - meta.size - } + let meta = match &version.state { + ObjectVersionState::Complete(ObjectVersionData::Inline(meta, _)) => meta, ObjectVersionState::Complete(ObjectVersionData::FirstBlock(meta, _)) => { - meta.size + meta } _ => unreachable!(), }; let info = match result_keys.get(&object.key) { None => ListResultInfo { last_modified: version.timestamp, - size, + size: meta.size, + etag: meta.etag.to_string(), }, Some(_lri) => { return Err(Error::Message(format!("Duplicate key?? {}", object.key))) @@ -98,7 +98,7 @@ pub async fn handle_list( } } if objects.len() < max_keys + 1 { - truncated = false; + truncated = None; break 'query_loop; } if objects.len() > 0 { @@ -113,11 +113,22 @@ pub async fn handle_list( r#""# ) .unwrap(); - writeln!(&mut xml, "\t{}", bucket).unwrap(); + writeln!(&mut xml, "\t{}", bucket).unwrap(); writeln!(&mut xml, "\t{}", prefix).unwrap(); + if let Some(mkr) = marker { + writeln!(&mut xml, "\t{}", mkr).unwrap(); + } writeln!(&mut xml, "\t{}", result_keys.len()).unwrap(); writeln!(&mut xml, "\t{}", max_keys).unwrap(); - writeln!(&mut xml, "\t{}", truncated).unwrap(); + writeln!( + &mut xml, + "\t{}", + truncated.is_some() + ) + .unwrap(); + if let Some(next_marker) = truncated { + writeln!(&mut xml, "\t{}", next_marker).unwrap(); + } for (key, info) in result_keys.iter() { let last_modif = NaiveDateTime::from_timestamp(info.last_modified as i64 / 1000, 0); let last_modif = DateTime::::from_utc(last_modif, Utc); @@ -132,6 +143,9 @@ pub async fn handle_list( .unwrap(); writeln!(&mut xml, "\t\t{}", last_modif).unwrap(); writeln!(&mut xml, "\t\t{}", info.size).unwrap(); + if !info.etag.is_empty() { + writeln!(&mut xml, "\t\t\"{}\"", info.etag).unwrap(); + } writeln!(&mut xml, "\t\tSTANDARD").unwrap(); writeln!(&mut xml, "\t").unwrap(); } diff --git a/src/api/s3_put.rs b/src/api/s3_put.rs index a528720d..c42309b2 100644 --- a/src/api/s3_put.rs +++ b/src/api/s3_put.rs @@ -51,12 +51,7 @@ pub async fn handle_put( let md5sum_arr = md5sum.finalize(); let md5sum_hex = hex::encode(md5sum_arr); - let mut sha256sum = Sha256::new(); - sha256sum.input(&first_block[..]); - let sha256sum_arr = sha256sum.result(); - let mut hash = [0u8; 32]; - hash.copy_from_slice(&sha256sum_arr[..]); - let sha256sum_hash = Hash::from(hash); + let sha256sum_hash = hash(&first_block[..]); ensure_checksum_matches( md5sum_arr.as_slice(), @@ -253,7 +248,7 @@ impl BodyChunker { body, read_all: false, block_size, - buf: VecDeque::new(), + buf: VecDeque::with_capacity(2 * block_size), } } async fn next(&mut self) -> Result>, GarageError> { @@ -278,11 +273,10 @@ impl BodyChunker { } } -pub fn put_response(version_uuid: UUID, etag: String) -> Response { +pub fn put_response(version_uuid: UUID, md5sum_hex: String) -> Response { Response::builder() .header("x-amz-version-id", hex::encode(version_uuid)) - .header("ETag", etag) - // TODO ETag + .header("ETag", format!("\"{}\"", md5sum_hex)) .body(Body::from(vec![])) .unwrap() } @@ -369,7 +363,7 @@ pub async fn handle_put_part( } // Copy block to store - let version = Version::new(version_uuid, bucket.into(), key.into(), false, vec![]); + let version = Version::new(version_uuid, bucket, key, false, vec![]); let first_block_hash = hash(&first_block[..]); let (_, md5sum_arr, sha256sum) = read_and_put_blocks( &garage, @@ -388,7 +382,11 @@ pub async fn handle_put_part( content_sha256, )?; - Ok(Response::new(Body::from(vec![]))) + let response = Response::builder() + .header("ETag", format!("\"{}\"", hex::encode(md5sum_arr))) + .body(Body::from(vec![])) + .unwrap(); + Ok(response) } pub async fn handle_complete_multipart_upload( @@ -430,6 +428,21 @@ pub async fn handle_complete_multipart_upload( _ => unreachable!(), }; + // ETag calculation: we produce ETags that have the same form as + // those of S3 multipart uploads, but we don't use their actual + // calculation for the first part (we use random bytes). This + // shouldn't impact compatibility as the S3 docs specify that + // the ETag is an opaque value in case of a multipart upload. + // See also: https://teppen.io/2018/06/23/aws_s3_etags/ + let num_parts = version.blocks().last().unwrap().part_number + - version.blocks().first().unwrap().part_number + + 1; + let etag = format!( + "{}-{}", + hex::encode(&rand::random::<[u8; 16]>()[..]), + num_parts + ); + // TODO: check that all the parts that they pretend they gave us are indeed there // TODO: when we read the XML from _req, remember to check the sha256 sum of the payload // against the signed x-amz-content-sha256 @@ -444,7 +457,7 @@ pub async fn handle_complete_multipart_upload( ObjectVersionMeta { headers, size: total_size, - etag: "".to_string(), // TODO + etag: etag, }, version.blocks()[0].hash, )); diff --git a/src/table/table.rs b/src/table/table.rs index 5dfee3c8..acb46325 100644 --- a/src/table/table.rs +++ b/src/table/table.rs @@ -391,7 +391,8 @@ where let (old_entry, new_entry) = self.store.transaction(|db| { let (old_entry, new_entry) = match db.get(&tree_key)? { Some(prev_bytes) => { - let old_entry = self.decode_entry(&prev_bytes) + let old_entry = self + .decode_entry(&prev_bytes) .map_err(sled::ConflictableTransactionError::Abort)?; let mut new_entry = old_entry.clone(); new_entry.merge(&update);