diff --git a/doc/20201202_talk/.gitignore b/doc/20201202_talk/.gitignore new file mode 100644 index 00000000..a5e8d18b --- /dev/null +++ b/doc/20201202_talk/.gitignore @@ -0,0 +1,12 @@ +* + +!img + +!.gitignore +!*.svg +!*.png +!*.jpg +!*.tex +!Makefile +!.gitignore +!talk.pdf diff --git a/doc/20201202_talk/Makefile b/doc/20201202_talk/Makefile new file mode 100644 index 00000000..27dbf788 --- /dev/null +++ b/doc/20201202_talk/Makefile @@ -0,0 +1,6 @@ +talk.pdf: talk.tex img/garage_distributed.pdf img/consistent_hashing_1.pdf img/consistent_hashing_2.pdf img/consistent_hashing_3.pdf img/consistent_hashing_4.pdf img/garage_tables.pdf + pdflatex talk.tex + +img/%.pdf: img/%.svg + inkscape -D -z --file=$^ --export-pdf=$@ + diff --git a/doc/20201202_talk/img/Amazon-S3.jpg b/doc/20201202_talk/img/Amazon-S3.jpg new file mode 100644 index 00000000..a9501973 Binary files /dev/null and b/doc/20201202_talk/img/Amazon-S3.jpg differ diff --git a/doc/20201202_talk/img/cloud.png b/doc/20201202_talk/img/cloud.png new file mode 100644 index 00000000..d95b2ae3 Binary files /dev/null and b/doc/20201202_talk/img/cloud.png differ diff --git a/doc/20201202_talk/img/consistent_hashing_1.svg b/doc/20201202_talk/img/consistent_hashing_1.svg new file mode 100644 index 00000000..f8d24fd8 --- /dev/null +++ b/doc/20201202_talk/img/consistent_hashing_1.svg @@ -0,0 +1,301 @@ + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + 1 + + + + 2 + + + + 3 + + + + 4 + + + + 5 + + + + 6 + + + + diff --git a/doc/20201202_talk/img/consistent_hashing_2.svg b/doc/20201202_talk/img/consistent_hashing_2.svg new file mode 100644 index 00000000..5ac8faf6 --- /dev/null +++ b/doc/20201202_talk/img/consistent_hashing_2.svg @@ -0,0 +1,334 @@ + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + 1 + + + + 2 + + + + 3 + + + + 4 + + + + 5 + + + + 6 + + + + + + + + + + + + diff --git a/doc/20201202_talk/img/consistent_hashing_3.svg b/doc/20201202_talk/img/consistent_hashing_3.svg new file mode 100644 index 00000000..fdfd3efc --- /dev/null +++ b/doc/20201202_talk/img/consistent_hashing_3.svg @@ -0,0 +1,358 @@ + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + 1 + + + + 2 + + + + 3 + + + + 4 + + + + 5 + + + + 6 + + + + + + + + + + + + + + diff --git a/doc/20201202_talk/img/consistent_hashing_4.svg b/doc/20201202_talk/img/consistent_hashing_4.svg new file mode 100644 index 00000000..95ed0e02 --- /dev/null +++ b/doc/20201202_talk/img/consistent_hashing_4.svg @@ -0,0 +1,377 @@ + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + 1 + + + + 2 + + + + 3 + + + + 4 + + + + 5 + + + + 6 + + + + + + + + + + + + + + + + + + + diff --git a/doc/20201202_talk/img/dc.jpg b/doc/20201202_talk/img/dc.jpg new file mode 100644 index 00000000..52cb1e70 Binary files /dev/null and b/doc/20201202_talk/img/dc.jpg differ diff --git a/doc/20201202_talk/img/death.jpg b/doc/20201202_talk/img/death.jpg new file mode 100644 index 00000000..2ffa9180 Binary files /dev/null and b/doc/20201202_talk/img/death.jpg differ diff --git a/doc/20201202_talk/img/garage_distributed.svg b/doc/20201202_talk/img/garage_distributed.svg new file mode 100644 index 00000000..2db03626 --- /dev/null +++ b/doc/20201202_talk/img/garage_distributed.svg @@ -0,0 +1,404 @@ + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/20201202_talk/img/garage_tables.svg b/doc/20201202_talk/img/garage_tables.svg new file mode 100644 index 00000000..fc3d8fc5 --- /dev/null +++ b/doc/20201202_talk/img/garage_tables.svg @@ -0,0 +1,502 @@ + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + Object + + + + + + bucket + + + + + + file path + + + + + + Version 1 + deleted + + + + + + + Version 2 + id + + size + MIME type + ... + + + + + + Version + + id + h(block 1) + h(block 2) + ... + + + + + Data block + + hash + data + + + + Objects table + Versions table + Blocks table + + diff --git a/doc/20201202_talk/img/rustacean-flat-happy.png b/doc/20201202_talk/img/rustacean-flat-happy.png new file mode 100644 index 00000000..ebce1a14 Binary files /dev/null and b/doc/20201202_talk/img/rustacean-flat-happy.png differ diff --git a/doc/20201202_talk/img/shh.jpg b/doc/20201202_talk/img/shh.jpg new file mode 100644 index 00000000..ad4689ce Binary files /dev/null and b/doc/20201202_talk/img/shh.jpg differ diff --git a/doc/20201202_talk/img/sync.png b/doc/20201202_talk/img/sync.png new file mode 100644 index 00000000..d0404996 Binary files /dev/null and b/doc/20201202_talk/img/sync.png differ diff --git a/doc/20201202_talk/talk.pdf b/doc/20201202_talk/talk.pdf new file mode 100644 index 00000000..b27eca29 Binary files /dev/null and b/doc/20201202_talk/talk.pdf differ diff --git a/doc/20201202_talk/talk.tex b/doc/20201202_talk/talk.tex new file mode 100644 index 00000000..1acda842 --- /dev/null +++ b/doc/20201202_talk/talk.tex @@ -0,0 +1,247 @@ +%\nonstopmode +\documentclass[aspectratio=169]{beamer} +\usepackage[utf8]{inputenc} +% \usepackage[frenchb]{babel} +\usepackage{amsmath} +\usepackage{mathtools} +\usepackage{breqn} +\usepackage{multirow} +\usetheme{Luebeck} +\usepackage{graphicx} +%\useoutertheme[footline=authortitle,subsection=false]{miniframes} + +\beamertemplatenavigationsymbolsempty +\setbeamertemplate{footline} +{% + \leavevmode% + \hbox{\begin{beamercolorbox}[wd=.15\paperwidth,ht=2.5ex,dp=1.125ex,leftskip=.3cm,rightskip=.3cm plus1fill]{author in head/foot}% + \usebeamerfont{author in head/foot} \insertframenumber{} / \inserttotalframenumber + \end{beamercolorbox}% + \begin{beamercolorbox}[wd=.2\paperwidth,ht=2.5ex,dp=1.125ex,leftskip=.3cm plus1fill,rightskip=.3cm]{author in head/foot}% + \usebeamerfont{author in head/foot}\insertshortauthor + \end{beamercolorbox}% + \begin{beamercolorbox}[wd=.65\paperwidth,ht=2.5ex,dp=1.125ex,leftskip=.3cm,rightskip=.3cm plus1fil]{title in head/foot}% + \usebeamerfont{title in head/foot}\insertshorttitle~--~\insertshortdate + \end{beamercolorbox}}% + \vskip0pt% +} + +\usepackage{tabu} +\usepackage{multicol} +\usepackage{vwcol} +\usepackage{stmaryrd} +\usepackage{graphicx} + +\usepackage[normalem]{ulem} + +\title[Garage : jouer dans la cour des grands quand on est un hébergeur associatif]{Garage : jouer dans la cour des grands \\quand on est un hébergeur associatif} +\subtitle{(ou pourquoi on a décidé de réinventer la roue)} +\author[Q. Dufour \& A. Auvolat]{Quentin Dufour \& Alex Auvolat} +\date[02/12/2020]{Mercredi 2 décembre 2020} + +\begin{document} + +\begin{frame} + \titlepage +\end{frame} + +\begin{frame} + \frametitle{La question qui tue} + + \begin{center} + \includegraphics[scale=3]{img/sync.png} \\ + \Huge Pourquoi vous n'hébergez pas vos fichiers chez vous ? \\ + \end{center} + +\end{frame} + +\begin{frame}[t] + \frametitle{La cour des grands} + + \begin{columns}[t] + \begin{column}{0.5\textwidth} + {\huge Le modèle du cloud...} + + \begin{center} + \includegraphics[scale=0.08]{img/cloud.png} + \end{center} + + + \underline{intégrité} : plus de perte de données + + + \underline{disponibilité} : tout le temps accessible + + + \underline{service} : rien à gérer + + \vspace{0.15cm} + \textbf{changement des comportements} + \end{column} + \pause + \begin{column}{0.5\textwidth} + {\huge ...et son prix} + + \begin{center} + \includegraphics[scale=0.07]{img/dc.jpg} + \end{center} + + - matériel couteux et polluant + + - logiciels secrets + + - gestion opaque + + \vspace{0.2cm} + \textbf{prisonnier de l'écosystème} + \end{column} + \end{columns} +\end{frame} + +\begin{frame}[t] + \frametitle{Garage l'imposteur} + + \begin{columns}[t] + \begin{column}{0.5\textwidth} + {\huge Ressemble à du cloud...} + + \begin{center} + \includegraphics[scale=0.5]{img/shh.jpg} + \end{center} + + + \underline{compatible} avec les apps existantes + + + \underline{fonctionne} avec le mobile + + + \underline{s'adapte} aux habitudes prises + + + \end{column} + + \pause + \begin{column}{0.5\textwidth} + {\huge ...fait du P2P} + + \begin{center} + \includegraphics[scale=1]{img/death.jpg} + \end{center} + + \vspace{0.4cm} + + + \underline{contrôle} de l'infrastructure + + + \underline{transparent} code libre + + + \underline{sobre} fonctionne avec de vieilles machines à la maison + \end{column} + \end{columns} + +\end{frame} + + +\graphicspath{{img/}} + +\begin{frame} + \frametitle{Mais donc, c'est quoi Garage ?} + + \begin{columns}[t] + \begin{column}{0.5\textwidth} + \centering + \textbf{Un système de stockage distribué} + \vspace{1em} + + \includegraphics[width=.7\columnwidth]{img/garage_distributed.pdf} + \end{column} + \pause + + \begin{column}{0.5\textwidth} + \centering + \textbf{qui implémente l'API S3} + \vspace{2em} + + \includegraphics[width=.7\columnwidth]{img/Amazon-S3.jpg} + \end{column} + \end{columns} +\end{frame} + +\begin{frame} + \frametitle{Consistent Hashing (DynamoDB)} + \textbf{Comment répartir les fichiers sur les différentes machines ?} + \vspace{1em} + + \centering + + \only<1>{\includegraphics[width=.55\columnwidth]{img/consistent_hashing_1.pdf}}% + \only<2>{\includegraphics[width=.55\columnwidth]{img/consistent_hashing_2.pdf}}% + \only<3>{\includegraphics[width=.55\columnwidth]{img/consistent_hashing_3.pdf}}% + \only<4>{\includegraphics[width=.55\columnwidth]{img/consistent_hashing_4.pdf}}% +\end{frame} + +\begin{frame} + \frametitle{Garage Internals : 3 niveaux de consistent hashing} + \centering + \includegraphics[width=.85\columnwidth]{img/garage_tables.pdf} +\end{frame} + +\begin{frame} + \frametitle{Modèles de cohérence} + Garage utilise un modèle de cohérence relativement faible : + \vspace{1em} + + \begin{itemize} + \item Objets répliqués 3 fois, quorum de 2 pour les lectures et les écritures\\ + $\to$ cohérence \textbf{``read your writes''} + \vspace{1em} + \item<2-> Types de donnée CRDT + mécanisme d'anti-entropie\\ + $\to$ cohérence \textbf{à terme} (eventual consistency) + \vspace{1em} + \item<3-> Cela s'applique pour chaque fichier individuellement :\\ + pas de linéarisabilté ou de cohérence causale entre les opérations\\ + sur des fichiers différents + \vspace{1em} + \item<4-> \textbf{Avantage :} convient bien à un déploiement géodistribué (multi-datacenter) + \end{itemize} +\end{frame} + +\begin{frame} + \frametitle{Rust : retour d'expérience} + + \begin{columns} + \begin{column}{0.55\textwidth} + Garage est entièrement écrit en Rust ! + \vspace{2em} + + \textbf{Points forts :} + \vspace{.5em} + \begin{itemize} + \item Langage compilé, très rapide + \vspace{.5em} + \item Typage fort, beaucoup de sécurités + \vspace{.5em} + \item Le meilleur de plusieurs paradigmes: + fonctionnel, orienté objet, impératif + \vspace{.5em} + \item Un écosytème de librairies très complet: + serialisation, async/await, http, ... + \end{itemize} + \end{column} + + \begin{column}{0.45\textwidth} + \begin{centering} + \hspace{2em}\includegraphics[width=0.55\columnwidth]{img/rustacean-flat-happy.png} + \end{centering} + + \vspace{2em} + \textbf{Points faibles :} + \vspace{.5em} + \begin{itemize} + \item Les temps de compilation... + \vspace{.5em} + \item Compliqué à apprendre + \end{itemize} + \vspace{2em} + \end{column} + \end{columns} + +\end{frame} + +\end{document} + +%% vim: set ts=4 sw=4 tw=0 noet spelllang=fr : diff --git a/src/api/s3_get.rs b/src/api/s3_get.rs index a68c485b..43215923 100644 --- a/src/api/s3_get.rs +++ b/src/api/s3_get.rs @@ -24,7 +24,6 @@ fn object_headers( "Content-Type", version_meta.headers.content_type.to_string(), ) - .header("Content-Length", format!("{}", version_meta.size)) .header("ETag", version_meta.etag.to_string()) .header("Last-Modified", date_str) .header("Accept-Ranges", format!("bytes")); @@ -63,6 +62,7 @@ pub async fn handle_head( let body: Body = Body::from(vec![]); let response = object_headers(&version, version_meta) + .header("Content-Length", format!("{}", version_meta.size)) .status(StatusCode::OK) .body(body) .unwrap(); @@ -123,7 +123,9 @@ pub async fn handle_get( .await; } - let resp_builder = object_headers(&last_v, last_v_meta).status(StatusCode::OK); + let resp_builder = object_headers(&last_v, last_v_meta) + .header("Content-Length", format!("{}", last_v_meta.size)) + .status(StatusCode::OK); match &last_v_data { ObjectVersionData::DeleteMarker => unreachable!(), @@ -161,7 +163,7 @@ pub async fn handle_get( } }) .buffered(2); - //let body: Body = Box::new(StreamBody::new(Box::pin(body_stream))); + let body = hyper::body::Body::wrap_stream(body_stream); Ok(resp_builder.body(body)?) } @@ -181,9 +183,10 @@ pub async fn handle_get_range( } let resp_builder = object_headers(version, version_meta) + .header("Content-Length", format!("{}", end - begin)) .header( "Content-Range", - format!("bytes {}-{}/{}", begin, end, version_meta.size), + format!("bytes {}-{}/{}", begin, end - 1, version_meta.size), ) .status(StatusCode::PARTIAL_CONTENT); @@ -206,35 +209,49 @@ pub async fn handle_get_range( None => return Err(Error::NotFound), }; - let blocks = version - .blocks() - .iter() - .cloned() - .filter(|block| block.offset + block.size > begin && block.offset < end) - .collect::>(); + // We will store here the list of blocks that have an intersection with the requested + // range, as well as their "true offset", which is their actual offset in the complete + // file (whereas block.offset designates the offset of the block WITHIN THE PART + // block.part_number, which is not the same in the case of a multipart upload) + let mut blocks = Vec::with_capacity(std::cmp::min( + version.blocks().len(), + 4 + ((end - begin) / std::cmp::max(version.blocks()[0].size as u64, 1024)) as usize, + )); + let mut true_offset = 0; + for b in version.blocks().iter() { + if true_offset >= end { + break; + } + // Keep only blocks that have an intersection with the requested range + if true_offset < end && true_offset + b.size > begin { + blocks.push((b.clone(), true_offset)); + } + true_offset += b.size; + } let body_stream = futures::stream::iter(blocks) - .map(move |block| { + .map(move |(block, true_offset)| { let garage = garage.clone(); async move { let data = garage.block_manager.rpc_get_block(&block.hash).await?; - let start_in_block = if block.offset > begin { + let data = Bytes::from(data); + let start_in_block = if true_offset > begin { 0 } else { - begin - block.offset + begin - true_offset }; - let end_in_block = if block.offset + block.size < end { + let end_in_block = if true_offset + block.size < end { block.size } else { - end - block.offset + end - true_offset }; Result::::Ok(Bytes::from( - data[start_in_block as usize..end_in_block as usize].to_vec(), + data.slice(start_in_block as usize..end_in_block as usize), )) } }) .buffered(2); - //let body: Body = Box::new(StreamBody::new(Box::pin(body_stream))); + let body = hyper::body::Body::wrap_stream(body_stream); Ok(resp_builder.body(body)?) }