Merge remote-tracking branch 'origin/main' into optimal-layout

This commit is contained in:
Mendes 2022-10-04 18:14:49 +02:00
commit 829f815a89
260 changed files with 30550 additions and 5813 deletions

View file

@ -2,68 +2,26 @@
kind: pipeline
name: default
workspace:
base: /drone/garage
volumes:
- name: nix_store
host:
path: /var/lib/drone/nix
- name: nix_config
temp: {}
environment:
HOME: /drone/garage
node:
nix-daemon: 1
steps:
- name: setup nix
image: nixpkgs/nix:nixos-21.05
volumes:
- name: nix_store
path: /nix
- name: nix_config
path: /etc/nix
- name: check formatting
image: nixpkgs/nix:nixos-22.05
commands:
- cp nix/nix.conf /etc/nix/nix.conf
- nix-build --no-build-output --no-out-link shell.nix --arg release false -A inputDerivation
- name: code quality
image: nixpkgs/nix:nixos-21.05
volumes:
- name: nix_store
path: /nix
- name: nix_config
path: /etc/nix
commands:
- nix-shell --arg release false --run "cargo fmt -- --check"
- nix-shell --arg release false --run "cargo clippy -- --deny warnings"
- nix-shell --attr rust --run "cargo fmt -- --check"
- name: build
image: nixpkgs/nix:nixos-21.05
volumes:
- name: nix_store
path: /nix
- name: nix_config
path: /etc/nix
image: nixpkgs/nix:nixos-22.05
commands:
- nix-build --no-build-output --option log-lines 100 --argstr target x86_64-unknown-linux-musl --arg release false --argstr git_version $DRONE_COMMIT
- nix-build --no-build-output --attr clippy.amd64 --argstr git_version ${DRONE_TAG:-$DRONE_COMMIT}
- name: unit + func tests
image: nixpkgs/nix:nixos-21.05
image: nixpkgs/nix:nixos-22.05
environment:
GARAGE_TEST_INTEGRATION_EXE: result/bin/garage
volumes:
- name: nix_store
path: /nix
- name: nix_config
path: /etc/nix
commands:
- |
nix-build \
--no-build-output \
--option log-lines 100 \
--argstr target x86_64-unknown-linux-musl \
--argstr compileMode test
- nix-build --no-build-output --attr test.amd64
- ./result/bin/garage_api-*
- ./result/bin/garage_model-*
- ./result/bin/garage_rpc-*
@ -73,16 +31,11 @@ steps:
- ./result/bin/garage-*
- ./result/bin/integration-*
- name: smoke-test
image: nixpkgs/nix:nixos-21.05
volumes:
- name: nix_store
path: /nix
- name: nix_config
path: /etc/nix
- name: integration tests
image: nixpkgs/nix:nixos-22.05
commands:
- nix-build --no-build-output --argstr target x86_64-unknown-linux-musl --arg release false --argstr git_version $DRONE_COMMIT
- nix-shell --arg release false --run ./script/test-smoke.sh || (cat /tmp/garage.log; false)
- nix-build --no-build-output --attr clippy.amd64 --argstr git_version ${DRONE_TAG:-$DRONE_COMMIT}
- nix-shell --attr integration --run ./script/test-smoke.sh || (cat /tmp/garage.log; false)
trigger:
event:
@ -92,78 +45,39 @@ trigger:
- tag
- cron
node:
nix: 1
---
kind: pipeline
type: docker
name: release-linux-x86_64
name: release-linux-amd64
volumes:
- name: nix_store
host:
path: /var/lib/drone/nix
- name: nix_config
temp: {}
environment:
TARGET: x86_64-unknown-linux-musl
node:
nix-daemon: 1
steps:
- name: setup nix
image: nixpkgs/nix:nixos-21.05
volumes:
- name: nix_store
path: /nix
- name: nix_config
path: /etc/nix
commands:
- cp nix/nix.conf /etc/nix/nix.conf
- nix-build --no-build-output --no-out-link shell.nix -A inputDerivation
- name: build
image: nixpkgs/nix:nixos-21.05
volumes:
- name: nix_store
path: /nix
- name: nix_config
path: /etc/nix
image: nixpkgs/nix:nixos-22.05
commands:
- nix-build --no-build-output --argstr target $TARGET --arg release true --argstr git_version $DRONE_COMMIT
- nix-build --no-build-output --attr pkgs.amd64.release --argstr git_version ${DRONE_TAG:-$DRONE_COMMIT}
- nix-shell --attr rust --run "./script/not-dynamic.sh result/bin/garage"
- name: integration
image: nixpkgs/nix:nixos-21.05
volumes:
- name: nix_store
path: /nix
- name: nix_config
path: /etc/nix
image: nixpkgs/nix:nixos-22.05
commands:
- nix-shell --run ./script/test-smoke.sh || (cat /tmp/garage.log; false)
- nix-shell --attr integration --run ./script/test-smoke.sh || (cat /tmp/garage.log; false)
- name: push static binary
image: nixpkgs/nix:nixos-21.05
volumes:
- name: nix_store
path: /nix
- name: nix_config
path: /etc/nix
image: nixpkgs/nix:nixos-22.05
environment:
AWS_ACCESS_KEY_ID:
from_secret: garagehq_aws_access_key_id
AWS_SECRET_ACCESS_KEY:
from_secret: garagehq_aws_secret_access_key
TARGET: "x86_64-unknown-linux-musl"
commands:
- nix-shell --arg rust false --arg integration false --run "to_s3"
- nix-shell --attr release --run "to_s3"
- name: docker build and publish
image: nixpkgs/nix:nixos-21.05
volumes:
- name: nix_store
path: /nix
- name: nix_config
path: /etc/nix
image: nixpkgs/nix:nixos-22.05
environment:
DOCKER_AUTH:
from_secret: docker_auth
@ -174,7 +88,7 @@ steps:
- mkdir -p /kaniko/.docker
- echo $DOCKER_AUTH > /kaniko/.docker/config.json
- export CONTAINER_TAG=${DRONE_TAG:-$DRONE_COMMIT}
- nix-shell --arg rust false --arg integration false --run "to_docker"
- nix-shell --attr release --run "to_docker"
trigger:
@ -182,78 +96,39 @@ trigger:
- promote
- cron
node:
nix: 1
---
kind: pipeline
type: docker
name: release-linux-i686
name: release-linux-i386
volumes:
- name: nix_store
host:
path: /var/lib/drone/nix
- name: nix_config
temp: {}
environment:
TARGET: i686-unknown-linux-musl
node:
nix-daemon: 1
steps:
- name: setup nix
image: nixpkgs/nix:nixos-21.05
volumes:
- name: nix_store
path: /nix
- name: nix_config
path: /etc/nix
commands:
- cp nix/nix.conf /etc/nix/nix.conf
- nix-build --no-build-output --no-out-link shell.nix -A inputDerivation
- name: build
image: nixpkgs/nix:nixos-21.05
volumes:
- name: nix_store
path: /nix
- name: nix_config
path: /etc/nix
image: nixpkgs/nix:nixos-22.05
commands:
- nix-build --no-build-output --argstr target $TARGET --arg release true --argstr git_version $DRONE_COMMIT
- nix-build --no-build-output --attr pkgs.i386.release --argstr git_version ${DRONE_TAG:-$DRONE_COMMIT}
- nix-shell --attr rust --run "./script/not-dynamic.sh result/bin/garage"
- name: integration
image: nixpkgs/nix:nixos-21.05
volumes:
- name: nix_store
path: /nix
- name: nix_config
path: /etc/nix
image: nixpkgs/nix:nixos-22.05
commands:
- nix-shell --run ./script/test-smoke.sh || (cat /tmp/garage.log; false)
- nix-shell --attr integration --run ./script/test-smoke.sh || (cat /tmp/garage.log; false)
- name: push static binary
image: nixpkgs/nix:nixos-21.05
volumes:
- name: nix_store
path: /nix
- name: nix_config
path: /etc/nix
image: nixpkgs/nix:nixos-22.05
environment:
AWS_ACCESS_KEY_ID:
from_secret: garagehq_aws_access_key_id
AWS_SECRET_ACCESS_KEY:
from_secret: garagehq_aws_secret_access_key
TARGET: "i686-unknown-linux-musl"
commands:
- nix-shell --arg rust false --arg integration false --run "to_s3"
- nix-shell --attr release --run "to_s3"
- name: docker build and publish
image: nixpkgs/nix:nixos-21.05
volumes:
- name: nix_store
path: /nix
- name: nix_config
path: /etc/nix
image: nixpkgs/nix:nixos-22.05
environment:
DOCKER_AUTH:
from_secret: docker_auth
@ -264,75 +139,41 @@ steps:
- mkdir -p /kaniko/.docker
- echo $DOCKER_AUTH > /kaniko/.docker/config.json
- export CONTAINER_TAG=${DRONE_TAG:-$DRONE_COMMIT}
- nix-shell --arg rust false --arg integration false --run "to_docker"
- nix-shell --attr release --run "to_docker"
trigger:
event:
- promote
- cron
node:
nix: 1
---
kind: pipeline
type: docker
name: release-linux-aarch64
name: release-linux-arm64
volumes:
- name: nix_store
host:
path: /var/lib/drone/nix
- name: nix_config
temp: {}
environment:
TARGET: aarch64-unknown-linux-musl
node:
nix-daemon: 1
steps:
- name: setup nix
image: nixpkgs/nix:nixos-21.05
volumes:
- name: nix_store
path: /nix
- name: nix_config
path: /etc/nix
commands:
- cp nix/nix.conf /etc/nix/nix.conf
- nix-build --no-build-output --no-out-link ./shell.nix --arg rust false --arg integration false -A inputDerivation
- name: build
image: nixpkgs/nix:nixos-21.05
volumes:
- name: nix_store
path: /nix
- name: nix_config
path: /etc/nix
image: nixpkgs/nix:nixos-22.05
commands:
- nix-build --no-build-output --argstr target $TARGET --arg release true --argstr git_version $DRONE_COMMIT
- nix-build --no-build-output --attr pkgs.arm64.release --argstr git_version ${DRONE_TAG:-$DRONE_COMMIT}
- nix-shell --attr rust --run "./script/not-dynamic.sh result/bin/garage"
- name: push static binary
image: nixpkgs/nix:nixos-21.05
volumes:
- name: nix_store
path: /nix
- name: nix_config
path: /etc/nix
image: nixpkgs/nix:nixos-22.05
environment:
AWS_ACCESS_KEY_ID:
from_secret: garagehq_aws_access_key_id
AWS_SECRET_ACCESS_KEY:
from_secret: garagehq_aws_secret_access_key
TARGET: "aarch64-unknown-linux-musl"
commands:
- nix-shell --arg rust false --arg integration false --run "to_s3"
- nix-shell --attr release --run "to_s3"
- name: docker build and publish
image: nixpkgs/nix:nixos-21.05
volumes:
- name: nix_store
path: /nix
- name: nix_config
path: /etc/nix
image: nixpkgs/nix:nixos-22.05
environment:
DOCKER_AUTH:
from_secret: docker_auth
@ -343,75 +184,41 @@ steps:
- mkdir -p /kaniko/.docker
- echo $DOCKER_AUTH > /kaniko/.docker/config.json
- export CONTAINER_TAG=${DRONE_TAG:-$DRONE_COMMIT}
- nix-shell --arg rust false --arg integration false --run "to_docker"
- nix-shell --attr release --run "to_docker"
trigger:
event:
- promote
- cron
node:
nix: 1
---
kind: pipeline
type: docker
name: release-linux-armv6l
name: release-linux-arm
volumes:
- name: nix_store
host:
path: /var/lib/drone/nix
- name: nix_config
temp: {}
environment:
TARGET: armv6l-unknown-linux-musleabihf
node:
nix-daemon: 1
steps:
- name: setup nix
image: nixpkgs/nix:nixos-21.05
volumes:
- name: nix_store
path: /nix
- name: nix_config
path: /etc/nix
commands:
- cp nix/nix.conf /etc/nix/nix.conf
- nix-build --no-build-output --no-out-link --arg rust false --arg integration false -A inputDerivation
- name: build
image: nixpkgs/nix:nixos-21.05
volumes:
- name: nix_store
path: /nix
- name: nix_config
path: /etc/nix
image: nixpkgs/nix:nixos-22.05
commands:
- nix-build --no-build-output --argstr target $TARGET --arg release true --argstr git_version $DRONE_COMMIT
- nix-build --no-build-output --attr pkgs.arm.release --argstr git_version ${DRONE_TAG:-$DRONE_COMMIT}
- nix-shell --attr rust --run "./script/not-dynamic.sh result/bin/garage"
- name: push static binary
image: nixpkgs/nix:nixos-21.05
volumes:
- name: nix_store
path: /nix
- name: nix_config
path: /etc/nix
image: nixpkgs/nix:nixos-22.05
environment:
AWS_ACCESS_KEY_ID:
from_secret: garagehq_aws_access_key_id
AWS_SECRET_ACCESS_KEY:
from_secret: garagehq_aws_secret_access_key
TARGET: "armv6l-unknown-linux-musleabihf"
commands:
- nix-shell --arg integration false --arg rust false --run "to_s3"
- nix-shell --attr release --run "to_s3"
- name: docker build and publish
image: nixpkgs/nix:nixos-21.05
volumes:
- name: nix_store
path: /nix
- name: nix_config
path: /etc/nix
image: nixpkgs/nix:nixos-22.05
environment:
DOCKER_AUTH:
from_secret: docker_auth
@ -422,32 +229,35 @@ steps:
- mkdir -p /kaniko/.docker
- echo $DOCKER_AUTH > /kaniko/.docker/config.json
- export CONTAINER_TAG=${DRONE_TAG:-$DRONE_COMMIT}
- nix-shell --arg rust false --arg integration false --run "to_docker"
- nix-shell --attr release --run "to_docker"
trigger:
event:
- promote
- cron
node:
nix: 1
---
kind: pipeline
type: docker
name: refresh-release-page
volumes:
- name: nix_store
host:
path: /var/lib/drone/nix
node:
nix-daemon: 1
steps:
- name: multiarch-docker
image: nixpkgs/nix:nixos-22.05
environment:
DOCKER_AUTH:
from_secret: docker_auth
HOME: "/root"
commands:
- mkdir -p /root/.docker
- echo $DOCKER_AUTH > /root/.docker/config.json
- export CONTAINER_TAG=${DRONE_TAG:-$DRONE_COMMIT}
- nix-shell --attr release --run "multiarch_docker"
- name: refresh-index
image: nixpkgs/nix:nixos-21.05
volumes:
- name: nix_store
path: /nix
image: nixpkgs/nix:nixos-22.05
environment:
AWS_ACCESS_KEY_ID:
from_secret: garagehq_aws_access_key_id
@ -455,24 +265,21 @@ steps:
from_secret: garagehq_aws_secret_access_key
commands:
- mkdir -p /etc/nix && cp nix/nix.conf /etc/nix/nix.conf
- nix-shell --arg integration false --arg rust false --run "refresh_index"
- nix-shell --attr release --run "refresh_index"
depends_on:
- release-linux-x86_64
- release-linux-i686
- release-linux-aarch64
- release-linux-armv6l
- release-linux-amd64
- release-linux-i386
- release-linux-arm64
- release-linux-arm
trigger:
event:
- promote
- cron
node:
nix: 1
---
kind: signature
hmac: 3fc19d6f9a3555519c8405e3281b2e74289bb802f644740d5481d53df3a01fa4
hmac: 103a04785c98f5376a63ce22865c2576963019bbc4d828f200d2a470a3c821ea
...

1195
Cargo.lock generated

File diff suppressed because it is too large Load diff

2378
Cargo.nix

File diff suppressed because it is too large Load diff

View file

@ -1,16 +1,20 @@
[workspace]
resolver = "2"
members = [
"src/db",
"src/util",
"src/rpc",
"src/table",
"src/block",
"src/model",
"src/admin",
"src/api",
"src/web",
"src/garage"
"src/garage",
"src/k2v-client",
]
default-members = ["src/garage"]
[profile.dev]
lto = "off"

View file

@ -1,13 +1,27 @@
.PHONY: doc all release shell
.PHONY: doc all release shell run1 run2 run3
all:
clear; cargo build
doc:
cd doc/book; mdbook build
release:
nix-build --arg release true
shell:
nix-shell
# ----
run1:
RUST_LOG=garage=debug ./target/debug/garage -c tmp/config1.toml server
run1rel:
RUST_LOG=garage=debug ./target/release/garage -c tmp/config1.toml server
run2:
RUST_LOG=garage=debug ./target/debug/garage -c tmp/config2.toml server
run2rel:
RUST_LOG=garage=debug ./target/release/garage -c tmp/config2.toml server
run3:
RUST_LOG=garage=debug ./target/debug/garage -c tmp/config3.toml server
run3rel:
RUST_LOG=garage=debug ./target/release/garage -c tmp/config3.toml server

View file

@ -15,18 +15,24 @@ Garage [![Build Status](https://drone.deuxfleurs.fr/api/badges/Deuxfleurs/garage
]
</p>
Garage is a lightweight S3-compatible distributed object store, with the following goals:
Garage is an S3-compatible distributed object storage service
designed for self-hosting at a small-to-medium scale.
- As self-contained as possible
- Easy to set up
- Highly resilient to network failures, network latency, disk failures, sysadmin failures
- Relatively simple
- Made for multi-datacenter deployments
Garage is designed for storage clusters composed of nodes running
at different physical locations,
in order to easily provide a storage service that replicates data at these different
locations and stays available even when some servers are unreachable.
Garage also focuses on being lightweight, easy to operate, and highly resilient to
machine failures.
Non-goals include:
Garage is built by [Deuxfleurs](https://deuxfleurs.fr),
an experimental small-scale self hosted service provider,
which has been using it in production since its first release in 2020.
- Extremely high performance
- Complete implementation of the S3 API
- Erasure coding (our replication model is simply to copy the data as is on several nodes, in different datacenters if possible)
Learn more on our dedicated documentation pages:
Our main use case is to provide a distributed storage layer for small-scale self hosted services such as [Deuxfleurs](https://deuxfleurs.fr).
- [Goals and use cases](https://garagehq.deuxfleurs.fr/documentation/design/goals/)
- [Features](https://garagehq.deuxfleurs.fr/documentation/reference-manual/features/)
- [Quick start](https://garagehq.deuxfleurs.fr/documentation/quick-start/)
Garage is entirely free software released under the terms of the AGPLv3.

View file

@ -1,147 +1,33 @@
{
system ? builtins.currentSystem,
release ? false,
target ? "x86_64-unknown-linux-musl",
compileMode ? null,
git_version ? null,
}:
with import ./nix/common.nix;
let
crossSystem = { config = target; };
in let
log = v: builtins.trace v v;
let
pkgs = import pkgsSrc { };
compile = import ./nix/compile.nix;
build_debug_and_release = (target: {
debug = (compile { inherit target git_version; release = false; }).workspace.garage { compileMode = "build"; };
release = (compile { inherit target git_version; release = true; }).workspace.garage { compileMode = "build"; };
});
test = (rustPkgs: pkgs.symlinkJoin {
name ="garage-tests";
paths = builtins.map (key: rustPkgs.workspace.${key} { compileMode = "test"; }) (builtins.attrNames rustPkgs.workspace);
});
pkgs = import pkgsSrc {
inherit system crossSystem;
overlays = [ cargo2nixOverlay ];
in {
pkgs = {
amd64 = build_debug_and_release "x86_64-unknown-linux-musl";
i386 = build_debug_and_release "i686-unknown-linux-musl";
arm64 = build_debug_and_release "aarch64-unknown-linux-musl";
arm = build_debug_and_release "armv6l-unknown-linux-musleabihf";
};
/*
Rust and Nix triples are not the same. Cargo2nix has a dedicated library
to convert Nix triples to Rust ones. We need this conversion as we want to
set later options linked to our (rust) target in a generic way. Not only
the triple terminology is different, but also the "roles" are named differently.
Nix uses a build/host/target terminology where Nix's "host" maps to Cargo's "target".
*/
rustTarget = log (pkgs.rustBuilder.rustLib.rustTriple pkgs.stdenv.hostPlatform);
/*
Cargo2nix is built for rustOverlay which installs Rust from Mozilla releases.
We want our own Rust to avoid incompatibilities, like we had with musl 1.2.0.
rustc was built with musl < 1.2.0 and nix shipped musl >= 1.2.0 which lead to compilation breakage.
So we want a Rust release that is bound to our Nix repository to avoid these problems.
See here for more info: https://musl.libc.org/time64.html
Because Cargo2nix does not support the Rust environment shipped by NixOS,
we emulate the structure of the Rust object created by rustOverlay.
In practise, rustOverlay ships rustc+cargo in a single derivation while
NixOS ships them in separate ones. We reunite them with symlinkJoin.
*/
rustChannel = pkgs.symlinkJoin {
name ="rust-channel";
paths = [
pkgs.rustPlatform.rust.rustc
pkgs.rustPlatform.rust.cargo
];
test = {
amd64 = test (compile { inherit git_version; target = "x86_64-unknown-linux-musl"; });
};
/*
Cargo2nix provides many overrides by default, you can take inspiration from them:
https://github.com/cargo2nix/cargo2nix/blob/master/overlay/overrides.nix
You can have a complete list of the available options by looking at the overriden object, mkcrate:
https://github.com/cargo2nix/cargo2nix/blob/master/overlay/mkcrate.nix
*/
overrides = pkgs.rustBuilder.overrides.all ++ [
/*
[1] We need to alter Nix hardening to be able to statically compile: PIE,
Position Independent Executables seems to be supported only on amd64. Having
this flags set either make our executables crash or compile as dynamic on many platforms.
In the following section codegenOpts, we reactive it for the supported targets
(only amd64 curently) through the `-static-pie` flag. PIE is a feature used
by ASLR, which helps mitigate security issues.
Learn more about Nix Hardening: https://github.com/NixOS/nixpkgs/blob/master/pkgs/build-support/cc-wrapper/add-hardening.sh
[2] We want to inject the git version while keeping the build deterministic.
As we do not want to consider the .git folder as part of the input source,
we ask the user (the CI often) to pass the value to Nix.
*/
(pkgs.rustBuilder.rustLib.makeOverride {
name = "garage";
overrideAttrs = drv:
/* [1] */ { hardeningDisable = [ "pie" ]; }
//
/* [2] */ (if git_version != null then {
preConfigure = ''
${drv.preConfigure or ""}
export GIT_VERSION="${git_version}"
'';
} else {});
})
/*
We ship some parts of the code disabled by default by putting them behind a flag.
It speeds up the compilation (when the feature is not required) and released crates have less dependency by default (less attack surface, disk space, etc.).
But we want to ship these additional features when we release Garage.
In the end, we chose to exclude all features from debug builds while putting (all of) them in the release builds.
Currently, the only feature of Garage is kubernetes-discovery from the garage_rpc crate.
*/
(pkgs.rustBuilder.rustLib.makeOverride {
name = "garage_rpc";
overrideArgs = old:
{
features = if release then [ "kubernetes-discovery" ] else [];
};
})
];
packageFun = import ./Cargo.nix;
/*
We compile fully static binaries with musl to simplify deployment on most systems.
When possible, we reactivate PIE hardening (see above).
Also, if you set the RUSTFLAGS environment variable, the following parameters will
be ignored.
For more information on static builds, please refer to Rust's RFC 1721.
https://rust-lang.github.io/rfcs/1721-crt-static.html#specifying-dynamicstatic-c-runtime-linkage
*/
codegenOpts = {
"armv6l-unknown-linux-musleabihf" = [ "target-feature=+crt-static" "link-arg=-static" ]; /* compile as dynamic with static-pie */
"aarch64-unknown-linux-musl" = [ "target-feature=+crt-static" "link-arg=-static" ]; /* segfault with static-pie */
"i686-unknown-linux-musl" = [ "target-feature=+crt-static" "link-arg=-static" ]; /* segfault with static-pie */
"x86_64-unknown-linux-musl" = [ "target-feature=+crt-static" "link-arg=-static-pie" ];
clippy = {
amd64 = (compile { inherit git_version; compiler = "clippy"; }).workspace.garage { compileMode = "build"; } ;
};
/*
The following definition is not elegant as we use a low level function of Cargo2nix
that enables us to pass our custom rustChannel object. We need this low level definition
to pass Nix's Rust toolchains instead of Mozilla's one.
target is mandatory but must be kept to null to allow cargo2nix to set it to the appropriate value
for each crate.
*/
rustPkgs = pkgs.rustBuilder.makePackageSet {
inherit packageFun rustChannel release codegenOpts;
packageOverrides = overrides;
target = null;
buildRustPackages = pkgs.buildPackages.rustBuilder.makePackageSet {
inherit rustChannel packageFun codegenOpts;
packageOverrides = overrides;
target = null;
};
};
in
if compileMode == "test"
then pkgs.symlinkJoin {
name ="garage-tests";
paths = builtins.map (key: rustPkgs.workspace.${key} { inherit compileMode; }) (builtins.attrNames rustPkgs.workspace);
}
else rustPkgs.workspace.garage { inherit compileMode; }
}

View file

@ -17,6 +17,61 @@ If you still want to use Borg, you can use it with `rclone mount`.
## Restic
Create your key and bucket:
```bash
garage key new my-key
garage bucket create backup
garage bucket allow backup --read --write --key my-key
```
Then register your Key ID and Secret key in your environment:
```bash
export AWS_ACCESS_KEY_ID=GKxxx
export AWS_SECRET_ACCESS_KEY=xxxx
```
Configure restic from environment too:
```bash
export RESTIC_REPOSITORY="s3:http://localhost:3900/backups"
echo "Generated password (save it safely): $(openssl rand -base64 32)"
export RESTIC_PASSWORD=xxx # copy paste your generated password here
```
Do not forget to save your password safely (in your password manager or print it). It will be needed to decrypt your backups.
Now you can use restic:
```bash
# Initialize the bucket, must be run once
restic init
# Backup your PostgreSQL database
# (We suppose your PostgreSQL daemon is stopped for all commands)
restic backup /var/lib/postgresql
# Show backup history
restic snapshots
# Backup again your PostgreSQL database, it will be faster as only changes will be uploaded
restic backup /var/lib/postgresql
# Show backup history (again)
restic snapshots
# Restore a backup
# (79766175 is the ID of the snapshot you want to restore)
mv /var/lib/postgresql /var/lib/postgresql.broken
restic restore 79766175 --target /var/lib/postgresql
```
Restic has way more features than the ones presented here.
You can discover all of them by accessing its documentation from the link below.
*External links:* [Restic Documentation > Amazon S3](https://restic.readthedocs.io/en/stable/030_preparing_a_new_repo.html#amazon-s3)
## Duplicity

View file

@ -3,7 +3,7 @@ title = "Websites (Hugo, Jekyll, Publii...)"
weight = 10
+++
Garage is also suitable to host static websites.
Garage is also suitable [to host static websites](@/documentation/cookbook/exposing-websites.md).
While they can be deployed with traditional CLI tools, some static website generators have integrated options to ease your workflow.
| Name | Status | Note |

View file

@ -5,12 +5,14 @@ weight = 25
## Configuring a bucket for website access
There are two methods to expose buckets as website:
There are three methods to expose buckets as website:
1. using the PutBucketWebsite S3 API call, which is allowed for access keys that have the owner permission bit set
2. from the Garage CLI, by an adminstrator of the cluster
3. using the Garage administration API
The `PutBucketWebsite` API endpoint [is documented](https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutBucketWebsite.html) in the official AWS docs.
This endpoint can also be called [using `aws s3api`](https://docs.aws.amazon.com/cli/latest/reference/s3api/put-bucket-website.html) on the command line.
The website configuration supported by Garage is only a subset of the possibilities on Amazon S3: redirections are not supported, only the index document and error document can be specified.

View file

@ -20,40 +20,76 @@ sudo apt-get update
sudo apt-get install build-essential
```
## Using source from `crates.io`
Garage's source code is published on `crates.io`, Rust's official package repository.
This means you can simply ask `cargo` to download and build this source code for you:
```bash
cargo install garage
```
That's all, `garage` should be in `$HOME/.cargo/bin`.
You can add this folder to your `$PATH` or copy the binary somewhere else on your system.
For instance:
```bash
sudo cp $HOME/.cargo/bin/garage /usr/local/bin/garage
```
## Using source from the Gitea repository
## Building from source from the Gitea repository
The primary location for Garage's source code is the
[Gitea repository](https://git.deuxfleurs.fr/Deuxfleurs/garage).
[Gitea repository](https://git.deuxfleurs.fr/Deuxfleurs/garage),
which contains all of the released versions as well as the code
for the developpement of the next version.
Clone the repository and build Garage with the following commands:
Clone the repository and enter it as follows:
```bash
git clone https://git.deuxfleurs.fr/Deuxfleurs/garage.git
cd garage
cargo build
```
Be careful, as this will make a debug build of Garage, which will be extremely slow!
To make a release build, invoke `cargo build --release` (this takes much longer).
If you wish to build a specific version of Garage, check out the corresponding tag. For instance:
The binaries built this way are found in `target/{debug,release}/garage`.
```bash
git tag # List available tags
git checkout v0.8.0 # Change v0.8.0 with the version you wish to build
```
Otherwise you will be building a developpement build from the `main` branch
that includes all of the changes to be released in the next version.
Be careful that such a build might be unstable or contain bugs,
and could be incompatible with nodes that run stable versions of Garage.
Finally, build Garage with the following command:
```bash
cargo build --release
```
The binary built this way can now be found in `target/release/garage`.
You may simply copy this binary to somewhere in your `$PATH` in order to
have the `garage` command available in your shell, for instance:
```bash
sudo cp target/release/garage /usr/local/bin/garage
```
If you are planning to develop Garage,
you might be interested in producing debug builds, which compile faster but run slower:
this can be done by removing the `--release` flag, and the resulting build can then
be found in `target/debug/garage`.
## List of available Cargo feature flags
Garage supports a number of compilation options in the form of Cargo feature flags,
which can be used to provide builds adapted to your system and your use case.
To produce a build with a given set of features, invoke the `cargo build` command
as follows:
```bash
# This will build the default feature set plus feature1, feature2 and feature3
cargo build --release --features feature1,feature2,feature3
# This will build ONLY feature1, feature2 and feature3
cargo build --release --no-default-features \
--features feature1,feature2,feature3
```
The following feature flags are available in v0.8.0:
| Feature flag | Enabled | Description |
| ------------ | ------- | ----------- |
| `bundled-libs` | *by default* | Use bundled version of sqlite3, zstd, lmdb and libsodium |
| `system-libs` | optional | Use system version of sqlite3, zstd, lmdb and libsodium<br>if available (exclusive with `bundled-libs`, build using<br>`cargo build --no-default-features --features system-libs`) |
| `k2v` | optional | Enable the experimental K2V API (if used, all nodes on your<br>Garage cluster must have it enabled as well) |
| `kubernetes-discovery` | optional | Enable automatic registration and discovery<br>of cluster nodes through the Kubernetes API |
| `metrics` | *by default* | Enable collection of metrics in Prometheus format on the admin API |
| `telemetry-otlp` | optional | Enable collection of execution traces using OpenTelemetry |
| `sled` | *by default* | Enable using Sled to store Garage's metadata |
| `lmdb` | optional | Enable using LMDB to store Garage's metadata |
| `sqlite` | optional | Enable using Sqlite3 to store Garage's metadata |

View file

@ -0,0 +1,87 @@
+++
title = "Deploying on Kubernetes"
weight = 32
+++
Garage can also be deployed on a kubernetes cluster via helm chart.
## Deploying
Firstly clone the repository:
```bash
git clone https://git.deuxfleurs.fr/Deuxfleurs/garage
cd garage/scripts/helm
```
Deploy with default options:
```bash
helm install --create-namespace --namespace garage garage ./garage
```
Or deploy with custom values:
```bash
helm install --create-namespace --namespace garage garage ./garage -f values.override.yaml
```
After deploying, cluster layout must be configured manually as described in [Creating a cluster layout](@/documentation/quick-start/_index.md#creating-a-cluster-layout). Use the following command to access garage CLI:
```bash
kubectl exec --stdin --tty -n garage garage-0 -- ./garage status
```
## Overriding default values
All possible configuration values can be found with:
```bash
helm show values ./garage
```
This is an example `values.overrride.yaml` for deploying in a microk8s cluster with a https s3 api ingress route:
```yaml
garage:
# Use only 2 replicas per object
replicationMode: "2"
# Start 4 instances (StatefulSets) of garage
replicaCount: 4
# Override default storage class and size
persistence:
meta:
storageClass: "openebs-hostpath"
size: 100Mi
data:
storageClass: "openebs-hostpath"
size: 1Gi
ingress:
s3:
api:
enabled: true
className: "public"
annotations:
cert-manager.io/cluster-issuer: "letsencrypt-prod"
nginx.ingress.kubernetes.io/proxy-body-size: 500m
hosts:
- host: s3-api.my-domain.com
paths:
- path: /
pathType: Prefix
tls:
- secretName: garage-ingress-cert
hosts:
- s3-api.my-domain.com
```
## Removing
```bash
helm delete --namespace garage garage
```
Note that this will leave behind custom CRD `garagenodes.deuxfleurs.fr`, which must be removed manually if desired.

View file

@ -51,15 +51,15 @@ to store 2 TB of data in total.
## Get a Docker image
Our docker image is currently named `dxflrs/amd64_garage` and is stored on the [Docker Hub](https://hub.docker.com/r/dxflrs/amd64_garage/tags?page=1&ordering=last_updated).
We encourage you to use a fixed tag (eg. `v0.4.0`) and not the `latest` tag.
For this example, we will use the latest published version at the time of the writing which is `v0.4.0` but it's up to you
to check [the most recent versions on the Docker Hub](https://hub.docker.com/r/dxflrs/amd64_garage/tags?page=1&ordering=last_updated).
Our docker image is currently named `dxflrs/garage` and is stored on the [Docker Hub](https://hub.docker.com/r/dxflrs/garage/tags?page=1&ordering=last_updated).
We encourage you to use a fixed tag (eg. `v0.8.0`) and not the `latest` tag.
For this example, we will use the latest published version at the time of the writing which is `v0.8.0` but it's up to you
to check [the most recent versions on the Docker Hub](https://hub.docker.com/r/dxflrs/garage/tags?page=1&ordering=last_updated).
For example:
```
sudo docker pull dxflrs/amd64_garage:v0.4.0
sudo docker pull dxflrs/garage:v0.8.0
```
## Deploying and configuring Garage
@ -125,7 +125,7 @@ docker run \
-v /etc/garage.toml:/etc/garage.toml \
-v /var/lib/garage/meta:/var/lib/garage/meta \
-v /var/lib/garage/data:/var/lib/garage/data \
lxpz/garage_amd64:v0.4.0
dxflrs/garage:v0.8.0
```
It should be restarted automatically at each reboot.

View file

@ -100,7 +100,7 @@ server {
}
```
## Exposing the web endpoint
### Exposing the web endpoint
To better understand the logic involved, you can refer to the [Exposing buckets as websites](/cookbook/exposing_websites.html) section.
Otherwise, the configuration is very similar to the S3 endpoint.
@ -140,6 +140,165 @@ server {
@TODO
## Traefik
## Traefik v2
We will see in this part how to set up a reverse proxy with [Traefik](https://docs.traefik.io/).
Here is [a basic configuration file](https://doc.traefik.io/traefik/https/acme/#configuration-examples):
```toml
[entryPoints]
[entryPoints.web]
address = ":80"
[entryPoints.websecure]
address = ":443"
[certificatesResolvers.myresolver.acme]
email = "your-email@example.com"
storage = "acme.json"
[certificatesResolvers.myresolver.acme.httpChallenge]
# used during the challenge
entryPoint = "web"
```
### Add Garage service
To add Garage on Traefik you should declare a new service using its IP address (or hostname) and port:
```toml
[http.services]
[http.services.my_garage_service.loadBalancer]
[[http.services.my_garage_service.loadBalancer.servers]]
url = "http://xxx.xxx.xxx.xxx"
port = 3900
```
It's possible to declare multiple Garage servers as back-ends:
```toml
[http.services]
[[http.services.my_garage_service.loadBalancer.servers]]
url = "http://xxx.xxx.xxx.xxx"
port = 3900
[[http.services.my_garage_service.loadBalancer.servers]]
url = "http://yyy.yyy.yyy.yyy"
port = 3900
[[http.services.my_garage_service.loadBalancer.servers]]
url = "http://zzz.zzz.zzz.zzz"
port = 3900
```
Traefik can remove unhealthy servers automatically with [a health check configuration](https://doc.traefik.io/traefik/routing/services/#health-check):
```
[http.services]
[http.services.my_garage_service.loadBalancer]
[http.services.my_garage_service.loadBalancer.healthCheck]
path = "/"
interval = "60s"
timeout = "5s"
```
### Adding a website
To add a new website, add the following declaration to your Traefik configuration file:
```toml
[http.routers]
[http.routers.my_website]
rule = "Host(`yoururl.example.org`)"
service = "my_garage_service"
entryPoints = ["web"]
```
Enable HTTPS access to your website with the following configuration section ([documentation](https://doc.traefik.io/traefik/https/overview/)):
```toml
...
entryPoints = ["websecure"]
[http.routers.my_website.tls]
certResolver = "myresolver"
...
```
### Adding gzip compression
Add the following configuration section [to compress response](https://doc.traefik.io/traefik/middlewares/http/compress/) using [gzip](https://developer.mozilla.org/en-US/docs/Glossary/GZip_compression) before sending them to the client:
```toml
[http.routers]
[http.routers.my_website]
...
middlewares = ["gzip_compress"]
...
[http.middlewares]
[http.middlewares.gzip_compress.compress]
```
### Add caching response
Traefik's caching middleware is only available on [entreprise version](https://doc.traefik.io/traefik-enterprise/middlewares/http-cache/), however the freely-available [Souin plugin](https://github.com/darkweak/souin#tr%C3%A6fik-container) can also do the job. (section to be completed)
### Complete example
```toml
[entryPoints]
[entryPoints.web]
address = ":80"
[entryPoints.websecure]
address = ":443"
[certificatesResolvers.myresolver.acme]
email = "your-email@example.com"
storage = "acme.json"
[certificatesResolvers.myresolver.acme.httpChallenge]
# used during the challenge
entryPoint = "web"
[http.routers]
[http.routers.my_website]
rule = "Host(`yoururl.example.org`)"
service = "my_garage_service"
middlewares = ["gzip_compress"]
entryPoints = ["websecure"]
[http.services]
[http.services.my_garage_service.loadBalancer]
[http.services.my_garage_service.loadBalancer.healthCheck]
path = "/"
interval = "60s"
timeout = "5s"
[[http.services.my_garage_service.loadBalancer.servers]]
url = "http://xxx.xxx.xxx.xxx"
[[http.services.my_garage_service.loadBalancer.servers]]
url = "http://yyy.yyy.yyy.yyy"
[[http.services.my_garage_service.loadBalancer.servers]]
url = "http://zzz.zzz.zzz.zzz"
[http.middlewares]
[http.middlewares.gzip_compress.compress]
```
## Caddy
Your Caddy configuration can be as simple as:
```caddy
s3.garage.tld, *.s3.garage.tld {
reverse_proxy localhost:3900 192.168.1.2:3900 example.tld:3900
}
*.web.garage.tld {
reverse_proxy localhost:3902 192.168.1.2:3900 example.tld:3900
}
admin.garage.tld {
reverse_proxy localhost:3903
}
```
But at the same time, the `reverse_proxy` is very flexible.
For a production deployment, you should [read its documentation](https://caddyserver.com/docs/caddyfile/directives/reverse_proxy) as it supports features like DNS discovery of upstreams, load balancing with checks, streaming parameters, etc.
@TODO

View file

@ -1,6 +1,6 @@
+++
title = "Benchmarks"
weight = 10
weight = 40
+++
With Garage, we wanted to build a software defined storage service that follow the [KISS principle](https://en.wikipedia.org/wiki/KISS_principle),

View file

@ -1,23 +1,23 @@
+++
title = "Goals and use cases"
weight = 5
weight = 10
+++
## Goals and non-goals
Garage is a lightweight geo-distributed data store that implements the
[Amazon S3](https://docs.aws.amazon.com/AmazonS3/latest/API/Welcome.html)
object storage protocole. It enables applications to store large blobs such
object storage protocol. It enables applications to store large blobs such
as pictures, video, images, documents, etc., in a redundant multi-node
setting. S3 is versatile enough to also be used to publish a static
website.
Garage is an opinionated object storage solutoin, we focus on the following **desirable properties**:
- **Internet enabled**: made for multi-sites (eg. datacenters, offices, households, etc.) interconnected through regular Internet connections.
- **Self-contained & lightweight**: works everywhere and integrates well in existing environments to target [hyperconverged infrastructures](https://en.wikipedia.org/wiki/Hyper-converged_infrastructure).
- **Highly resilient**: highly resilient to network failures, network latency, disk failures, sysadmin failures.
- **Simple**: simple to understand, simple to operate, simple to debug.
- **Internet enabled**: made for multi-sites (eg. datacenters, offices, households, etc.) interconnected through regular Internet connections.
We also noted that the pursuit of some other goals are detrimental to our initial goals.
The following has been identified as **non-goals** (if these points matter to you, you should not use Garage):

View file

@ -20,6 +20,49 @@ In the meantime, you can find some information at the following links:
- [an old design draft](@/documentation/working-documents/design-draft.md)
## Request routing logic
Data retrieval requests to Garage endpoints (S3 API and websites) are resolved
to an individual object in a bucket. Since objects are replicated to multiple nodes
Garage must ensure consistency before answering the request.
### Using quorum to ensure consistency
Garage ensures consistency by attempting to establish a quorum with the
data nodes responsible for the object. When a majority of the data nodes
have provided metadata on a object Garage can then answer the request.
When a request arrives Garage will, assuming the recommended 3 replicas, perform the following actions:
- Make a request to the two preferred nodes for object metadata
- Try the third node if one of the two initial requests fail
- Check that the metadata from at least 2 nodes match
- Check that the object hasn't been marked deleted
- Answer the request with inline data from metadata if object is small enough
- Or get data blocks from the preferred nodes and answer using the assembled object
Garage dynamically determines which nodes to query based on health, preference, and
which nodes actually host a given data. Garage has no concept of "primary" so any
healthy node with the data can be used as long as a quorum is reached for the metadata.
### Node health
Garage keeps a TCP session open to each node in the cluster and periodically pings them. If a connection
cannot be established, or a node fails to answer a number of pings, the target node is marked as failed.
Failed nodes are not used for quorum or other internal requests.
### Node preference
Garage prioritizes which nodes to query according to a few criteria:
- A node always prefers itself if it can answer the request
- Then the node prioritizes nodes in the same zone
- Finally the nodes with the lowest latency are prioritized
For further reading on the cluster structure look at the [gateway](@/documentation/cookbook/gateways.md)
and [cluster layout management](@/documentation/reference-manual/layout.md) pages.
## Garbage collection
A faulty garbage collection procedure has been the cause of

View file

@ -1,6 +1,6 @@
+++
title = "Related work"
weight = 15
weight = 50
+++
## Context

View file

@ -9,6 +9,15 @@ Let's start your Garage journey!
In this chapter, we explain how to deploy Garage as a single-node server
and how to interact with it.
## What is Garage?
Before jumping in, you might be interested in reading the following pages:
- [Goals and use cases](@/documentation/design/goals.md)
- [List of features](@/documentation/reference-manual/features.md)
## Scope of this tutorial
Our goal is to introduce you to Garage's workflows.
Following this guide is recommended before moving on to
[configuring a multi-node cluster](@/documentation/cookbook/real-world.md).
@ -249,16 +258,6 @@ mc alias set \
--api S3v4
```
You must also add an environment variable to your configuration to
inform MinIO of our region (`garage` by default, corresponding to the `s3_region` parameter
in the configuration file).
The best way is to add the following snippet to your `$HOME/.bash_profile`
or `$HOME/.bashrc` file:
```bash
export MC_REGION=garage
```
### Use `mc`
You can not list buckets from `mc` currently.

View file

@ -0,0 +1,644 @@
+++
title = "Administration API"
weight = 60
+++
The Garage administration API is accessible through a dedicated server whose
listen address is specified in the `[admin]` section of the configuration
file (see [configuration file
reference](@/documentation/reference-manual/configuration.md))
**WARNING.** At this point, there is no comittement to stability of the APIs described in this document.
We will bump the version numbers prefixed to each API endpoint at each time the syntax
or semantics change, meaning that code that relies on these endpoint will break
when changes are introduced.
The Garage administration API was introduced in version 0.7.2, this document
does not apply to older versions of Garage.
## Access control
The admin API uses two different tokens for acces control, that are specified in the config file's `[admin]` section:
- `metrics_token`: the token for accessing the Metrics endpoint (if this token
is not set in the config file, the Metrics endpoint can be accessed without
access control);
- `admin_token`: the token for accessing all of the other administration
endpoints (if this token is not set in the config file, access to these
endpoints is disabled entirely).
These tokens are used as simple HTTP bearer tokens. In other words, to
authenticate access to an admin API endpoint, add the following HTTP header
to your request:
```
Authorization: Bearer <token>
```
## Administration API endpoints
### Metrics-related endpoints
#### Metrics `GET /metrics`
Returns internal Garage metrics in Prometheus format.
### Cluster operations
#### GetClusterStatus `GET /v0/status`
Returns the cluster's current status in JSON, including:
- ID of the node being queried and its version of the Garage daemon
- Live nodes
- Currently configured cluster layout
- Staged changes to the cluster layout
Example response body:
```json
{
"node": "ec79480e0ce52ae26fd00c9da684e4fa56658d9c64cdcecb094e936de0bfe71f",
"garage_version": "git:v0.8.0",
"knownNodes": {
"ec79480e0ce52ae26fd00c9da684e4fa56658d9c64cdcecb094e936de0bfe71f": {
"addr": "10.0.0.11:3901",
"is_up": true,
"last_seen_secs_ago": 9,
"hostname": "node1"
},
"4a6ae5a1d0d33bf895f5bb4f0a418b7dc94c47c0dd2eb108d1158f3c8f60b0ff": {
"addr": "10.0.0.12:3901",
"is_up": true,
"last_seen_secs_ago": 1,
"hostname": "node2"
},
"23ffd0cdd375ebff573b20cc5cef38996b51c1a7d6dbcf2c6e619876e507cf27": {
"addr": "10.0.0.21:3901",
"is_up": true,
"last_seen_secs_ago": 7,
"hostname": "node3"
},
"e2ee7984ee65b260682086ec70026165903c86e601a4a5a501c1900afe28d84b": {
"addr": "10.0.0.22:3901",
"is_up": true,
"last_seen_secs_ago": 1,
"hostname": "node4"
}
},
"layout": {
"version": 12,
"roles": {
"ec79480e0ce52ae26fd00c9da684e4fa56658d9c64cdcecb094e936de0bfe71f": {
"zone": "dc1",
"capacity": 4,
"tags": [
"node1"
]
},
"4a6ae5a1d0d33bf895f5bb4f0a418b7dc94c47c0dd2eb108d1158f3c8f60b0ff": {
"zone": "dc1",
"capacity": 6,
"tags": [
"node2"
]
},
"23ffd0cdd375ebff573b20cc5cef38996b51c1a7d6dbcf2c6e619876e507cf27": {
"zone": "dc2",
"capacity": 10,
"tags": [
"node3"
]
}
},
"stagedRoleChanges": {
"e2ee7984ee65b260682086ec70026165903c86e601a4a5a501c1900afe28d84b": {
"zone": "dc2",
"capacity": 5,
"tags": [
"node4"
]
}
}
}
}
```
#### ConnectClusterNodes `POST /v0/connect`
Instructs this Garage node to connect to other Garage nodes at specified addresses.
Example request body:
```json
[
"ec79480e0ce52ae26fd00c9da684e4fa56658d9c64cdcecb094e936de0bfe71f@10.0.0.11:3901",
"4a6ae5a1d0d33bf895f5bb4f0a418b7dc94c47c0dd2eb108d1158f3c8f60b0ff@10.0.0.12:3901"
]
```
The format of the string for a node to connect to is: `<node ID>@<ip address>:<port>`, same as in the `garage node connect` CLI call.
Example response:
```json
[
{
"success": true,
"error": null
},
{
"success": false,
"error": "Handshake error"
}
]
```
#### GetClusterLayout `GET /v0/layout`
Returns the cluster's current layout in JSON, including:
- Currently configured cluster layout
- Staged changes to the cluster layout
(the info returned by this endpoint is a subset of the info returned by GetClusterStatus)
Example response body:
```json
{
"version": 12,
"roles": {
"ec79480e0ce52ae26fd00c9da684e4fa56658d9c64cdcecb094e936de0bfe71f": {
"zone": "dc1",
"capacity": 4,
"tags": [
"node1"
]
},
"4a6ae5a1d0d33bf895f5bb4f0a418b7dc94c47c0dd2eb108d1158f3c8f60b0ff": {
"zone": "dc1",
"capacity": 6,
"tags": [
"node2"
]
},
"23ffd0cdd375ebff573b20cc5cef38996b51c1a7d6dbcf2c6e619876e507cf27": {
"zone": "dc2",
"capacity": 10,
"tags": [
"node3"
]
}
},
"stagedRoleChanges": {
"e2ee7984ee65b260682086ec70026165903c86e601a4a5a501c1900afe28d84b": {
"zone": "dc2",
"capacity": 5,
"tags": [
"node4"
]
}
}
}
```
#### UpdateClusterLayout `POST /v0/layout`
Send modifications to the cluster layout. These modifications will
be included in the staged role changes, visible in subsequent calls
of `GetClusterLayout`. Once the set of staged changes is satisfactory,
the user may call `ApplyClusterLayout` to apply the changed changes,
or `Revert ClusterLayout` to clear all of the staged changes in
the layout.
Request body format:
```json
{
<node_id>: {
"capacity": <new_capacity>,
"zone": <new_zone>,
"tags": [
<new_tag>,
...
]
},
<node_id_to_remove>: null,
...
}
```
Contrary to the CLI that may update only a subset of the fields
`capacity`, `zone` and `tags`, when calling this API all of these
values must be specified.
#### ApplyClusterLayout `POST /v0/layout/apply`
Applies to the cluster the layout changes currently registered as
staged layout changes.
Request body format:
```json
{
"version": 13
}
```
Similarly to the CLI, the body must include the version of the new layout
that will be created, which MUST be 1 + the value of the currently
existing layout in the cluster.
#### RevertClusterLayout `POST /v0/layout/revert`
Clears all of the staged layout changes.
Request body format:
```json
{
"version": 13
}
```
Reverting the staged changes is done by incrementing the version number
and clearing the contents of the staged change list.
Similarly to the CLI, the body must include the incremented
version number, which MUST be 1 + the value of the currently
existing layout in the cluster.
### Access key operations
#### ListKeys `GET /v0/key`
Returns all API access keys in the cluster.
Example response:
```json
[
{
"id": "GK31c2f218a2e44f485b94239e",
"name": "test"
},
{
"id": "GKe10061ac9c2921f09e4c5540",
"name": "test2"
}
]
```
#### CreateKey `POST /v0/key`
Creates a new API access key.
Request body format:
```json
{
"name": "NameOfMyKey"
}
```
#### ImportKey `POST /v0/key/import`
Imports an existing API key.
Request body format:
```json
{
"accessKeyId": "GK31c2f218a2e44f485b94239e",
"secretAccessKey": "b892c0665f0ada8a4755dae98baa3b133590e11dae3bcc1f9d769d67f16c3835",
"name": "NameOfMyKey"
}
```
#### GetKeyInfo `GET /v0/key?id=<acces key id>`
#### GetKeyInfo `GET /v0/key?search=<pattern>`
Returns information about the requested API access key.
If `id` is set, the key is looked up using its exact identifier (faster).
If `search` is set, the key is looked up using its name or prefix
of identifier (slower, all keys are enumerated to do this).
Example response:
```json
{
"name": "test",
"accessKeyId": "GK31c2f218a2e44f485b94239e",
"secretAccessKey": "b892c0665f0ada8a4755dae98baa3b133590e11dae3bcc1f9d769d67f16c3835",
"permissions": {
"createBucket": false
},
"buckets": [
{
"id": "70dc3bed7fe83a75e46b66e7ddef7d56e65f3c02f9f80b6749fb97eccb5e1033",
"globalAliases": [
"test2"
],
"localAliases": [],
"permissions": {
"read": true,
"write": true,
"owner": false
}
},
{
"id": "d7452a935e663fc1914f3a5515163a6d3724010ce8dfd9e4743ca8be5974f995",
"globalAliases": [
"test3"
],
"localAliases": [],
"permissions": {
"read": true,
"write": true,
"owner": false
}
},
{
"id": "e6a14cd6a27f48684579ec6b381c078ab11697e6bc8513b72b2f5307e25fff9b",
"globalAliases": [],
"localAliases": [
"test"
],
"permissions": {
"read": true,
"write": true,
"owner": true
}
},
{
"id": "96470e0df00ec28807138daf01915cfda2bee8eccc91dea9558c0b4855b5bf95",
"globalAliases": [
"alex"
],
"localAliases": [],
"permissions": {
"read": true,
"write": true,
"owner": true
}
}
]
}
```
#### DeleteKey `DELETE /v0/key?id=<acces key id>`
Deletes an API access key.
#### UpdateKey `POST /v0/key?id=<acces key id>`
Updates information about the specified API access key.
Request body format:
```json
{
"name": "NameOfMyKey",
"allow": {
"createBucket": true,
},
"deny": {}
}
```
All fields (`name`, `allow` and `deny`) are optionnal.
If they are present, the corresponding modifications are applied to the key, otherwise nothing is changed.
The possible flags in `allow` and `deny` are: `createBucket`.
### Bucket operations
#### ListBuckets `GET /v0/bucket`
Returns all storage buckets in the cluster.
Example response:
```json
[
{
"id": "70dc3bed7fe83a75e46b66e7ddef7d56e65f3c02f9f80b6749fb97eccb5e1033",
"globalAliases": [
"test2"
],
"localAliases": []
},
{
"id": "96470e0df00ec28807138daf01915cfda2bee8eccc91dea9558c0b4855b5bf95",
"globalAliases": [
"alex"
],
"localAliases": []
},
{
"id": "d7452a935e663fc1914f3a5515163a6d3724010ce8dfd9e4743ca8be5974f995",
"globalAliases": [
"test3"
],
"localAliases": []
},
{
"id": "e6a14cd6a27f48684579ec6b381c078ab11697e6bc8513b72b2f5307e25fff9b",
"globalAliases": [],
"localAliases": [
{
"accessKeyId": "GK31c2f218a2e44f485b94239e",
"alias": "test"
}
]
}
]
```
#### GetBucketInfo `GET /v0/bucket?id=<bucket id>`
#### GetBucketInfo `GET /v0/bucket?globalAlias=<alias>`
Returns information about the requested storage bucket.
If `id` is set, the bucket is looked up using its exact identifier.
If `globalAlias` is set, the bucket is looked up using its global alias.
(both are fast)
Example response:
```json
{
"id": "afa8f0a22b40b1247ccd0affb869b0af5cff980924a20e4b5e0720a44deb8d39",
"globalAliases": [],
"websiteAccess": false,
"websiteConfig": null,
"keys": [
{
"accessKeyId": "GK31c2f218a2e44f485b94239e",
"name": "Imported key",
"permissions": {
"read": true,
"write": true,
"owner": true
},
"bucketLocalAliases": [
"debug"
]
}
],
"objects": 14827,
"bytes": 13189855625,
"unfinshedUploads": 0,
"quotas": {
"maxSize": null,
"maxObjects": null
}
}
```
#### CreateBucket `POST /v0/bucket`
Creates a new storage bucket.
Request body format:
```json
{
"globalAlias": "NameOfMyBucket"
}
```
OR
```json
{
"localAlias": {
"accessKeyId": "GK31c2f218a2e44f485b94239e",
"alias": "NameOfMyBucket",
"allow": {
"read": true,
"write": true,
"owner": false
}
}
}
```
OR
```json
{}
```
Creates a new bucket, either with a global alias, a local one,
or no alias at all.
Technically, you can also specify both `globalAlias` and `localAlias` and that would create
two aliases, but I don't see why you would want to do that.
#### DeleteBucket `DELETE /v0/bucket?id=<bucket id>`
Deletes a storage bucket. A bucket cannot be deleted if it is not empty.
Warning: this will delete all aliases associated with the bucket!
#### UpdateBucket `PUT /v0/bucket?id=<bucket id>`
Updates configuration of the given bucket.
Request body format:
```json
{
"websiteAccess": {
"enabled": true,
"indexDocument": "index.html",
"errorDocument": "404.html"
},
"quotas": {
"maxSize": 19029801,
"maxObjects": null,
}
}
```
All fields (`websiteAccess` and `quotas`) are optionnal.
If they are present, the corresponding modifications are applied to the bucket, otherwise nothing is changed.
In `websiteAccess`: if `enabled` is `true`, `indexDocument` must be specified.
The field `errorDocument` is optional, if no error document is set a generic
error message is displayed when errors happen. Conversely, if `enabled` is
`false`, neither `indexDocument` nor `errorDocument` must be specified.
In `quotas`: new values of `maxSize` and `maxObjects` must both be specified, or set to `null`
to remove the quotas. An absent value will be considered the same as a `null`. It is not possible
to change only one of the two quotas.
### Operations on permissions for keys on buckets
#### BucketAllowKey `POST /v0/bucket/allow`
Allows a key to do read/write/owner operations on a bucket.
Request body format:
```json
{
"bucketId": "e6a14cd6a27f48684579ec6b381c078ab11697e6bc8513b72b2f5307e25fff9b",
"accessKeyId": "GK31c2f218a2e44f485b94239e",
"permissions": {
"read": true,
"write": true,
"owner": true
},
}
```
Flags in `permissions` which have the value `true` will be activated.
Other flags will remain unchanged.
#### BucketDenyKey `POST /v0/bucket/deny`
Denies a key from doing read/write/owner operations on a bucket.
Request body format:
```json
{
"bucketId": "e6a14cd6a27f48684579ec6b381c078ab11697e6bc8513b72b2f5307e25fff9b",
"accessKeyId": "GK31c2f218a2e44f485b94239e",
"permissions": {
"read": false,
"write": false,
"owner": true
},
}
```
Flags in `permissions` which have the value `true` will be deactivated.
Other flags will remain unchanged.
### Operations on bucket aliases
#### GlobalAliasBucket `PUT /v0/bucket/alias/global?id=<bucket id>&alias=<global alias>`
Empty body. Creates a global alias for a bucket.
#### GlobalUnaliasBucket `DELETE /v0/bucket/alias/global?id=<bucket id>&alias=<global alias>`
Removes a global alias for a bucket.
#### LocalAliasBucket `PUT /v0/bucket/alias/local?id=<bucket id>&accessKeyId=<access key ID>&alias=<local alias>`
Empty body. Creates a local alias for a bucket in the namespace of a specific access key.
#### LocalUnaliasBucket `DELETE /v0/bucket/alias/local?id=<bucket id>&accessKeyId<access key ID>&alias=<local alias>`
Removes a local alias for a bucket in the namespace of a specific access key.

View file

@ -1,6 +1,6 @@
+++
title = "Garage CLI"
weight = 15
weight = 30
+++
The Garage CLI is mostly self-documented. Make use of the `help` subcommand

View file

@ -1,6 +1,6 @@
+++
title = "Configuration file format"
weight = 5
weight = 20
+++
Here is an example `garage.toml` configuration file that illustrates all of the possible options:
@ -9,6 +9,8 @@ Here is an example `garage.toml` configuration file that illustrates all of the
metadata_dir = "/var/lib/garage/meta"
data_dir = "/var/lib/garage/data"
db_engine = "lmdb"
block_size = 1048576
replication_mode = "3"
@ -47,6 +49,8 @@ root_domain = ".web.garage"
[admin]
api_bind_addr = "0.0.0.0:3903"
metrics_token = "cacce0b2de4bc2d9f5b5fdff551e01ac1496055aed248202d415398987e35f81"
admin_token = "ae8cb40ea7368bbdbb6430af11cca7da833d3458a5f52086f4e805a570fb5c2a"
trace_sink = "http://localhost:4317"
```
@ -69,6 +73,47 @@ This folder can be placed on an HDD. The space available for `data_dir`
should be counted to determine a node's capacity
when [adding it to the cluster layout](@/documentation/cookbook/real-world.md).
### `db_engine` (since `v0.8.0`)
By default, Garage uses the Sled embedded database library
to store its metadata on-disk. Since `v0.8.0`, Garage can use alternative storage backends as follows:
| DB engine | `db_engine` value | Database path |
| --------- | ----------------- | ------------- |
| [Sled](https://sled.rs) | `"sled"` | `<metadata_dir>/db/` |
| [LMDB](https://www.lmdb.tech) | `"lmdb"` | `<metadata_dir>/db.lmdb/` |
| [Sqlite](https://sqlite.org) | `"sqlite"` | `<metadata_dir>/db.sqlite` |
Performance characteristics of the different DB engines are as follows:
- Sled: the default database engine, which tends to produce
large data files and also has performance issues, especially when the metadata folder
is on a traditionnal HDD and not on SSD.
- LMDB: the recommended alternative on 64-bit systems,
much more space-efficiant and slightly faster. Note that the data format of LMDB is not portable
between architectures, so for instance the Garage database of an x86-64
node cannot be moved to an ARM64 node. Also note that, while LMDB can technically be used on 32-bit systems,
this will limit your node to very small database sizes due to how LMDB works; it is therefore not recommended.
- Sqlite: Garage supports Sqlite as a storage backend for metadata,
however it may have issues and is also very slow in its current implementation,
so it is not recommended to be used for now.
It is possible to convert Garage's metadata directory from one format to another with a small utility named `convert_db`,
which can be downloaded at the following locations:
[for amd64](https://garagehq.deuxfleurs.fr/_releases/convert_db/amd64/convert_db),
[for i386](https://garagehq.deuxfleurs.fr/_releases/convert_db/i386/convert_db),
[for arm64](https://garagehq.deuxfleurs.fr/_releases/convert_db/arm64/convert_db),
[for arm](https://garagehq.deuxfleurs.fr/_releases/convert_db/arm/convert_db).
The `convert_db` utility is used as folows:
```
convert-db -a <input db engine> -i <input db path> \
-b <output db engine> -o <output db path>
```
Make sure to specify the full database path as presented in the table above,
and not just the path to the metadata directory.
### `block_size`
Garage splits stored objects in consecutive chunks of size `block_size`
@ -326,10 +371,24 @@ Garage has a few administration capabilities, in particular to allow remote moni
### `api_bind_addr`
If specified, Garage will bind an HTTP server to this port and address, on
which it will listen to requests for administration features. Currently,
this endpoint only exposes Garage metrics in the Prometheus format at
`/metrics`. This endpoint is not authenticated. In the future, bucket and
access key management might be possible by REST calls to this endpoint.
which it will listen to requests for administration features.
See [administration API reference](@/documentation/reference-manual/admin-api.md) to learn more about these features.
### `metrics_token` (since version 0.7.2)
The token for accessing the Metrics endpoint. If this token is not set in
the config file, the Metrics endpoint can be accessed without access
control.
You can use any random string for this value. We recommend generating a random token with `openssl rand -hex 32`.
### `admin_token` (since version 0.7.2)
The token for accessing all of the other administration endpoints. If this
token is not set in the config file, access to these endpoints is disabled
entirely.
You can use any random string for this value. We recommend generating a random token with `openssl rand -hex 32`.
### `trace_sink`

View file

@ -0,0 +1,125 @@
+++
title = "List of Garage features"
weight = 10
+++
### S3 API
The main goal of Garage is to provide an object storage service that is compatible with the
[S3 API](https://docs.aws.amazon.com/AmazonS3/latest/API/Welcome.html) from Amazon Web Services.
We try to adhere as strictly as possible to the semantics of the API as implemented by Amazon
and other vendors such as Minio or CEPH.
Of course Garage does not implement the full span of API endpoints that AWS S3 does;
the exact list of S3 features implemented by Garage can be found [on our S3 compatibility page](@/documentation/reference-manual/s3-compatibility.md).
### Geo-distribution
Garage allows you to store copies of your data in multiple geographical locations in order to maximize resilience
to adverse events, such as network/power outages or hardware failures.
This allows Garage to run very well even at home, using consumer-grade Internet connectivity
(such as FTTH) and power, as long as cluster nodes can be spawned at several physical locations.
Garage exploits knowledge of the capacity and physical location of each storage node to design
a storage plan that best exploits the available storage capacity while satisfying the geo-distributed replication constraint.
To learn more about geo-distributed Garage clusters,
read our documentation on [setting up a real-world deployment](@/documentation/cookbook/real-world.md).
### Standalone/self-contained
Garage is extremely simple to deploy, and does not depend on any external service to run.
This makes setting up and administering storage clusters, we hope, as easy as it could be.
### Flexible topology
A Garage cluster can very easily evolve over time, as storage nodes are added or removed.
Garage will automatically rebalance data between nodes as needed to ensure the desired number of copies.
Read about cluster layout management [here](@/documentation/reference-manual/layout.md).
### No RAFT slowing you down
It might seem strange to tout the absence of something as a desirable feature,
but this is in fact a very important point! Garage does not use RAFT or another
consensus algorithm internally to order incoming requests: this means that all requests
directed to a Garage cluster can be handled independently of one another instead
of going through a central bottleneck (the leader node).
As a consequence, requests can be handled much faster, even in cases where latency
between cluster nodes is important (see our [benchmarks](@/documentation/design/benchmarks/index.md) for data on this).
This is particularly usefull when nodes are far from one another and talk to one other through standard Internet connections.
### Several replication modes
Garage supports a variety of replication modes, with 1 copy, 2 copies or 3 copies of your data,
and with various levels of consistency, in order to adapt to a variety of usage scenarios.
Read our reference page on [supported replication modes](@/documentation/reference-manual/configuration.md#replication-mode)
to select the replication mode best suited to your use case (hint: in most cases, `replication_mode = "3"` is what you want).
### Web server for static websites
A storage bucket can easily be configured to be served directly by Garage as a static web site.
Domain names for multiple websites directly map to bucket names, making it easy to build
a platform for your users to autonomously build and host their websites over Garage.
Surprisingly, none of the other alternative S3 implementations we surveyed (such as Minio
or CEPH) support publishing static websites from S3 buckets, a feature that is however
directly inherited from S3 on AWS.
Read more on our [dedicated documentation page](@/documentation/cookbook/exposing-websites.md).
### Bucket names as aliases
In Garage, a bucket may have several names, known as aliases.
Aliases can easily be added and removed on demand:
this allows to easily rename buckets if needed
without having to copy all of their content, something that cannot be done on AWS.
For buckets served as static websites, having multiple aliases for a bucket can allow
exposing the same content under different domain names.
Garage also supports bucket aliases which are local to a single user:
this allows different users to have different buckets with the same name, thus avoiding naming collisions.
This can be helpfull for instance if you want to write an application that creates per-user buckets with always the same name.
This feature is totally invisible to S3 clients and does not break compatibility with AWS.
### Cluster administration API
Garage provides a fully-fledged REST API to administer your cluster programatically.
Functionnality included in the admin API include: setting up and monitoring
cluster nodes, managing access credentials, and managing storage buckets and bucket aliases.
A full reference of the administration API is available [here](@/documentation/reference-manual/admin-api.md).
### Metrics and traces
Garage makes some internal metrics available in the Prometheus data format,
which allows you to build interactive dashboards to visualize the load and internal state of your storage cluster.
For developpers and performance-savvy administrators,
Garage also supports exporting traces of what it does internally in OpenTelemetry format.
This allows to monitor the time spent at various steps of the processing of requests,
in order to detect potential performance bottlenecks.
### Kubernetes and Nomad integrations
Garage can automatically discover other nodes in the cluster thanks to integration
with orchestrators such as Kubernetes and Nomad (when used with Consul).
This eases the configuration of your cluster as it removes one step where nodes need
to be manually connected to one another.
### Support for changing IP addresses
As long as all of your nodes don't thange their IP address at the same time,
Garage should be able to tolerate nodes with changing/dynamic IP addresses,
as nodes will regularly exchange the IP addresses of their peers and try to
reconnect using newer addresses when existing connections are broken.
### K2V API (experimental)
As part of an ongoing research project, Garage can expose an experimental key/value storage API called K2V.
K2V is made for the storage and retrieval of many small key/value pairs that need to be processed in bulk.
This completes the S3 API with an alternative that can be used to easily store and access metadata
related to objects stored in an S3 bucket.
In the context of our research project, [Aérogramme](https://aerogramme.deuxfleurs.fr),
K2V is used to provide metadata and log storage for operations on encrypted e-mail storage.
Learn more on the specification of K2V [here](https://git.deuxfleurs.fr/Deuxfleurs/garage/src/branch/k2v/doc/drafts/k2v-spec.md)
and on how to enable it in Garage [here](@/documentation/reference-manual/k2v.md).

View file

@ -0,0 +1,58 @@
+++
title = "K2V"
weight = 70
+++
Starting with version 0.7.2, Garage introduces an optionnal feature, K2V,
which is an alternative storage API designed to help efficiently store
many small values in buckets (in opposition to S3 which is more designed
to store large blobs).
K2V is currently disabled at compile time in all builds, as the
specification is still subject to changes. To build a Garage version with
K2V, the Cargo feature flag `k2v` must be activated. Special builds with
the `k2v` feature flag enabled can be obtained from our download page under
"Extra builds": such builds can be identified easily as their tag name ends
with `-k2v` (example: `v0.7.2-k2v`).
The specification of the K2V API can be found
[here](https://git.deuxfleurs.fr/Deuxfleurs/garage/src/branch/k2v/doc/drafts/k2v-spec.md).
This document also includes a high-level overview of K2V's design.
The K2V API uses AWSv4 signatures for authentification, same as the S3 API.
The AWS region used for signature calculation is always the same as the one
defined for the S3 API in the config file.
## Enabling and using K2V
To enable K2V, download and run a build that has the `k2v` feature flag
enabled, or produce one yourself. Then, add the following section to your
configuration file:
```toml
[k2v_api]
api_bind_addr = "<ip>:<port>"
```
Please select a port number that is not already in use by another API
endpoint (S3 api, admin API) or by the RPC server.
We provide an early-stage K2V client library for Rust which can be imported by adding the following to your `Cargo.toml` file:
```toml
k2v-client = { git = "https://git.deuxfleurs.fr/Deuxfleurs/garage.git" }
```
There is also a simple CLI utility which can be built from source in the
following way:
```sh
git clone https://git.deuxfleurs.fr/Deuxfleurs/garage.git
cd garage/src/k2v-client
cargo build --features cli --bin k2v-cli
```
The CLI utility is self-documented, run `k2v-cli --help` to learn how to use
it. There is also a short README.md in the `src/k2v-client` folder with some
instructions.

View file

@ -1,6 +1,6 @@
+++
title = "Cluster layout management"
weight = 10
weight = 50
+++
The cluster layout in Garage is a table that assigns to each node a role in

View file

@ -1,45 +0,0 @@
+++
title = "Request routing logic"
weight = 10
+++
Data retrieval requests to Garage endpoints (S3 API and websites) are resolved
to an individual object in a bucket. Since objects are replicated to multiple nodes
Garage must ensure consistency before answering the request.
## Using quorum to ensure consistency
Garage ensures consistency by attempting to establish a quorum with the
data nodes responsible for the object. When a majority of the data nodes
have provided metadata on a object Garage can then answer the request.
When a request arrives Garage will, assuming the recommended 3 replicas, perform the following actions:
- Make a request to the two preferred nodes for object metadata
- Try the third node if one of the two initial requests fail
- Check that the metadata from at least 2 nodes match
- Check that the object hasn't been marked deleted
- Answer the request with inline data from metadata if object is small enough
- Or get data blocks from the preferred nodes and answer using the assembled object
Garage dynamically determines which nodes to query based on health, preference, and
which nodes actually host a given data. Garage has no concept of "primary" so any
healthy node with the data can be used as long as a quorum is reached for the metadata.
## Node health
Garage keeps a TCP session open to each node in the cluster and periodically pings them. If a connection
cannot be established, or a node fails to answer a number of pings, the target node is marked as failed.
Failed nodes are not used for quorum or other internal requests.
## Node preference
Garage prioritizes which nodes to query according to a few criteria:
- A node always prefers itself if it can answer the request
- Then the node prioritizes nodes in the same zone
- Finally the nodes with the lowest latency are prioritized
For further reading on the cluster structure look at the [gateway](@/documentation/cookbook/gateways.md)
and [cluster layout management](@/documentation/reference-manual/layout.md) pages.

View file

@ -1,53 +1,79 @@
+++
title = "S3 Compatibility status"
weight = 20
weight = 40
+++
## Endpoint implementation
## DISCLAIMER
All APIs that are missing on Garage will return a 501 Not Implemented.
Some `x-amz-` headers are not implemented.
**The compatibility list for other platforms is given only for informational
purposes and based on available documentation.** They are sometimes completed,
in a best effort approach, with the source code and inputs from maintainers
when documentation is lacking. We are not proactively monitoring new versions
of each software: check the modification history to know when the page has been
updated for the last time. Some entries will be inexact or outdated. For any
serious decision, you must make your own tests.
**The official documentation of each project can be accessed by clicking on the
project name in the column header.**
*The compatibility list for other platforms is given only for information purposes and based on available documentation. Some entries might be inexact. Feel free to open a PR to fix this table. Minio is missing because they do not provide a public S3 compatibility list.*
Feel free to open a PR to suggest fixes this table. Minio is missing because they do not provide a public S3 compatibility list.
### Features
## Update history
- 2022-02-07 - First version of this page
- 2022-05-25 - Many Ceph S3 endpoints are not documented but implemented. Following a notification from the Ceph community, we added them.
## High-level features
| Feature | Garage | [Openstack Swift](https://docs.openstack.org/swift/latest/s3_compat.html) | [Ceph Object Gateway](https://docs.ceph.com/en/latest/radosgw/s3/) | [Riak CS](https://docs.riak.com/riak/cs/2.1.1/references/apis/storage/s3/index.html) | [OpenIO](https://docs.openio.io/latest/source/arch-design/s3_compliancy.html) |
|------------------------------|----------------------------------|-----------------|---------------|---------|-----|
| [signature v2](https://docs.aws.amazon.com/general/latest/gr/signature-version-2.html) (deprecated) | ❌ Missing | ✅ | ❌ | ✅ | ✅ |
| [signature v2](https://docs.aws.amazon.com/general/latest/gr/signature-version-2.html) (deprecated) | ❌ Missing | ✅ | | ✅ | ✅ |
| [signature v4](https://docs.aws.amazon.com/AmazonS3/latest/API/sig-v4-authenticating-requests.html) | ✅ Implemented | ✅ | ✅ | ❌ | ✅ |
| [URL path-style](https://docs.aws.amazon.com/AmazonS3/latest/userguide/VirtualHosting.html#path-style-access) (eg. `host.tld/bucket/key`) | ✅ Implemented | ✅ | ✅ | ❓| ✅ |
| [URL vhost-style](https://docs.aws.amazon.com/AmazonS3/latest/userguide/VirtualHosting.html#virtual-hosted-style-access) URL (eg. `bucket.host.tld/key`) | ✅ Implemented | ❌| ✅| ✅ | ✅ |
| [Presigned URLs](https://docs.aws.amazon.com/AmazonS3/latest/userguide/ShareObjectPreSignedURL.html) | ✅ Implemented | ❌| ✅ | ✅ | ✅(❓) |
*Note:* OpenIO does not says if it supports presigned URLs. Because it is part of signature v4 and they claim they support it without additional precisions, we suppose that OpenIO supports presigned URLs.
*Note:* OpenIO does not says if it supports presigned URLs. Because it is part
of signature v4 and they claim they support it without additional precisions,
we suppose that OpenIO supports presigned URLs.
## Endpoint implementation
All endpoints that are missing on Garage will return a 501 Not Implemented.
Some `x-amz-` headers are not implemented.
### Core endoints
| Endpoint | Garage | [Openstack Swift](https://docs.openstack.org/swift/latest/s3_compat.html) | [Ceph Object Gateway](https://docs.ceph.com/en/latest/radosgw/s3/) | [Riak CS](https://docs.riak.com/riak/cs/2.1.1/references/apis/storage/s3/index.html) | [OpenIO](https://docs.openio.io/latest/source/arch-design/s3_compliancy.html) |
| Endpoint | Garage | [Openstack Swift](https://docs.openstack.org/swift/latest/s3_compat.html) | [Ceph Object Gateway](https://docs.ceph.com/en/latest/radosgw/s3/) | [Riak CS](https://docs.riak.com/riak/cs/2.1.1/references/apis/storage/s3/index.html) | [OpenIO](https://docs.openio.io/latest/source/arch-design/s3_compliancy.html) |
|------------------------------|----------------------------------|-----------------|---------------|---------|-----|
| [CreateBucket](https://docs.aws.amazon.com/AmazonS3/latest/API/API_CreateBucket.html) | ✅ Implemented | ✅ | ✅ | ✅ | ✅ |
| [DeleteBucket](https://docs.aws.amazon.com/AmazonS3/latest/API/API_DeleteBucket.html) | ✅ Implemented | ✅ | ✅ | ✅ | ✅ |
| [GetBucketLocation](https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetBucketLocation.html) | ✅ Implemented | ✅ | ✅ | ❌ | ✅ |
| [HeadBucket](https://docs.aws.amazon.com/AmazonS3/latest/API/API_HeadBucket.html) | ✅ Implemented | ✅ | ✅ | ✅ | ✅ |
| [ListBuckets](https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListBuckets.html) | ✅ Implemented | ❌| ✅ | ✅ | ✅ |
| [HeadObject](https://docs.aws.amazon.com/AmazonS3/latest/API/API_HeadObject.html) | ✅ Implemented | ✅ | ✅ | ✅ | ✅ |
| [CopyObject](https://docs.aws.amazon.com/AmazonS3/latest/API/API_CopyObject.html) | ✅ Implemented | ✅ | ✅ | ✅ | ✅ |
| [HeadObject](https://docs.aws.amazon.com/AmazonS3/latest/API/API_HeadObject.html) | ✅ Implemented | ✅ | ✅ | ✅ | ✅ |
| [CopyObject](https://docs.aws.amazon.com/AmazonS3/latest/API/API_CopyObject.html) | ✅ Implemented | ✅ | ✅ | ✅ | ✅ |
| [DeleteObject](https://docs.aws.amazon.com/AmazonS3/latest/API/API_DeleteObject.html) | ✅ Implemented | ✅ | ✅ | ✅ | ✅ |
| [DeleteObjects](https://docs.aws.amazon.com/AmazonS3/latest/API/API_DeleteObjects.html) | ✅ Implemented | ✅ | ✅ | ✅ | ✅ |
| [GetObject](https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetObject.html) | ✅ Implemented | ✅ | ✅ | ✅ | ✅ |
| [ListObjects](https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListObjects.html) | ✅ Implemented (see details below) | ✅ | ✅ | ✅ | ❌|
| [ListObjectsV2](https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListObjectsV2.html) | ✅ Implemented | ❌| | ❌| ✅ |
| [PostObject](https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectPOST.html) (compatibility API) | ❌ Missing | ❌| ✅ | ❌| ❌|
| [ListObjectsV2](https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListObjectsV2.html) | ✅ Implemented | ❌| | ❌| ✅ |
| [PostObject](https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectPOST.html) | ✅ Implemented | ❌| ✅ | ❌| ❌|
| [PutObject](https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutObject.html) | ✅ Implemented | ✅ | ✅ | ✅ | ✅ |
**ListObjects:** Implemented, but there isn't a very good specification of what `encoding-type=url` covers so there might be some encoding bugs. In our implementation the url-encoded fields are in the same in ListObjects as they are in ListObjectsV2.
**ListObjects:** Implemented, but there isn't a very good specification of what
`encoding-type=url` covers so there might be some encoding bugs. In our
implementation the url-encoded fields are in the same in ListObjects as they
are in ListObjectsV2.
*Note: Ceph API documentation is incomplete and miss at least HeadBucket and UploadPartCopy, but these endpoints are documented in [Red Hat Ceph Storage - Chapter 2. Ceph Object Gateway and the S3 API](https://access.redhat.com/documentation/en-us/red_hat_ceph_storage/4/html/developer_guide/ceph-object-gateway-and-the-s3-api)*
*Note: Ceph API documentation is incomplete and lacks at least HeadBucket and UploadPartCopy,
but these endpoints are documented in [Red Hat Ceph Storage - Chapter 2. Ceph Object Gateway and the S3 API](https://access.redhat.com/documentation/en-us/red_hat_ceph_storage/4/html/developer_guide/ceph-object-gateway-and-the-s3-api)*
### Multipart Upload endpoints
| Endpoint | Garage | [Openstack Swift](https://docs.openstack.org/swift/latest/s3_compat.html) | [Ceph Object Gateway](https://docs.ceph.com/en/latest/radosgw/s3/) | [Riak CS](https://docs.riak.com/riak/cs/2.1.1/references/apis/storage/s3/index.html) | [OpenIO](https://docs.openio.io/latest/source/arch-design/s3_compliancy.html) |
| Endpoint | Garage | [Openstack Swift](https://docs.openstack.org/swift/latest/s3_compat.html) | [Ceph Object Gateway](https://docs.ceph.com/en/latest/radosgw/s3/) | [Riak CS](https://docs.riak.com/riak/cs/2.1.1/references/apis/storage/s3/index.html) | [OpenIO](https://docs.openio.io/latest/source/arch-design/s3_compliancy.html) |
|------------------------------|----------------------------------|-----------------|---------------|---------|-----|
| [AbortMultipartUpload](https://docs.aws.amazon.com/AmazonS3/latest/API/API_AbortMultipartUpload.html) | ✅ Implemented | ✅ | ✅ | ✅ | ✅ |
| [CompleteMultipartUpload](https://docs.aws.amazon.com/AmazonS3/latest/API/API_CompleteMultipartUpload.html) | ✅ Implemented (see details below) | ✅ | ✅ | ✅ | ✅ |
@ -62,18 +88,18 @@ For more information, please refer to our [issue tracker](https://git.deuxfleurs
### Website endpoints
| Endpoint | Garage | [Openstack Swift](https://docs.openstack.org/swift/latest/s3_compat.html) | [Ceph Object Gateway](https://docs.ceph.com/en/latest/radosgw/s3/) | [Riak CS](https://docs.riak.com/riak/cs/2.1.1/references/apis/storage/s3/index.html) | [OpenIO](https://docs.openio.io/latest/source/arch-design/s3_compliancy.html) |
| Endpoint | Garage | [Openstack Swift](https://docs.openstack.org/swift/latest/s3_compat.html) | [Ceph Object Gateway](https://docs.ceph.com/en/latest/radosgw/s3/) | [Riak CS](https://docs.riak.com/riak/cs/2.1.1/references/apis/storage/s3/index.html) | [OpenIO](https://docs.openio.io/latest/source/arch-design/s3_compliancy.html) |
|------------------------------|----------------------------------|-----------------|---------------|---------|-----|
| [DeleteBucketWebsite](https://docs.aws.amazon.com/AmazonS3/latest/API/API_DeleteBucketWebsite.html) | ✅ Implemented | ❌| ❌| ❌| ❌|
| [GetBucketWebsite](https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetBucketWebsite.html) | ✅ Implemented | ❌ | ❌| ❌| ❌|
| [PutBucketWebsite](https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutBucketWebsite.html) | ⚠ Partially implemented (see below)| ❌| ❌| ❌| ❌|
| [DeleteBucketCors](https://docs.aws.amazon.com/AmazonS3/latest/API/API_DeleteBucketCors.html) | ✅ Implemented | ❌| | ❌| ✅ |
| [GetBucketCors](https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetBucketCors.html) | ✅ Implemented | ❌ | | ❌| ✅ |
| [PutBucketCors](https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutBucketCors.html) | ✅ Implemented | ❌| | ❌| ✅ |
| [DeleteBucketCors](https://docs.aws.amazon.com/AmazonS3/latest/API/API_DeleteBucketCors.html) | ✅ Implemented | ❌| | ❌| ✅ |
| [GetBucketCors](https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetBucketCors.html) | ✅ Implemented | ❌ | | ❌| ✅ |
| [PutBucketCors](https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutBucketCors.html) | ✅ Implemented | ❌| | ❌| ✅ |
**PutBucketWebsite:** Implemented, but only stores the index document suffix and the error document path. Redirects are not supported.
*Note: Ceph radosgw has some support for static websites but it is different from Amazon one plus it does not implement its configuration endpoints.*
*Note: Ceph radosgw has some support for static websites but it is different from the Amazon one. It also does not implement its configuration endpoints.*
### ACL, Policies endpoints
@ -81,29 +107,29 @@ Amazon has 2 access control mechanisms in S3: ACL (legacy) and policies (new one
Garage implements none of them, and has its own system instead, built around a per-access-key-per-bucket logic.
See Garage CLI reference manual to learn how to use Garage's permission system.
| Endpoint | Garage | [Openstack Swift](https://docs.openstack.org/swift/latest/s3_compat.html) | [Ceph Object Gateway](https://docs.ceph.com/en/latest/radosgw/s3/) | [Riak CS](https://docs.riak.com/riak/cs/2.1.1/references/apis/storage/s3/index.html) | [OpenIO](https://docs.openio.io/latest/source/arch-design/s3_compliancy.html) |
| Endpoint | Garage | [Openstack Swift](https://docs.openstack.org/swift/latest/s3_compat.html) | [Ceph Object Gateway](https://docs.ceph.com/en/latest/radosgw/s3/) | [Riak CS](https://docs.riak.com/riak/cs/2.1.1/references/apis/storage/s3/index.html) | [OpenIO](https://docs.openio.io/latest/source/arch-design/s3_compliancy.html) |
|------------------------------|----------------------------------|-----------------|---------------|---------|-----|
| [DeleteBucketPolicy](https://docs.aws.amazon.com/AmazonS3/latest/API/API_DeleteBucketPolicy.html) | ❌ Missing | ❌| | ✅ | ❌|
| [GetBucketPolicy](https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetBucketPolicy.html) | ❌ Missing | ❌| | ⚠ | ❌|
| [GetBucketPolicyStatus](https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetBucketPolicyStatus.html) | ❌ Missing | ❌| | ❌| ❌|
| [PutBucketPolicy](https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutBucketPolicy.html) | ❌ Missing | ❌| | ⚠ | ❌|
| [DeleteBucketPolicy](https://docs.aws.amazon.com/AmazonS3/latest/API/API_DeleteBucketPolicy.html) | ❌ Missing | ❌| | ✅ | ❌|
| [GetBucketPolicy](https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetBucketPolicy.html) | ❌ Missing | ❌| | ⚠ | ❌|
| [GetBucketPolicyStatus](https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetBucketPolicyStatus.html) | ❌ Missing | ❌| | ❌| ❌|
| [PutBucketPolicy](https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutBucketPolicy.html) | ❌ Missing | ❌| | ⚠ | ❌|
| [GetBucketAcl](https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetBucketAcl.html) | ❌ Missing | ✅ | ✅ | ✅ | ✅ |
| [PutBucketAcl](https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutBucketAcl.html) | ❌ Missing | ✅ | ✅ | ✅ | ✅ |
| [GetObjectAcl](https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetObjectAcl.html) | ❌ Missing | ✅ | ✅ | ✅ | ✅ |
| [PutObjectAcl](https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutObjectAcl.html) | ❌ Missing | ✅ | ✅ | ✅ | ✅ |
*Notes:* Ceph claims that it supports bucket policies but does not implement any Policy endpoints. They probably refer to their own permission system. Riak CS only supports a subset of the policy configuration.
*Notes:* Riak CS only supports a subset of the policy configuration.
### Versioning, Lifecycle endpoints
Garage does not support (yet) object versioning.
Garage does not (yet) support object versioning.
If you need this feature, please [share your use case in our dedicated issue](https://git.deuxfleurs.fr/Deuxfleurs/garage/issues/166).
| Endpoint | Garage | [Openstack Swift](https://docs.openstack.org/swift/latest/s3_compat.html) | [Ceph Object Gateway](https://docs.ceph.com/en/latest/radosgw/s3/) | [Riak CS](https://docs.riak.com/riak/cs/2.1.1/references/apis/storage/s3/index.html) | [OpenIO](https://docs.openio.io/latest/source/arch-design/s3_compliancy.html) |
| Endpoint | Garage | [Openstack Swift](https://docs.openstack.org/swift/latest/s3_compat.html) | [Ceph Object Gateway](https://docs.ceph.com/en/latest/radosgw/s3/) | [Riak CS](https://docs.riak.com/riak/cs/2.1.1/references/apis/storage/s3/index.html) | [OpenIO](https://docs.openio.io/latest/source/arch-design/s3_compliancy.html) |
|------------------------------|----------------------------------|-----------------|---------------|---------|-----|
| [DeleteBucketLifecycle](https://docs.aws.amazon.com/AmazonS3/latest/API/API_DeleteBucketLifecycle.html) | ❌ Missing | ❌| ✅| ❌| ✅|
| [GetBucketLifecycleConfiguration](https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetBucketLifecycleConfiguration.html) | ❌ Missing | ❌| | ❌| ✅|
| [PutBucketLifecycleConfiguration](https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutBucketLifecycleConfiguration.html) | ❌ Missing | ❌| | ❌| ✅|
| [GetBucketLifecycleConfiguration](https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetBucketLifecycleConfiguration.html) | ❌ Missing | ❌| | ❌| ✅|
| [PutBucketLifecycleConfiguration](https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutBucketLifecycleConfiguration.html) | ❌ Missing | ❌| | ❌| ✅|
| [GetBucketVersioning](https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetBucketVersioning.html) | ❌ Stub (see below) | ✅| ✅ | ❌| ✅|
| [ListObjectVersions](https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListObjectVersions.html) | ❌ Missing | ❌| ✅ | ❌| ✅|
| [PutBucketVersioning](https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutBucketVersioning.html) | ❌ Missing | ❌| ✅| ❌| ✅|
@ -111,64 +137,65 @@ If you need this feature, please [share your use case in our dedicated issue](ht
**GetBucketVersioning:** Stub implementation (Garage does not yet support versionning so this always returns "versionning not enabled").
*Note: Ceph only supports `Expiration`, `NoncurrentVersionExpiration` and `AbortIncompleteMultipartUpload` on its Lifecycle endpoints.*
### Replication endpoints
Please open an issue if you have a use case for replication.
| Endpoint | Garage | [Openstack Swift](https://docs.openstack.org/swift/latest/s3_compat.html) | [Ceph Object Gateway](https://docs.ceph.com/en/latest/radosgw/s3/) | [Riak CS](https://docs.riak.com/riak/cs/2.1.1/references/apis/storage/s3/index.html) | [OpenIO](https://docs.openio.io/latest/source/arch-design/s3_compliancy.html) |
| Endpoint | Garage | [Openstack Swift](https://docs.openstack.org/swift/latest/s3_compat.html) | [Ceph Object Gateway](https://docs.ceph.com/en/latest/radosgw/s3/) | [Riak CS](https://docs.riak.com/riak/cs/2.1.1/references/apis/storage/s3/index.html) | [OpenIO](https://docs.openio.io/latest/source/arch-design/s3_compliancy.html) |
|------------------------------|----------------------------------|-----------------|---------------|---------|-----|
| [DeleteBucketReplication](https://docs.aws.amazon.com/AmazonS3/latest/API/API_DeleteBucketReplication.html) | ❌ Missing | ❌| ✅ | ❌| ❌|
| [GetBucketReplication](https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetBucketReplication.html) | ❌ Missing | ❌| ✅ | ❌| ❌|
| [PutBucketReplication](https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutBucketReplication.html) | ❌ Missing | ❌| ⚠ | ❌| ❌|
*Note: Ceph documentation briefly says that Ceph supports [replication though the S3 API](https://docs.ceph.com/en/latest/radosgw/multisite-sync-policy/#s3-replication-api) but with some limitations. Additionaly, replication endpoints are not documented in the S3 compatibility page so I don't know what kind of support we can expect.*
*Note: Ceph documentation briefly says that Ceph supports
[replication through the S3 API](https://docs.ceph.com/en/latest/radosgw/multisite-sync-policy/#s3-replication-api)
but with some limitations.
Additionaly, replication endpoints are not documented in the S3 compatibility page so I don't know what kind of support we can expect.*
### Locking objects
Amazon defines a concept of [object locking](https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-lock.html) that can be achieved either through a Retention period or a Legal hold.
| Endpoint | Garage | [Openstack Swift](https://docs.openstack.org/swift/latest/s3_compat.html) | [Ceph Object Gateway](https://docs.ceph.com/en/latest/radosgw/s3/) | [Riak CS](https://docs.riak.com/riak/cs/2.1.1/references/apis/storage/s3/index.html) | [OpenIO](https://docs.openio.io/latest/source/arch-design/s3_compliancy.html) |
| Endpoint | Garage | [Openstack Swift](https://docs.openstack.org/swift/latest/s3_compat.html) | [Ceph Object Gateway](https://docs.ceph.com/en/latest/radosgw/s3/) | [Riak CS](https://docs.riak.com/riak/cs/2.1.1/references/apis/storage/s3/index.html) | [OpenIO](https://docs.openio.io/latest/source/arch-design/s3_compliancy.html) |
|------------------------------|----------------------------------|-----------------|---------------|---------|-----|
| [GetObjectLegalHold](https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetObjectLegalHold.html) | ❌ Missing | ❌| ✅ | ❌| ❌|
| [PutObjectLegalHold](https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutObjectLegalHold.html) | ❌ Missing | ❌| ✅ | ❌| ❌|
| [GetObjectRetention](https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetObjectRetention.html) | ❌ Missing | ❌| ✅ | ❌| ❌|
| [PutObjectRetention](https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutObjectRetention.html) | ❌ Missing | ❌| ✅ | ❌| ❌|
| [GetObjectLockConfiguration](https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetObjectLockConfiguration.html) | ❌ Missing | ❌| | ❌| ❌|
| [PutObjectLockConfiguration](https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutObjectLockConfiguration.html) | ❌ Missing | ❌| | ❌| ❌|
| [GetObjectLockConfiguration](https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetObjectLockConfiguration.html) | ❌ Missing | ❌| | ❌| ❌|
| [PutObjectLockConfiguration](https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutObjectLockConfiguration.html) | ❌ Missing | ❌| | ❌| ❌|
### (Server-side) encryption
We think that you can either encrypt your server partition or do client-side encryption, so we did not implement server-side encryption for Garage.
Please open an issue if you have a use case.
| Endpoint | Garage | [Openstack Swift](https://docs.openstack.org/swift/latest/s3_compat.html) | [Ceph Object Gateway](https://docs.ceph.com/en/latest/radosgw/s3/) | [Riak CS](https://docs.riak.com/riak/cs/2.1.1/references/apis/storage/s3/index.html) | [OpenIO](https://docs.openio.io/latest/source/arch-design/s3_compliancy.html) |
| Endpoint | Garage | [Openstack Swift](https://docs.openstack.org/swift/latest/s3_compat.html) | [Ceph Object Gateway](https://docs.ceph.com/en/latest/radosgw/s3/) | [Riak CS](https://docs.riak.com/riak/cs/2.1.1/references/apis/storage/s3/index.html) | [OpenIO](https://docs.openio.io/latest/source/arch-design/s3_compliancy.html) |
|------------------------------|----------------------------------|-----------------|---------------|---------|-----|
| [DeleteBucketEncryption](https://docs.aws.amazon.com/AmazonS3/latest/API/API_DeleteBucketEncryption.html) | ❌ Missing | ❌| | ❌| ❌|
| [GetBucketEncryption](https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetBucketEncryption.html) | ❌ Missing | ❌| | ❌| ❌|
| [PutBucketEncryption](https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutBucketEncryption.html) | ❌ Missing | ❌| | ❌| ❌|
| [DeleteBucketEncryption](https://docs.aws.amazon.com/AmazonS3/latest/API/API_DeleteBucketEncryption.html) | ❌ Missing | ❌| | ❌| ❌|
| [GetBucketEncryption](https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetBucketEncryption.html) | ❌ Missing | ❌| | ❌| ❌|
| [PutBucketEncryption](https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutBucketEncryption.html) | ❌ Missing | ❌| | ❌| ❌|
### Misc endpoints
| Endpoint | Garage | [Openstack Swift](https://docs.openstack.org/swift/latest/s3_compat.html) | [Ceph Object Gateway](https://docs.ceph.com/en/latest/radosgw/s3/) | [Riak CS](https://docs.riak.com/riak/cs/2.1.1/references/apis/storage/s3/index.html) | [OpenIO](https://docs.openio.io/latest/source/arch-design/s3_compliancy.html) |
| Endpoint | Garage | [Openstack Swift](https://docs.openstack.org/swift/latest/s3_compat.html) | [Ceph Object Gateway](https://docs.ceph.com/en/latest/radosgw/s3/) | [Riak CS](https://docs.riak.com/riak/cs/2.1.1/references/apis/storage/s3/index.html) | [OpenIO](https://docs.openio.io/latest/source/arch-design/s3_compliancy.html) |
|------------------------------|----------------------------------|-----------------|---------------|---------|-----|
| [GetBucketNotificationConfiguration](https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetBucketNotificationConfiguration.html) | ❌ Missing | ❌| ✅ | ❌| ❌|
| [PutBucketNotificationConfiguration](https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutBucketNotificationConfiguration.html) | ❌ Missing | ❌| ✅ | ❌| ❌|
| [DeleteBucketTagging](https://docs.aws.amazon.com/AmazonS3/latest/API/API_DeleteBucketTagging.html) | ❌ Missing | ❌| | ❌| ✅ |
| [GetBucketTagging](https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetBucketTagging.html) | ❌ Missing | ❌| | ❌| ✅ |
| [PutBucketTagging](https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutBucketTagging.html) | ❌ Missing | ❌| | ❌| ✅ |
| [DeleteObjectTagging](https://docs.aws.amazon.com/AmazonS3/latest/API/API_DeleteObjectTagging.html) | ❌ Missing | ❌| | ❌| ✅ |
| [GetObjectTagging](https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetObjectTagging.html) | ❌ Missing | ❌| | ❌| ✅ |
| [PutObjectTagging](https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutObjectTagging.html) | ❌ Missing | ❌| | ❌| ✅ |
| [GetObjectTorrent](https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetObjectTorrent.html) | ❌ Missing | ❌| | ❌| ❌|
| [DeleteBucketTagging](https://docs.aws.amazon.com/AmazonS3/latest/API/API_DeleteBucketTagging.html) | ❌ Missing | ❌| | ❌| ✅ |
| [GetBucketTagging](https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetBucketTagging.html) | ❌ Missing | ❌| | ❌| ✅ |
| [PutBucketTagging](https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutBucketTagging.html) | ❌ Missing | ❌| | ❌| ✅ |
| [DeleteObjectTagging](https://docs.aws.amazon.com/AmazonS3/latest/API/API_DeleteObjectTagging.html) | ❌ Missing | ❌| | ❌| ✅ |
| [GetObjectTagging](https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetObjectTagging.html) | ❌ Missing | ❌| | ❌| ✅ |
| [PutObjectTagging](https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutObjectTagging.html) | ❌ Missing | ❌| | ❌| ✅ |
| [GetObjectTorrent](https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetObjectTorrent.html) | ❌ Missing | ❌| | ❌| ❌|
### Vendor specific endpoints
<details><summary>Display Amazon specifc endpoints</summary>
| Endpoint | Garage | [Openstack Swift](https://docs.openstack.org/swift/latest/s3_compat.html) | [Ceph Object Gateway](https://docs.ceph.com/en/latest/radosgw/s3/) | [Riak CS](https://docs.riak.com/riak/cs/2.1.1/references/apis/storage/s3/index.html) | [OpenIO](https://docs.openio.io/latest/source/arch-design/s3_compliancy.html) |
| Endpoint | Garage | [Openstack Swift](https://docs.openstack.org/swift/latest/s3_compat.html) | [Ceph Object Gateway](https://docs.ceph.com/en/latest/radosgw/s3/) | [Riak CS](https://docs.riak.com/riak/cs/2.1.1/references/apis/storage/s3/index.html) | [OpenIO](https://docs.openio.io/latest/source/arch-design/s3_compliancy.html) |
|------------------------------|----------------------------------|-----------------|---------------|---------|-----|
| [DeleteBucketAnalyticsConfiguration](https://docs.aws.amazon.com/AmazonS3/latest/API/API_DeleteBucketAnalyticsConfiguration.html) | ❌ Missing | ❌| ❌| ❌| ❌|
| [DeleteBucketIntelligentTieringConfiguration](https://docs.aws.amazon.com/AmazonS3/latest/API/API_DeleteBucketIntelligentTieringConfiguration.html) | ❌ Missing | ❌| ❌| ❌| ❌|

View file

@ -1,6 +1,6 @@
+++
title = "Design draft"
weight = 25
title = "Design draft (obsolete)"
weight = 50
+++
**WARNING: this documentation is a design draft which was written before Garage's actual implementation.

View file

@ -1,6 +1,6 @@
+++
title = "Load balancing data"
weight = 10
title = "Load balancing data (obsolete)"
weight = 60
+++
**This is being yet improved in release 0.5. The working document has not been updated yet, it still only applies to Garage 0.2 through 0.4.**

View file

@ -16,7 +16,7 @@ The migration steps are as follows:
1. Do `garage repair --all-nodes --yes tables` and `garage repair --all-nodes --yes blocks`,
check the logs and check that all data seems to be synced correctly between
nodes. If you have time, do additional checks (`scrub`, `block_refs`, etc.)
2. Disable api and web access. Garage does not support disabling
2. Disable API and web access. Garage does not support disabling
these endpoints but you can change the port number or stop your reverse
proxy for instance.
3. Check once again that your cluster is healty. Run again `garage repair --all-nodes --yes tables` which is quick.

View file

@ -0,0 +1,34 @@
+++
title = "Migrating from 0.7 to 0.8"
weight = 13
+++
**This guide explains how to migrate to 0.8 if you have an existing 0.7 cluster.
We don't recommend trying to migrate to 0.8 directly from 0.6 or older.**
**We make no guarantee that this migration will work perfectly:
back up all your data before attempting it!**
Garage v0.8 introduces new data tables that allow the counting of objects in buckets in order to implement bucket quotas.
A manual migration step is required to first count objects in Garage buckets and populate these tables with accurate data.
The migration steps are as follows:
1. Disable API and web access. Garage v0.7 does not support disabling
these endpoints but you can change the port number or stop your reverse proxy for instance.
2. Do `garage repair --all-nodes --yes tables` and `garage repair --all-nodes --yes blocks`,
check the logs and check that all data seems to be synced correctly between
nodes. If you have time, do additional checks (`scrub`, `block_refs`, etc.)
3. Check that queues are empty: run `garage stats` to query them or inspect metrics in the Grafana dashboard.
4. Turn off Garage v0.7
5. **Backup the metadata folder of all your nodes!** For instance, use the following command
if your metadata directory is `/var/lib/garage/meta`: `cd /var/lib/garage ; tar -acf meta-v0.7.tar.zst meta/`
6. Install Garage v0.8
7. **Before starting Garage v0.8**, run the offline migration step: `garage offline-repair --yes object_counters`.
This can take a while to run, depending on the number of objects stored in your cluster.
8. Turn on Garage v0.8
9. Do `garage repair --all-nodes --yes tables` and `garage repair --all-nodes --yes blocks`.
Wait for a full table sync to run.
10. Your upgraded cluster should be in a working state. Re-enable API and Web
access and check that everything went well.
11. Monitor your cluster in the next hours to see if it works well under your production load, report any issue.

717
doc/drafts/k2v-spec.md Normal file
View file

@ -0,0 +1,717 @@
# Specification of the Garage K2V API (K2V = Key/Key/Value)
- We are storing triplets of the form `(partition key, sort key, value)` -> no
user-defined fields, the client is responsible of writing whatever he wants
in the value (typically an encrypted blob). Values are binary blobs, which
are always represented as their base64 encoding in the JSON API. Partition
keys and sort keys are utf8 strings.
- Triplets are stored in buckets; each bucket stores a separate set of triplets
- Bucket names and access keys are the same as for accessing the S3 API
- K2V triplets exist separately from S3 objects. K2V triplets don't exist for
the S3 API, and S3 objects don't exist for the K2V API.
- Values stored for triplets have associated causality information, that enables
Garage to detect concurrent writes. In case of concurrent writes, Garage
keeps the concurrent values until a further write supersedes the concurrent
values. This is the same method as Riak KV implements. The method used is
based on DVVS (dotted version vector sets), described in the paper "Scalable
and Accurate Causality Tracking for Eventually Consistent Data Stores", as
well as [here](https://github.com/ricardobcl/Dotted-Version-Vectors)
## Data format
### Triple format
Triples in K2V are constituted of three fields:
- a partition key (`pk`), an utf8 string that defines in what partition the
triplet is stored; triplets in different partitions cannot be listed together
in a ReadBatch command, or deleted together in a DeleteBatch command: a
separate command must be included in the ReadBatch/DeleteBatch call for each
partition key in which the client wants to read/delete lists of items
- a sort key (`sk`), an utf8 string that defines the index of the triplet inside its
partition; triplets are uniquely idendified by their partition key + sort key
- a value (`v`), an opaque binary blob associated to the partition key + sort key;
they are transmitted as binary when possible but in most case in the JSON API
they will be represented as strings using base64 encoding; a value can also
be `null` to indicate a deleted triplet (a `null` value is called a tombstone)
### Causality information
K2V supports storing several concurrent values associated to a pk+sk, in the
case where insertion or deletion operations are detected to be concurrent (i.e.
there is not one that was aware of the other, they are not causally dependant
one on the other). In practice, it even looks more like the opposite: to
overwrite a previously existing value, the client must give a "causality token"
that "proves" (not in a cryptographic sense) that it had seen a previous value.
Otherwise, the value written will not overwrite an existing value, it will just
create a new concurrent value.
The causality token is a binary/b64-encoded representation of a context,
specified below.
A set of concurrent values looks like this:
```
(node1, tdiscard1, (v1, t1), (v2, t2)) ; tdiscard1 < t1 < t2
(node2, tdiscard2, (v3, t3) ; tdiscard2 < t3
```
`tdiscard` for a node `i` means that all values inserted by node `i` with times
`<= tdiscard` are obsoleted, i.e. have been read by a client that overwrote it
afterwards.
The associated context would be the following: `[(node1, t2), (node2, t3)]`,
i.e. if a node reads this set of values and inserts a new values, we will now
have `tdiscard1 = t2` and `tdiscard2 = t3`, to indicate that values v1, v2 and v3
are obsoleted by the new write.
**Basic insertion.** To insert a new value `v4` with context `[(node1, t2), (node2, t3)]`, in a
simple case where there was no insertion in-between reading the value
mentionned above and writing `v4`, and supposing that node2 receives the
InsertItem query:
- `node2` generates a timestamp `t4` such that `t4 > t3`.
- the new state is as follows:
```
(node1, tdiscard1', ()) ; tdiscard1' = t2
(node2, tdiscard2', (v4, t4)) ; tdiscard2' = t3
```
**A more complex insertion example.** In the general case, other intermediate values could have
been written before `v4` with context `[(node1, t2), (node2, t3)]` is sent to the system.
For instance, here is a possible sequence of events:
1. First we have the set of values v1, v2 and v3 described above.
A node reads it, it obtains values v1, v2 and v3 with context `[(node1, t2), (node2, t3)]`.
2. A node writes a value `v5` with context `[(node1, t1)]`, i.e. `v5` is only a
successor of v1 but not of v2 or v3. Suppose node1 receives the write, it
will generate a new timestamp `t5` larger than all of the timestamps it
knows of, i.e. `t5 > t2`. We will now have:
```
(node1, tdiscard1'', (v2, t2), (v5, t5)) ; tdiscard1'' = t1 < t2 < t5
(node2, tdiscard2, (v3, t3) ; tdiscard2 < t3
```
3. Now `v4` is written with context `[(node1, t2), (node2, t3)]`, and node2
processes the query. It will generate `t4 > t3` and the state will become:
```
(node1, tdiscard1', (v5, t5)) ; tdiscard1' = t2 < t5
(node2, tdiscard2', (v4, t4)) ; tdiscard2' = t3
```
**Generic algorithm for handling insertions:** A certain node n handles the
InsertItem and is responsible for the correctness of this procedure.
1. Lock the key (or the whole table?) at this node to prevent concurrent updates of the value that would mess things up
2. Read current set of values
3. Generate a new timestamp that is larger than the largest timestamp for node n
4. Add the inserted value in the list of values of node n
5. Update the discard times to be the times set in the context, and accordingly discard overwritten values
6. Release lock
7. Propagate updated value to other nodes
8. Return to user when propagation achieved the write quorum (propagation to other nodes continues asynchronously)
**Encoding of contexts:**
Contexts consist in a list of (node id, timestamp) pairs.
They are encoded in binary as follows:
```
checksum: u64, [ node: u64, timestamp: u64 ]*
```
The checksum is just the XOR of all of the node IDs and timestamps.
Once encoded in binary, contexts are written and transmitted in base64.
### Indexing
K2V keeps an index, a secondary data structure that is updated asynchronously,
that keeps tracks of the number of triplets stored for each partition key.
This allows easy listing of all of the partition keys for which triplets exist
in a bucket, as the partition key becomes the sort key in the index.
How indexing works:
- Each node keeps a local count of how many items it stores for each partition,
in a local Sled tree that is updated atomically when an item is modified.
- These local counters are asynchronously stored in the index table which is
a regular Garage table spread in the network. Counters are stored as LWW values,
so basically the final table will have the following structure:
```
- pk: bucket
- sk: partition key for which we are counting
- v: lwwmap (node id -> number of items)
```
The final number of items present in the partition can be estimated by taking
the maximum of the values (i.e. the value for the node that announces having
the most items for that partition). In most cases the values for different node
IDs should all be the same; more precisely, three node IDs should map to the
same non-zero value, and all other node IDs that are present are tombstones
that map to zeroes. Note that we need to filter out values from nodes that are
no longer part of the cluster layout, as when nodes are removed they won't
necessarily have had the time to set their counters to zero.
## Important details
**THIS SECTION CONTAINS A FEW WARNINGS ON THE K2V API WHICH ARE IMPORTANT
TO UNDERSTAND IN ORDER TO USE IT CORRECTLY.**
- **Internal server errors on updates do not mean that the update isn't stored.**
K2V will return an internal server error when it cannot reach a quorum of nodes on
which to save an updated value. However the value may still be stored on just one
node, which will then propagate it to other nodes asynchronously via anti-entropy.
- **Batch operations are not transactions.** When calling InsertBatch or DeleteBatch,
items may appear partially inserted/deleted while the operation is being processed.
More importantly, if InsertBatch or DeleteBatch returns an internal server error,
some of the items to be inserted/deleted might end up inserted/deleted on the server,
while others may still have their old value.
- **Concurrent values are deduplicated.** When inserting a value for a key,
Garage might internally end up
storing the value several times if there are network errors. These values will end up as
concurrent values for a key, with the same byte string (or `null` for a deletion).
Garage fixes this by deduplicating concurrent values when they are returned to the
user on read operations. Importantly, *Garage does not differentiate between duplicate
concurrent values due to the user making the same call twice, or Garage having to
do an internal retry*. This means that all duplicate concurrent values are deduplicated
when an item is read: if the user inserts twice concurrently the same value, they will
only read it once.
## API Endpoints
**Remark.** Example queries and responses here are given in JSON5 format
for clarity. However the actual K2V API uses basic JSON so all examples
and responses need to be translated.
### Operations on single items
**ReadItem: `GET /<bucket>/<partition key>?sort_key=<sort key>`**
Query parameters:
| name | default value | meaning |
| - | - | - |
| `sort_key` | **mandatory** | The sort key of the item to read |
Returns the item with specified partition key and sort key. Values can be
returned in either of two ways:
1. a JSON array of base64-encoded values, or `null`'s for tombstones, with
header `Content-Type: application/json`
2. in the case where there are no concurrent values, the single present value
can be returned directly as the response body (or an HTTP 204 NO CONTENT for
a tombstone), with header `Content-Type: application/octet-stream`
The choice between return formats 1 and 2 is directed by the `Accept` HTTP header:
- if the `Accept` header is not present, format 1 is always used
- if `Accept` contains `application/json` but not `application/octet-stream`,
format 1 is always used
- if `Accept` contains `application/octet-stream` but not `application/json`,
format 2 is used when there is a single value, and an HTTP error 409 (HTTP
409 CONFLICT) is returned in the case of multiple concurrent values
(including concurrent tombstones)
- if `Accept` contains both, format 2 is used when there is a single value, and
format 1 is used as a fallback in case of concurrent values
- if `Accept` contains none, HTTP 406 NOT ACCEPTABLE is raised
Example query:
```
GET /my_bucket/mailboxes?sort_key=INBOX HTTP/1.1
```
Example response:
```json
HTTP/1.1 200 OK
X-Garage-Causality-Token: opaquetoken123
Content-Type: application/json
[
"b64cryptoblob123",
"b64cryptoblob'123"
]
```
Example response in case the item is a tombstone:
```
HTTP/1.1 200 OK
X-Garage-Causality-Token: opaquetoken999
Content-Type: application/json
[
null
]
```
Example query 2:
```
GET /my_bucket/mailboxes?sort_key=INBOX HTTP/1.1
Accept: application/octet-stream
```
Example response if multiple concurrent versions exist:
```
HTTP/1.1 409 CONFLICT
X-Garage-Causality-Token: opaquetoken123
Content-Type: application/octet-stream
```
Example response in case of single value:
```
HTTP/1.1 200 OK
X-Garage-Causality-Token: opaquetoken123
Content-Type: application/octet-stream
cryptoblob123
```
Example response in case of a single value that is a tombstone:
```
HTTP/1.1 204 NO CONTENT
X-Garage-Causality-Token: opaquetoken123
Content-Type: application/octet-stream
```
**PollItem: `GET /<bucket>/<partition key>?sort_key=<sort key>&causality_token=<causality token>`**
This endpoint will block until a new value is written to a key.
The GET parameter `causality_token` should be set to the causality
token returned with the last read of the key, so that K2V knows
what values are concurrent or newer than the ones that the
client previously knew.
This endpoint returns the new value in the same format as ReadItem.
If no new value is written and the timeout elapses,
an HTTP 304 NOT MODIFIED is returned.
Query parameters:
| name | default value | meaning |
| - | - | - |
| `sort_key` | **mandatory** | The sort key of the item to read |
| `causality_token` | **mandatory** | The causality token of the last known value or set of values |
| `timeout` | 300 | The timeout before 304 NOT MODIFIED is returned if the value isn't updated |
The timeout can be set to any number of seconds, with a maximum of 600 seconds (10 minutes).
**InsertItem: `PUT /<bucket>/<partition key>?sort_key=<sort_key>`**
Inserts a single item. This request does not use JSON, the body is sent directly as a binary blob.
To supersede previous values, the HTTP header `X-Garage-Causality-Token` should
be set to the causality token returned by a previous read on this key. This
header can be ommitted for the first writes to the key.
Example query:
```
PUT /my_bucket/mailboxes?sort_key=INBOX HTTP/1.1
X-Garage-Causality-Token: opaquetoken123
myblobblahblahblah
```
Example response:
```
HTTP/1.1 200 OK
```
**DeleteItem: `DELETE /<bucket>/<partition key>?sort_key=<sort_key>`**
Deletes a single item. The HTTP header `X-Garage-Causality-Token` must be set
to the causality token returned by a previous read on this key, to indicate
which versions of the value should be deleted. The request will not process if
`X-Garage-Causality-Token` is not set.
Example query:
```
DELETE /my_bucket/mailboxes?sort_key=INBOX HTTP/1.1
X-Garage-Causality-Token: opaquetoken123
```
Example response:
```
HTTP/1.1 204 NO CONTENT
```
### Operations on index
**ReadIndex: `GET /<bucket>?start=<start>&end=<end>&limit=<limit>`**
Lists all partition keys in the bucket for which some triplets exist, and gives
for each the number of triplets, total number of values (which might be bigger
than the number of triplets in case of conflicts), total number of bytes of
these values, and number of triplets that are in a state of conflict.
The values returned are an approximation of the true counts in the bucket,
as these values are asynchronously updated, and thus eventually consistent.
Query parameters:
| name | default value | meaning |
| - | - | - |
| `prefix` | `null` | Restrict listing to partition keys that start with this prefix |
| `start` | `null` | First partition key to list, in lexicographical order |
| `end` | `null` | Last partition key to list (excluded) |
| `limit` | `null` | Maximum number of partition keys to list |
| `reverse` | `false` | Iterate in reverse lexicographical order |
The response consists in a JSON object that repeats the parameters of the query and gives the result (see below).
The listing starts at partition key `start`, or if not specified at the
smallest partition key that exists. It returns partition keys in increasing
order, or decreasing order if `reverse` is set to `true`,
and stops when either of the following conditions is met:
1. if `end` is specfied, the partition key `end` is reached or surpassed (if it
is reached exactly, it is not included in the result)
2. if `limit` is specified, `limit` partition keys have been listed
3. no more partition keys are available to list
In case 2, and if there are more partition keys to list before condition 1
triggers, then in the result `more` is set to `true` and `nextStart` is set to
the first partition key that couldn't be listed due to the limit. In the first
case (if the listing stopped because of the `end` parameter), `more` is not set
and the `nextStart` key is not specified.
Note that if `reverse` is set to `true`, `start` is the highest key
(in lexicographical order) for which values are returned.
This means that if an `end` is specified, it must be smaller than `start`,
otherwise no values will be returned.
Example query:
```
GET /my_bucket HTTP/1.1
```
Example response:
```json
HTTP/1.1 200 OK
{
prefix: null,
start: null,
end: null,
limit: null,
reverse: false,
partitionKeys: [
{
pk: "keys",
entries: 3043,
conflicts: 0,
values: 3043,
bytes: 121720,
},
{
pk: "mailbox:INBOX",
entries: 42,
conflicts: 1,
values: 43,
bytes: 142029,
},
{
pk: "mailbox:Junk",
entries: 2991
conflicts: 0,
values: 2991,
bytes: 12019322,
},
{
pk: "mailbox:Trash",
entries: 10,
conflicts: 0,
values: 10,
bytes: 32401,
},
{
pk: "mailboxes",
entries: 3,
conflicts: 0,
values: 3,
bytes: 3019,
},
],
more: false,
nextStart: null,
}
```
### Operations on batches of items
**InsertBatch: `POST /<bucket>`**
Simple insertion and deletion of triplets. The body is just a list of items to
insert in the following format:
`{ pk: "<partition key>", sk: "<sort key>", ct: "<causality token>"|null, v: "<value>"|null }`.
The causality token should be the one returned in a previous read request (e.g.
by ReadItem or ReadBatch), to indicate that this write takes into account the
values that were returned from these reads, and supersedes them causally. If
the triplet is inserted for the first time, the causality token should be set to
`null`.
The value is expected to be a base64-encoded binary blob. The value `null` can
also be used to delete the triplet while preserving causality information: this
allows to know if a delete has happenned concurrently with an insert, in which
case both are preserved and returned on reads (see below).
Partition keys and sort keys are utf8 strings which are stored sorted by
lexicographical ordering of their binary representation.
Example query:
```json
POST /my_bucket HTTP/1.1
[
{ pk: "mailbox:INBOX", sk: "001892831", ct: "opaquetoken321", v: "b64cryptoblob321updated" },
{ pk: "mailbox:INBOX", sk: "001892912", ct: null, v: "b64cryptoblob444" },
{ pk: "mailbox:INBOX", sk: "001892932", ct: "opaquetoken654", v: null },
]
```
Example response:
```
HTTP/1.1 200 OK
```
**ReadBatch: `POST /<bucket>?search`**, or alternatively<br/>
**ReadBatch: `SEARCH /<bucket>`**
Batch read of triplets in a bucket.
The request body is a JSON list of searches, that each specify a range of
items to get (to get single items, set `singleItem` to `true`). A search is a
JSON struct with the following fields:
| name | default value | meaning |
| - | - | - |
| `partitionKey` | **mandatory** | The partition key in which to search |
| `prefix` | `null` | Restrict items to list to those whose sort keys start with this prefix |
| `start` | `null` | The sort key of the first item to read |
| `end` | `null` | The sort key of the last item to read (excluded) |
| `limit` | `null` | The maximum number of items to return |
| `reverse` | `false` | Iterate in reverse lexicographical order on sort keys |
| `singleItem` | `false` | Whether to return only the item with sort key `start` |
| `conflictsOnly` | `false` | Whether to return only items that have several concurrent values |
| `tombstones` | `false` | Whether or not to return tombstone lines to indicate the presence of old deleted items |
For each of the searches, triplets are listed and returned separately. The
semantics of `prefix`, `start`, `end`, `limit` and `reverse` are the same as for ReadIndex. The
additionnal parameter `singleItem` allows to get a single item, whose sort key
is the one given in `start`. Parameters `conflictsOnly` and `tombstones`
control additional filters on the items that are returned.
The result is a list of length the number of searches, that consists in for
each search a JSON object specified similarly to the result of ReadIndex, but
that lists triplets within a partition key.
The format of returned tuples is as follows: `{ sk: "<sort key>", ct: "<causality
token>", v: ["<value1>", ...] }`, with the following fields:
- `sk` (sort key): any unicode string used as a sort key
- `ct` (causality token): an opaque token served by the server (generally
base64-encoded) to be used in subsequent writes to this key
- `v` (list of values): each value is a binary blob, always base64-encoded;
contains multiple items when concurrent values exists
- in case of concurrent update and deletion, a `null` is added to the list of concurrent values
- if the `tombstones` query parameter is set to `true`, tombstones are returned
for items that have been deleted (this can be usefull for inserting after an
item that has been deleted, so that the insert is not considered
concurrent with the delete). Tombstones are returned as tuples in the
same format with only `null` values
Example query:
```json
POST /my_bucket?search HTTP/1.1
[
{
partitionKey: "mailboxes",
},
{
partitionKey: "mailbox:INBOX",
start: "001892831",
limit: 3,
},
{
partitionKey: "keys",
start: "0",
singleItem: true,
},
]
```
Example associated response body:
```json
HTTP/1.1 200 OK
[
{
partitionKey: "mailboxes",
prefix: null,
start: null,
end: null,
limit: null,
reverse: false,
conflictsOnly: false,
tombstones: false,
singleItem: false,
items: [
{ sk: "INBOX", ct: "opaquetoken123", v: ["b64cryptoblob123", "b64cryptoblob'123"] },
{ sk: "Trash", ct: "opaquetoken456", v: ["b64cryptoblob456"] },
{ sk: "Junk", ct: "opaquetoken789", v: ["b64cryptoblob789"] },
],
more: false,
nextStart: null,
},
{
partitionKey: "mailbox::INBOX",
prefix: null,
start: "001892831",
end: null,
limit: 3,
reverse: false,
conflictsOnly: false,
tombstones: false,
singleItem: false,
items: [
{ sk: "001892831", ct: "opaquetoken321", v: ["b64cryptoblob321"] },
{ sk: "001892832", ct: "opaquetoken654", v: ["b64cryptoblob654"] },
{ sk: "001892874", ct: "opaquetoken987", v: ["b64cryptoblob987"] },
],
more: true,
nextStart: "001892898",
},
{
partitionKey: "keys",
prefix: null,
start: "0",
end: null,
conflictsOnly: false,
tombstones: false,
limit: null,
reverse: false,
singleItem: true,
items: [
{ sk: "0", ct: "opaquetoken999", v: ["b64binarystuff999"] },
],
more: false,
nextStart: null,
},
]
```
**DeleteBatch: `POST /<bucket>?delete`**
Batch deletion of triplets. The request format is the same for `POST
/<bucket>?search` to indicate items or range of items, except that here they
are deleted instead of returned, but only the fields `partitionKey`, `prefix`, `start`,
`end`, and `singleItem` are supported. Causality information is not given by
the user: this request will internally list all triplets and write deletion
markers that supersede all of the versions that have been read.
This request returns for each series of items to be deleted, the number of
matching items that have been found and deleted.
Example query:
```json
POST /my_bucket?delete HTTP/1.1
[
{
partitionKey: "mailbox:OldMailbox",
},
{
partitionKey: "mailbox:INBOX",
start: "0018928321",
singleItem: true,
},
]
```
Example response:
```
HTTP/1.1 200 OK
[
{
partitionKey: "mailbox:OldMailbox",
prefix: null,
start: null,
end: null,
singleItem: false,
deletedItems: 35,
},
{
partitionKey: "mailbox:INBOX",
prefix: null,
start: "0018928321",
end: null,
singleItem: true,
deletedItems: 1,
},
]
```
## Internals: causality tokens
The method used is based on DVVS (dotted version vector sets). See:
- the paper "Scalable and Accurate Causality Tracking for Eventually Consistent Data Stores"
- <https://github.com/ricardobcl/Dotted-Version-Vectors>
For DVVS to work, write operations (at each node) must take a lock on the data table.

14
doc/talks/2022-06-23-stack/.gitignore vendored Normal file
View file

@ -0,0 +1,14 @@
*
!assets
!.gitignore
!*.svg
!*.png
!*.jpg
!*.tex
!Makefile
!.gitignore
!assets/*.drawio.pdf
!talk.pdf

View file

@ -0,0 +1,5 @@
talk.pdf: talk.tex assets/consistent_hashing_1.pdf assets/consistent_hashing_2.pdf assets/consistent_hashing_3.pdf assets/consistent_hashing_4.pdf assets/garage_tables.pdf assets/deuxfleurs.pdf
pdflatex talk.tex
assets/%.pdf: assets/%.svg
inkscape -D -z --file=$^ --export-pdf=$@

Binary file not shown.

After

Width:  |  Height:  |  Size: 32 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 115 KiB

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 184 KiB

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 26 KiB

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 27 KiB

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.9 KiB

BIN
doc/talks/2022-06-23-stack/assets/aerogramme_keys.drawio.pdf (Stored with Git LFS) Normal file

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 18 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 263 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 82 KiB

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 53 KiB

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 54 KiB

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 56 KiB

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 57 KiB

View file

@ -0,0 +1,91 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<svg
viewBox="0 0 70.424515 70.300102"
version="1.1"
id="svg8"
sodipodi:docname="logo.svg"
inkscape:version="1.1 (c68e22c387, 2021-05-23)"
inkscape:export-filename="/home/quentin/Documents/dev/deuxfleurs/site/src/img/logo.png"
inkscape:export-xdpi="699.30194"
inkscape:export-ydpi="699.30194"
width="70.424515"
height="70.300102"
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
xmlns="http://www.w3.org/2000/svg"
xmlns:svg="http://www.w3.org/2000/svg">
<defs
id="defs12" />
<sodipodi:namedview
id="namedview10"
pagecolor="#ffffff"
bordercolor="#666666"
borderopacity="1.0"
inkscape:pageshadow="2"
inkscape:pageopacity="0.0"
inkscape:pagecheckerboard="0"
showgrid="false"
inkscape:zoom="12.125"
inkscape:cx="43.092783"
inkscape:cy="48.082474"
inkscape:window-width="3072"
inkscape:window-height="1659"
inkscape:window-x="0"
inkscape:window-y="0"
inkscape:window-maximized="1"
inkscape:current-layer="svg8" />
<g
id="g79969"
transform="translate(-0.827,34.992103)">
<path
fill="#ffffff"
d="m 15.632,34.661 c -0.799,-0.597 -1.498,-1.484 -2.035,-2.592 l -0.228,-0.47 -0.46,0.249 c -0.975,0.528 -1.913,0.858 -2.744,0.969 L 9.963,29.061 6.327,30.029 C 6.17,29.175 6.202,28.142 6.423,27.007 L 6.526,26.482 5.994,26.416 C 4.752,26.262 3.688,25.891 2.89,25.336 L 4.411,22.419 1.423,20.896 C 1.742,19.952 2.371,19.014 3.257,18.161 L 3.634,17.798 3.255,17.438 C 2.452,16.674 1.847,15.884 1.485,15.127 L 4.995,13.774 2.95,10.615 C 3.69,10.213 4.643,9.929 5.739,9.783 L 6.258,9.715 6.167,9.201 C 5.952,7.99 5.995,6.863 6.291,5.913 l 3.308,0.523 0.524,-3.308 c 0.988,0.013 2.08,0.326 3.164,0.907 L 13.749,4.283 13.975,3.81 C 14.454,2.807 15.019,1.986 15.628,1.406 L 18,4.326 20.372,1.406 c 0.609,0.58 1.175,1.401 1.653,2.404 l 0.226,0.473 0.462,-0.247 C 23.798,3.455 24.891,3.142 25.877,3.13 L 26.4,6.438 29.71,5.913 c 0.296,0.951 0.34,2.078 0.124,3.288 l -0.092,0.515 0.518,0.069 c 1.095,0.145 2.048,0.43 2.788,0.832 l -2.046,3.156 3.511,1.355 c -0.361,0.757 -0.966,1.547 -1.77,2.311 l -0.379,0.36 0.377,0.363 c 0.888,0.854 1.516,1.793 1.835,2.736 l -2.984,1.52 1.521,2.984 c -0.812,0.574 -1.871,0.964 -3.094,1.134 l -0.518,0.072 0.096,0.514 c 0.201,1.089 0.226,2.083 0.073,2.909 l -3.634,-0.97 -0.204,3.757 c -0.83,-0.11 -1.768,-0.44 -2.742,-0.968 l -0.459,-0.249 -0.228,0.47 c -0.539,1.107 -1.237,1.994 -2.036,2.591 L 18,32.293 Z"
id="path2" />
<path
d="M 7.092,10.678 C 6.562,9.189 6.394,7.708 6.66,6.478 l 2.368,0.375 0.987,0.156 0.157,-0.988 0.375,-2.368 C 11.808,3.78 13.16,4.396 14.409,5.359 14.527,5.022 14.653,4.696 14.791,4.392 13.24,3.257 11.568,2.629 10.061,2.629 9.938,2.629 9.816,2.633 9.695,2.642 L 9.184,5.865 5.96,5.354 C 5.36,6.841 5.395,8.769 6.045,10.747 6.38,10.71 6.729,10.686 7.092,10.678 Z M 21.593,5.359 c 1.248,-0.962 2.6,-1.578 3.86,-1.705 l 0.376,2.368 0.156,0.988 0.987,-0.157 2.369,-0.376 c 0.266,1.23 0.098,2.71 -0.432,4.2 0.361,0.009 0.711,0.032 1.046,0.07 C 30.606,8.769 30.64,6.841 30.04,5.353 L 26.815,5.865 26.304,2.641 c -0.12,-0.008 -0.242,-0.012 -0.365,-0.012 -1.507,0 -3.179,0.628 -4.73,1.762 0.14,0.306 0.266,0.631 0.384,0.968 z M 7.368,27 h 0.035 c 0.067,0 0.157,-0.604 0.26,-0.947 -0.098,0.004 -0.197,0.046 -0.294,0.046 -1.496,0 -2.826,-0.303 -3.83,-0.89 L 4.628,23.081 5.082,22.194 4.191,21.742 2.055,20.654 C 2.563,19.503 3.57,18.404 4.873,17.511 4.586,17.292 4.312,17.07 4.063,16.842 2.376,18.059 1.217,19.597 0.828,21.152 l 2.908,1.483 -1.482,2.843 C 3.475,26.501 5.303,27 7.368,27 Z m 27.806,-5.846 c -0.39,-1.555 -1.548,-3.093 -3.234,-4.311 -0.25,0.228 -0.523,0.451 -0.81,0.669 1.304,0.893 2.31,1.992 2.817,3.145 l -2.136,1.088 -0.891,0.453 0.454,0.892 1.089,2.137 c -1.004,0.587 -2.332,0.904 -3.828,0.904 -0.099,0 -0.199,-0.01 -0.299,-0.013 0.103,0.344 0.192,0.683 0.26,1.011 l 0.039,0.002 c 2.066,0 3.892,-0.563 5.112,-1.587 l -1.482,-2.908 z m -12.653,9.182 c -0.447,1.517 -1.181,2.812 -2.119,3.651 L 18.707,32.293 18,31.586 l -0.707,0.707 -1.695,1.694 c -0.938,-0.839 -1.673,-2.136 -2.12,-3.652 -0.296,0.206 -0.593,0.397 -0.886,0.563 0.636,1.98 1.741,3.559 3.1,4.409 L 18,33 l 2.308,2.308 c 1.358,-0.851 2.464,-2.428 3.101,-4.408 -0.295,-0.168 -0.591,-0.359 -0.888,-0.564 z"
fill="#ea596e"
id="path4" />
<path
fill="#ea596e"
d="m 20.118,5.683 c 0.426,1.146 0.748,2.596 0.841,4.284 l 0.2,3.683 3.564,-0.946 c 1.32,-0.351 2.655,-0.536 3.86,-0.536 0.16,0 0.318,0.003 0.474,0.01 l -1.827,2.819 3.139,1.211 c -0.958,0.759 -2.237,1.514 -3.814,2.123 l -3.441,1.328 2.001,3.099 c 0.918,1.42 1.509,2.782 1.838,3.96 L 23.709,25.853 23.527,29.21 C 22.508,28.533 21.395,27.55 20.329,26.237 L 18,23.374 15.672,26.236 c -1.066,1.312 -2.179,2.295 -3.198,2.972 l -0.18,-3.354 -3.248,0.864 c 0.329,-1.178 0.921,-2.54 1.839,-3.961 L 12.889,19.658 9.447,18.33 C 7.87,17.721 6.591,16.967 5.633,16.208 L 8.768,15 6.941,12.177 c 0.155,-0.006 0.313,-0.01 0.473,-0.01 1.206,0 2.541,0.185 3.861,0.536 l 3.564,0.947 0.202,-3.683 c 0.092,-1.688 0.415,-3.138 0.84,-4.284 L 18,8.292 20.118,5.683 M 20.308,0.692 18,3.533 15.692,0.692 C 13.703,2.224 12.271,5.684 12.046,9.804 10.429,9.374 8.854,9.167 7.414,9.167 c -2.11,0 -3.929,0.445 -5.161,1.289 l 1.989,3.073 -3.415,1.316 c 0.842,2.366 3.69,4.797 7.54,6.283 -2.241,3.465 -3.116,7.106 -2.407,9.516 l 3.537,-0.941 0.196,3.654 c 2.512,-0.07 5.703,-2.027 8.307,-5.228 2.603,3.201 5.796,5.158 8.306,5.228 l 0.198,-3.655 3.535,0.943 c 0.71,-2.411 -0.165,-6.05 -2.404,-9.517 3.849,-1.485 6.696,-3.918 7.538,-6.283 l -3.415,-1.318 1.99,-3.07 c -1.233,-0.844 -3.053,-1.29 -5.164,-1.29 -1.438,0 -3.013,0.207 -4.63,0.636 C 23.729,5.684 22.297,2.224 20.308,0.692 Z"
id="path6" />
</g>
<g
id="g79964"
transform="translate(-1.043816,35.993714)">
<path
fill="#ffffff"
d="m 51.92633,-2.0247139 c -0.799,-0.597 -1.498,-1.484 -2.035,-2.592 l -0.228,-0.47 -0.46,0.249 c -0.975,0.528 -1.913,0.858 -2.744,0.969 l -0.202,-3.7560001 -3.636,0.968 c -0.157,-0.854 -0.125,-1.887 0.096,-3.022 l 0.103,-0.525 -0.532,-0.066 c -1.242,-0.154 -2.306,-0.525 -3.104,-1.08 l 1.521,-2.917 -2.988,-1.523 c 0.319,-0.944 0.948,-1.882 1.834,-2.735 l 0.377,-0.363 -0.379,-0.36 c -0.803,-0.764 -1.408,-1.554 -1.77,-2.311 l 3.51,-1.353 -2.045,-3.159 c 0.74,-0.402 1.693,-0.686 2.789,-0.832 l 0.519,-0.068 -0.091,-0.514 c -0.215,-1.211 -0.172,-2.338 0.124,-3.288 l 3.308,0.523 0.524,-3.308 c 0.988,0.013 2.08,0.326 3.164,0.907 l 0.462,0.248 0.226,-0.473 c 0.479,-1.003 1.044,-1.824 1.653,-2.404 l 2.372,2.92 2.372,-2.92 c 0.609,0.58 1.175,1.401 1.653,2.404 l 0.226,0.473 0.462,-0.247 c 1.085,-0.581 2.178,-0.894 3.164,-0.906 l 0.523,3.308 3.31,-0.525 c 0.296,0.951 0.34,2.078 0.124,3.288 l -0.092,0.515 0.518,0.069 c 1.095,0.145 2.048,0.43 2.788,0.832 l -2.046,3.156 3.511,1.355 c -0.361,0.757 -0.966,1.547 -1.77,2.311 l -0.379,0.36 0.377,0.363 c 0.888,0.854 1.516,1.793 1.835,2.736 l -2.984,1.52 1.521,2.984 c -0.812,0.574 -1.871,0.964 -3.094,1.134 l -0.518,0.072 0.096,0.514 c 0.201,1.089 0.226,2.083 0.073,2.909 l -3.634,-0.97 -0.204,3.7570001 c -0.83,-0.11 -1.768,-0.44 -2.742,-0.968 l -0.459,-0.249 -0.228,0.47 c -0.539,1.107 -1.237,1.994 -2.036,2.591 l -2.367,-2.369 z"
id="path2-9" />
<path
d="m 43.38633,-26.007714 c -0.53,-1.489 -0.698,-2.97 -0.432,-4.2 l 2.368,0.375 0.987,0.156 0.157,-0.988 0.375,-2.368 c 1.261,0.127 2.613,0.743 3.862,1.706 0.118,-0.337 0.244,-0.663 0.382,-0.967 -1.551,-1.135 -3.223,-1.763 -4.73,-1.763 -0.123,0 -0.245,0.004 -0.366,0.013 l -0.511,3.223 -3.224,-0.511 c -0.6,1.487 -0.565,3.415 0.085,5.393 0.335,-0.037 0.684,-0.061 1.047,-0.069 z m 14.501,-5.319 c 1.248,-0.962 2.6,-1.578 3.86,-1.705 l 0.376,2.368 0.156,0.988 0.987,-0.157 2.369,-0.376 c 0.266,1.23 0.098,2.71 -0.432,4.2 0.361,0.009 0.711,0.032 1.046,0.07 0.651,-1.978 0.685,-3.906 0.085,-5.394 l -3.225,0.512 -0.511,-3.224 c -0.12,-0.008 -0.242,-0.012 -0.365,-0.012 -1.507,0 -3.179,0.628 -4.73,1.762 0.14,0.306 0.266,0.631 0.384,0.968 z m -14.225,21.641 h 0.035 c 0.067,0 0.157,-0.604 0.26,-0.947 -0.098,0.004 -0.197,0.046 -0.294,0.046 -1.496,0 -2.826,-0.303 -3.83,-0.89 l 1.089,-2.128 0.454,-0.887 -0.891,-0.452 -2.136,-1.088 c 0.508,-1.151 1.515,-2.25 2.818,-3.143 -0.287,-0.219 -0.561,-0.441 -0.81,-0.669 -1.687,1.217 -2.846,2.755 -3.235,4.31 l 2.908,1.483 -1.482,2.843 c 1.221,1.023 3.049,1.522 5.114,1.522 z m 27.806,-5.846 c -0.39,-1.555 -1.548,-3.093 -3.234,-4.311 -0.25,0.228 -0.523,0.451 -0.81,0.669 1.304,0.893 2.31,1.992 2.817,3.145 l -2.136,1.088 -0.891,0.453 0.454,0.892 1.089,2.137 c -1.004,0.587 -2.332,0.904 -3.828,0.904 -0.099,0 -0.199,-0.01 -0.299,-0.013 0.103,0.344 0.192,0.683 0.26,1.011 l 0.039,0.002 c 2.066,0 3.892,-0.563 5.112,-1.587 l -1.482,-2.908 z m -12.653,9.182 c -0.447,1.5170001 -1.181,2.8120001 -2.119,3.6510001 l -1.695,-1.694 -0.707,-0.707 -0.707,0.707 -1.695,1.694 c -0.938,-0.839 -1.673,-2.136 -2.12,-3.6520001 -0.296,0.2060001 -0.593,0.3970001 -0.886,0.5630001 0.636,1.98 1.741,3.559 3.1,4.409 l 2.308,-2.307 2.308,2.308 c 1.358,-0.851 2.464,-2.428 3.101,-4.408 -0.295,-0.168 -0.591,-0.359 -0.888,-0.5640001 z"
fill="#ea596e"
id="path4-3" />
<path
fill="#ea596e"
d="m 56.41233,-31.002714 c 0.426,1.146 0.748,2.596 0.841,4.284 l 0.2,3.683 3.564,-0.946 c 1.32,-0.351 2.655,-0.536 3.86,-0.536 0.16,0 0.318,0.003 0.474,0.01 l -1.827,2.819 3.139,1.211 c -0.958,0.759 -2.237,1.514 -3.814,2.123 l -3.441,1.328 2.001,3.099 c 0.918,1.42 1.509,2.782 1.838,3.96 l -3.244,-0.865 -0.182,3.357 c -1.019,-0.677 -2.132,-1.66 -3.198,-2.973 l -2.329,-2.863 -2.328,2.862 c -1.066,1.312 -2.179,2.295 -3.198,2.972 l -0.18,-3.354 -3.248,0.864 c 0.329,-1.178 0.921,-2.54 1.839,-3.961 l 2.004,-3.099 -3.442,-1.328 c -1.577,-0.609 -2.856,-1.363 -3.814,-2.122 l 3.135,-1.208 -1.827,-2.823 c 0.155,-0.006 0.313,-0.01 0.473,-0.01 1.206,0 2.541,0.185 3.861,0.536 l 3.564,0.947 0.202,-3.683 c 0.092,-1.688 0.415,-3.138 0.84,-4.284 l 2.119,2.609 2.118,-2.609 m 0.19,-4.991 -2.308,2.841 -2.308,-2.841 c -1.989,1.532 -3.421,4.992 -3.646,9.112 -1.617,-0.43 -3.192,-0.637 -4.632,-0.637 -2.11,0 -3.929,0.445 -5.161,1.289 l 1.989,3.073 -3.415,1.316 c 0.842,2.366 3.69,4.797 7.54,6.283 -2.241,3.465 -3.116,7.106 -2.407,9.5160001 l 3.537,-0.9410001 0.196,3.6540001 c 2.512,-0.07 5.703,-2.027 8.307,-5.2280001 2.603,3.2010001 5.796,5.1580001 8.306,5.2280001 l 0.198,-3.6550001 3.535,0.9430001 c 0.71,-2.4110001 -0.165,-6.0500001 -2.404,-9.5170001 3.849,-1.485 6.696,-3.918 7.538,-6.283 l -3.415,-1.318 1.99,-3.07 c -1.233,-0.844 -3.053,-1.29 -5.164,-1.29 -1.438,0 -3.013,0.207 -4.63,0.636 -0.225,-4.119 -1.657,-7.579 -3.646,-9.111 z"
id="path6-6" />
</g>
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:42.6667px;line-height:1.25;font-family:sans-serif;fill:#ea596e;fill-opacity:1;stroke:none"
x="2.2188232"
y="31.430677"
id="text46212"><tspan
sodipodi:role="line"
id="tspan46210"
x="2.2188232"
y="31.430677"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:42.6667px;font-family:'TeX Gyre Termes';-inkscape-font-specification:'TeX Gyre Termes'">D</tspan></text>
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:42.6667px;line-height:1.25;font-family:sans-serif;fill:#ea596e;fill-opacity:1;stroke:none"
x="41.347008"
y="67.114784"
id="text46212-1"><tspan
sodipodi:role="line"
id="tspan46210-5"
x="41.347008"
y="67.114784"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:42.6667px;font-family:'TeX Gyre Termes';-inkscape-font-specification:'TeX Gyre Termes'">F</tspan></text>
</svg>

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 129 KiB

BIN
doc/talks/2022-06-23-stack/assets/garage.drawio.pdf (Stored with Git LFS) Normal file

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 13 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 88 KiB

BIN
doc/talks/2022-06-23-stack/assets/garage2a.drawio.pdf (Stored with Git LFS) Normal file

Binary file not shown.

BIN
doc/talks/2022-06-23-stack/assets/garage2b.drawio.pdf (Stored with Git LFS) Normal file

Binary file not shown.

View file

@ -0,0 +1,537 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<svg
width="850"
height="480"
viewBox="0 0 224.89584 127"
version="1.1"
id="svg8"
inkscape:version="1.2 (dc2aedaf03, 2022-05-15)"
sodipodi:docname="garage_tables.svg"
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
xmlns="http://www.w3.org/2000/svg"
xmlns:svg="http://www.w3.org/2000/svg"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:cc="http://creativecommons.org/ns#"
xmlns:dc="http://purl.org/dc/elements/1.1/">
<defs
id="defs2">
<marker
style="overflow:visible"
id="marker1262"
refX="0"
refY="0"
orient="auto"
inkscape:stockid="Arrow1Mend"
inkscape:isstock="true">
<path
transform="matrix(-0.4,0,0,-0.4,-4,0)"
style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1pt;stroke-opacity:1"
d="M 0,0 5,-5 -12.5,0 5,5 Z"
id="path1260" />
</marker>
<marker
style="overflow:visible"
id="Arrow1Mend"
refX="0"
refY="0"
orient="auto"
inkscape:stockid="Arrow1Mend"
inkscape:isstock="true"
inkscape:collect="always">
<path
transform="matrix(-0.4,0,0,-0.4,-4,0)"
style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1pt;stroke-opacity:1"
d="M 0,0 5,-5 -12.5,0 5,5 Z"
id="path965" />
</marker>
<marker
style="overflow:visible"
id="Arrow1Lend"
refX="0"
refY="0"
orient="auto"
inkscape:stockid="Arrow1Lend"
inkscape:isstock="true">
<path
transform="matrix(-0.8,0,0,-0.8,-10,0)"
style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1pt;stroke-opacity:1"
d="M 0,0 5,-5 -12.5,0 5,5 Z"
id="path959" />
</marker>
</defs>
<sodipodi:namedview
id="base"
pagecolor="#ffffff"
bordercolor="#666666"
borderopacity="1.0"
inkscape:pageopacity="0.0"
inkscape:pageshadow="2"
inkscape:zoom="0.98994949"
inkscape:cx="429.31483"
inkscape:cy="289.40871"
inkscape:document-units="mm"
inkscape:current-layer="layer1"
inkscape:document-rotation="0"
showgrid="false"
units="px"
inkscape:window-width="1678"
inkscape:window-height="993"
inkscape:window-x="0"
inkscape:window-y="0"
inkscape:window-maximized="1"
inkscape:showpageshadow="2"
inkscape:pagecheckerboard="0"
inkscape:deskcolor="#d1d1d1" />
<metadata
id="metadata5">
<rdf:RDF>
<cc:Work
rdf:about="">
<dc:format>image/svg+xml</dc:format>
<dc:type
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
<dc:title />
</cc:Work>
</rdf:RDF>
</metadata>
<g
inkscape:label="Layer 1"
inkscape:groupmode="layer"
id="layer1">
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:5.64444px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
x="39.570904"
y="38.452755"
id="text2025"><tspan
sodipodi:role="line"
id="tspan2023"
x="39.570904"
y="38.452755"
style="font-size:5.64444px;stroke-width:0.264583" /></text>
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:10.5833px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
x="101.95796"
y="92.835831"
id="text2139"><tspan
sodipodi:role="line"
id="tspan2137"
x="101.95796"
y="92.835831"
style="stroke-width:0.264583"> </tspan></text>
<g
id="g2316"
transform="translate(-11.455511,1.5722486)">
<g
id="g2277">
<rect
style="fill:none;stroke:#000000;stroke-width:0.8;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
id="rect833"
width="47.419891"
height="95.353409"
x="18.534418"
y="24.42766" />
<rect
style="fill:none;stroke:#000000;stroke-width:0.799999;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
id="rect833-3"
width="47.419891"
height="86.973076"
x="18.534418"
y="32.807987" />
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:5.64444px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
x="32.250839"
y="29.894743"
id="text852"><tspan
sodipodi:role="line"
id="tspan850"
x="32.250839"
y="29.894743"
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:5.64444px;font-family:'Liberation Mono';-inkscape-font-specification:'Liberation Mono Bold';stroke-width:0.264583">Object</tspan></text>
</g>
<g
id="g2066"
transform="translate(-2.1807817,-3.0621439)">
<g
id="g1969"
transform="matrix(0.12763631,0,0,0.12763631,0.7215051,24.717273)"
style="fill:#ff6600;fill-opacity:1;stroke:none;stroke-opacity:1">
<path
style="fill:#ff6600;fill-opacity:1;stroke:none;stroke-width:0.264583;stroke-opacity:1"
d="m 203.71837,154.80038 c -1.11451,3.75057 -2.45288,5.84095 -5.11132,7.98327 -2.2735,1.83211 -4.66721,2.65982 -8.09339,2.79857 -2.59227,0.10498 -2.92868,0.0577 -5.02863,-0.70611 -3.99215,-1.45212 -7.1627,-4.65496 -8.48408,-8.57046 -1.28374,-3.80398 -0.61478,-8.68216 1.64793,-12.01698 0.87317,-1.28689 3.15089,-3.48326 4.18771,-4.03815 l 0.53332,-28.51234 5.78454,-5.09197 6.95158,6.16704 -3.21112,3.49026 3.17616,3.45499 -3.17616,3.40822 2.98973,3.28645 -3.24843,3.3829 4.49203,4.58395 0.0516,5.69106 c 1.06874,0.64848 3.81974,3.24046 4.69548,4.56257 0.452,0.68241 1.06834,2.0197 1.36962,2.97176 0.62932,1.98864 0.88051,5.785 0.47342,7.15497 z m -10.0406,2.32604 c -0.88184,-3.17515 -4.92402,-3.78864 -6.75297,-1.02492 -0.58328,0.8814 -0.6898,1.28852 -0.58362,2.23056 0.26492,2.35041 2.45434,3.95262 4.60856,3.37255 1.19644,-0.32217 2.39435,-1.44872 2.72875,-2.56621 0.30682,-1.02529 0.30686,-0.9045 -7.9e-4,-2.01198 z"
id="path1971"
sodipodi:nodetypes="ssscsscccccccccccssscsssscc" />
</g>
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:5.64444px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
x="28.809687"
y="44.070885"
id="text852-9"><tspan
sodipodi:role="line"
id="tspan850-4"
x="28.809687"
y="44.070885"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:5.64444px;font-family:'Liberation Mono';-inkscape-font-specification:'Liberation Mono Bold';stroke-width:0.264583">bucket </tspan></text>
</g>
<g
id="g2066-7"
transform="translate(-2.1807817,6.2627616)">
<g
id="g1969-8"
transform="matrix(0.12763631,0,0,0.12763631,0.7215051,24.717273)"
style="fill:#ff6600;fill-opacity:1;stroke:none;stroke-opacity:1">
<path
style="fill:#4040ff;fill-opacity:1;stroke:none;stroke-width:0.264583;stroke-opacity:1"
d="m 203.71837,154.80038 c -1.11451,3.75057 -2.45288,5.84095 -5.11132,7.98327 -2.2735,1.83211 -4.66721,2.65982 -8.09339,2.79857 -2.59227,0.10498 -2.92868,0.0577 -5.02863,-0.70611 -3.99215,-1.45212 -7.1627,-4.65496 -8.48408,-8.57046 -1.28374,-3.80398 -0.61478,-8.68216 1.64793,-12.01698 0.87317,-1.28689 3.15089,-3.48326 4.18771,-4.03815 l 0.53332,-28.51234 5.78454,-5.09197 6.95158,6.16704 -3.21112,3.49026 3.17616,3.45499 -3.17616,3.40822 2.98973,3.28645 -3.24843,3.3829 4.49203,4.58395 0.0516,5.69106 c 1.06874,0.64848 3.81974,3.24046 4.69548,4.56257 0.452,0.68241 1.06834,2.0197 1.36962,2.97176 0.62932,1.98864 0.88051,5.785 0.47342,7.15497 z m -10.0406,2.32604 c -0.88184,-3.17515 -4.92402,-3.78864 -6.75297,-1.02492 -0.58328,0.8814 -0.6898,1.28852 -0.58362,2.23056 0.26492,2.35041 2.45434,3.95262 4.60856,3.37255 1.19644,-0.32217 2.39435,-1.44872 2.72875,-2.56621 0.30682,-1.02529 0.30686,-0.9045 -7.9e-4,-2.01198 z"
id="path1971-4"
sodipodi:nodetypes="ssscsscccccccccccssscsssscc" />
</g>
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:5.64444px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
x="28.809687"
y="44.070885"
id="text852-9-5"><tspan
sodipodi:role="line"
id="tspan850-4-0"
x="28.809687"
y="44.070885"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:5.64444px;font-family:'Liberation Mono';-inkscape-font-specification:'Liberation Mono Bold';stroke-width:0.264583">file path </tspan></text>
<path
style="fill:#ff6600;fill-opacity:1;stroke:none;stroke-width:0.0337704;stroke-opacity:1"
d="m 174.20027,104.45585 c -0.14225,0.47871 -0.31308,0.74552 -0.65239,1.01896 -0.29018,0.23384 -0.5957,0.33949 -1.03301,0.3572 -0.33087,0.0134 -0.37381,0.007 -0.64184,-0.0901 -0.50954,-0.18534 -0.91422,-0.59414 -1.08287,-1.0939 -0.16385,-0.48552 -0.0785,-1.10816 0.21033,-1.5338 0.11145,-0.16426 0.40217,-0.44459 0.53451,-0.51542 l 0.0681,-3.639207 0.73832,-0.64992 0.88727,0.787138 -0.40986,0.445484 0.4054,0.440982 -0.4054,0.435013 0.3816,0.41947 -0.41461,0.43178 0.57334,0.58508 0.007,0.72639 c 0.13641,0.0828 0.48753,0.4136 0.59931,0.58235 0.0577,0.0871 0.13636,0.25778 0.17481,0.3793 0.0803,0.25382 0.11239,0.73838 0.0604,0.91323 z m -1.28154,0.29689 c -0.11256,-0.40526 -0.62849,-0.48357 -0.86193,-0.13082 -0.0745,0.1125 -0.088,0.16447 -0.0745,0.2847 0.0338,0.3 0.31326,0.5045 0.58822,0.43046 0.15271,-0.0411 0.30561,-0.1849 0.34829,-0.32754 0.0392,-0.13086 0.0392,-0.11544 -1e-4,-0.2568 z"
id="path1971-3"
sodipodi:nodetypes="ssscsscccccccccccssscsssscc" />
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:5.64444px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
x="177.8474"
y="104.05132"
id="text852-9-6"><tspan
sodipodi:role="line"
id="tspan850-4-7"
x="177.8474"
y="104.05132"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:5.64444px;font-family:'Liberation Mono';-inkscape-font-specification:'Liberation Mono Bold';stroke-width:0.264583">= partition key </tspan></text>
<path
style="fill:#4040ff;fill-opacity:1;stroke:none;stroke-width:0.0337704;stroke-opacity:1"
d="m 174.20027,113.78076 c -0.14225,0.47871 -0.31308,0.74552 -0.65239,1.01895 -0.29018,0.23385 -0.5957,0.33949 -1.03301,0.3572 -0.33087,0.0134 -0.37381,0.007 -0.64184,-0.0901 -0.50954,-0.18534 -0.91422,-0.59414 -1.08287,-1.0939 -0.16385,-0.48553 -0.0785,-1.10816 0.21033,-1.53381 0.11145,-0.16425 0.40217,-0.44459 0.53451,-0.51541 l 0.0681,-3.63921 0.73832,-0.64992 0.88727,0.78714 -0.40986,0.44548 0.4054,0.44098 -0.4054,0.43502 0.3816,0.41947 -0.41461,0.43178 0.57334,0.58508 0.007,0.72638 c 0.13641,0.0828 0.48753,0.4136 0.59931,0.58235 0.0577,0.0871 0.13636,0.25779 0.17481,0.37931 0.0803,0.25382 0.11239,0.73837 0.0604,0.91323 z m -1.28154,0.29689 c -0.11256,-0.40527 -0.62849,-0.48357 -0.86193,-0.13082 -0.0745,0.1125 -0.088,0.16446 -0.0745,0.2847 0.0338,0.3 0.31326,0.5045 0.58822,0.43046 0.15271,-0.0411 0.30561,-0.18491 0.34829,-0.32754 0.0392,-0.13087 0.0392,-0.11545 -1e-4,-0.2568 z"
id="path1971-4-5"
sodipodi:nodetypes="ssscsscccccccccccssscsssscc" />
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:5.64444px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
x="177.8474"
y="113.37622"
id="text852-9-5-3"><tspan
sodipodi:role="line"
id="tspan850-4-0-5"
x="177.8474"
y="113.37622"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:5.64444px;font-family:'Liberation Mono';-inkscape-font-specification:'Liberation Mono Bold';stroke-width:0.264583">= sort key </tspan></text>
</g>
<g
id="g2161"
transform="translate(-62.264403,-59.333115)">
<g
id="g2271"
transform="translate(0,67.042823)">
<rect
style="fill:none;stroke:#000000;stroke-width:0.799999;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
id="rect833-6"
width="39.008453"
height="16.775949"
x="84.896881"
y="90.266838" />
<rect
style="fill:none;stroke:#000000;stroke-width:0.799999;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
id="rect833-3-1"
width="39.008453"
height="8.673645"
x="84.896881"
y="98.369141" />
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:5.64444px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
x="89.826942"
y="96.212921"
id="text852-0"><tspan
sodipodi:role="line"
id="tspan850-6"
x="89.826942"
y="96.212921"
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:5.64444px;font-family:'Liberation Mono';-inkscape-font-specification:'Liberation Mono Bold';stroke-width:0.264583">Version 1</tspan></text>
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:5.64444px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
x="89.826942"
y="104.71013"
id="text852-0-3"><tspan
sodipodi:role="line"
id="tspan850-6-2"
x="89.826942"
y="104.71013"
style="font-style:italic;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:5.64444px;font-family:'Liberation Mono';-inkscape-font-specification:'Liberation Mono Bold';fill:#4d4d4d;stroke-width:0.264583">deleted</tspan></text>
</g>
</g>
<g
id="g2263"
transform="translate(0,-22.791204)">
<g
id="g2161-1"
transform="translate(-62.264403,-10.910843)">
<rect
style="fill:none;stroke:#000000;stroke-width:0.799999;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
id="rect833-6-5"
width="39.008453"
height="36.749603"
x="84.896881"
y="90.266838" />
<rect
style="fill:none;stroke:#000000;stroke-width:0.799999;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
id="rect833-3-1-5"
width="39.008453"
height="28.647301"
x="84.896881"
y="98.369141" />
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:5.64444px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
x="89.826942"
y="96.212921"
id="text852-0-4"><tspan
sodipodi:role="line"
id="tspan850-6-7"
x="89.826942"
y="96.212921"
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:5.64444px;font-family:'Liberation Mono';-inkscape-font-specification:'Liberation Mono Bold';stroke-width:0.264583">Version 2</tspan></text>
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:5.64444px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
x="89.826942"
y="104.71013"
id="text852-0-3-6"><tspan
sodipodi:role="line"
id="tspan850-6-2-5"
x="89.826942"
y="104.71013"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:5.64444px;font-family:'Liberation Mono';-inkscape-font-specification:'Liberation Mono Bold';fill:#000000;stroke-width:0.264583">id</tspan></text>
</g>
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:5.64444px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
x="27.56254"
y="100.34132"
id="text852-0-3-6-6"><tspan
sodipodi:role="line"
id="tspan850-6-2-5-9"
x="27.56254"
y="100.34132"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:5.64444px;font-family:'Liberation Mono';-inkscape-font-specification:'Liberation Mono Bold';fill:#000000;stroke-width:0.264583">size</tspan></text>
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:5.64444px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
x="27.56254"
y="106.90263"
id="text852-0-3-6-6-3"><tspan
sodipodi:role="line"
id="tspan850-6-2-5-9-7"
x="27.56254"
y="106.90263"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:5.64444px;font-family:'Liberation Mono';-inkscape-font-specification:'Liberation Mono Bold';fill:#000000;stroke-width:0.264583">MIME type</tspan></text>
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:5.64444px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
x="27.56254"
y="111.92816"
id="text852-0-3-6-6-3-4"><tspan
sodipodi:role="line"
id="tspan850-6-2-5-9-7-5"
x="27.56254"
y="111.92816"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:5.64444px;font-family:'Liberation Mono';-inkscape-font-specification:'Liberation Mono Bold';fill:#000000;stroke-width:0.264583">...</tspan></text>
</g>
</g>
<g
id="g898"
transform="translate(-6.2484318,29.95006)">
<rect
style="fill:none;stroke:#000000;stroke-width:0.799999;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
id="rect833-7"
width="47.419891"
height="44.007515"
x="95.443573"
y="24.42766" />
<rect
style="fill:none;stroke:#000000;stroke-width:0.799999;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
id="rect833-3-4"
width="47.419891"
height="35.627186"
x="95.443573"
y="32.807987" />
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:5.64444px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
x="107.46638"
y="29.894743"
id="text852-4"><tspan
sodipodi:role="line"
id="tspan850-3"
x="107.46638"
y="29.894743"
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:5.64444px;font-family:'Liberation Mono';-inkscape-font-specification:'Liberation Mono Bold';stroke-width:0.264583">Version</tspan></text>
<path
style="fill:#ff6600;fill-opacity:1;stroke:none;stroke-width:0.0337704;stroke-opacity:1"
d="m 102.90563,41.413279 c -0.14226,0.478709 -0.31308,0.745518 -0.65239,1.018956 -0.29019,0.233843 -0.59571,0.339489 -1.03301,0.357199 -0.33087,0.0134 -0.37381,0.0074 -0.64184,-0.09013 -0.50954,-0.185343 -0.914221,-0.594142 -1.082877,-1.093901 -0.163852,-0.485526 -0.07847,-1.108159 0.210335,-1.533803 0.111448,-0.164254 0.402172,-0.444591 0.534502,-0.515415 l 0.0681,-3.63921 0.73832,-0.64992 0.88727,0.787138 -0.40985,0.445484 0.40539,0.440982 -0.40539,0.435013 0.3816,0.41947 -0.41462,0.431781 0.57335,0.585078 0.007,0.726386 c 0.13641,0.08277 0.48753,0.413601 0.59931,0.58235 0.0577,0.0871 0.13636,0.257787 0.17481,0.379304 0.0803,0.253823 0.11239,0.738377 0.0604,0.913234 z m -1.28155,0.296888 c -0.11255,-0.405265 -0.62848,-0.483569 -0.86192,-0.130817 -0.0744,0.112498 -0.088,0.164461 -0.0745,0.2847 0.0338,0.299998 0.31326,0.504498 0.58822,0.43046 0.15271,-0.04112 0.3056,-0.184909 0.34828,-0.327542 0.0392,-0.130864 0.0392,-0.115447 -1e-4,-0.256801 z"
id="path1971-0"
sodipodi:nodetypes="ssscsscccccccccccssscsssscc" />
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:5.64444px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
x="104.99195"
y="41.008743"
id="text852-9-7"><tspan
sodipodi:role="line"
id="tspan850-4-8"
x="104.99195"
y="41.008743"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:5.64444px;font-family:'Liberation Mono';-inkscape-font-specification:'Liberation Mono Bold';stroke-width:0.264583">id </tspan></text>
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:5.64444px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
x="104.99195"
y="49.168018"
id="text852-9-7-6"><tspan
sodipodi:role="line"
id="tspan850-4-8-8"
x="104.99195"
y="49.168018"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:5.64444px;font-family:'Liberation Mono';-inkscape-font-specification:'Liberation Mono Bold';stroke-width:0.264583">h(block 1)</tspan></text>
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:5.64444px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
x="104.99195"
y="56.583336"
id="text852-9-7-6-8"><tspan
sodipodi:role="line"
id="tspan850-4-8-8-4"
x="104.99195"
y="56.583336"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:5.64444px;font-family:'Liberation Mono';-inkscape-font-specification:'Liberation Mono Bold';stroke-width:0.264583">h(block 2)</tspan></text>
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:5.64444px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
x="104.99195"
y="64.265732"
id="text852-9-7-6-3"><tspan
sodipodi:role="line"
id="tspan850-4-8-8-1"
x="104.99195"
y="64.265732"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:5.64444px;font-family:'Liberation Mono';-inkscape-font-specification:'Liberation Mono Bold';stroke-width:0.264583">...</tspan></text>
</g>
<g
id="g898-3"
transform="translate(75.777779,38.888663)">
<rect
style="fill:none;stroke:#000000;stroke-width:0.799999;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
id="rect833-7-6"
width="47.419891"
height="29.989157"
x="95.443573"
y="24.42766" />
<rect
style="fill:none;stroke:#000000;stroke-width:0.799999;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
id="rect833-3-4-7"
width="47.419891"
height="21.608831"
x="95.443573"
y="32.807987" />
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:5.64444px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
x="102.11134"
y="29.894743"
id="text852-4-5"><tspan
sodipodi:role="line"
id="tspan850-3-3"
x="102.11134"
y="29.894743"
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:5.64444px;font-family:'Liberation Mono';-inkscape-font-specification:'Liberation Mono Bold';stroke-width:0.264583">Data block</tspan></text>
<path
style="fill:#ff6600;fill-opacity:1;stroke:none;stroke-width:0.0337704;stroke-opacity:1"
d="m 102.90563,41.413279 c -0.14226,0.478709 -0.31308,0.745518 -0.65239,1.018956 -0.29019,0.233843 -0.59571,0.339489 -1.03301,0.357199 -0.33087,0.0134 -0.37381,0.0074 -0.64184,-0.09013 -0.50954,-0.185343 -0.914221,-0.594142 -1.082877,-1.093901 -0.163852,-0.485526 -0.07847,-1.108159 0.210335,-1.533803 0.111448,-0.164254 0.402172,-0.444591 0.534502,-0.515415 l 0.0681,-3.63921 0.73832,-0.64992 0.88727,0.787138 -0.40985,0.445484 0.40539,0.440982 -0.40539,0.435013 0.3816,0.41947 -0.41462,0.431781 0.57335,0.585078 0.007,0.726386 c 0.13641,0.08277 0.48753,0.413601 0.59931,0.58235 0.0577,0.0871 0.13636,0.257787 0.17481,0.379304 0.0803,0.253823 0.11239,0.738377 0.0604,0.913234 z m -1.28155,0.296888 c -0.11255,-0.405265 -0.62848,-0.483569 -0.86192,-0.130817 -0.0744,0.112498 -0.088,0.164461 -0.0745,0.2847 0.0338,0.299998 0.31326,0.504498 0.58822,0.43046 0.15271,-0.04112 0.3056,-0.184909 0.34828,-0.327542 0.0392,-0.130864 0.0392,-0.115447 -1e-4,-0.256801 z"
id="path1971-0-5"
sodipodi:nodetypes="ssscsscccccccccccssscsssscc" />
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:5.64444px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
x="104.99195"
y="41.008743"
id="text852-9-7-62"><tspan
sodipodi:role="line"
id="tspan850-4-8-9"
x="104.99195"
y="41.008743"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:5.64444px;font-family:'Liberation Mono';-inkscape-font-specification:'Liberation Mono Bold';stroke-width:0.264583">hash </tspan></text>
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:5.64444px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
x="104.99195"
y="49.168018"
id="text852-9-7-6-1"><tspan
sodipodi:role="line"
id="tspan850-4-8-8-2"
x="104.99195"
y="49.168018"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:5.64444px;font-family:'Liberation Mono';-inkscape-font-specification:'Liberation Mono Bold';stroke-width:0.264583">data</tspan></text>
</g>
<path
style="fill:none;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;marker-end:url(#Arrow1Mend)"
d="M 42.105292,69.455903 89.563703,69.317144"
id="path954"
sodipodi:nodetypes="cc" />
<path
style="fill:none;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;marker-end:url(#marker1262)"
d="m 134.32612,77.363197 38.12618,0.260865"
id="path1258"
sodipodi:nodetypes="cc" />
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:5.64444px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
x="8.6727352"
y="16.687063"
id="text852-3"><tspan
sodipodi:role="line"
id="tspan850-67"
x="8.6727352"
y="16.687063"
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:5.64444px;font-family:'Liberation Mono';-inkscape-font-specification:'Liberation Mono Bold';stroke-width:0.264583">Objects table </tspan></text>
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:5.64444px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
x="89.190445"
y="16.687063"
id="text852-3-5"><tspan
sodipodi:role="line"
id="tspan850-67-3"
x="89.190445"
y="16.687063"
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:5.64444px;font-family:'Liberation Mono';-inkscape-font-specification:'Liberation Mono Bold';stroke-width:0.264583">Versions table </tspan></text>
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:5.64444px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
x="174.55702"
y="16.687063"
id="text852-3-56"><tspan
sodipodi:role="line"
id="tspan850-67-2"
x="174.55702"
y="16.687063"
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:5.64444px;font-family:'Liberation Mono';-inkscape-font-specification:'Liberation Mono Bold';stroke-width:0.264583">Blocks table</tspan></text>
</g>
</svg>

After

Width:  |  Height:  |  Size: 30 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 52 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 37 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 97 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 199 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 145 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 13 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 174 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 38 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 87 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 81 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 124 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 84 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 81 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 81 KiB

File diff suppressed because it is too large Load diff

After

Width:  |  Height:  |  Size: 315 KiB

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 286 KiB

BIN
doc/talks/2022-06-23-stack/talk.pdf (Stored with Git LFS) Normal file

Binary file not shown.

View file

@ -0,0 +1,480 @@
%\nonstopmode
\documentclass[aspectratio=169]{beamer}
\usepackage[utf8]{inputenc}
% \usepackage[frenchb]{babel}
\usepackage{amsmath}
\usepackage{mathtools}
\usepackage{breqn}
\usepackage{multirow}
\usetheme{boxes}
\usepackage{graphicx}
\usepackage{adjustbox}
%\useoutertheme[footline=authortitle,subsection=false]{miniframes}
%\useoutertheme[footline=authorinstitute,subsection=false]{miniframes}
\useoutertheme{infolines}
\setbeamertemplate{headline}{}
\beamertemplatenavigationsymbolsempty
\definecolor{TitleOrange}{RGB}{255,137,0}
\setbeamercolor{title}{fg=TitleOrange}
\setbeamercolor{frametitle}{fg=TitleOrange}
\definecolor{ListOrange}{RGB}{255,145,5}
\setbeamertemplate{itemize item}{\color{ListOrange}$\blacktriangleright$}
\definecolor{verygrey}{RGB}{70,70,70}
\setbeamercolor{normal text}{fg=verygrey}
\usepackage{tabu}
\usepackage{multicol}
\usepackage{vwcol}
\usepackage{stmaryrd}
\usepackage{graphicx}
\usepackage[normalem]{ulem}
\title{Introducing Garage}
\subtitle{a new storage platform for self-hosted geo-distributed clusters}
\author{Deuxfleurs Association}
\date{IMT Atlantique, 2022-06-23}
\begin{document}
\begin{frame}
\centering
\includegraphics[width=.3\linewidth]{../../sticker/Garage.pdf}
\vspace{1em}
{\large\bf Deuxfleurs Association}
\vspace{1em}
\url{https://garagehq.deuxfleurs.fr/}
Matrix channel: \texttt{\#garage:deuxfleurs.fr}
\end{frame}
\begin{frame}
\frametitle{Who we are}
\begin{columns}[t]
\begin{column}{.2\textwidth}
\centering
\adjincludegraphics[width=.4\linewidth, valign=t]{assets/alex.jpg}
\end{column}
\begin{column}{.6\textwidth}
\textbf{Alex Auvolat}\\
PhD at Inria, team WIDE; co-founder of Deuxfleurs
\end{column}
\begin{column}{.2\textwidth}
~
\end{column}
\end{columns}
\vspace{1em}
\begin{columns}[t]
\begin{column}{.2\textwidth}
~
\end{column}
\begin{column}{.6\textwidth}
\textbf{Quentin Dufour}\\
PhD at Inria, team WIDE; co-founder of Deuxfleurs
\end{column}
\begin{column}{.2\textwidth}
\centering
\adjincludegraphics[width=.5\linewidth, valign=t]{assets/quentin.jpg}
\end{column}
\end{columns}
\vspace{2em}
\begin{columns}[t]
\begin{column}{.2\textwidth}
\centering
\adjincludegraphics[width=.5\linewidth, valign=t]{assets/deuxfleurs.pdf}
\end{column}
\begin{column}{.6\textwidth}
\textbf{Deuxfleurs}\\
A non-profit self-hosting collective,\\
member of the CHATONS network
\end{column}
\begin{column}{.2\textwidth}
\centering
\adjincludegraphics[width=.7\linewidth, valign=t]{assets/logo_chatons.png}
\end{column}
\end{columns}
\end{frame}
\begin{frame}
\frametitle{Our objective at Deuxfleurs}
\begin{center}
\textbf{Promote self-hosting and small-scale hosting\\
as an alternative to large cloud providers}
\end{center}
\vspace{2em}
\visible<2->{
Why is it hard?
}
\visible<3->{
\vspace{2em}
\begin{center}
\textbf{\underline{Resilience}}\\
{\footnotesize (we want good uptime/availability with low supervision)}
\end{center}
}
\end{frame}
\begin{frame}
\frametitle{How to make a \underline{stable} system}
Enterprise-grade systems typically employ:
\vspace{1em}
\begin{itemize}
\item RAID
\item Redundant power grid + UPS
\item Redundant Internet connections
\item Low-latency links
\item ...
\end{itemize}
\vspace{1em}
$\to$ it's costly and only worth it at DC scale
\end{frame}
\begin{frame}
\frametitle{How to make a \underline{resilient} system}
\only<1,4-5>{
Instead, we use:
\vspace{1em}
\begin{itemize}
\item \textcolor<2->{gray}{Commodity hardware (e.g. old desktop PCs)}
\vspace{.5em}
\item<4-> \textcolor<5->{gray}{Commodity Internet (e.g. FTTB, FTTH) and power grid}
\vspace{.5em}
\item<5-> \textcolor<6->{gray}{\textbf{Geographical redundancy} (multi-site replication)}
\end{itemize}
}
\only<2>{
\begin{center}
\includegraphics[width=.8\linewidth]{assets/atuin.jpg}
\end{center}
}
\only<3>{
\begin{center}
\includegraphics[width=.8\linewidth]{assets/neptune.jpg}
\end{center}
}
\only<6>{
\begin{center}
\includegraphics[width=.5\linewidth]{assets/inframap.jpg}
\end{center}
}
\end{frame}
\begin{frame}
\frametitle{How to make this happen}
\begin{center}
\only<1>{\includegraphics[width=.8\linewidth]{assets/slide1.png}}%
\only<2>{\includegraphics[width=.8\linewidth]{assets/slide2.png}}%
\only<3>{\includegraphics[width=.8\linewidth]{assets/slide3.png}}%
\end{center}
\end{frame}
\begin{frame}
\frametitle{Distributed file systems are slow}
File systems are complex, for example:
\vspace{1em}
\begin{itemize}
\item Concurrent modification by several processes
\vspace{1em}
\item Folder hierarchies
\vspace{1em}
\item Other requirements of the POSIX spec
\end{itemize}
\vspace{1em}
Coordination in a distributed system is costly
\vspace{1em}
Costs explode with commodity hardware / Internet connections\\
{\small (we experienced this!)}
\end{frame}
\begin{frame}
\frametitle{A simpler solution: object storage}
Only two operations:
\vspace{1em}
\begin{itemize}
\item Put an object at a key
\vspace{1em}
\item Retrieve an object from its key
\end{itemize}
\vspace{1em}
{\footnotesize (and a few others)}
\vspace{1em}
Sufficient for many applications!
\end{frame}
\begin{frame}
\frametitle{A simpler solution: object storage}
\begin{center}
\includegraphics[width=.2\linewidth]{../2020-12-02_wide-team/img/Amazon-S3.jpg}
\hspace{5em}
\includegraphics[width=.2\linewidth]{assets/minio.png}
\end{center}
\vspace{1em}
S3: a de-facto standard, many compatible applications
\vspace{1em}
MinIO is self-hostable but not suited for geo-distributed deployments
\end{frame}
\begin{frame}
\frametitle{But what is Garage, exactly?}
\textbf{Garage is a self-hosted drop-in replacement for the Amazon S3 object store}\\
\vspace{.5em}
that implements resilience through geographical redundancy on commodity hardware
\begin{center}
\includegraphics[width=.8\linewidth]{assets/garageuses.png}
\end{center}
\end{frame}
\begin{frame}
\frametitle{Overview}
\begin{center}
\only<1>{\includegraphics[width=.45\linewidth]{assets/garage2a.drawio.pdf}}%
\only<2>{\includegraphics[width=.45\linewidth]{assets/garage2b.drawio.pdf}}%
\end{center}
\end{frame}
\begin{frame}
\frametitle{Garage is \emph{location-aware}}
\begin{center}
\includegraphics[width=\linewidth]{assets/location-aware.png}
\end{center}
\vspace{2em}
Garage replicates data on different zones when possible
\end{frame}
\begin{frame}
\frametitle{Garage is \emph{location-aware}}
\begin{center}
\includegraphics[width=.8\linewidth]{assets/map.png}
\end{center}
\end{frame}
\begin{frame}
\frametitle{How to spread files over different cluster nodes?}
\textbf{Consistent hashing (DynamoDB):}
\vspace{1em}
\begin{center}
\only<1>{\includegraphics[width=.45\columnwidth]{assets/consistent_hashing_1.pdf}}%
\only<2>{\includegraphics[width=.45\columnwidth]{assets/consistent_hashing_2.pdf}}%
\only<3>{\includegraphics[width=.45\columnwidth]{assets/consistent_hashing_3.pdf}}%
\only<4>{\includegraphics[width=.45\columnwidth]{assets/consistent_hashing_4.pdf}}%
\end{center}
\end{frame}
\begin{frame}
\frametitle{How to spread files over different cluster nodes?}
\textbf{Issues with consistent hashing:}
\vspace{1em}
\begin{itemize}
\item Doesn't dispatch data based on geographical location of nodes
\vspace{1em}
\item<2-> Geographically aware adaptation, try 1:\\
data quantities not well balanced between nodes
\vspace{1em}
\item<3-> Geographically aware adaptation, try 2:\\
too many reshuffles when adding/removing nodes
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{How to spread files over different cluster nodes?}
\textbf{Garage's method: build an index table}
\vspace{1em}
Realization: we can actually precompute an optimal solution
\vspace{1em}
\visible<2->{
\begin{center}
\begin{tabular}{|l|l|l|l|}
\hline
\textbf{Partition} & \textbf{Node 1} & \textbf{Node 2} & \textbf{Node 3} \\
\hline
\hline
Partition 0 & Io (jupiter) & Drosera (atuin) & Courgette (neptune) \\
\hline
Partition 1 & Datura (atuin) & Courgette (neptune) & Io (jupiter) \\
\hline
Partition 2 & Io(jupiter) & Celeri (neptune) & Drosera (atuin) \\
\hline
\hspace{1em}$\vdots$ & \hspace{1em}$\vdots$ & \hspace{1em}$\vdots$ & \hspace{1em}$\vdots$ \\
\hline
Partition 255 & Concombre (neptune) & Io (jupiter) & Drosera (atuin) \\
\hline
\end{tabular}
\end{center}
}
\vspace{1em}
\visible<3->{
The index table is built centrally using an optimal* algorithm,\\
then propagated to all nodes\\
\hfill\footnotesize *not yet optimal but will be soon
}
\end{frame}
\begin{frame}
\frametitle{Garage's internal data structures}
\centering
\includegraphics[width=.75\columnwidth]{assets/garage_tables.pdf}
\end{frame}
%\begin{frame}
% \frametitle{Garage's architecture}
% \begin{center}
% \includegraphics[width=.35\linewidth]{assets/garage.drawio.pdf}
% \end{center}
%\end{frame}
\begin{frame}
\frametitle{Garage is \emph{coordination-free}:}
\begin{itemize}
\item No Raft or Paxos
\vspace{1em}
\item Internal data types are CRDTs
\vspace{1em}
\item All nodes are equivalent (no master/leader/index node)
\end{itemize}
\vspace{2em}
$\to$ less sensitive to higher latencies between nodes
\end{frame}
\begin{frame}
\frametitle{Consistency model}
\begin{itemize}
\item Not ACID (not required by S3 spec) / not linearizable
\vspace{1em}
\item \textbf{Read-after-write consistency}\\
{\footnotesize (stronger than eventual consistency)}
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{Impact on performances}
\begin{center}
\includegraphics[width=.8\linewidth]{assets/endpoint-latency-dc.png}
\end{center}
\end{frame}
\begin{frame}
\frametitle{An ever-increasing compatibility list}
\begin{center}
\includegraphics[width=.7\linewidth]{assets/compatibility.png}
\end{center}
\end{frame}
\begin{frame}
\frametitle{Further plans for Garage}
\begin{center}
\only<1>{\includegraphics[width=.8\linewidth]{assets/slideB1.png}}%
\only<2>{\includegraphics[width=.8\linewidth]{assets/slideB2.png}}%
\only<3>{\includegraphics[width=.8\linewidth]{assets/slideB3.png}}%
\end{center}
\end{frame}
\begin{frame}
\frametitle{K2V Design}
\begin{itemize}
\item A new, custom, minimal API
\vspace{1em}
\item<2-> Exposes the partitoning mechanism of Garage\\
K2V = partition key / sort key / value (like Dynamo)
\vspace{1em}
\item<3-> Coordination-free, CRDT-friendly (inspired by Riak)\\
\vspace{1em}
\item<4-> Cryptography-friendly: values are binary blobs
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{Application: an e-mail storage server}
\begin{center}
\only<1>{\includegraphics[width=.9\linewidth]{assets/aerogramme.png}}%
\end{center}
\end{frame}
\begin{frame}
\frametitle{Aerogramme data model}
\begin{center}
\only<1>{\includegraphics[width=.4\linewidth]{assets/aerogramme_datatype.drawio.pdf}}%
\only<2->{\includegraphics[width=.9\linewidth]{assets/aerogramme_keys.drawio.pdf}\vspace{1em}}%
\end{center}
\visible<3->{Aerogramme encrypts all stored values for privacy\\
(Garage server administrators can't read your mail)}
\end{frame}
\begin{frame}
\frametitle{Different deployment scenarios}
\begin{center}
\only<1>{\includegraphics[width=.9\linewidth]{assets/aerogramme_components1.drawio.pdf}}%
\only<2>{\includegraphics[width=.9\linewidth]{assets/aerogramme_components2.drawio.pdf}}%
\end{center}
\end{frame}
\begin{frame}
\frametitle{A new model for building resilient software}
\begin{itemize}
\item Design a data model suited to K2V\\
{\footnotesize (see Cassandra docs on porting SQL data models to Cassandra)}
\vspace{1em}
\begin{itemize}
\item Use CRDTs or other eventually consistent data types (see e.g. Bayou)
\vspace{1em}
\item Store opaque binary blobs to provide End-to-End Encryption\\
\end{itemize}
\vspace{1em}
\item Store big blobs (files) in S3
\vspace{1em}
\item Let Garage manage sharding, replication, failover, etc.
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{Research perspectives}
\begin{itemize}
\item Write about Garage's global architecture \emph{(paper in progress)}
\vspace{1em}
\item Measure and improve Garage's performances
\vspace{1em}
\item Discuss the optimal layout algorithm, provide proofs
\vspace{1em}
\item Write about our proposed architecture for (E2EE) apps over K2V+S3
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{Where to find us}
\begin{center}
\includegraphics[width=.25\linewidth]{../../logo/garage_hires.png}\\
\vspace{-1em}
\url{https://garagehq.deuxfleurs.fr/}\\
\url{mailto:garagehq@deuxfleurs.fr}\\
\texttt{\#garage:deuxfleurs.fr} on Matrix
\vspace{1.5em}
\includegraphics[width=.06\linewidth]{assets/rust_logo.png}
\includegraphics[width=.13\linewidth]{assets/AGPLv3_Logo.png}
\end{center}
\end{frame}
\end{document}
%% vim: set ts=4 sw=4 tw=0 noet spelllang=en :

158
k2v_test.py Executable file
View file

@ -0,0 +1,158 @@
#!/usr/bin/env python
import os
import requests
from datetime import datetime
# let's talk to our AWS Elasticsearch cluster
#from requests_aws4auth import AWS4Auth
#auth = AWS4Auth('GK31c2f218a2e44f485b94239e',
# 'b892c0665f0ada8a4755dae98baa3b133590e11dae3bcc1f9d769d67f16c3835',
# 'us-east-1',
# 's3')
from aws_requests_auth.aws_auth import AWSRequestsAuth
auth = AWSRequestsAuth(aws_access_key='GK31c2f218a2e44f485b94239e',
aws_secret_access_key='b892c0665f0ada8a4755dae98baa3b133590e11dae3bcc1f9d769d67f16c3835',
aws_host='localhost:3812',
aws_region='us-east-1',
aws_service='k2v')
print("-- ReadIndex")
response = requests.get('http://localhost:3812/alex',
auth=auth)
print(response.headers)
print(response.text)
sort_keys = ["a", "b", "c", "d"]
for sk in sort_keys:
print("-- (%s) Put initial (no CT)"%sk)
response = requests.put('http://localhost:3812/alex/root?sort_key=%s'%sk,
auth=auth,
data='{}: Hello, world!'.format(datetime.timestamp(datetime.now())))
print(response.headers)
print(response.text)
print("-- Get")
response = requests.get('http://localhost:3812/alex/root?sort_key=%s'%sk,
auth=auth)
print(response.headers)
print(response.text)
ct = response.headers["x-garage-causality-token"]
print("-- ReadIndex")
response = requests.get('http://localhost:3812/alex',
auth=auth)
print(response.headers)
print(response.text)
print("-- Put with CT")
response = requests.put('http://localhost:3812/alex/root?sort_key=%s'%sk,
auth=auth,
headers={'x-garage-causality-token': ct},
data='{}: Good bye, world!'.format(datetime.timestamp(datetime.now())))
print(response.headers)
print(response.text)
print("-- Get")
response = requests.get('http://localhost:3812/alex/root?sort_key=%s'%sk,
auth=auth)
print(response.headers)
print(response.text)
print("-- Put again with same CT (concurrent)")
response = requests.put('http://localhost:3812/alex/root?sort_key=%s'%sk,
auth=auth,
headers={'x-garage-causality-token': ct},
data='{}: Concurrent value, oops'.format(datetime.timestamp(datetime.now())))
print(response.headers)
print(response.text)
for sk in sort_keys:
print("-- (%s) Get"%sk)
response = requests.get('http://localhost:3812/alex/root?sort_key=%s'%sk,
auth=auth)
print(response.headers)
print(response.text)
ct = response.headers["x-garage-causality-token"]
print("-- Delete")
response = requests.delete('http://localhost:3812/alex/root?sort_key=%s'%sk,
headers={'x-garage-causality-token': ct},
auth=auth)
print(response.headers)
print(response.text)
print("-- ReadIndex")
response = requests.get('http://localhost:3812/alex',
auth=auth)
print(response.headers)
print(response.text)
print("-- InsertBatch")
response = requests.post('http://localhost:3812/alex',
auth=auth,
data='''
[
{"pk": "root", "sk": "a", "ct": null, "v": "aW5pdGlhbCB0ZXN0Cg=="},
{"pk": "root", "sk": "b", "ct": null, "v": "aW5pdGlhbCB0ZXN1Cg=="},
{"pk": "root", "sk": "c", "ct": null, "v": "aW5pdGlhbCB0ZXN2Cg=="}
]
''')
print(response.headers)
print(response.text)
print("-- ReadIndex")
response = requests.get('http://localhost:3812/alex',
auth=auth)
print(response.headers)
print(response.text)
for sk in sort_keys:
print("-- (%s) Get"%sk)
response = requests.get('http://localhost:3812/alex/root?sort_key=%s'%sk,
auth=auth)
print(response.headers)
print(response.text)
ct = response.headers["x-garage-causality-token"]
print("-- ReadBatch")
response = requests.post('http://localhost:3812/alex?search',
auth=auth,
data='''
[
{"partitionKey": "root"},
{"partitionKey": "root", "tombstones": true},
{"partitionKey": "root", "tombstones": true, "limit": 2},
{"partitionKey": "root", "start": "c", "singleItem": true},
{"partitionKey": "root", "start": "b", "end": "d", "tombstones": true}
]
''')
print(response.headers)
print(response.text)
print("-- DeleteBatch")
response = requests.post('http://localhost:3812/alex?delete',
auth=auth,
data='''
[
{"partitionKey": "root", "start": "b", "end": "c"}
]
''')
print(response.headers)
print(response.text)
print("-- ReadBatch")
response = requests.post('http://localhost:3812/alex?search',
auth=auth,
data='''
[
{"partitionKey": "root"}
]
''')
print(response.headers)
print(response.text)

View file

@ -4,18 +4,16 @@ rec {
*/
pkgsSrc = fetchTarball {
# As of 2021-10-04
url ="https://github.com/NixOS/nixpkgs/archive/b27d18a412b071f5d7991d1648cfe78ee7afe68a.tar.gz";
url = "https://github.com/NixOS/nixpkgs/archive/b27d18a412b071f5d7991d1648cfe78ee7afe68a.tar.gz";
sha256 = "1xy9zpypqfxs5gcq5dcla4bfkhxmh5nzn9dyqkr03lqycm9wg5cr";
};
cargo2nixSrc = fetchGit {
# As of 2022-03-17
url = "https://github.com/superboum/cargo2nix";
ref = "main";
rev = "bcbf3ba99e9e01a61eb83a24624419c2dd9dec64";
# As of 2022-08-29, stacking two patches: superboum@dedup_propagate and Alexis211@fix_fetchcrategit
url = "https://github.com/Alexis211/cargo2nix";
ref = "fix_fetchcrategit";
rev = "4b31c0cc05b6394916d46e9289f51263d81973b9";
};
/*
* Shared objects
*/

244
nix/compile.nix Normal file
View file

@ -0,0 +1,244 @@
{
system ? builtins.currentSystem,
target ? null,
compiler ? "rustc",
release ? false,
git_version ? null,
}:
with import ./common.nix;
let
log = v: builtins.trace v v;
pkgs = import pkgsSrc {
inherit system;
${ if target == null then null else "crossSystem" } = { config = target; };
overlays = [ cargo2nixOverlay ];
};
/*
Rust and Nix triples are not the same. Cargo2nix has a dedicated library
to convert Nix triples to Rust ones. We need this conversion as we want to
set later options linked to our (rust) target in a generic way. Not only
the triple terminology is different, but also the "roles" are named differently.
Nix uses a build/host/target terminology where Nix's "host" maps to Cargo's "target".
*/
rustTarget = log (pkgs.rustBuilder.rustLib.rustTriple pkgs.stdenv.hostPlatform);
/*
Cargo2nix is built for rustOverlay which installs Rust from Mozilla releases.
We want our own Rust to avoid incompatibilities, like we had with musl 1.2.0.
rustc was built with musl < 1.2.0 and nix shipped musl >= 1.2.0 which lead to compilation breakage.
So we want a Rust release that is bound to our Nix repository to avoid these problems.
See here for more info: https://musl.libc.org/time64.html
Because Cargo2nix does not support the Rust environment shipped by NixOS,
we emulate the structure of the Rust object created by rustOverlay.
In practise, rustOverlay ships rustc+cargo in a single derivation while
NixOS ships them in separate ones. We reunite them with symlinkJoin.
*/
rustChannel = {
rustc = pkgs.symlinkJoin {
name = "rust-channel";
paths = [
pkgs.rustPlatform.rust.cargo
pkgs.rustPlatform.rust.rustc
];
};
clippy = pkgs.symlinkJoin {
name = "clippy-channel";
paths = [
pkgs.rustPlatform.rust.cargo
pkgs.rustPlatform.rust.rustc
pkgs.clippy
];
};
}.${compiler};
clippyBuilder = pkgs.writeScriptBin "clippy" ''
#!${pkgs.stdenv.shell}
. ${cargo2nixSrc + "/overlay/utils.sh"}
isBuildScript=
args=("$@")
for i in "''${!args[@]}"; do
if [ "xmetadata=" = "x''${args[$i]::9}" ]; then
args[$i]=metadata=$NIX_RUST_METADATA
elif [ "x--crate-name" = "x''${args[$i]}" ] && [ "xbuild_script_" = "x''${args[$i+1]::13}" ]; then
isBuildScript=1
fi
done
if [ "$isBuildScript" ]; then
args+=($NIX_RUST_BUILD_LINK_FLAGS)
else
args+=($NIX_RUST_LINK_FLAGS)
fi
touch invoke.log
echo "''${args[@]}" >>invoke.log
exec ${rustChannel}/bin/clippy-driver --deny warnings "''${args[@]}"
'';
buildEnv = (drv: {
rustc = drv.setBuildEnv;
clippy = ''
${drv.setBuildEnv or "" }
echo
echo --- BUILDING WITH CLIPPY ---
echo
export RUSTC=${clippyBuilder}/bin/clippy
'';
}.${compiler});
/*
Cargo2nix provides many overrides by default, you can take inspiration from them:
https://github.com/cargo2nix/cargo2nix/blob/master/overlay/overrides.nix
You can have a complete list of the available options by looking at the overriden object, mkcrate:
https://github.com/cargo2nix/cargo2nix/blob/master/overlay/mkcrate.nix
*/
overrides = pkgs.rustBuilder.overrides.all ++ [
/*
[1] We add some logic to compile our crates with clippy, it provides us many additional lints
[2] We need to alter Nix hardening to make static binaries: PIE,
Position Independent Executables seems to be supported only on amd64. Having
this flag set either 1. make our executables crash or 2. compile as dynamic on some platforms.
Here, we deactivate it. Later (find `codegenOpts`), we reactivate it for supported targets
(only amd64 curently) through the `-static-pie` flag.
PIE is a feature used by ASLR, which helps mitigate security issues.
Learn more about Nix Hardening at: https://github.com/NixOS/nixpkgs/blob/master/pkgs/build-support/cc-wrapper/add-hardening.sh
[3] We want to inject the git version while keeping the build deterministic.
As we do not want to consider the .git folder as part of the input source,
we ask the user (the CI often) to pass the value to Nix.
[4] We ship some parts of the code disabled by default by putting them behind a flag.
It speeds up the compilation (when the feature is not required) and released crates have less dependency by default (less attack surface, disk space, etc.).
But we want to ship these additional features when we release Garage.
In the end, we chose to exclude all features from debug builds while putting (all of) them in the release builds.
[5] We don't want libsodium-sys and zstd-sys to try to use pkgconfig to build against a system library.
However the features to do so get activated for some reason (due to a bug in cargo2nix?),
so disable them manually here.
*/
(pkgs.rustBuilder.rustLib.makeOverride {
name = "garage";
overrideAttrs = drv:
(if git_version != null then {
/* [3] */ preConfigure = ''
${drv.preConfigure or ""}
export GIT_VERSION="${git_version}"
'';
} else {})
//
{
/* [1] */ setBuildEnv = (buildEnv drv);
/* [2] */ hardeningDisable = [ "pie" ];
};
overrideArgs = old: {
/* [4] */ features = [ "bundled-libs" "sled" "metrics" "k2v" ]
++ (if release then [ "kubernetes-discovery" "telemetry-otlp" "lmdb" "sqlite" ] else []);
};
})
(pkgs.rustBuilder.rustLib.makeOverride {
name = "garage_rpc";
overrideAttrs = drv: { /* [1] */ setBuildEnv = (buildEnv drv); };
})
(pkgs.rustBuilder.rustLib.makeOverride {
name = "garage_db";
overrideAttrs = drv: { /* [1] */ setBuildEnv = (buildEnv drv); };
})
(pkgs.rustBuilder.rustLib.makeOverride {
name = "garage_util";
overrideAttrs = drv: { /* [1] */ setBuildEnv = (buildEnv drv); };
})
(pkgs.rustBuilder.rustLib.makeOverride {
name = "garage_table";
overrideAttrs = drv: { /* [1] */ setBuildEnv = (buildEnv drv); };
})
(pkgs.rustBuilder.rustLib.makeOverride {
name = "garage_block";
overrideAttrs = drv: { /* [1] */ setBuildEnv = (buildEnv drv); };
})
(pkgs.rustBuilder.rustLib.makeOverride {
name = "garage_model";
overrideAttrs = drv: { /* [1] */ setBuildEnv = (buildEnv drv); };
})
(pkgs.rustBuilder.rustLib.makeOverride {
name = "garage_api";
overrideAttrs = drv: { /* [1] */ setBuildEnv = (buildEnv drv); };
})
(pkgs.rustBuilder.rustLib.makeOverride {
name = "garage_web";
overrideAttrs = drv: { /* [1] */ setBuildEnv = (buildEnv drv); };
})
(pkgs.rustBuilder.rustLib.makeOverride {
name = "k2v-client";
overrideAttrs = drv: { /* [1] */ setBuildEnv = (buildEnv drv); };
})
(pkgs.rustBuilder.rustLib.makeOverride {
name = "libsodium-sys";
overrideArgs = old: {
features = [ ]; /* [5] */
};
})
(pkgs.rustBuilder.rustLib.makeOverride {
name = "zstd-sys";
overrideArgs = old: {
features = [ ]; /* [5] */
};
})
];
packageFun = import ../Cargo.nix;
/*
We compile fully static binaries with musl to simplify deployment on most systems.
When possible, we reactivate PIE hardening (see above).
Also, if you set the RUSTFLAGS environment variable, the following parameters will
be ignored.
For more information on static builds, please refer to Rust's RFC 1721.
https://rust-lang.github.io/rfcs/1721-crt-static.html#specifying-dynamicstatic-c-runtime-linkage
*/
codegenOpts = {
"armv6l-unknown-linux-musleabihf" = [ "target-feature=+crt-static" "link-arg=-static" ]; /* compile as dynamic with static-pie */
"aarch64-unknown-linux-musl" = [ "target-feature=+crt-static" "link-arg=-static" ]; /* segfault with static-pie */
"i686-unknown-linux-musl" = [ "target-feature=+crt-static" "link-arg=-static" ]; /* segfault with static-pie */
"x86_64-unknown-linux-musl" = [ "target-feature=+crt-static" "link-arg=-static-pie" ];
};
in
/*
The following definition is not elegant as we use a low level function of Cargo2nix
that enables us to pass our custom rustChannel object. We need this low level definition
to pass Nix's Rust toolchains instead of Mozilla's one.
target is mandatory but must be kept to null to allow cargo2nix to set it to the appropriate value
for each crate.
*/
pkgs.rustBuilder.makePackageSet {
inherit packageFun rustChannel release codegenOpts;
packageOverrides = overrides;
target = null;
buildRustPackages = pkgs.buildPackages.rustBuilder.makePackageSet {
inherit rustChannel packageFun codegenOpts;
packageOverrides = overrides;
target = null;
};
}

23
nix/manifest-tool.nix Normal file
View file

@ -0,0 +1,23 @@
pkgs:
pkgs.buildGoModule rec {
pname = "manifest-tool";
version = "2.0.5";
src = pkgs.fetchFromGitHub {
owner = "estesp";
repo = "manifest-tool";
rev = "v${version}";
sha256 = "hjCGKnE0yrlnF/VIzOwcDzmQX3Wft+21KCny/opqdLg=";
} + "/v2";
vendorSha256 = null;
checkPhase = "true";
meta = with pkgs.lib; {
description = "Command line tool to create and query container image manifest list/indexes";
homepage = "https://github.com/estesp/manifest-tool";
license = licenses.asl20;
platforms = platforms.linux;
};
}

View file

@ -1,4 +1,9 @@
substituters = https://cache.nixos.org https://nix.web.deuxfleurs.fr
trusted-public-keys = cache.nixos.org-1:6NCHdD59X431o0gWypbMrAURkbJ16ZPMQFGspcDShjY= nix.web.deuxfleurs.fr:eTGL6kvaQn6cDR/F9lDYUIP9nCVR/kkshYfLDJf1yKs=
max-jobs = auto
cores = 4
cores = 0
log-lines = 200
filter-syscalls = false
sandbox = false
keep-outputs = true
keep-derivations = true

View file

@ -11,7 +11,7 @@ PATH="${GARAGE_DEBUG}:${GARAGE_RELEASE}:${NIX_RELEASE}:$PATH"
FANCYCOLORS=("41m" "42m" "44m" "45m" "100m" "104m")
export RUST_BACKTRACE=1
export RUST_LOG=garage=info,garage_api=debug
export RUST_LOG=garage=info,garage_api=debug,netapp=trace
MAIN_LABEL="\e[${FANCYCOLORS[0]}[main]\e[49m"
WHICH_GARAGE=$(which garage || exit 1)

3
script/helm/README.md Normal file
View file

@ -0,0 +1,3 @@
# Garage helm3 chart
Documentation is located [here](/doc/book/cookbook/kubernetes.md).

View file

@ -0,0 +1,23 @@
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*.orig
*~
# Various IDEs
.project
.idea/
*.tmproj
.vscode/

View file

@ -0,0 +1,24 @@
apiVersion: v2
name: garage
description: S3-compatible object store for small self-hosted geo-distributed deployments
# A chart can be either an 'application' or a 'library' chart.
#
# Application charts are a collection of templates that can be packaged into versioned archives
# to be deployed.
#
# Library charts provide useful utilities or functions for the chart developer. They're included as
# a dependency of application charts to inject those utilities and functions into the rendering
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.1.0
# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
# It is recommended to use it with quotes.
appVersion: "v0.7.2.1"

View file

@ -0,0 +1,88 @@
{{/*
Expand the name of the chart.
*/}}
{{- define "garage.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
{{- end }}
{{/*
Create a default fully qualified app name.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
If release name contains chart name it will be used as a full name.
*/}}
{{- define "garage.fullname" -}}
{{- if .Values.fullnameOverride }}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- $name := default .Chart.Name .Values.nameOverride }}
{{- if contains $name .Release.Name }}
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
{{- end }}
{{- end }}
{{- end }}
{{/*
Create the name of the rpc secret
*/}}
{{- define "garage.rpcSecretName" -}}
{{- printf "%s-rpc-secret" (include "garage.fullname" .) -}}
{{- end }}
{{/*
Create chart name and version as used by the chart label.
*/}}
{{- define "garage.chart" -}}
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
{{- end }}
{{/*
Common labels
*/}}
{{- define "garage.labels" -}}
helm.sh/chart: {{ include "garage.chart" . }}
{{ include "garage.selectorLabels" . }}
{{- if .Chart.AppVersion }}
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
{{- end }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
{{- end }}
{{/*
Selector labels
*/}}
{{- define "garage.selectorLabels" -}}
app.kubernetes.io/name: {{ include "garage.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- end }}
{{/*
Create the name of the service account to use
*/}}
{{- define "garage.serviceAccountName" -}}
{{- if .Values.serviceAccount.create }}
{{- default (include "garage.fullname" .) .Values.serviceAccount.name }}
{{- else }}
{{- default "default" .Values.serviceAccount.name }}
{{- end }}
{{- end }}
{{/*
Returns given number of random Hex characters.
In practice, it generates up to 100 randAlphaNum strings
that are filtered from non-hex characters and augmented
to the resulting string that is finally trimmed down.
*/}}
{{- define "jupyterhub.randHex" -}}
{{- $result := "" }}
{{- range $i := until 100 }}
{{- if lt (len $result) . }}
{{- $rand_list := randAlphaNum . | splitList "" -}}
{{- $reduced_list := without $rand_list "g" "h" "i" "j" "k" "l" "m" "n" "o" "p" "q" "r" "s" "t" "u" "v" "w" "x" "y" "z" "A" "B" "C" "D" "E" "F" "G" "H" "I" "J" "K" "L" "M" "N" "O" "P" "Q" "R" "S" "T" "U" "V" "W" "X" "Y" "Z" }}
{{- $rand_string := join "" $reduced_list }}
{{- $result = print $result $rand_string -}}
{{- end }}
{{- end }}
{{- $result | trunc . }}
{{- end }}

View file

@ -0,0 +1,28 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: manage-crds-{{ .Release.Namespace }}-{{ .Release.Name }}
labels:
{{- include "garage.labels" . | nindent 4 }}
rules:
- apiGroups: ["apiextensions.k8s.io"]
resources: ["customresourcedefinitions"]
verbs: ["get", "list", "watch", "create", "patch"]
- apiGroups: ["deuxfleurs.fr"]
resources: ["garagenodes"]
verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: allow-crds-for-{{ .Release.Namespace }}-{{ .Release.Name }}
labels:
{{- include "garage.labels" . | nindent 4 }}
subjects:
- kind: ServiceAccount
name: {{ include "garage.serviceAccountName" . }}
namespace: {{ .Release.Namespace }}
roleRef:
kind: ClusterRole
name: manage-crds-{{ .Release.Namespace }}-{{ .Release.Name }}
apiGroup: rbac.authorization.k8s.io

View file

@ -0,0 +1,30 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: {{ include "garage.fullname" . }}-config
data:
garage.toml: |-
metadata_dir = "{{ .Values.garage.metadataDir }}"
data_dir = "{{ .Values.garage.dataDir }}"
replication_mode = "{{ .Values.garage.replicationMode }}"
rpc_bind_addr = "{{ .Values.garage.rpcBindAddr }}"
# rpc_secret will be populated by the init container from a k8s secret object
rpc_secret = "__RPC_SECRET_REPLACE__"
bootstrap_peers = {{ .Values.garage.bootstrapPeers }}
kubernetes_namespace = "{{ .Release.Namespace }}"
kubernetes_service_name = "{{ include "garage.fullname" . }}"
kubernetes_skip_crd = {{ .Values.garage.kubernetesSkipCrd }}
[s3_api]
s3_region = "{{ .Values.garage.s3.api.region }}"
api_bind_addr = "[::]:3900"
root_domain = "{{ .Values.garage.s3.api.rootDomain }}"
[s3_web]
bind_addr = "[::]:3902"
root_domain = "{{ .Values.garage.s3.web.rootDomain }}"
index = "{{ .Values.garage.s3.web.index }}"

View file

@ -0,0 +1,123 @@
{{- if .Values.ingress.s3.api.enabled -}}
{{- $fullName := include "garage.fullname" . -}}
{{- $svcPort := .Values.service.s3.api.port -}}
{{- if and .Values.ingress.className (not (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion)) }}
{{- if not (hasKey .Values.ingress.s3.api.annotations "kubernetes.io/ingress.class") }}
{{- $_ := set .Values.ingress.s3.api.annotations "kubernetes.io/ingress.class" .Values.ingress.s3.api.className}}
{{- end }}
{{- end }}
{{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion -}}
apiVersion: networking.k8s.io/v1
{{- else if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion -}}
apiVersion: networking.k8s.io/v1beta1
{{- else -}}
apiVersion: extensions/v1beta1
{{- end }}
kind: Ingress
metadata:
name: {{ $fullName }}-s3-api
labels:
{{- include "garage.labels" . | nindent 4 }}
{{- with .Values.ingress.s3.api.annotations }}
annotations:
{{- toYaml . | nindent 4 }}
{{- end }}
spec:
{{- if and .Values.ingress.s3.api.className (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion) }}
ingressClassName: {{ .Values.ingress.s3.api.className }}
{{- end }}
{{- if .Values.ingress.s3.api.tls }}
tls:
{{- range .Values.ingress.s3.api.tls }}
- hosts:
{{- range .hosts }}
- {{ . | quote }}
{{- end }}
secretName: {{ .secretName }}
{{- end }}
{{- end }}
rules:
{{- range .Values.ingress.s3.api.hosts }}
- host: {{ .host | quote }}
http:
paths:
{{- range .paths }}
- path: {{ .path }}
{{- if and .pathType (semverCompare ">=1.18-0" $.Capabilities.KubeVersion.GitVersion) }}
pathType: {{ .pathType }}
{{- end }}
backend:
{{- if semverCompare ">=1.19-0" $.Capabilities.KubeVersion.GitVersion }}
service:
name: {{ $fullName }}
port:
number: {{ $svcPort }}
{{- else }}
serviceName: {{ $fullName }}
servicePort: {{ $svcPort }}
{{- end }}
{{- end }}
{{- end }}
{{- end }}
---
{{- if .Values.ingress.s3.web.enabled -}}
{{- $fullName := include "garage.fullname" . -}}
{{- $svcPort := .Values.service.s3.web.port -}}
{{- if and .Values.ingress.s3.web.className (not (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion)) }}
{{- if not (hasKey .Values.ingress.s3.web.annotations "kubernetes.io/ingress.class") }}
{{- $_ := set .Values.ingress.s3.web.annotations "kubernetes.io/ingress.class" .Values.ingress.s3.web.className}}
{{- end }}
{{- end }}
{{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion -}}
apiVersion: networking.k8s.io/v1
{{- else if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion -}}
apiVersion: networking.k8s.io/v1beta1
{{- else -}}
apiVersion: extensions/v1beta1
{{- end }}
kind: Ingress
metadata:
name: {{ $fullName }}-s3-web
labels:
{{- include "garage.labels" . | nindent 4 }}
{{- with .Values.ingress.s3.web.annotations }}
annotations:
{{- toYaml . | nindent 4 }}
{{- end }}
spec:
{{- if and .Values.ingress.s3.web.className (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion) }}
ingressClassName: {{ .Values.ingress.s3.web.className }}
{{- end }}
{{- if .Values.ingress.s3.web.tls }}
tls:
{{- range .Values.ingress.s3.web.tls }}
- hosts:
{{- range .hosts }}
- {{ . | quote }}
{{- end }}
secretName: {{ .secretName }}
{{- end }}
{{- end }}
rules:
{{- range .Values.ingress.s3.web.hosts }}
- host: {{ .host | quote }}
http:
paths:
{{- range .paths }}
- path: {{ .path }}
{{- if and .pathType (semverCompare ">=1.18-0" $.Capabilities.KubeVersion.GitVersion) }}
pathType: {{ .pathType }}
{{- end }}
backend:
{{- if semverCompare ">=1.19-0" $.Capabilities.KubeVersion.GitVersion }}
service:
name: {{ $fullName }}
port:
number: {{ $svcPort }}
{{- else }}
serviceName: {{ $fullName }}
servicePort: {{ $svcPort }}
{{- end }}
{{- end }}
{{- end }}
{{- end }}

View file

@ -0,0 +1,14 @@
apiVersion: v1
kind: Secret
metadata:
name: {{ include "garage.rpcSecretName" . }}
labels:
{{- include "garage.labels" . | nindent 4 }}
type: Opaque
data:
{{/* retrieve the secret data using lookup function and when not exists, return an empty dictionary / map as result */}}
{{- $prevSecret := (lookup "v1" "Secret" .Release.Namespace (include "garage.rpcSecretName" .)) | default dict }}
{{- $prevSecretData := $prevSecret.data | default dict }}
{{- $prevRpcSecret := $prevSecretData.rpcSecret | default "" | b64dec }}
{{/* Priority is: 1. from values, 2. previous value, 3. generate random */}}
rpcSecret: {{ .Values.garage.rpcSecret | default $prevRpcSecret | default (include "jupyterhub.randHex" 64) | b64enc | quote }}

View file

@ -0,0 +1,19 @@
apiVersion: v1
kind: Service
metadata:
name: {{ include "garage.fullname" . }}
labels:
{{- include "garage.labels" . | nindent 4 }}
spec:
type: {{ .Values.service.type }}
ports:
- port: {{ .Values.service.s3.api.port }}
targetPort: 3900
protocol: TCP
name: s3-api
- port: {{ .Values.service.s3.web.port }}
targetPort: 3902
protocol: TCP
name: s3-web
selector:
{{- include "garage.selectorLabels" . | nindent 4 }}

View file

@ -0,0 +1,12 @@
{{- if .Values.serviceAccount.create -}}
apiVersion: v1
kind: ServiceAccount
metadata:
name: {{ include "garage.serviceAccountName" . }}
labels:
{{- include "garage.labels" . | nindent 4 }}
{{- with .Values.serviceAccount.annotations }}
annotations:
{{- toYaml . | nindent 4 }}
{{- end }}
{{- end }}

View file

@ -0,0 +1,116 @@
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: {{ include "garage.fullname" . }}
labels:
{{- include "garage.labels" . | nindent 4 }}
spec:
replicas: {{ .Values.replicaCount }}
selector:
matchLabels:
{{- include "garage.selectorLabels" . | nindent 6 }}
serviceName: {{ include "garage.fullname" . }}
template:
metadata:
{{- with .Values.podAnnotations }}
annotations:
{{- toYaml . | nindent 8 }}
{{- end }}
labels:
{{- include "garage.selectorLabels" . | nindent 8 }}
spec:
{{- with .Values.imagePullSecrets }}
imagePullSecrets:
{{- toYaml . | nindent 8 }}
{{- end }}
serviceAccountName: {{ include "garage.serviceAccountName" . }}
securityContext:
{{- toYaml .Values.podSecurityContext | nindent 8 }}
initContainers:
# Copies garage.toml from configmap to temporary etc volume and replaces RPC secret placeholder
- name: {{ .Chart.Name }}-init
image: busybox:1.28
command: ["sh", "-c", "sed \"s/__RPC_SECRET_REPLACE__/$RPC_SECRET/\" /mnt/garage.toml > /mnt/etc/garage.toml"]
env:
- name: RPC_SECRET
valueFrom:
secretKeyRef:
name: {{ include "garage.rpcSecretName" . }}
key: rpcSecret
volumeMounts:
- name: configmap
mountPath: /mnt/garage.toml
subPath: garage.toml
- name: etc
mountPath: /mnt/etc
containers:
- name: {{ .Chart.Name }}
securityContext:
{{- toYaml .Values.securityContext | nindent 12 }}
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
imagePullPolicy: {{ .Values.image.pullPolicy }}
ports:
- containerPort: 3900
name: s3-api
- containerPort: 3902
name: web-api
volumeMounts:
- name: meta
mountPath: /mnt/meta
- name: data
mountPath: /mnt/data
- name: etc
mountPath: /etc/garage.toml
subPath: garage.toml
# TODO
# livenessProbe:
# httpGet:
# path: /
# port: 3900
# readinessProbe:
# httpGet:
# path: /
# port: 3900
resources:
{{- toYaml .Values.resources | nindent 12 }}
volumes:
- name: configmap
configMap:
name: {{ include "garage.fullname" . }}-config
- name: etc
emptyDir: {}
{{- with .Values.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- if .Values.persistence.enabled }}
volumeClaimTemplates:
- metadata:
name: meta
spec:
accessModes: [ "ReadWriteOnce" ]
{{- if hasKey .Values.persistence.meta "storageClass" }}
storageClassName: {{ .Values.persistence.meta.storageClass | quote }}
{{- end }}
resources:
requests:
storage: {{ .Values.persistence.meta.size | quote }}
- metadata:
name: data
spec:
accessModes: [ "ReadWriteOnce" ]
{{- if hasKey .Values.persistence.data "storageClass" }}
storageClassName: {{ .Values.persistence.data.storageClass | quote }}
{{- end }}
resources:
requests:
storage: {{ .Values.persistence.data.size | quote }}
{{- end }}

View file

@ -0,0 +1,142 @@
# Default values for garage.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.
# Garage configuration. These values go to garage.toml
garage:
metadataDir: "/mnt/meta"
dataDir: "/mnt/data"
# Default to 3 replicas, see the replication_mode section at
# https://garagehq.deuxfleurs.fr/documentation/reference-manual/configuration/
replicationMode: "3"
rpcBindAddr: "[::]:3901"
# If not given, a random secret will be generated and stored in a Secret object
rpcSecret: ""
# This is not required if you use the integrated kubernetes discovery
bootstrapPeers: []
kubernetesSkipCrd: false
s3:
api:
region: "garage"
rootDomain: ".s3.garage.tld"
web:
rootDomain: ".web.garage.tld"
index: "index.html"
# Data persistence
persistence:
enabled: true
meta:
# storageClass: "fast-storage-class"
size: 100Mi
data:
# storageClass: "slow-storage-class"
size: 100Mi
# Number of StatefulSet replicas/garage nodes to start
replicaCount: 3
image:
repository: dxflrs/amd64_garage
# please prefer using the chart version and not this tag
tag: ""
pullPolicy: IfNotPresent
imagePullSecrets: []
nameOverride: ""
fullnameOverride: ""
serviceAccount:
# Specifies whether a service account should be created
create: true
# Annotations to add to the service account
annotations: {}
# The name of the service account to use.
# If not set and create is true, a name is generated using the fullname template
name: ""
podAnnotations: {}
podSecurityContext: {}
# fsGroup: 2000
securityContext:
# The default security context is heavily restricted
# feel free to tune it to your requirements
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 1000
service:
# You can rely on any service to expose your cluster
# - ClusterIP (+ Ingress)
# - NodePort (+ Ingress)
# - LoadBalancer
type: ClusterIP
s3:
api:
port: 3900
web:
port: 3902
# NOTE: the admin API is excluded for now as it is not consistent across nodes
ingress:
s3:
api:
enabled: true
# Rely either on the className or the annotation below but not both
# replace "nginx" by an Ingress controller
# you can find examples here https://kubernetes.io/docs/concepts/services-networking/ingress-controllers
className: "nginx"
annotations:
# kubernetes.io/ingress.class: "nginx"
# kubernetes.io/tls-acme: "true"
hosts:
- host: "s3.garage.tld" # garage S3 API endpoint
paths:
- path: /
pathType: Prefix
- host: "*.s3.garage.tld" # garage S3 API endpoint, DNS style bucket access
paths:
- path: /
pathType: Prefix
tls: []
# - secretName: my-garage-cluster-tls
# hosts:
# - kubernetes.docker.internal
web:
enabled: true
className: "nginx"
annotations: {}
# kubernetes.io/ingress.class: nginx
# kubernetes.io/tls-acme: "true"
hosts:
- host: "*.web.garage.tld" # wildcard website access with bucket name prefix
paths:
- path: /
pathType: Prefix
- host: "mywebpage.example.com" # specific bucket access with FQDN bucket
paths:
- path: /
pathType: Prefix
tls: []
# - secretName: my-garage-cluster-tls
# hosts:
# - kubernetes.docker.internal
resources: {}
# The following are indicative for a small-size deployement, for anything serious double them.
# limits:
# cpu: 100m
# memory: 1024Mi
# requests:
# cpu: 100m
# memory: 512Mi
nodeSelector: {}
tolerations: []
affinity: {}

14
script/not-dynamic.sh Executable file
View file

@ -0,0 +1,14 @@
#!/usr/bin/env bash
set -e
if [ "$#" -ne 1 ]; then
echo "[fail] usage: $0 binary"
exit 2
fi
if file $1 | grep 'dynamically linked' 2>&1; then
echo "[fail] $1 is dynamic"
exit 1
fi
echo "[ok] $1 is probably static"

132
shell.nix
View file

@ -1,8 +1,5 @@
{
system ? builtins.currentSystem,
rust ? true,
integration ? true,
release ? true,
}:
with import ./nix/common.nix;
@ -13,12 +10,63 @@ let
overlays = [ cargo2nixOverlay ];
};
kaniko = (import ./nix/kaniko.nix) pkgs;
manifest-tool = (import ./nix/manifest-tool.nix) pkgs;
winscp = (import ./nix/winscp.nix) pkgs;
in
{
pkgs.mkShell {
shellHook = ''
/* --- Rust Shell ---
* Use it to compile Garage
*/
rust = pkgs.mkShell {
shellHook = ''
function refresh_toolchain {
nix copy \
--to 's3://nix?endpoint=garage.deuxfleurs.fr&region=garage&secret-key=/etc/nix/signing-key.sec' \
$(nix-store -qR \
$(nix-build --quiet --no-build-output --no-out-link nix/toolchain.nix))
}
'';
nativeBuildInputs = [
#pkgs.rustPlatform.rust.rustc
pkgs.rustPlatform.rust.cargo
#pkgs.clippy
pkgs.rustfmt
#pkgs.perl
#pkgs.protobuf
#pkgs.pkg-config
#pkgs.openssl
pkgs.file
#cargo2nix.packages.x86_64-linux.cargo2nix
];
};
/* --- Integration shell ---
* Use it to test Garage with common S3 clients
*/
integration = pkgs.mkShell {
nativeBuildInputs = [
winscp
pkgs.s3cmd
pkgs.awscli2
pkgs.minio-client
pkgs.rclone
pkgs.socat
pkgs.psmisc
pkgs.which
pkgs.openssl
pkgs.curl
pkgs.jq
];
};
/* --- Release shell ---
* A shell built to make releasing easier
*/
release = pkgs.mkShell {
shellHook = ''
function to_s3 {
aws \
--endpoint-url https://garage.deuxfleurs.fr \
@ -37,6 +85,34 @@ function to_docker {
--verbosity=debug
}
function multiarch_docker {
manifest-tool push from-spec <(cat <<EOF
image: dxflrs/garage:''${CONTAINER_TAG}
manifests:
-
image: dxflrs/arm64_garage:''${CONTAINER_TAG}
platform:
architecture: arm64
os: linux
-
image: dxflrs/amd64_garage:''${CONTAINER_TAG}
platform:
architecture: amd64
os: linux
-
image: dxflrs/386_garage:''${CONTAINER_TAG}
platform:
architecture: 386
os: linux
-
image: dxflrs/arm_garage:''${CONTAINER_TAG}
platform:
architecture: arm
os: linux
EOF
)
}
function refresh_index {
aws \
--endpoint-url https://garage.deuxfleurs.fr \
@ -62,43 +138,13 @@ function refresh_index {
result/share/_releases.html \
s3://garagehq.deuxfleurs.fr/
}
'';
nativeBuildInputs = [
pkgs.awscli2
kaniko
manifest-tool
];
};
}
function refresh_toolchain {
nix copy \
--to 's3://nix?endpoint=garage.deuxfleurs.fr&region=garage&secret-key=/etc/nix/signing-key.sec' \
$(nix-store -qR \
$(nix-build --quiet --no-build-output --no-out-link nix/toolchain.nix))
}
'';
nativeBuildInputs =
(if rust then [
pkgs.rustPlatform.rust.rustc
pkgs.rustPlatform.rust.cargo
pkgs.clippy
pkgs.rustfmt
pkgs.perl
pkgs.protobuf
cargo2nix.packages.x86_64-linux.cargo2nix
] else [])
++
(if integration then [
winscp
pkgs.s3cmd
pkgs.awscli2
pkgs.minio-client
pkgs.rclone
pkgs.socat
pkgs.psmisc
pkgs.which
pkgs.openssl
pkgs.curl
pkgs.jq
] else [])
++
(if release then [
pkgs.awscli2
kaniko
] else [])
;
}

View file

@ -1,29 +0,0 @@
[package]
name = "garage_admin"
version = "0.7.0"
authors = ["Maximilien Richer <code@mricher.fr>"]
edition = "2018"
license = "AGPL-3.0"
description = "Administration and metrics REST HTTP server for Garage"
repository = "https://git.deuxfleurs.fr/Deuxfleurs/garage"
[lib]
path = "lib.rs"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
garage_util = { version = "0.7.0", path = "../util" }
hex = "0.4"
futures = "0.3"
futures-util = "0.3"
http = "0.2"
hyper = "0.14"
tracing = "0.1.30"
opentelemetry = { version = "0.17", features = [ "rt-tokio" ] }
opentelemetry-prometheus = "0.10"
opentelemetry-otlp = "0.10"
prometheus = "0.13"

Some files were not shown because too many files have changed in this diff Show more