Compare commits

..

1 commit

Author SHA1 Message Date
75597a1820 Disable UploadPartCopy min size test 2022-02-22 18:20:52 +01:00
231 changed files with 4103 additions and 35345 deletions

2108
Cargo.lock generated

File diff suppressed because it is too large Load diff

3945
Cargo.nix

File diff suppressed because it is too large Load diff

View file

@ -1,19 +1,14 @@
[workspace] [workspace]
members = [ members = [
"src/db",
"src/util", "src/util",
"src/rpc", "src/rpc",
"src/table", "src/table",
"src/block",
"src/model", "src/model",
"src/api", "src/api",
"src/web", "src/web",
"src/garage", "src/garage",
"src/k2v-client",
] ]
default-members = ["src/garage"]
[profile.dev] [profile.dev]
lto = "off" lto = "off"

View file

@ -1,7 +1,7 @@
.PHONY: doc all release shell .PHONY: doc all release shell
all: all:
clear; cargo build --all-features clear; cargo build
doc: doc:
cd doc/book; mdbook build cd doc/book; mdbook build

View file

@ -11,26 +11,14 @@ with import ./nix/common.nix;
let let
crossSystem = { config = target; }; crossSystem = { config = target; };
in let in let
log = v: builtins.trace v v;
pkgs = import pkgsSrc { pkgs = import pkgsSrc {
inherit system crossSystem; inherit system crossSystem;
overlays = [ cargo2nixOverlay ]; overlays = [ cargo2nixOverlay ];
}; };
/*
Rust and Nix triples are not the same. Cargo2nix has a dedicated library
to convert Nix triples to Rust ones. We need this conversion as we want to
set later options linked to our (rust) target in a generic way. Not only
the triple terminology is different, but also the "roles" are named differently.
Nix uses a build/host/target terminology where Nix's "host" maps to Cargo's "target".
*/
rustTarget = log (pkgs.rustBuilder.rustLib.rustTriple pkgs.stdenv.hostPlatform);
/* /*
Cargo2nix is built for rustOverlay which installs Rust from Mozilla releases. Cargo2nix is built for rustOverlay which installs Rust from Mozilla releases.
We want our own Rust to avoid incompatibilities, like we had with musl 1.2.0. We want our own Rust to avoir incompatibilities, like we had with musl 1.2.0.
rustc was built with musl < 1.2.0 and nix shipped musl >= 1.2.0 which lead to compilation breakage. rustc was built with musl < 1.2.0 and nix shipped musl >= 1.2.0 which lead to compilation breakage.
So we want a Rust release that is bound to our Nix repository to avoid these problems. So we want a Rust release that is bound to our Nix repository to avoid these problems.
See here for more info: https://musl.libc.org/time64.html See here for more info: https://musl.libc.org/time64.html
@ -47,93 +35,53 @@ in let
]; ];
}; };
/*
Cargo2nix provides many overrides by default, you can take inspiration from them:
https://github.com/cargo2nix/cargo2nix/blob/master/overlay/overrides.nix
You can have a complete list of the available options by looking at the overriden object, mkcrate:
https://github.com/cargo2nix/cargo2nix/blob/master/overlay/mkcrate.nix
*/
overrides = pkgs.rustBuilder.overrides.all ++ [ overrides = pkgs.rustBuilder.overrides.all ++ [
/* /*
[1] We need to alter Nix hardening to be able to statically compile: PIE, We want to inject the git version while keeping the build deterministic.
Position Independent Executables seems to be supported only on amd64. Having
this flags set either make our executables crash or compile as dynamic on many platforms.
In the following section codegenOpts, we reactive it for the supported targets
(only amd64 curently) through the `-static-pie` flag. PIE is a feature used
by ASLR, which helps mitigate security issues.
Learn more about Nix Hardening: https://github.com/NixOS/nixpkgs/blob/master/pkgs/build-support/cc-wrapper/add-hardening.sh
[2] We want to inject the git version while keeping the build deterministic.
As we do not want to consider the .git folder as part of the input source, As we do not want to consider the .git folder as part of the input source,
we ask the user (the CI often) to pass the value to Nix. we ask the user (the CI often) to pass the value to Nix.
*/ */
(pkgs.rustBuilder.rustLib.makeOverride { (pkgs.rustBuilder.rustLib.makeOverride {
name = "garage_rpc"; name = "garage";
overrideAttrs = drv: overrideAttrs = drv: if git_version != null then {
/* [1] */ { hardeningDisable = [ "pie" ]; }
//
/* [2] */ (if git_version != null then {
preConfigure = '' preConfigure = ''
${drv.preConfigure or ""} ${drv.preConfigure or ""}
export GIT_VERSION="${git_version}" export GIT_VERSION="${git_version}"
''; '';
} else {}); } else {};
}) })
/* /*
We ship some parts of the code disabled by default by putting them behind a flag. On a sandbox pure NixOS environment, /usr/bin/file is not available.
It speeds up the compilation (when the feature is not required) and released crates have less dependency by default (less attack surface, disk space, etc.). This is a known problem: https://github.com/NixOS/nixpkgs/issues/98440
But we want to ship these additional features when we release Garage. We simply patch the file as suggested
In the end, we chose to exclude all features from debug builds while putting (all of) them in the release builds.
Currently, the only feature of Garage is kubernetes-discovery from the garage_rpc crate.
*/ */
(pkgs.rustBuilder.rustLib.makeOverride { /*(pkgs.rustBuilder.rustLib.makeOverride {
name = "garage_rpc"; name = "libsodium-sys";
overrideArgs = old: overrideAttrs = drv: {
{ preConfigure = ''
features = if release then [ "kubernetes-discovery" ] else []; ${drv.preConfigure or ""}
}; sed -i 's,/usr/bin/file,${file}/bin/file,g' ./configure
}) '';
}
})*/
]; ];
packageFun = import ./Cargo.nix; packageFun = import ./Cargo.nix;
/*
We compile fully static binaries with musl to simplify deployment on most systems.
When possible, we reactivate PIE hardening (see above).
Also, if you set the RUSTFLAGS environment variable, the following parameters will
be ignored.
For more information on static builds, please refer to Rust's RFC 1721.
https://rust-lang.github.io/rfcs/1721-crt-static.html#specifying-dynamicstatic-c-runtime-linkage
*/
codegenOpts = {
"armv6l-unknown-linux-musleabihf" = [ "target-feature=+crt-static" "link-arg=-static" ]; /* compile as dynamic with static-pie */
"aarch64-unknown-linux-musl" = [ "target-feature=+crt-static" "link-arg=-static" ]; /* segfault with static-pie */
"i686-unknown-linux-musl" = [ "target-feature=+crt-static" "link-arg=-static" ]; /* segfault with static-pie */
"x86_64-unknown-linux-musl" = [ "target-feature=+crt-static" "link-arg=-static-pie" ];
};
/* /*
The following definition is not elegant as we use a low level function of Cargo2nix The following definition is not elegant as we use a low level function of Cargo2nix
that enables us to pass our custom rustChannel object. We need this low level definition that enables us to pass our custom rustChannel object
to pass Nix's Rust toolchains instead of Mozilla's one.
target is mandatory but must be kept to null to allow cargo2nix to set it to the appropriate value
for each crate.
*/ */
rustPkgs = pkgs.rustBuilder.makePackageSet { rustPkgs = pkgs.rustBuilder.makePackageSet {
inherit packageFun rustChannel release codegenOpts; inherit packageFun rustChannel release;
packageOverrides = overrides; packageOverrides = overrides;
target = null; target = null; /* we set target to null because we want that cargo2nix computes it automatically */
buildRustPackages = pkgs.buildPackages.rustBuilder.makePackageSet { buildRustPackages = pkgs.buildPackages.rustBuilder.makePackageSet {
inherit rustChannel packageFun codegenOpts; inherit rustChannel packageFun;
packageOverrides = overrides; packageOverrides = overrides;
target = null; target = null; /* we set target to null because we want that cargo2nix computes it automatically */
}; };
}; };

View file

@ -17,61 +17,6 @@ If you still want to use Borg, you can use it with `rclone mount`.
## Restic ## Restic
Create your key and bucket:
```bash
garage key new my-key
garage bucket create backup
garage bucket allow backup --read --write --key my-key
```
Then register your Key ID and Secret key in your environment:
```bash
export AWS_ACCESS_KEY_ID=GKxxx
export AWS_SECRET_ACCESS_KEY=xxxx
```
Configure restic from environment too:
```bash
export RESTIC_REPOSITORY="s3:http://localhost:3900/backups"
echo "Generated password (save it safely): $(openssl rand -base64 32)"
export RESTIC_PASSWORD=xxx # copy paste your generated password here
```
Do not forget to save your password safely (in your password manager or print it). It will be needed to decrypt your backups.
Now you can use restic:
```bash
# Initialize the bucket, must be run once
restic init
# Backup your PostgreSQL database
# (We suppose your PostgreSQL daemon is stopped for all commands)
restic backup /var/lib/postgresql
# Show backup history
restic snapshots
# Backup again your PostgreSQL database, it will be faster as only changes will be uploaded
restic backup /var/lib/postgresql
# Show backup history (again)
restic snapshots
# Restore a backup
# (79766175 is the ID of the snapshot you want to restore)
mv /var/lib/postgresql /var/lib/postgresql.broken
restic restore 79766175 --target /var/lib/postgresql
```
Restic has way more features than the ones presented here.
You can discover all of them by accessing its documentation from the link below.
*External links:* [Restic Documentation > Amazon S3](https://restic.readthedocs.io/en/stable/030_preparing_a_new_repo.html#amazon-s3) *External links:* [Restic Documentation > Amazon S3](https://restic.readthedocs.io/en/stable/030_preparing_a_new_repo.html#amazon-s3)
## Duplicity ## Duplicity
@ -80,22 +25,7 @@ You can discover all of them by accessing its documentation from the link below.
## Duplicati ## Duplicati
*External links:* [Duplicati Documentation > Storage Providers](https://duplicati.readthedocs.io/en/latest/05-storage-providers/#s3-compatible) *External links:* [Duplicati Documentation > Storage Providers](https://github.com/kees-z/DuplicatiDocs/blob/master/docs/05-storage-providers.md#user-content-s3-compatible)
The following fields need to be specified:
```
Storage Type: S3 Compatible
Use SSL: [ ] # Only if you have SSL
Server: Custom server url (s3.garage.localhost:3900)
Bucket name: bucket-name
Bucket create region: Custom region value (garage) # Or as you've specified in garage.toml
AWS Access ID: Key ID from "garage key info key-name"
AWS Access Key: Secret key from "garage key info key-name"
Client Library to use: Minio SDK
```
Click `Test connection` and then no when asked `The bucket name should start with your username, prepend automatically?`. Then it should say `Connection worked!`.
## knoxite ## knoxite
@ -105,24 +35,3 @@ Click `Test connection` and then no when asked `The bucket name should start wit
*External links:* [Kopia Documentation > Repositories](https://kopia.io/docs/repositories/#amazon-s3) *External links:* [Kopia Documentation > Repositories](https://kopia.io/docs/repositories/#amazon-s3)
To create the Kopia repository, you need to specify the region, the HTTP(S) endpoint, the bucket name and the access keys.
For instance, if you have an instance of garage running on `https://garage.example.com`:
```
kopia repository create s3 --region=garage --bucket=mybackups --access-key=KEY_ID --secret-access-key=SECRET_KEY --endpoint=garage.example.com
```
Or if you have an instance running on localhost, without TLS:
```
kopia repository create s3 --region=garage --bucket=mybackups --access-key=KEY_ID --secret-access-key=SECRET_KEY --endpoint=localhost:3900 --disable-tls
```
After the repository has been created, check that everything works as expected:
```
kopia repository validate-provider
```
You can then run all the standard kopia commands: `kopia snapshot create`, `kopia mount`...
Everything should work out-of-the-box.

View file

@ -13,8 +13,7 @@ These tools are particularly suitable for debug, backups, website deployments or
| [rclone](#rclone) | ✅ | | | [rclone](#rclone) | ✅ | |
| [s3cmd](#s3cmd) | ✅ | | | [s3cmd](#s3cmd) | ✅ | |
| [(Cyber)duck](#cyberduck) | ✅ | | | [(Cyber)duck](#cyberduck) | ✅ | |
| [WinSCP (libs3)](#winscp) | ✅ | CLI instructions only | | [WinSCP (libs3)](#winscp) | ✅ | No instructions yet |
| [sftpgo](#sftpgo) | ✅ | |
## Minio client ## Minio client
@ -282,59 +281,5 @@ duck --delete garage:/my-files/an-object.txt
## WinSCP (libs3) {#winscp} ## WinSCP (libs3) {#winscp}
*You can find instructions on how to use the GUI in french [in our wiki](https://wiki.deuxfleurs.fr/fr/Guide/Garage/WinSCP).* *No instruction yet. You can find ones in french [in our wiki](https://wiki.deuxfleurs.fr/fr/Guide/Garage/WinSCP).*
How to use `winscp.com`, the CLI interface of WinSCP:
```
open s3://GKxxxxx:yyyyyyy@127.0.0.1:4443 -certificate=* -rawsettings S3DefaultRegion=garage S3UrlStyle=1
ls
ls my-files/
get my-files/an-object.txt Z:\tmp\object.txt
put Z:\tmp\object.txt my-files/another-object.txt
rm my-files/an-object
exit
```
Notes:
- It seems WinSCP supports only TLS connections for S3
- `-certificate=*` allows self-signed certificates, remove it if you have valid certificates
## sftpgo {#sftpgo}
sftpgo needs a database to work, by default it uses sqlite and does not require additional configuration.
You can then directly init it:
```
sftpgo initprovider
```
Then you can directly launch the daemon that will listen by default on `:8080 (http)` and `:2022 (ssh)`:
```
sftpgo serve
```
Go to the admin web interface (http://[::1]:8080/web/admin/), create the required admin account, then create a user account.
Choose a username (eg: `ada`) and a password.
In the filesystem section, choose:
- Storage: AWS S3 (Compatible)
- Bucket: *your bucket name*
- Region: `garage` (or the one you defined in `config.toml`)
- Access key: *your access key*
- Access secret: *your secret key*
- Endpoint: *your endpoint*, eg. `https://garage.example.tld`, note that the protocol (`https` here) must be specified. Non standard ports and `http` have not been tested yet.
- Keep the default values for other fields
- Tick "Use path-style addressing". It should work without ticking it if you have correctly configured your instance to use URL vhost-style.
Now you can access your bucket through SFTP:
```
sftp -P2022 ada@[::1]
ls
```
And through the web interface at http://[::1]:8080/web/client

View file

@ -3,7 +3,7 @@ title = "Websites (Hugo, Jekyll, Publii...)"
weight = 10 weight = 10
+++ +++
Garage is also suitable [to host static websites](@/documentation/cookbook/exposing-websites.md). Garage is also suitable to host static websites.
While they can be deployed with traditional CLI tools, some static website generators have integrated options to ease your workflow. While they can be deployed with traditional CLI tools, some static website generators have integrated options to ease your workflow.
| Name | Status | Note | | Name | Status | Note |

View file

@ -23,7 +23,7 @@ To run a real-world deployment, make sure the following conditions are met:
- Ideally, each machine should have a SSD available in addition to the HDD you are dedicating - Ideally, each machine should have a SSD available in addition to the HDD you are dedicating
to Garage. This will allow for faster access to metadata and has the potential to Garage. This will allow for faster access to metadata and has the potential
to significantly reduce Garage's response times. to drastically reduce Garage's response times.
- This guide will assume you are using Docker containers to deploy Garage on each node. - This guide will assume you are using Docker containers to deploy Garage on each node.
Garage can also be run independently, for instance as a [Systemd service](@/documentation/cookbook/systemd.md). Garage can also be run independently, for instance as a [Systemd service](@/documentation/cookbook/systemd.md).
@ -35,19 +35,12 @@ For our example, we will suppose the following infrastructure with IPv6 connecti
| Location | Name | IP Address | Disk Space | | Location | Name | IP Address | Disk Space |
|----------|---------|------------|------------| |----------|---------|------------|------------|
| Paris | Mercury | fc00:1::1 | 1 TB | | Paris | Mercury | fc00:1::1 | 1 To |
| Paris | Venus | fc00:1::2 | 2 TB | | Paris | Venus | fc00:1::2 | 2 To |
| London | Earth | fc00:B::1 | 2 TB | | London | Earth | fc00:B::1 | 2 To |
| Brussels | Mars | fc00:F::1 | 1.5 TB | | Brussels | Mars | fc00:F::1 | 1.5 To |
Note that Garage will **always** store the three copies of your data on nodes at different
locations. This means that in the case of this small example, the available capacity
of the cluster is in fact only 1.5 TB, because nodes in Brussels can't store more than that.
This also means that nodes in Paris and London will be under-utilized.
To make better use of the available hardware, you should ensure that the capacity
available in the different locations of your cluster is roughly the same.
For instance, here, the Mercury node could be moved to Brussels; this would allow the cluster
to store 2 TB of data in total.
## Get a Docker image ## Get a Docker image
@ -215,10 +208,10 @@ For our example, we will suppose we have the following infrastructure
| Location | Name | Disk Space | `Capacity` | `Identifier` | `Zone` | | Location | Name | Disk Space | `Capacity` | `Identifier` | `Zone` |
|----------|---------|------------|------------|--------------|--------------| |----------|---------|------------|------------|--------------|--------------|
| Paris | Mercury | 1 TB | `10` | `563e` | `par1` | | Paris | Mercury | 1 To | `10` | `563e` | `par1` |
| Paris | Venus | 2 TB | `20` | `86f0` | `par1` | | Paris | Venus | 2 To | `20` | `86f0` | `par1` |
| London | Earth | 2 TB | `20` | `6814` | `lon1` | | London | Earth | 2 To | `20` | `6814` | `lon1` |
| Brussels | Mars | 1.5 TB | `15` | `212f` | `bru1` | | Brussels | Mars | 1.5 To | `15` | `212f` | `bru1` |
#### Node identifiers #### Node identifiers
@ -268,10 +261,10 @@ have 66% chance of being stored by Venus and 33% chance of being stored by Mercu
Given the information above, we will configure our cluster as follow: Given the information above, we will configure our cluster as follow:
```bash ```bash
garage layout assign 563e -z par1 -c 10 -t mercury garage layout assign -z par1 -c 10 -t mercury 563e
garage layout assign 86f0 -z par1 -c 20 -t venus garage layout assign -z par1 -c 20 -t venus 86f0
garage layout assign 6814 -z lon1 -c 20 -t earth garage layout assign -z lon1 -c 20 -t earth 6814
garage layout assign 212f -z bru1 -c 15 -t mars garage layout assign -z bru1 -c 15 -t mars 212f
``` ```
At this point, the changes in the cluster layout have not yet been applied. At this point, the changes in the cluster layout have not yet been applied.

View file

@ -100,7 +100,7 @@ server {
} }
``` ```
### Exposing the web endpoint ## Exposing the web endpoint
To better understand the logic involved, you can refer to the [Exposing buckets as websites](/cookbook/exposing_websites.html) section. To better understand the logic involved, you can refer to the [Exposing buckets as websites](/cookbook/exposing_websites.html) section.
Otherwise, the configuration is very similar to the S3 endpoint. Otherwise, the configuration is very similar to the S3 endpoint.
@ -140,143 +140,6 @@ server {
@TODO @TODO
## Traefik v2 ## Traefik
We will see in this part how to set up a reverse proxy with [Traefik](https://docs.traefik.io/). @TODO
Here is [a basic configuration file](https://doc.traefik.io/traefik/https/acme/#configuration-examples):
```toml
[entryPoints]
[entryPoints.web]
address = ":80"
[entryPoints.websecure]
address = ":443"
[certificatesResolvers.myresolver.acme]
email = "your-email@example.com"
storage = "acme.json"
[certificatesResolvers.myresolver.acme.httpChallenge]
# used during the challenge
entryPoint = "web"
```
### Add Garage service
To add Garage on Traefik you should declare a new service using its IP address (or hostname) and port:
```toml
[http.services]
[http.services.my_garage_service.loadBalancer]
[[http.services.my_garage_service.loadBalancer.servers]]
url = "http://xxx.xxx.xxx.xxx"
port = 3900
```
It's possible to declare multiple Garage servers as back-ends:
```toml
[http.services]
[[http.services.my_garage_service.loadBalancer.servers]]
url = "http://xxx.xxx.xxx.xxx"
port = 3900
[[http.services.my_garage_service.loadBalancer.servers]]
url = "http://yyy.yyy.yyy.yyy"
port = 3900
[[http.services.my_garage_service.loadBalancer.servers]]
url = "http://zzz.zzz.zzz.zzz"
port = 3900
```
Traefik can remove unhealthy servers automatically with [a health check configuration](https://doc.traefik.io/traefik/routing/services/#health-check):
```
[http.services]
[http.services.my_garage_service.loadBalancer]
[http.services.my_garage_service.loadBalancer.healthCheck]
path = "/"
interval = "60s"
timeout = "5s"
```
### Adding a website
To add a new website, add the following declaration to your Traefik configuration file:
```toml
[http.routers]
[http.routers.my_website]
rule = "Host(`yoururl.example.org`)"
service = "my_garage_service"
entryPoints = ["web"]
```
Enable HTTPS access to your website with the following configuration section ([documentation](https://doc.traefik.io/traefik/https/overview/)):
```toml
...
entryPoints = ["websecure"]
[http.routers.my_website.tls]
certResolver = "myresolver"
...
```
### Adding gzip compression
Add the following configuration section [to compress response](https://doc.traefik.io/traefik/middlewares/http/compress/) using [gzip](https://developer.mozilla.org/en-US/docs/Glossary/GZip_compression) before sending them to the client:
```toml
[http.routers]
[http.routers.my_website]
...
middlewares = ["gzip_compress"]
...
[http.middlewares]
[http.middlewares.gzip_compress.compress]
```
### Add caching response
Traefik's caching middleware is only available on [entreprise version](https://doc.traefik.io/traefik-enterprise/middlewares/http-cache/), however the freely-available [Souin plugin](https://github.com/darkweak/souin#tr%C3%A6fik-container) can also do the job. (section to be completed)
### Complete example
```toml
[entryPoints]
[entryPoints.web]
address = ":80"
[entryPoints.websecure]
address = ":443"
[certificatesResolvers.myresolver.acme]
email = "your-email@example.com"
storage = "acme.json"
[certificatesResolvers.myresolver.acme.httpChallenge]
# used during the challenge
entryPoint = "web"
[http.routers]
[http.routers.my_website]
rule = "Host(`yoururl.example.org`)"
service = "my_garage_service"
middlewares = ["gzip_compress"]
entryPoints = ["websecure"]
[http.services]
[http.services.my_garage_service.loadBalancer]
[http.services.my_garage_service.loadBalancer.healthCheck]
path = "/"
interval = "60s"
timeout = "5s"
[[http.services.my_garage_service.loadBalancer.servers]]
url = "http://xxx.xxx.xxx.xxx"
[[http.services.my_garage_service.loadBalancer.servers]]
url = "http://yyy.yyy.yyy.yyy"
[[http.services.my_garage_service.loadBalancer.servers]]
url = "http://zzz.zzz.zzz.zzz"
[http.middlewares]
[http.middlewares.gzip_compress.compress]
```

View file

@ -1,50 +0,0 @@
+++
title = "Upgrading Garage"
weight = 40
+++
Garage is a stateful clustered application, where all nodes are communicating together and share data structures.
It makes upgrade more difficult than stateless applications so you must be more careful when upgrading.
On a new version release, there is 2 possibilities:
- protocols and data structures remained the same ➡️ this is a **straightforward upgrade**
- protocols or data structures changed ➡️ this is an **advanced upgrade**
You can quickly now what type of update you will have to operate by looking at the version identifier.
Following the [SemVer ](https://semver.org/) terminology, if only the *patch* number changed, it will only need a straightforward upgrade.
Example: an upgrade from v0.6.0 from v0.6.1 is a straightforward upgrade.
If the *minor* or *major* number changed however, you will have to do an advanced upgrade. Example: from v0.6.1 to v0.7.0.
Migrations are designed to be run only between contiguous versions (from a *major*.*minor* perspective, *patches* can be skipped).
Example: migrations from v0.6.1 to v0.7.0 and from v0.6.0 to v0.7.0 are supported but migrations from v0.5.0 to v0.7.0 are not supported.
## Straightforward upgrades
Straightforward upgrades do not imply cluster downtime.
Before upgrading, you should still read [the changelog](https://git.deuxfleurs.fr/Deuxfleurs/garage/releases) and ideally test your deployment on a staging cluster before.
When you are ready, start by checking the health of your cluster.
You can force some checks with `garage repair`, we recommend at least running `garage repair --all-nodes --yes` that is very quick to run (less than a minute).
You will see that the command correctly terminated in the logs of your daemon.
Finally, you can simply upgrades nodes one by one.
For each node: stop it, install the new binary, edit the configuration if needed, restart it.
## Advanced upgrades
Advanced upgrades will imply cluster downtime.
Before upgrading, you must read [the changelog](https://git.deuxfleurs.fr/Deuxfleurs/garage/releases) and you must test your deployment on a staging cluster before.
From a high level perspective, an advanced upgrade looks like this:
1. Make sure the health of your cluster is good (see `garage repair`)
2. Disable API access (comment the configuration in your reverse proxy)
3. Check that your cluster is idle
4. Stop the whole cluster
5. Backup the metadata folder of all your nodes, so that you will be able to restore it quickly if the upgrade fails (blocks being immutable, they should not be impacted)
6. Install the new binary, update the configuration
7. Start the whole cluster
8. If needed, run the corresponding migration from `garage migrate`
9. Make sure the health of your cluster is good
10. Enable API access (uncomment the configuration in your reverse proxy)
11. Monitor your cluster while load comes back, check that all your applications are happy with this new version
We write guides for each advanced upgrade, they are stored under the "Working Documents" section of this documentation.

View file

@ -249,6 +249,16 @@ mc alias set \
--api S3v4 --api S3v4
``` ```
You must also add an environment variable to your configuration to
inform MinIO of our region (`garage` by default, corresponding to the `s3_region` parameter
in the configuration file).
The best way is to add the following snippet to your `$HOME/.bash_profile`
or `$HOME/.bashrc` file:
```bash
export MC_REGION=garage
```
### Use `mc` ### Use `mc`
You can not list buckets from `mc` currently. You can not list buckets from `mc` currently.

View file

@ -1,644 +0,0 @@
+++
title = "Administration API"
weight = 16
+++
The Garage administration API is accessible through a dedicated server whose
listen address is specified in the `[admin]` section of the configuration
file (see [configuration file
reference](@/documentation/reference-manual/configuration.md))
**WARNING.** At this point, there is no comittement to stability of the APIs described in this document.
We will bump the version numbers prefixed to each API endpoint at each time the syntax
or semantics change, meaning that code that relies on these endpoint will break
when changes are introduced.
The Garage administration API was introduced in version 0.7.2, this document
does not apply to older versions of Garage.
## Access control
The admin API uses two different tokens for acces control, that are specified in the config file's `[admin]` section:
- `metrics_token`: the token for accessing the Metrics endpoint (if this token
is not set in the config file, the Metrics endpoint can be accessed without
access control);
- `admin_token`: the token for accessing all of the other administration
endpoints (if this token is not set in the config file, access to these
endpoints is disabled entirely).
These tokens are used as simple HTTP bearer tokens. In other words, to
authenticate access to an admin API endpoint, add the following HTTP header
to your request:
```
Authorization: Bearer <token>
```
## Administration API endpoints
### Metrics-related endpoints
#### Metrics `GET /metrics`
Returns internal Garage metrics in Prometheus format.
### Cluster operations
#### GetClusterStatus `GET /v0/status`
Returns the cluster's current status in JSON, including:
- ID of the node being queried and its version of the Garage daemon
- Live nodes
- Currently configured cluster layout
- Staged changes to the cluster layout
Example response body:
```json
{
"node": "ec79480e0ce52ae26fd00c9da684e4fa56658d9c64cdcecb094e936de0bfe71f",
"garage_version": "git:v0.8.0",
"knownNodes": {
"ec79480e0ce52ae26fd00c9da684e4fa56658d9c64cdcecb094e936de0bfe71f": {
"addr": "10.0.0.11:3901",
"is_up": true,
"last_seen_secs_ago": 9,
"hostname": "node1"
},
"4a6ae5a1d0d33bf895f5bb4f0a418b7dc94c47c0dd2eb108d1158f3c8f60b0ff": {
"addr": "10.0.0.12:3901",
"is_up": true,
"last_seen_secs_ago": 1,
"hostname": "node2"
},
"23ffd0cdd375ebff573b20cc5cef38996b51c1a7d6dbcf2c6e619876e507cf27": {
"addr": "10.0.0.21:3901",
"is_up": true,
"last_seen_secs_ago": 7,
"hostname": "node3"
},
"e2ee7984ee65b260682086ec70026165903c86e601a4a5a501c1900afe28d84b": {
"addr": "10.0.0.22:3901",
"is_up": true,
"last_seen_secs_ago": 1,
"hostname": "node4"
}
},
"layout": {
"version": 12,
"roles": {
"ec79480e0ce52ae26fd00c9da684e4fa56658d9c64cdcecb094e936de0bfe71f": {
"zone": "dc1",
"capacity": 4,
"tags": [
"node1"
]
},
"4a6ae5a1d0d33bf895f5bb4f0a418b7dc94c47c0dd2eb108d1158f3c8f60b0ff": {
"zone": "dc1",
"capacity": 6,
"tags": [
"node2"
]
},
"23ffd0cdd375ebff573b20cc5cef38996b51c1a7d6dbcf2c6e619876e507cf27": {
"zone": "dc2",
"capacity": 10,
"tags": [
"node3"
]
}
},
"stagedRoleChanges": {
"e2ee7984ee65b260682086ec70026165903c86e601a4a5a501c1900afe28d84b": {
"zone": "dc2",
"capacity": 5,
"tags": [
"node4"
]
}
}
}
}
```
#### ConnectClusterNodes `POST /v0/connect`
Instructs this Garage node to connect to other Garage nodes at specified addresses.
Example request body:
```json
[
"ec79480e0ce52ae26fd00c9da684e4fa56658d9c64cdcecb094e936de0bfe71f@10.0.0.11:3901",
"4a6ae5a1d0d33bf895f5bb4f0a418b7dc94c47c0dd2eb108d1158f3c8f60b0ff@10.0.0.12:3901"
]
```
The format of the string for a node to connect to is: `<node ID>@<ip address>:<port>`, same as in the `garage node connect` CLI call.
Example response:
```json
[
{
"success": true,
"error": null
},
{
"success": false,
"error": "Handshake error"
}
]
```
#### GetClusterLayout `GET /v0/layout`
Returns the cluster's current layout in JSON, including:
- Currently configured cluster layout
- Staged changes to the cluster layout
(the info returned by this endpoint is a subset of the info returned by GetClusterStatus)
Example response body:
```json
{
"version": 12,
"roles": {
"ec79480e0ce52ae26fd00c9da684e4fa56658d9c64cdcecb094e936de0bfe71f": {
"zone": "dc1",
"capacity": 4,
"tags": [
"node1"
]
},
"4a6ae5a1d0d33bf895f5bb4f0a418b7dc94c47c0dd2eb108d1158f3c8f60b0ff": {
"zone": "dc1",
"capacity": 6,
"tags": [
"node2"
]
},
"23ffd0cdd375ebff573b20cc5cef38996b51c1a7d6dbcf2c6e619876e507cf27": {
"zone": "dc2",
"capacity": 10,
"tags": [
"node3"
]
}
},
"stagedRoleChanges": {
"e2ee7984ee65b260682086ec70026165903c86e601a4a5a501c1900afe28d84b": {
"zone": "dc2",
"capacity": 5,
"tags": [
"node4"
]
}
}
}
```
#### UpdateClusterLayout `POST /v0/layout`
Send modifications to the cluster layout. These modifications will
be included in the staged role changes, visible in subsequent calls
of `GetClusterLayout`. Once the set of staged changes is satisfactory,
the user may call `ApplyClusterLayout` to apply the changed changes,
or `Revert ClusterLayout` to clear all of the staged changes in
the layout.
Request body format:
```json
{
<node_id>: {
"capacity": <new_capacity>,
"zone": <new_zone>,
"tags": [
<new_tag>,
...
]
},
<node_id_to_remove>: null,
...
}
```
Contrary to the CLI that may update only a subset of the fields
`capacity`, `zone` and `tags`, when calling this API all of these
values must be specified.
#### ApplyClusterLayout `POST /v0/layout/apply`
Applies to the cluster the layout changes currently registered as
staged layout changes.
Request body format:
```json
{
"version": 13
}
```
Similarly to the CLI, the body must include the version of the new layout
that will be created, which MUST be 1 + the value of the currently
existing layout in the cluster.
#### RevertClusterLayout `POST /v0/layout/revert`
Clears all of the staged layout changes.
Request body format:
```json
{
"version": 13
}
```
Reverting the staged changes is done by incrementing the version number
and clearing the contents of the staged change list.
Similarly to the CLI, the body must include the incremented
version number, which MUST be 1 + the value of the currently
existing layout in the cluster.
### Access key operations
#### ListKeys `GET /v0/key`
Returns all API access keys in the cluster.
Example response:
```json
[
{
"id": "GK31c2f218a2e44f485b94239e",
"name": "test"
},
{
"id": "GKe10061ac9c2921f09e4c5540",
"name": "test2"
}
]
```
#### CreateKey `POST /v0/key`
Creates a new API access key.
Request body format:
```json
{
"name": "NameOfMyKey"
}
```
#### ImportKey `POST /v0/key/import`
Imports an existing API key.
Request body format:
```json
{
"accessKeyId": "GK31c2f218a2e44f485b94239e",
"secretAccessKey": "b892c0665f0ada8a4755dae98baa3b133590e11dae3bcc1f9d769d67f16c3835",
"name": "NameOfMyKey"
}
```
#### GetKeyInfo `GET /v0/key?id=<acces key id>`
#### GetKeyInfo `GET /v0/key?search=<pattern>`
Returns information about the requested API access key.
If `id` is set, the key is looked up using its exact identifier (faster).
If `search` is set, the key is looked up using its name or prefix
of identifier (slower, all keys are enumerated to do this).
Example response:
```json
{
"name": "test",
"accessKeyId": "GK31c2f218a2e44f485b94239e",
"secretAccessKey": "b892c0665f0ada8a4755dae98baa3b133590e11dae3bcc1f9d769d67f16c3835",
"permissions": {
"createBucket": false
},
"buckets": [
{
"id": "70dc3bed7fe83a75e46b66e7ddef7d56e65f3c02f9f80b6749fb97eccb5e1033",
"globalAliases": [
"test2"
],
"localAliases": [],
"permissions": {
"read": true,
"write": true,
"owner": false
}
},
{
"id": "d7452a935e663fc1914f3a5515163a6d3724010ce8dfd9e4743ca8be5974f995",
"globalAliases": [
"test3"
],
"localAliases": [],
"permissions": {
"read": true,
"write": true,
"owner": false
}
},
{
"id": "e6a14cd6a27f48684579ec6b381c078ab11697e6bc8513b72b2f5307e25fff9b",
"globalAliases": [],
"localAliases": [
"test"
],
"permissions": {
"read": true,
"write": true,
"owner": true
}
},
{
"id": "96470e0df00ec28807138daf01915cfda2bee8eccc91dea9558c0b4855b5bf95",
"globalAliases": [
"alex"
],
"localAliases": [],
"permissions": {
"read": true,
"write": true,
"owner": true
}
}
]
}
```
#### DeleteKey `DELETE /v0/key?id=<acces key id>`
Deletes an API access key.
#### UpdateKey `POST /v0/key?id=<acces key id>`
Updates information about the specified API access key.
Request body format:
```json
{
"name": "NameOfMyKey",
"allow": {
"createBucket": true,
},
"deny": {}
}
```
All fields (`name`, `allow` and `deny`) are optionnal.
If they are present, the corresponding modifications are applied to the key, otherwise nothing is changed.
The possible flags in `allow` and `deny` are: `createBucket`.
### Bucket operations
#### ListBuckets `GET /v0/bucket`
Returns all storage buckets in the cluster.
Example response:
```json
[
{
"id": "70dc3bed7fe83a75e46b66e7ddef7d56e65f3c02f9f80b6749fb97eccb5e1033",
"globalAliases": [
"test2"
],
"localAliases": []
},
{
"id": "96470e0df00ec28807138daf01915cfda2bee8eccc91dea9558c0b4855b5bf95",
"globalAliases": [
"alex"
],
"localAliases": []
},
{
"id": "d7452a935e663fc1914f3a5515163a6d3724010ce8dfd9e4743ca8be5974f995",
"globalAliases": [
"test3"
],
"localAliases": []
},
{
"id": "e6a14cd6a27f48684579ec6b381c078ab11697e6bc8513b72b2f5307e25fff9b",
"globalAliases": [],
"localAliases": [
{
"accessKeyId": "GK31c2f218a2e44f485b94239e",
"alias": "test"
}
]
}
]
```
#### GetBucketInfo `GET /v0/bucket?id=<bucket id>`
#### GetBucketInfo `GET /v0/bucket?globalAlias=<alias>`
Returns information about the requested storage bucket.
If `id` is set, the bucket is looked up using its exact identifier.
If `globalAlias` is set, the bucket is looked up using its global alias.
(both are fast)
Example response:
```json
{
"id": "afa8f0a22b40b1247ccd0affb869b0af5cff980924a20e4b5e0720a44deb8d39",
"globalAliases": [],
"websiteAccess": false,
"websiteConfig": null,
"keys": [
{
"accessKeyId": "GK31c2f218a2e44f485b94239e",
"name": "Imported key",
"permissions": {
"read": true,
"write": true,
"owner": true
},
"bucketLocalAliases": [
"debug"
]
}
],
"objects": 14827,
"bytes": 13189855625,
"unfinshedUploads": 0,
"quotas": {
"maxSize": null,
"maxObjects": null
}
}
```
#### CreateBucket `POST /v0/bucket`
Creates a new storage bucket.
Request body format:
```json
{
"globalAlias": "NameOfMyBucket"
}
```
OR
```json
{
"localAlias": {
"accessKeyId": "GK31c2f218a2e44f485b94239e",
"alias": "NameOfMyBucket",
"allow": {
"read": true,
"write": true,
"owner": false
}
}
}
```
OR
```json
{}
```
Creates a new bucket, either with a global alias, a local one,
or no alias at all.
Technically, you can also specify both `globalAlias` and `localAlias` and that would create
two aliases, but I don't see why you would want to do that.
#### DeleteBucket `DELETE /v0/bucket?id=<bucket id>`
Deletes a storage bucket. A bucket cannot be deleted if it is not empty.
Warning: this will delete all aliases associated with the bucket!
#### UpdateBucket `PUT /v0/bucket?id=<bucket id>`
Updates configuration of the given bucket.
Request body format:
```json
{
"websiteAccess": {
"enabled": true,
"indexDocument": "index.html",
"errorDocument": "404.html"
},
"quotas": {
"maxSize": 19029801,
"maxObjects": null,
}
}
```
All fields (`websiteAccess` and `quotas`) are optionnal.
If they are present, the corresponding modifications are applied to the bucket, otherwise nothing is changed.
In `websiteAccess`: if `enabled` is `true`, `indexDocument` must be specified.
The field `errorDocument` is optional, if no error document is set a generic
error message is displayed when errors happen. Conversely, if `enabled` is
`false`, neither `indexDocument` nor `errorDocument` must be specified.
In `quotas`: new values of `maxSize` and `maxObjects` must both be specified, or set to `null`
to remove the quotas. An absent value will be considered the same as a `null`. It is not possible
to change only one of the two quotas.
### Operations on permissions for keys on buckets
#### BucketAllowKey `POST /v0/bucket/allow`
Allows a key to do read/write/owner operations on a bucket.
Request body format:
```json
{
"bucketId": "e6a14cd6a27f48684579ec6b381c078ab11697e6bc8513b72b2f5307e25fff9b",
"accessKeyId": "GK31c2f218a2e44f485b94239e",
"permissions": {
"read": true,
"write": true,
"owner": true
},
}
```
Flags in `permissions` which have the value `true` will be activated.
Other flags will remain unchanged.
#### BucketDenyKey `POST /v0/bucket/deny`
Denies a key from doing read/write/owner operations on a bucket.
Request body format:
```json
{
"bucketId": "e6a14cd6a27f48684579ec6b381c078ab11697e6bc8513b72b2f5307e25fff9b",
"accessKeyId": "GK31c2f218a2e44f485b94239e",
"permissions": {
"read": false,
"write": false,
"owner": true
},
}
```
Flags in `permissions` which have the value `true` will be deactivated.
Other flags will remain unchanged.
### Operations on bucket aliases
#### GlobalAliasBucket `PUT /v0/bucket/alias/global?id=<bucket id>&alias=<global alias>`
Empty body. Creates a global alias for a bucket.
#### GlobalUnaliasBucket `DELETE /v0/bucket/alias/global?id=<bucket id>&alias=<global alias>`
Removes a global alias for a bucket.
#### LocalAliasBucket `PUT /v0/bucket/alias/local?id=<bucket id>&accessKeyId=<access key ID>&alias=<local alias>`
Empty body. Creates a local alias for a bucket in the namespace of a specific access key.
#### LocalUnaliasBucket `DELETE /v0/bucket/alias/local?id=<bucket id>&accessKeyId<access key ID>&alias=<local alias>`
Removes a local alias for a bucket in the namespace of a specific access key.

View file

@ -10,7 +10,6 @@ metadata_dir = "/var/lib/garage/meta"
data_dir = "/var/lib/garage/data" data_dir = "/var/lib/garage/data"
block_size = 1048576 block_size = 1048576
block_manager_background_tranquility = 2
replication_mode = "3" replication_mode = "3"
@ -30,10 +29,6 @@ bootstrap_peers = [
consul_host = "consul.service" consul_host = "consul.service"
consul_service_name = "garage-daemon" consul_service_name = "garage-daemon"
kubernetes_namespace = "garage"
kubernetes_service_name = "garage-daemon"
kubernetes_skip_crd = false
sled_cache_capacity = 134217728 sled_cache_capacity = 134217728
sled_flush_every_ms = 2000 sled_flush_every_ms = 2000
@ -45,12 +40,6 @@ root_domain = ".s3.garage"
[s3_web] [s3_web]
bind_addr = "[::]:3902" bind_addr = "[::]:3902"
root_domain = ".web.garage" root_domain = ".web.garage"
[admin]
api_bind_addr = "0.0.0.0:3903"
metrics_token = "cacce0b2de4bc2d9f5b5fdff551e01ac1496055aed248202d415398987e35f81"
admin_token = "ae8cb40ea7368bbdbb6430af11cca7da833d3458a5f52086f4e805a570fb5c2a"
trace_sink = "http://localhost:4317"
``` ```
The following gives details about each available configuration option. The following gives details about each available configuration option.
@ -87,62 +76,24 @@ files will remain available. This however means that chunks from existing files
will not be deduplicated with chunks from newly uploaded files, meaning you will not be deduplicated with chunks from newly uploaded files, meaning you
might use more storage space that is optimally possible. might use more storage space that is optimally possible.
### `block_manager_background_tranquility`
This parameter tunes the activity of the background worker responsible for
resyncing data blocks between nodes. The higher the tranquility value is set,
the more the background worker will wait between iterations, meaning the load
on the system (including network usage between nodes) will be reduced. The
minimal value for this parameter is `0`, where the background worker will
allways work at maximal throughput to resynchronize blocks. The default value
is `2`, where the background worker will try to spend at most 1/3 of its time
working, and 2/3 sleeping in order to reduce system load.
### `replication_mode` ### `replication_mode`
Garage supports the following replication modes: Garage supports the following replication modes:
- `none` or `1`: data stored on Garage is stored on a single node. There is no - `none` or `1`: data stored on Garage is stored on a single node. There is no redundancy,
redundancy, and data will be unavailable as soon as one node fails or its and data will be unavailable as soon as one node fails or its network is disconnected.
network is disconnected. Do not use this for anything else than test Do not use this for anything else than test deployments.
deployments.
- `2`: data stored on Garage will be stored on two different nodes, if possible - `2`: data stored on Garage will be stored on two different nodes, if possible in different
in different zones. Garage tolerates one node failure, or several nodes zones. Garage tolerates one node failure before losing data. Data should be available
failing but all in a single zone (in a deployment with at least two zones), read-only when one node is down, but write operations will fail.
before losing data. Data remains available in read-only mode when one node is Use this only if you really have to.
down, but write operations will fail.
- `2-dangerous`: a variant of mode `2`, where written objects are written to - `3`: data stored on Garage will be stored on three different nodes, if possible each in
the second replica asynchronously. This means that Garage will return `200 a different zones.
OK` to a PutObject request before the second copy is fully written (or even Garage tolerates two node failure before losing data. Data should be available
before it even starts being written). This means that data can more easily read-only when two nodes are down, and writes should be possible if only a single node
be lost if the node crashes before a second copy can be completed. This is down.
also means that written objects might not be visible immediately in read
operations. In other words, this mode severely breaks the consistency and
durability guarantees of standard Garage cluster operation. Benefits of
this mode: you can still write to your cluster when one node is
unavailable.
- `3`: data stored on Garage will be stored on three different nodes, if
possible each in a different zones. Garage tolerates two node failure, or
several node failures but in no more than two zones (in a deployment with at
least three zones), before losing data. As long as only a single node fails,
or node failures are only in a single zone, reading and writing data to
Garage can continue normally.
- `3-degraded`: a variant of replication mode `3`, that lowers the read
quorum to `1`, to allow you to read data from your cluster when several
nodes (or nodes in several zones) are unavailable. In this mode, Garage
does not provide read-after-write consistency anymore. The write quorum is
still 2, ensuring that data successfully written to Garage is stored on at
least two nodes.
- `3-dangerous`: a variant of replication mode `3` that lowers both the read
and write quorums to `1`, to allow you to both read and write to your
cluster when several nodes (or nodes in several zones) are unavailable. It
is the least consistent mode of operation proposed by Garage, and also one
that should probably never be used.
Note that in modes `2` and `3`, Note that in modes `2` and `3`,
if at least the same number of zones are available, an arbitrary number of failures in if at least the same number of zones are available, an arbitrary number of failures in
@ -151,35 +102,8 @@ any given zone is tolerated as copies of data will be spread over several zones.
**Make sure `replication_mode` is the same in the configuration files of all nodes. **Make sure `replication_mode` is the same in the configuration files of all nodes.
Never run a Garage cluster where that is not the case.** Never run a Garage cluster where that is not the case.**
The quorums associated with each replication mode are described below: Changing the `replication_mode` of a cluster might work (make sure to shut down all nodes
and changing it everywhere at the time), but is not officially supported.
| `replication_mode` | Number of replicas | Write quorum | Read quorum | Read-after-write consistency? |
| ------------------ | ------------------ | ------------ | ----------- | ----------------------------- |
| `none` or `1` | 1 | 1 | 1 | yes |
| `2` | 2 | 2 | 1 | yes |
| `2-dangerous` | 2 | 1 | 1 | NO |
| `3` | 3 | 2 | 2 | yes |
| `3-degraded` | 3 | 2 | 1 | NO |
| `3-dangerous` | 3 | 1 | 1 | NO |
Changing the `replication_mode` between modes with the same number of replicas
(e.g. from `3` to `3-degraded`, or from `2-dangerous` to `2`), can be done easily by
just changing the `replication_mode` parameter in your config files and restarting all your
Garage nodes.
It is also technically possible to change the replication mode to a mode with a
different numbers of replicas, although it's a dangerous operation that is not
officially supported. This requires you to delete the existing cluster layout
and create a new layout from scratch, meaning that a full rebalancing of your
cluster's data will be needed. To do it, shut down your cluster entirely,
delete the `custer_layout` files in the meta directories of all your nodes,
update all your configuration files with the new `replication_mode` parameter,
restart your cluster, and then create a new layout with all the nodes you want
to keep. Rebalancing data will take some time, and data might temporarily
appear unavailable to your users. It is recommended to shut down public access
to the cluster while rebalancing is in progress. In theory, no data should be
lost as rebalancing is a routine operation for Garage, although we cannot
guarantee you that everything will go right in such an extreme scenario.
### `compression_level` ### `compression_level`
@ -257,20 +181,6 @@ RPC ports are announced.
Garage does not yet support talking to Consul over TLS. Garage does not yet support talking to Consul over TLS.
### `kubernetes_namespace`, `kubernetes_service_name` and `kubernetes_skip_crd`
Garage supports discovering other nodes of the cluster using kubernetes custom
resources. For this to work `kubernetes_namespace` and `kubernetes_service_name`
need to be configured.
`kubernetes_namespace` sets the namespace in which the custom resources are
configured. `kubernetes_service_name` is added as a label to these resources to
filter them, to allow for multiple deployments in a single namespace.
`kubernetes_skip_crd` can be set to true to disable the automatic creation and
patching of the `garagenodes.deuxfleurs.fr` CRD. You will need to create the CRD
manually.
### `sled_cache_capacity` ### `sled_cache_capacity`
This parameter can be used to tune the capacity of the cache used by This parameter can be used to tune the capacity of the cache used by
@ -332,35 +242,3 @@ For instance, if `root_domain` is `web.garage.eu`, a bucket called `deuxfleurs.f
will be accessible either with hostname `deuxfleurs.fr.web.garage.eu` will be accessible either with hostname `deuxfleurs.fr.web.garage.eu`
or with hostname `deuxfleurs.fr`. or with hostname `deuxfleurs.fr`.
## The `[admin]` section
Garage has a few administration capabilities, in particular to allow remote monitoring. These features are detailed below.
### `api_bind_addr`
If specified, Garage will bind an HTTP server to this port and address, on
which it will listen to requests for administration features.
See [administration API reference](@/documentation/reference-manual/admin-api.md) to learn more about these features.
### `metrics_token` (since version 0.7.2)
The token for accessing the Metrics endpoint. If this token is not set in
the config file, the Metrics endpoint can be accessed without access
control.
You can use any random string for this value. We recommend generating a random token with `openssl rand -hex 32`.
### `admin_token` (since version 0.7.2)
The token for accessing all of the other administration endpoints. If this
token is not set in the config file, access to these endpoints is disabled
entirely.
You can use any random string for this value. We recommend generating a random token with `openssl rand -hex 32`.
### `trace_sink`
Optionnally, the address of an Opentelemetry collector. If specified,
Garage will send traces in the Opentelemetry format to this endpoint. These
trace allow to inspect Garage's operation when it handles S3 API requests.

View file

@ -1,58 +0,0 @@
+++
title = "K2V"
weight = 30
+++
Starting with version 0.7.2, Garage introduces an optionnal feature, K2V,
which is an alternative storage API designed to help efficiently store
many small values in buckets (in opposition to S3 which is more designed
to store large blobs).
K2V is currently disabled at compile time in all builds, as the
specification is still subject to changes. To build a Garage version with
K2V, the Cargo feature flag `k2v` must be activated. Special builds with
the `k2v` feature flag enabled can be obtained from our download page under
"Extra builds": such builds can be identified easily as their tag name ends
with `-k2v` (example: `v0.7.2-k2v`).
The specification of the K2V API can be found
[here](https://git.deuxfleurs.fr/Deuxfleurs/garage/src/branch/k2v/doc/drafts/k2v-spec.md).
This document also includes a high-level overview of K2V's design.
The K2V API uses AWSv4 signatures for authentification, same as the S3 API.
The AWS region used for signature calculation is always the same as the one
defined for the S3 API in the config file.
## Enabling and using K2V
To enable K2V, download and run a build that has the `k2v` feature flag
enabled, or produce one yourself. Then, add the following section to your
configuration file:
```toml
[k2v_api]
api_bind_addr = "<ip>:<port>"
```
Please select a port number that is not already in use by another API
endpoint (S3 api, admin API) or by the RPC server.
We provide an early-stage K2V client library for Rust which can be imported by adding the following to your `Cargo.toml` file:
```toml
k2v-client = { git = "https://git.deuxfleurs.fr/Deuxfleurs/garage.git" }
```
There is also a simple CLI utility which can be built from source in the
following way:
```sh
git clone https://git.deuxfleurs.fr/Deuxfleurs/garage.git
cd garage/src/k2v-client
cargo build --features cli --bin k2v-cli
```
The CLI utility is self-documented, run `k2v-cli --help` to learn how to use
it. There is also a short README.md in the `src/k2v-client` folder with some
instructions.

View file

@ -1,45 +0,0 @@
+++
title = "Request routing logic"
weight = 10
+++
Data retrieval requests to Garage endpoints (S3 API and websites) are resolved
to an individual object in a bucket. Since objects are replicated to multiple nodes
Garage must ensure consistency before answering the request.
## Using quorum to ensure consistency
Garage ensures consistency by attempting to establish a quorum with the
data nodes responsible for the object. When a majority of the data nodes
have provided metadata on a object Garage can then answer the request.
When a request arrives Garage will, assuming the recommended 3 replicas, perform the following actions:
- Make a request to the two preferred nodes for object metadata
- Try the third node if one of the two initial requests fail
- Check that the metadata from at least 2 nodes match
- Check that the object hasn't been marked deleted
- Answer the request with inline data from metadata if object is small enough
- Or get data blocks from the preferred nodes and answer using the assembled object
Garage dynamically determines which nodes to query based on health, preference, and
which nodes actually host a given data. Garage has no concept of "primary" so any
healthy node with the data can be used as long as a quorum is reached for the metadata.
## Node health
Garage keeps a TCP session open to each node in the cluster and periodically pings them. If a connection
cannot be established, or a node fails to answer a number of pings, the target node is marked as failed.
Failed nodes are not used for quorum or other internal requests.
## Node preference
Garage prioritizes which nodes to query according to a few criteria:
- A node always prefers itself if it can answer the request
- Then the node prioritizes nodes in the same zone
- Finally the nodes with the lowest latency are prioritized
For further reading on the cluster structure look at the [gateway](@/documentation/cookbook/gateways.md)
and [cluster layout management](@/documentation/reference-manual/layout.md) pages.

View file

@ -3,46 +3,24 @@ title = "S3 Compatibility status"
weight = 20 weight = 20
+++ +++
## DISCLAIMER ## Endpoint implementation
**The compatibility list for other platforms is given only for informational All APIs that are missing on Garage will return a 501 Not Implemented.
purposes and based on available documentation.** They are sometimes completed, Some `x-amz-` headers are not implemented.
in a best effort approach, with the source code and inputs from maintainers
when documentation is lacking. We are not proactively monitoring new versions
of each software: check the modification history to know when the page has been
updated for the last time. Some entries will be inexact or outdated. For any
serious decision, you must make your own tests.
**The official documentation of each project can be accessed by clicking on the
project name in the column header.**
Feel free to open a PR to suggest fixes this table. Minio is missing because they do not provide a public S3 compatibility list. *The compatibility list for other platforms is given only for information purposes and based on available documentation. Some entries might be inexact. Feel free to open a PR to fix this table. Minio is missing because they do not provide a public S3 compatibility list.*
## Update history ### Features
- 2022-02-07 - First version of this page
- 2022-05-25 - Many Ceph S3 endpoints are not documented but implemented. Following a notification from the Ceph community, we added them.
## High-level features
| Feature | Garage | [Openstack Swift](https://docs.openstack.org/swift/latest/s3_compat.html) | [Ceph Object Gateway](https://docs.ceph.com/en/latest/radosgw/s3/) | [Riak CS](https://docs.riak.com/riak/cs/2.1.1/references/apis/storage/s3/index.html) | [OpenIO](https://docs.openio.io/latest/source/arch-design/s3_compliancy.html) | | Feature | Garage | [Openstack Swift](https://docs.openstack.org/swift/latest/s3_compat.html) | [Ceph Object Gateway](https://docs.ceph.com/en/latest/radosgw/s3/) | [Riak CS](https://docs.riak.com/riak/cs/2.1.1/references/apis/storage/s3/index.html) | [OpenIO](https://docs.openio.io/latest/source/arch-design/s3_compliancy.html) |
|------------------------------|----------------------------------|-----------------|---------------|---------|-----| |------------------------------|----------------------------------|-----------------|---------------|---------|-----|
| [signature v2](https://docs.aws.amazon.com/general/latest/gr/signature-version-2.html) (deprecated) | ❌ Missing | ✅ | | ✅ | ✅ | | [signature v2](https://docs.aws.amazon.com/general/latest/gr/signature-version-2.html) (deprecated) | ❌ Missing | ✅ | ❌ | ✅ | ✅ |
| [signature v4](https://docs.aws.amazon.com/AmazonS3/latest/API/sig-v4-authenticating-requests.html) | ✅ Implemented | ✅ | ✅ | ❌ | ✅ | | [signature v4](https://docs.aws.amazon.com/AmazonS3/latest/API/sig-v4-authenticating-requests.html) | ✅ Implemented | ✅ | ✅ | ❌ | ✅ |
| [URL path-style](https://docs.aws.amazon.com/AmazonS3/latest/userguide/VirtualHosting.html#path-style-access) (eg. `host.tld/bucket/key`) | ✅ Implemented | ✅ | ✅ | ❓| ✅ | | [URL path-style](https://docs.aws.amazon.com/AmazonS3/latest/userguide/VirtualHosting.html#path-style-access) (eg. `host.tld/bucket/key`) | ✅ Implemented | ✅ | ✅ | ❓| ✅ |
| [URL vhost-style](https://docs.aws.amazon.com/AmazonS3/latest/userguide/VirtualHosting.html#virtual-hosted-style-access) URL (eg. `bucket.host.tld/key`) | ✅ Implemented | ❌| ✅| ✅ | ✅ | | [URL vhost-style](https://docs.aws.amazon.com/AmazonS3/latest/userguide/VirtualHosting.html#virtual-hosted-style-access) URL (eg. `bucket.host.tld/key`) | ✅ Implemented | ❌| ✅| ✅ | ✅ |
| [Presigned URLs](https://docs.aws.amazon.com/AmazonS3/latest/userguide/ShareObjectPreSignedURL.html) | ✅ Implemented | ❌| ✅ | ✅ | ✅(❓) | | [Presigned URLs](https://docs.aws.amazon.com/AmazonS3/latest/userguide/ShareObjectPreSignedURL.html) | ✅ Implemented | ❌| ✅ | ✅ | ✅(❓) |
*Note:* OpenIO does not says if it supports presigned URLs. Because it is part *Note:* OpenIO does not says if it supports presigned URLs. Because it is part of signature v4 and they claim they support it without additional precisions, we suppose that OpenIO supports presigned URLs.
of signature v4 and they claim they support it without additional precisions,
we suppose that OpenIO supports presigned URLs.
## Endpoint implementation
All endpoints that are missing on Garage will return a 501 Not Implemented.
Some `x-amz-` headers are not implemented.
### Core endoints ### Core endoints
@ -59,17 +37,13 @@ Some `x-amz-` headers are not implemented.
| [DeleteObjects](https://docs.aws.amazon.com/AmazonS3/latest/API/API_DeleteObjects.html) | ✅ Implemented | ✅ | ✅ | ✅ | ✅ | | [DeleteObjects](https://docs.aws.amazon.com/AmazonS3/latest/API/API_DeleteObjects.html) | ✅ Implemented | ✅ | ✅ | ✅ | ✅ |
| [GetObject](https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetObject.html) | ✅ Implemented | ✅ | ✅ | ✅ | ✅ | | [GetObject](https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetObject.html) | ✅ Implemented | ✅ | ✅ | ✅ | ✅ |
| [ListObjects](https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListObjects.html) | ✅ Implemented (see details below) | ✅ | ✅ | ✅ | ❌| | [ListObjects](https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListObjects.html) | ✅ Implemented (see details below) | ✅ | ✅ | ✅ | ❌|
| [ListObjectsV2](https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListObjectsV2.html) | ✅ Implemented | ❌| | ❌| ✅ | | [ListObjectsV2](https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListObjectsV2.html) | ✅ Implemented | ❌| | ❌| ✅ |
| [PostObject](https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectPOST.html) | ✅ Implemented | ❌| ✅ | ❌| ❌| | [PostObject](https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectPOST.html) (compatibility API) | ❌ Missing | ❌| ✅ | ❌| ❌|
| [PutObject](https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutObject.html) | ✅ Implemented | ✅ | ✅ | ✅ | ✅ | | [PutObject](https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutObject.html) | ✅ Implemented | ✅ | ✅ | ✅ | ✅ |
**ListObjects:** Implemented, but there isn't a very good specification of what **ListObjects:** Implemented, but there isn't a very good specification of what `encoding-type=url` covers so there might be some encoding bugs. In our implementation the url-encoded fields are in the same in ListObjects as they are in ListObjectsV2.
`encoding-type=url` covers so there might be some encoding bugs. In our
implementation the url-encoded fields are in the same in ListObjects as they
are in ListObjectsV2.
*Note: Ceph API documentation is incomplete and lacks at least HeadBucket and UploadPartCopy, *Note: Ceph API documentation is incomplete and miss at least HeadBucket and UploadPartCopy, but these endpoints are documented in [Red Hat Ceph Storage - Chapter 2. Ceph Object Gateway and the S3 API](https://access.redhat.com/documentation/en-us/red_hat_ceph_storage/4/html/developer_guide/ceph-object-gateway-and-the-s3-api)*
but these endpoints are documented in [Red Hat Ceph Storage - Chapter 2. Ceph Object Gateway and the S3 API](https://access.redhat.com/documentation/en-us/red_hat_ceph_storage/4/html/developer_guide/ceph-object-gateway-and-the-s3-api)*
### Multipart Upload endpoints ### Multipart Upload endpoints
@ -93,13 +67,13 @@ For more information, please refer to our [issue tracker](https://git.deuxfleurs
| [DeleteBucketWebsite](https://docs.aws.amazon.com/AmazonS3/latest/API/API_DeleteBucketWebsite.html) | ✅ Implemented | ❌| ❌| ❌| ❌| | [DeleteBucketWebsite](https://docs.aws.amazon.com/AmazonS3/latest/API/API_DeleteBucketWebsite.html) | ✅ Implemented | ❌| ❌| ❌| ❌|
| [GetBucketWebsite](https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetBucketWebsite.html) | ✅ Implemented | ❌ | ❌| ❌| ❌| | [GetBucketWebsite](https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetBucketWebsite.html) | ✅ Implemented | ❌ | ❌| ❌| ❌|
| [PutBucketWebsite](https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutBucketWebsite.html) | ⚠ Partially implemented (see below)| ❌| ❌| ❌| ❌| | [PutBucketWebsite](https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutBucketWebsite.html) | ⚠ Partially implemented (see below)| ❌| ❌| ❌| ❌|
| [DeleteBucketCors](https://docs.aws.amazon.com/AmazonS3/latest/API/API_DeleteBucketCors.html) | ✅ Implemented | ❌| | ❌| ✅ | | [DeleteBucketCors](https://docs.aws.amazon.com/AmazonS3/latest/API/API_DeleteBucketCors.html) | ✅ Implemented | ❌| | ❌| ✅ |
| [GetBucketCors](https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetBucketCors.html) | ✅ Implemented | ❌ | | ❌| ✅ | | [GetBucketCors](https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetBucketCors.html) | ✅ Implemented | ❌ | | ❌| ✅ |
| [PutBucketCors](https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutBucketCors.html) | ✅ Implemented | ❌| | ❌| ✅ | | [PutBucketCors](https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutBucketCors.html) | ✅ Implemented | ❌| | ❌| ✅ |
**PutBucketWebsite:** Implemented, but only stores the index document suffix and the error document path. Redirects are not supported. **PutBucketWebsite:** Implemented, but only stores the index document suffix and the error document path. Redirects are not supported.
*Note: Ceph radosgw has some support for static websites but it is different from the Amazon one. It also does not implement its configuration endpoints.* *Note: Ceph radosgw has some support for static websites but it is different from Amazon one plus it does not implement its configuration endpoints.*
### ACL, Policies endpoints ### ACL, Policies endpoints
@ -109,27 +83,27 @@ See Garage CLI reference manual to learn how to use Garage's permission system.
| Endpoint | Garage | [Openstack Swift](https://docs.openstack.org/swift/latest/s3_compat.html) | [Ceph Object Gateway](https://docs.ceph.com/en/latest/radosgw/s3/) | [Riak CS](https://docs.riak.com/riak/cs/2.1.1/references/apis/storage/s3/index.html) | [OpenIO](https://docs.openio.io/latest/source/arch-design/s3_compliancy.html) | | Endpoint | Garage | [Openstack Swift](https://docs.openstack.org/swift/latest/s3_compat.html) | [Ceph Object Gateway](https://docs.ceph.com/en/latest/radosgw/s3/) | [Riak CS](https://docs.riak.com/riak/cs/2.1.1/references/apis/storage/s3/index.html) | [OpenIO](https://docs.openio.io/latest/source/arch-design/s3_compliancy.html) |
|------------------------------|----------------------------------|-----------------|---------------|---------|-----| |------------------------------|----------------------------------|-----------------|---------------|---------|-----|
| [DeleteBucketPolicy](https://docs.aws.amazon.com/AmazonS3/latest/API/API_DeleteBucketPolicy.html) | ❌ Missing | ❌| | ✅ | ❌| | [DeleteBucketPolicy](https://docs.aws.amazon.com/AmazonS3/latest/API/API_DeleteBucketPolicy.html) | ❌ Missing | ❌| | ✅ | ❌|
| [GetBucketPolicy](https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetBucketPolicy.html) | ❌ Missing | ❌| | ⚠ | ❌| | [GetBucketPolicy](https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetBucketPolicy.html) | ❌ Missing | ❌| | ⚠ | ❌|
| [GetBucketPolicyStatus](https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetBucketPolicyStatus.html) | ❌ Missing | ❌| | ❌| ❌| | [GetBucketPolicyStatus](https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetBucketPolicyStatus.html) | ❌ Missing | ❌| | ❌| ❌|
| [PutBucketPolicy](https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutBucketPolicy.html) | ❌ Missing | ❌| | ⚠ | ❌| | [PutBucketPolicy](https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutBucketPolicy.html) | ❌ Missing | ❌| | ⚠ | ❌|
| [GetBucketAcl](https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetBucketAcl.html) | ❌ Missing | ✅ | ✅ | ✅ | ✅ | | [GetBucketAcl](https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetBucketAcl.html) | ❌ Missing | ✅ | ✅ | ✅ | ✅ |
| [PutBucketAcl](https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutBucketAcl.html) | ❌ Missing | ✅ | ✅ | ✅ | ✅ | | [PutBucketAcl](https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutBucketAcl.html) | ❌ Missing | ✅ | ✅ | ✅ | ✅ |
| [GetObjectAcl](https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetObjectAcl.html) | ❌ Missing | ✅ | ✅ | ✅ | ✅ | | [GetObjectAcl](https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetObjectAcl.html) | ❌ Missing | ✅ | ✅ | ✅ | ✅ |
| [PutObjectAcl](https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutObjectAcl.html) | ❌ Missing | ✅ | ✅ | ✅ | ✅ | | [PutObjectAcl](https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutObjectAcl.html) | ❌ Missing | ✅ | ✅ | ✅ | ✅ |
*Notes:* Riak CS only supports a subset of the policy configuration. *Notes:* Ceph claims that it supports bucket policies but does not implement any Policy endpoints. They probably refer to their own permission system. Riak CS only supports a subset of the policy configuration.
### Versioning, Lifecycle endpoints ### Versioning, Lifecycle endpoints
Garage does not (yet) support object versioning. Garage does not support (yet) object versioning.
If you need this feature, please [share your use case in our dedicated issue](https://git.deuxfleurs.fr/Deuxfleurs/garage/issues/166). If you need this feature, please [share your use case in our dedicated issue](https://git.deuxfleurs.fr/Deuxfleurs/garage/issues/166).
| Endpoint | Garage | [Openstack Swift](https://docs.openstack.org/swift/latest/s3_compat.html) | [Ceph Object Gateway](https://docs.ceph.com/en/latest/radosgw/s3/) | [Riak CS](https://docs.riak.com/riak/cs/2.1.1/references/apis/storage/s3/index.html) | [OpenIO](https://docs.openio.io/latest/source/arch-design/s3_compliancy.html) | | Endpoint | Garage | [Openstack Swift](https://docs.openstack.org/swift/latest/s3_compat.html) | [Ceph Object Gateway](https://docs.ceph.com/en/latest/radosgw/s3/) | [Riak CS](https://docs.riak.com/riak/cs/2.1.1/references/apis/storage/s3/index.html) | [OpenIO](https://docs.openio.io/latest/source/arch-design/s3_compliancy.html) |
|------------------------------|----------------------------------|-----------------|---------------|---------|-----| |------------------------------|----------------------------------|-----------------|---------------|---------|-----|
| [DeleteBucketLifecycle](https://docs.aws.amazon.com/AmazonS3/latest/API/API_DeleteBucketLifecycle.html) | ❌ Missing | ❌| ✅| ❌| ✅| | [DeleteBucketLifecycle](https://docs.aws.amazon.com/AmazonS3/latest/API/API_DeleteBucketLifecycle.html) | ❌ Missing | ❌| ✅| ❌| ✅|
| [GetBucketLifecycleConfiguration](https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetBucketLifecycleConfiguration.html) | ❌ Missing | ❌| | ❌| ✅| | [GetBucketLifecycleConfiguration](https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetBucketLifecycleConfiguration.html) | ❌ Missing | ❌| | ❌| ✅|
| [PutBucketLifecycleConfiguration](https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutBucketLifecycleConfiguration.html) | ❌ Missing | ❌| | ❌| ✅| | [PutBucketLifecycleConfiguration](https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutBucketLifecycleConfiguration.html) | ❌ Missing | ❌| | ❌| ✅|
| [GetBucketVersioning](https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetBucketVersioning.html) | ❌ Stub (see below) | ✅| ✅ | ❌| ✅| | [GetBucketVersioning](https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetBucketVersioning.html) | ❌ Stub (see below) | ✅| ✅ | ❌| ✅|
| [ListObjectVersions](https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListObjectVersions.html) | ❌ Missing | ❌| ✅ | ❌| ✅| | [ListObjectVersions](https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListObjectVersions.html) | ❌ Missing | ❌| ✅ | ❌| ✅|
| [PutBucketVersioning](https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutBucketVersioning.html) | ❌ Missing | ❌| ✅| ❌| ✅| | [PutBucketVersioning](https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutBucketVersioning.html) | ❌ Missing | ❌| ✅| ❌| ✅|
@ -137,6 +111,8 @@ If you need this feature, please [share your use case in our dedicated issue](ht
**GetBucketVersioning:** Stub implementation (Garage does not yet support versionning so this always returns "versionning not enabled"). **GetBucketVersioning:** Stub implementation (Garage does not yet support versionning so this always returns "versionning not enabled").
*Note: Ceph only supports `Expiration`, `NoncurrentVersionExpiration` and `AbortIncompleteMultipartUpload` on its Lifecycle endpoints.*
### Replication endpoints ### Replication endpoints
Please open an issue if you have a use case for replication. Please open an issue if you have a use case for replication.
@ -147,10 +123,7 @@ Please open an issue if you have a use case for replication.
| [GetBucketReplication](https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetBucketReplication.html) | ❌ Missing | ❌| ✅ | ❌| ❌| | [GetBucketReplication](https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetBucketReplication.html) | ❌ Missing | ❌| ✅ | ❌| ❌|
| [PutBucketReplication](https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutBucketReplication.html) | ❌ Missing | ❌| ⚠ | ❌| ❌| | [PutBucketReplication](https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutBucketReplication.html) | ❌ Missing | ❌| ⚠ | ❌| ❌|
*Note: Ceph documentation briefly says that Ceph supports *Note: Ceph documentation briefly says that Ceph supports [replication though the S3 API](https://docs.ceph.com/en/latest/radosgw/multisite-sync-policy/#s3-replication-api) but with some limitations. Additionaly, replication endpoints are not documented in the S3 compatibility page so I don't know what kind of support we can expect.*
[replication through the S3 API](https://docs.ceph.com/en/latest/radosgw/multisite-sync-policy/#s3-replication-api)
but with some limitations.
Additionaly, replication endpoints are not documented in the S3 compatibility page so I don't know what kind of support we can expect.*
### Locking objects ### Locking objects
@ -162,8 +135,8 @@ Amazon defines a concept of [object locking](https://docs.aws.amazon.com/AmazonS
| [PutObjectLegalHold](https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutObjectLegalHold.html) | ❌ Missing | ❌| ✅ | ❌| ❌| | [PutObjectLegalHold](https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutObjectLegalHold.html) | ❌ Missing | ❌| ✅ | ❌| ❌|
| [GetObjectRetention](https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetObjectRetention.html) | ❌ Missing | ❌| ✅ | ❌| ❌| | [GetObjectRetention](https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetObjectRetention.html) | ❌ Missing | ❌| ✅ | ❌| ❌|
| [PutObjectRetention](https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutObjectRetention.html) | ❌ Missing | ❌| ✅ | ❌| ❌| | [PutObjectRetention](https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutObjectRetention.html) | ❌ Missing | ❌| ✅ | ❌| ❌|
| [GetObjectLockConfiguration](https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetObjectLockConfiguration.html) | ❌ Missing | ❌| | ❌| ❌| | [GetObjectLockConfiguration](https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetObjectLockConfiguration.html) | ❌ Missing | ❌| | ❌| ❌|
| [PutObjectLockConfiguration](https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutObjectLockConfiguration.html) | ❌ Missing | ❌| | ❌| ❌| | [PutObjectLockConfiguration](https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutObjectLockConfiguration.html) | ❌ Missing | ❌| | ❌| ❌|
### (Server-side) encryption ### (Server-side) encryption
@ -172,9 +145,9 @@ Please open an issue if you have a use case.
| Endpoint | Garage | [Openstack Swift](https://docs.openstack.org/swift/latest/s3_compat.html) | [Ceph Object Gateway](https://docs.ceph.com/en/latest/radosgw/s3/) | [Riak CS](https://docs.riak.com/riak/cs/2.1.1/references/apis/storage/s3/index.html) | [OpenIO](https://docs.openio.io/latest/source/arch-design/s3_compliancy.html) | | Endpoint | Garage | [Openstack Swift](https://docs.openstack.org/swift/latest/s3_compat.html) | [Ceph Object Gateway](https://docs.ceph.com/en/latest/radosgw/s3/) | [Riak CS](https://docs.riak.com/riak/cs/2.1.1/references/apis/storage/s3/index.html) | [OpenIO](https://docs.openio.io/latest/source/arch-design/s3_compliancy.html) |
|------------------------------|----------------------------------|-----------------|---------------|---------|-----| |------------------------------|----------------------------------|-----------------|---------------|---------|-----|
| [DeleteBucketEncryption](https://docs.aws.amazon.com/AmazonS3/latest/API/API_DeleteBucketEncryption.html) | ❌ Missing | ❌| | ❌| ❌| | [DeleteBucketEncryption](https://docs.aws.amazon.com/AmazonS3/latest/API/API_DeleteBucketEncryption.html) | ❌ Missing | ❌| | ❌| ❌|
| [GetBucketEncryption](https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetBucketEncryption.html) | ❌ Missing | ❌| | ❌| ❌| | [GetBucketEncryption](https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetBucketEncryption.html) | ❌ Missing | ❌| | ❌| ❌|
| [PutBucketEncryption](https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutBucketEncryption.html) | ❌ Missing | ❌| | ❌| ❌| | [PutBucketEncryption](https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutBucketEncryption.html) | ❌ Missing | ❌| | ❌| ❌|
### Misc endpoints ### Misc endpoints
@ -182,13 +155,13 @@ Please open an issue if you have a use case.
|------------------------------|----------------------------------|-----------------|---------------|---------|-----| |------------------------------|----------------------------------|-----------------|---------------|---------|-----|
| [GetBucketNotificationConfiguration](https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetBucketNotificationConfiguration.html) | ❌ Missing | ❌| ✅ | ❌| ❌| | [GetBucketNotificationConfiguration](https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetBucketNotificationConfiguration.html) | ❌ Missing | ❌| ✅ | ❌| ❌|
| [PutBucketNotificationConfiguration](https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutBucketNotificationConfiguration.html) | ❌ Missing | ❌| ✅ | ❌| ❌| | [PutBucketNotificationConfiguration](https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutBucketNotificationConfiguration.html) | ❌ Missing | ❌| ✅ | ❌| ❌|
| [DeleteBucketTagging](https://docs.aws.amazon.com/AmazonS3/latest/API/API_DeleteBucketTagging.html) | ❌ Missing | ❌| | ❌| ✅ | | [DeleteBucketTagging](https://docs.aws.amazon.com/AmazonS3/latest/API/API_DeleteBucketTagging.html) | ❌ Missing | ❌| | ❌| ✅ |
| [GetBucketTagging](https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetBucketTagging.html) | ❌ Missing | ❌| | ❌| ✅ | | [GetBucketTagging](https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetBucketTagging.html) | ❌ Missing | ❌| | ❌| ✅ |
| [PutBucketTagging](https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutBucketTagging.html) | ❌ Missing | ❌| | ❌| ✅ | | [PutBucketTagging](https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutBucketTagging.html) | ❌ Missing | ❌| | ❌| ✅ |
| [DeleteObjectTagging](https://docs.aws.amazon.com/AmazonS3/latest/API/API_DeleteObjectTagging.html) | ❌ Missing | ❌| | ❌| ✅ | | [DeleteObjectTagging](https://docs.aws.amazon.com/AmazonS3/latest/API/API_DeleteObjectTagging.html) | ❌ Missing | ❌| | ❌| ✅ |
| [GetObjectTagging](https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetObjectTagging.html) | ❌ Missing | ❌| | ❌| ✅ | | [GetObjectTagging](https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetObjectTagging.html) | ❌ Missing | ❌| | ❌| ✅ |
| [PutObjectTagging](https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutObjectTagging.html) | ❌ Missing | ❌| | ❌| ✅ | | [PutObjectTagging](https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutObjectTagging.html) | ❌ Missing | ❌| | ❌| ✅ |
| [GetObjectTorrent](https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetObjectTorrent.html) | ❌ Missing | ❌| | ❌| ❌| | [GetObjectTorrent](https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetObjectTorrent.html) | ❌ Missing | ❌| | ❌| ❌|
### Vendor specific endpoints ### Vendor specific endpoints

View file

@ -4,12 +4,12 @@ weight = 15
+++ +++
**This guide explains how to migrate to 0.6 if you have an existing 0.5 cluster. **This guide explains how to migrate to 0.6 if you have an existing 0.5 cluster.
We don't recommend trying to migrate to 0.6 directly from 0.4 or older.** We don't recommend trying to migrate directly from 0.4 or older to 0.6.**
**We make no guarantee that this migration will work perfectly: **We make no guarantee that this migration will work perfectly:
back up all your data before attempting it!** back up all your data before attempting it!**
Garage v0.6 introduces a new data model for buckets, Garage v0.6 (not yet released) introduces a new data model for buckets,
that allows buckets to have many names (aliases). that allows buckets to have many names (aliases).
Buckets can also have "private" aliases (called local aliases), Buckets can also have "private" aliases (called local aliases),
which are only visible when using a certain access key. which are only visible when using a certain access key.

View file

@ -1,31 +0,0 @@
+++
title = "Migrating from 0.6 to 0.7"
weight = 14
+++
**This guide explains how to migrate to 0.7 if you have an existing 0.6 cluster.
We don't recommend trying to migrate to 0.7 directly from 0.5 or older.**
**We make no guarantee that this migration will work perfectly:
back up all your data before attempting it!**
Garage v0.7 introduces a cluster protocol change to support request tracing through OpenTelemetry.
No data structure is changed, so no data migration is required.
The migration steps are as follows:
1. Do `garage repair --all-nodes --yes tables` and `garage repair --all-nodes --yes blocks`,
check the logs and check that all data seems to be synced correctly between
nodes. If you have time, do additional checks (`scrub`, `block_refs`, etc.)
2. Disable api and web access. Garage does not support disabling
these endpoints but you can change the port number or stop your reverse
proxy for instance.
3. Check once again that your cluster is healty. Run again `garage repair --all-nodes --yes tables` which is quick.
Also check your queues are empty, run `garage stats` to query them.
4. Turn off Garage v0.6
5. Backup the metadata folder of all your nodes: `cd /var/lib/garage ; tar -acf meta-v0.6.tar.zst meta/`
6. Install Garage v0.7, edit the configuration if you plan to use OpenTelemetry or the Kubernetes integration
7. Turn on Garage v0.7
8. Do `garage repair --all-nodes --yes tables` and `garage repair --all-nodes --yes blocks`
9. Your upgraded cluster should be in a working state. Re-enable API and Web
access and check that everything went well.
10. Monitor your cluster in the next hours to see if it works well under your production load, report any issue.

View file

@ -1,717 +0,0 @@
# Specification of the Garage K2V API (K2V = Key/Key/Value)
- We are storing triplets of the form `(partition key, sort key, value)` -> no
user-defined fields, the client is responsible of writing whatever he wants
in the value (typically an encrypted blob). Values are binary blobs, which
are always represented as their base64 encoding in the JSON API. Partition
keys and sort keys are utf8 strings.
- Triplets are stored in buckets; each bucket stores a separate set of triplets
- Bucket names and access keys are the same as for accessing the S3 API
- K2V triplets exist separately from S3 objects. K2V triplets don't exist for
the S3 API, and S3 objects don't exist for the K2V API.
- Values stored for triplets have associated causality information, that enables
Garage to detect concurrent writes. In case of concurrent writes, Garage
keeps the concurrent values until a further write supersedes the concurrent
values. This is the same method as Riak KV implements. The method used is
based on DVVS (dotted version vector sets), described in the paper "Scalable
and Accurate Causality Tracking for Eventually Consistent Data Stores", as
well as [here](https://github.com/ricardobcl/Dotted-Version-Vectors)
## Data format
### Triple format
Triples in K2V are constituted of three fields:
- a partition key (`pk`), an utf8 string that defines in what partition the
triplet is stored; triplets in different partitions cannot be listed together
in a ReadBatch command, or deleted together in a DeleteBatch command: a
separate command must be included in the ReadBatch/DeleteBatch call for each
partition key in which the client wants to read/delete lists of items
- a sort key (`sk`), an utf8 string that defines the index of the triplet inside its
partition; triplets are uniquely idendified by their partition key + sort key
- a value (`v`), an opaque binary blob associated to the partition key + sort key;
they are transmitted as binary when possible but in most case in the JSON API
they will be represented as strings using base64 encoding; a value can also
be `null` to indicate a deleted triplet (a `null` value is called a tombstone)
### Causality information
K2V supports storing several concurrent values associated to a pk+sk, in the
case where insertion or deletion operations are detected to be concurrent (i.e.
there is not one that was aware of the other, they are not causally dependant
one on the other). In practice, it even looks more like the opposite: to
overwrite a previously existing value, the client must give a "causality token"
that "proves" (not in a cryptographic sense) that it had seen a previous value.
Otherwise, the value written will not overwrite an existing value, it will just
create a new concurrent value.
The causality token is a binary/b64-encoded representation of a context,
specified below.
A set of concurrent values looks like this:
```
(node1, tdiscard1, (v1, t1), (v2, t2)) ; tdiscard1 < t1 < t2
(node2, tdiscard2, (v3, t3) ; tdiscard2 < t3
```
`tdiscard` for a node `i` means that all values inserted by node `i` with times
`<= tdiscard` are obsoleted, i.e. have been read by a client that overwrote it
afterwards.
The associated context would be the following: `[(node1, t2), (node2, t3)]`,
i.e. if a node reads this set of values and inserts a new values, we will now
have `tdiscard1 = t2` and `tdiscard2 = t3`, to indicate that values v1, v2 and v3
are obsoleted by the new write.
**Basic insertion.** To insert a new value `v4` with context `[(node1, t2), (node2, t3)]`, in a
simple case where there was no insertion in-between reading the value
mentionned above and writing `v4`, and supposing that node2 receives the
InsertItem query:
- `node2` generates a timestamp `t4` such that `t4 > t3`.
- the new state is as follows:
```
(node1, tdiscard1', ()) ; tdiscard1' = t2
(node2, tdiscard2', (v4, t4)) ; tdiscard2' = t3
```
**A more complex insertion example.** In the general case, other intermediate values could have
been written before `v4` with context `[(node1, t2), (node2, t3)]` is sent to the system.
For instance, here is a possible sequence of events:
1. First we have the set of values v1, v2 and v3 described above.
A node reads it, it obtains values v1, v2 and v3 with context `[(node1, t2), (node2, t3)]`.
2. A node writes a value `v5` with context `[(node1, t1)]`, i.e. `v5` is only a
successor of v1 but not of v2 or v3. Suppose node1 receives the write, it
will generate a new timestamp `t5` larger than all of the timestamps it
knows of, i.e. `t5 > t2`. We will now have:
```
(node1, tdiscard1'', (v2, t2), (v5, t5)) ; tdiscard1'' = t1 < t2 < t5
(node2, tdiscard2, (v3, t3) ; tdiscard2 < t3
```
3. Now `v4` is written with context `[(node1, t2), (node2, t3)]`, and node2
processes the query. It will generate `t4 > t3` and the state will become:
```
(node1, tdiscard1', (v5, t5)) ; tdiscard1' = t2 < t5
(node2, tdiscard2', (v4, t4)) ; tdiscard2' = t3
```
**Generic algorithm for handling insertions:** A certain node n handles the
InsertItem and is responsible for the correctness of this procedure.
1. Lock the key (or the whole table?) at this node to prevent concurrent updates of the value that would mess things up
2. Read current set of values
3. Generate a new timestamp that is larger than the largest timestamp for node n
4. Add the inserted value in the list of values of node n
5. Update the discard times to be the times set in the context, and accordingly discard overwritten values
6. Release lock
7. Propagate updated value to other nodes
8. Return to user when propagation achieved the write quorum (propagation to other nodes continues asynchronously)
**Encoding of contexts:**
Contexts consist in a list of (node id, timestamp) pairs.
They are encoded in binary as follows:
```
checksum: u64, [ node: u64, timestamp: u64 ]*
```
The checksum is just the XOR of all of the node IDs and timestamps.
Once encoded in binary, contexts are written and transmitted in base64.
### Indexing
K2V keeps an index, a secondary data structure that is updated asynchronously,
that keeps tracks of the number of triplets stored for each partition key.
This allows easy listing of all of the partition keys for which triplets exist
in a bucket, as the partition key becomes the sort key in the index.
How indexing works:
- Each node keeps a local count of how many items it stores for each partition,
in a local Sled tree that is updated atomically when an item is modified.
- These local counters are asynchronously stored in the index table which is
a regular Garage table spread in the network. Counters are stored as LWW values,
so basically the final table will have the following structure:
```
- pk: bucket
- sk: partition key for which we are counting
- v: lwwmap (node id -> number of items)
```
The final number of items present in the partition can be estimated by taking
the maximum of the values (i.e. the value for the node that announces having
the most items for that partition). In most cases the values for different node
IDs should all be the same; more precisely, three node IDs should map to the
same non-zero value, and all other node IDs that are present are tombstones
that map to zeroes. Note that we need to filter out values from nodes that are
no longer part of the cluster layout, as when nodes are removed they won't
necessarily have had the time to set their counters to zero.
## Important details
**THIS SECTION CONTAINS A FEW WARNINGS ON THE K2V API WHICH ARE IMPORTANT
TO UNDERSTAND IN ORDER TO USE IT CORRECTLY.**
- **Internal server errors on updates do not mean that the update isn't stored.**
K2V will return an internal server error when it cannot reach a quorum of nodes on
which to save an updated value. However the value may still be stored on just one
node, which will then propagate it to other nodes asynchronously via anti-entropy.
- **Batch operations are not transactions.** When calling InsertBatch or DeleteBatch,
items may appear partially inserted/deleted while the operation is being processed.
More importantly, if InsertBatch or DeleteBatch returns an internal server error,
some of the items to be inserted/deleted might end up inserted/deleted on the server,
while others may still have their old value.
- **Concurrent values are deduplicated.** When inserting a value for a key,
Garage might internally end up
storing the value several times if there are network errors. These values will end up as
concurrent values for a key, with the same byte string (or `null` for a deletion).
Garage fixes this by deduplicating concurrent values when they are returned to the
user on read operations. Importantly, *Garage does not differentiate between duplicate
concurrent values due to the user making the same call twice, or Garage having to
do an internal retry*. This means that all duplicate concurrent values are deduplicated
when an item is read: if the user inserts twice concurrently the same value, they will
only read it once.
## API Endpoints
**Remark.** Example queries and responses here are given in JSON5 format
for clarity. However the actual K2V API uses basic JSON so all examples
and responses need to be translated.
### Operations on single items
**ReadItem: `GET /<bucket>/<partition key>?sort_key=<sort key>`**
Query parameters:
| name | default value | meaning |
| - | - | - |
| `sort_key` | **mandatory** | The sort key of the item to read |
Returns the item with specified partition key and sort key. Values can be
returned in either of two ways:
1. a JSON array of base64-encoded values, or `null`'s for tombstones, with
header `Content-Type: application/json`
2. in the case where there are no concurrent values, the single present value
can be returned directly as the response body (or an HTTP 204 NO CONTENT for
a tombstone), with header `Content-Type: application/octet-stream`
The choice between return formats 1 and 2 is directed by the `Accept` HTTP header:
- if the `Accept` header is not present, format 1 is always used
- if `Accept` contains `application/json` but not `application/octet-stream`,
format 1 is always used
- if `Accept` contains `application/octet-stream` but not `application/json`,
format 2 is used when there is a single value, and an HTTP error 409 (HTTP
409 CONFLICT) is returned in the case of multiple concurrent values
(including concurrent tombstones)
- if `Accept` contains both, format 2 is used when there is a single value, and
format 1 is used as a fallback in case of concurrent values
- if `Accept` contains none, HTTP 406 NOT ACCEPTABLE is raised
Example query:
```
GET /my_bucket/mailboxes?sort_key=INBOX HTTP/1.1
```
Example response:
```json
HTTP/1.1 200 OK
X-Garage-Causality-Token: opaquetoken123
Content-Type: application/json
[
"b64cryptoblob123",
"b64cryptoblob'123"
]
```
Example response in case the item is a tombstone:
```
HTTP/1.1 200 OK
X-Garage-Causality-Token: opaquetoken999
Content-Type: application/json
[
null
]
```
Example query 2:
```
GET /my_bucket/mailboxes?sort_key=INBOX HTTP/1.1
Accept: application/octet-stream
```
Example response if multiple concurrent versions exist:
```
HTTP/1.1 409 CONFLICT
X-Garage-Causality-Token: opaquetoken123
Content-Type: application/octet-stream
```
Example response in case of single value:
```
HTTP/1.1 200 OK
X-Garage-Causality-Token: opaquetoken123
Content-Type: application/octet-stream
cryptoblob123
```
Example response in case of a single value that is a tombstone:
```
HTTP/1.1 204 NO CONTENT
X-Garage-Causality-Token: opaquetoken123
Content-Type: application/octet-stream
```
**PollItem: `GET /<bucket>/<partition key>?sort_key=<sort key>&causality_token=<causality token>`**
This endpoint will block until a new value is written to a key.
The GET parameter `causality_token` should be set to the causality
token returned with the last read of the key, so that K2V knows
what values are concurrent or newer than the ones that the
client previously knew.
This endpoint returns the new value in the same format as ReadItem.
If no new value is written and the timeout elapses,
an HTTP 304 NOT MODIFIED is returned.
Query parameters:
| name | default value | meaning |
| - | - | - |
| `sort_key` | **mandatory** | The sort key of the item to read |
| `causality_token` | **mandatory** | The causality token of the last known value or set of values |
| `timeout` | 300 | The timeout before 304 NOT MODIFIED is returned if the value isn't updated |
The timeout can be set to any number of seconds, with a maximum of 600 seconds (10 minutes).
**InsertItem: `PUT /<bucket>/<partition key>?sort_key=<sort_key>`**
Inserts a single item. This request does not use JSON, the body is sent directly as a binary blob.
To supersede previous values, the HTTP header `X-Garage-Causality-Token` should
be set to the causality token returned by a previous read on this key. This
header can be ommitted for the first writes to the key.
Example query:
```
PUT /my_bucket/mailboxes?sort_key=INBOX HTTP/1.1
X-Garage-Causality-Token: opaquetoken123
myblobblahblahblah
```
Example response:
```
HTTP/1.1 200 OK
```
**DeleteItem: `DELETE /<bucket>/<partition key>?sort_key=<sort_key>`**
Deletes a single item. The HTTP header `X-Garage-Causality-Token` must be set
to the causality token returned by a previous read on this key, to indicate
which versions of the value should be deleted. The request will not process if
`X-Garage-Causality-Token` is not set.
Example query:
```
DELETE /my_bucket/mailboxes?sort_key=INBOX HTTP/1.1
X-Garage-Causality-Token: opaquetoken123
```
Example response:
```
HTTP/1.1 204 NO CONTENT
```
### Operations on index
**ReadIndex: `GET /<bucket>?start=<start>&end=<end>&limit=<limit>`**
Lists all partition keys in the bucket for which some triplets exist, and gives
for each the number of triplets, total number of values (which might be bigger
than the number of triplets in case of conflicts), total number of bytes of
these values, and number of triplets that are in a state of conflict.
The values returned are an approximation of the true counts in the bucket,
as these values are asynchronously updated, and thus eventually consistent.
Query parameters:
| name | default value | meaning |
| - | - | - |
| `prefix` | `null` | Restrict listing to partition keys that start with this prefix |
| `start` | `null` | First partition key to list, in lexicographical order |
| `end` | `null` | Last partition key to list (excluded) |
| `limit` | `null` | Maximum number of partition keys to list |
| `reverse` | `false` | Iterate in reverse lexicographical order |
The response consists in a JSON object that repeats the parameters of the query and gives the result (see below).
The listing starts at partition key `start`, or if not specified at the
smallest partition key that exists. It returns partition keys in increasing
order, or decreasing order if `reverse` is set to `true`,
and stops when either of the following conditions is met:
1. if `end` is specfied, the partition key `end` is reached or surpassed (if it
is reached exactly, it is not included in the result)
2. if `limit` is specified, `limit` partition keys have been listed
3. no more partition keys are available to list
In case 2, and if there are more partition keys to list before condition 1
triggers, then in the result `more` is set to `true` and `nextStart` is set to
the first partition key that couldn't be listed due to the limit. In the first
case (if the listing stopped because of the `end` parameter), `more` is not set
and the `nextStart` key is not specified.
Note that if `reverse` is set to `true`, `start` is the highest key
(in lexicographical order) for which values are returned.
This means that if an `end` is specified, it must be smaller than `start`,
otherwise no values will be returned.
Example query:
```
GET /my_bucket HTTP/1.1
```
Example response:
```json
HTTP/1.1 200 OK
{
prefix: null,
start: null,
end: null,
limit: null,
reverse: false,
partitionKeys: [
{
pk: "keys",
entries: 3043,
conflicts: 0,
values: 3043,
bytes: 121720,
},
{
pk: "mailbox:INBOX",
entries: 42,
conflicts: 1,
values: 43,
bytes: 142029,
},
{
pk: "mailbox:Junk",
entries: 2991
conflicts: 0,
values: 2991,
bytes: 12019322,
},
{
pk: "mailbox:Trash",
entries: 10,
conflicts: 0,
values: 10,
bytes: 32401,
},
{
pk: "mailboxes",
entries: 3,
conflicts: 0,
values: 3,
bytes: 3019,
},
],
more: false,
nextStart: null,
}
```
### Operations on batches of items
**InsertBatch: `POST /<bucket>`**
Simple insertion and deletion of triplets. The body is just a list of items to
insert in the following format:
`{ pk: "<partition key>", sk: "<sort key>", ct: "<causality token>"|null, v: "<value>"|null }`.
The causality token should be the one returned in a previous read request (e.g.
by ReadItem or ReadBatch), to indicate that this write takes into account the
values that were returned from these reads, and supersedes them causally. If
the triplet is inserted for the first time, the causality token should be set to
`null`.
The value is expected to be a base64-encoded binary blob. The value `null` can
also be used to delete the triplet while preserving causality information: this
allows to know if a delete has happenned concurrently with an insert, in which
case both are preserved and returned on reads (see below).
Partition keys and sort keys are utf8 strings which are stored sorted by
lexicographical ordering of their binary representation.
Example query:
```json
POST /my_bucket HTTP/1.1
[
{ pk: "mailbox:INBOX", sk: "001892831", ct: "opaquetoken321", v: "b64cryptoblob321updated" },
{ pk: "mailbox:INBOX", sk: "001892912", ct: null, v: "b64cryptoblob444" },
{ pk: "mailbox:INBOX", sk: "001892932", ct: "opaquetoken654", v: null },
]
```
Example response:
```
HTTP/1.1 200 OK
```
**ReadBatch: `POST /<bucket>?search`**, or alternatively<br/>
**ReadBatch: `SEARCH /<bucket>`**
Batch read of triplets in a bucket.
The request body is a JSON list of searches, that each specify a range of
items to get (to get single items, set `singleItem` to `true`). A search is a
JSON struct with the following fields:
| name | default value | meaning |
| - | - | - |
| `partitionKey` | **mandatory** | The partition key in which to search |
| `prefix` | `null` | Restrict items to list to those whose sort keys start with this prefix |
| `start` | `null` | The sort key of the first item to read |
| `end` | `null` | The sort key of the last item to read (excluded) |
| `limit` | `null` | The maximum number of items to return |
| `reverse` | `false` | Iterate in reverse lexicographical order on sort keys |
| `singleItem` | `false` | Whether to return only the item with sort key `start` |
| `conflictsOnly` | `false` | Whether to return only items that have several concurrent values |
| `tombstones` | `false` | Whether or not to return tombstone lines to indicate the presence of old deleted items |
For each of the searches, triplets are listed and returned separately. The
semantics of `prefix`, `start`, `end`, `limit` and `reverse` are the same as for ReadIndex. The
additionnal parameter `singleItem` allows to get a single item, whose sort key
is the one given in `start`. Parameters `conflictsOnly` and `tombstones`
control additional filters on the items that are returned.
The result is a list of length the number of searches, that consists in for
each search a JSON object specified similarly to the result of ReadIndex, but
that lists triplets within a partition key.
The format of returned tuples is as follows: `{ sk: "<sort key>", ct: "<causality
token>", v: ["<value1>", ...] }`, with the following fields:
- `sk` (sort key): any unicode string used as a sort key
- `ct` (causality token): an opaque token served by the server (generally
base64-encoded) to be used in subsequent writes to this key
- `v` (list of values): each value is a binary blob, always base64-encoded;
contains multiple items when concurrent values exists
- in case of concurrent update and deletion, a `null` is added to the list of concurrent values
- if the `tombstones` query parameter is set to `true`, tombstones are returned
for items that have been deleted (this can be usefull for inserting after an
item that has been deleted, so that the insert is not considered
concurrent with the delete). Tombstones are returned as tuples in the
same format with only `null` values
Example query:
```json
POST /my_bucket?search HTTP/1.1
[
{
partitionKey: "mailboxes",
},
{
partitionKey: "mailbox:INBOX",
start: "001892831",
limit: 3,
},
{
partitionKey: "keys",
start: "0",
singleItem: true,
},
]
```
Example associated response body:
```json
HTTP/1.1 200 OK
[
{
partitionKey: "mailboxes",
prefix: null,
start: null,
end: null,
limit: null,
reverse: false,
conflictsOnly: false,
tombstones: false,
singleItem: false,
items: [
{ sk: "INBOX", ct: "opaquetoken123", v: ["b64cryptoblob123", "b64cryptoblob'123"] },
{ sk: "Trash", ct: "opaquetoken456", v: ["b64cryptoblob456"] },
{ sk: "Junk", ct: "opaquetoken789", v: ["b64cryptoblob789"] },
],
more: false,
nextStart: null,
},
{
partitionKey: "mailbox::INBOX",
prefix: null,
start: "001892831",
end: null,
limit: 3,
reverse: false,
conflictsOnly: false,
tombstones: false,
singleItem: false,
items: [
{ sk: "001892831", ct: "opaquetoken321", v: ["b64cryptoblob321"] },
{ sk: "001892832", ct: "opaquetoken654", v: ["b64cryptoblob654"] },
{ sk: "001892874", ct: "opaquetoken987", v: ["b64cryptoblob987"] },
],
more: true,
nextStart: "001892898",
},
{
partitionKey: "keys",
prefix: null,
start: "0",
end: null,
conflictsOnly: false,
tombstones: false,
limit: null,
reverse: false,
singleItem: true,
items: [
{ sk: "0", ct: "opaquetoken999", v: ["b64binarystuff999"] },
],
more: false,
nextStart: null,
},
]
```
**DeleteBatch: `POST /<bucket>?delete`**
Batch deletion of triplets. The request format is the same for `POST
/<bucket>?search` to indicate items or range of items, except that here they
are deleted instead of returned, but only the fields `partitionKey`, `prefix`, `start`,
`end`, and `singleItem` are supported. Causality information is not given by
the user: this request will internally list all triplets and write deletion
markers that supersede all of the versions that have been read.
This request returns for each series of items to be deleted, the number of
matching items that have been found and deleted.
Example query:
```json
POST /my_bucket?delete HTTP/1.1
[
{
partitionKey: "mailbox:OldMailbox",
},
{
partitionKey: "mailbox:INBOX",
start: "0018928321",
singleItem: true,
},
]
```
Example response:
```
HTTP/1.1 200 OK
[
{
partitionKey: "mailbox:OldMailbox",
prefix: null,
start: null,
end: null,
singleItem: false,
deletedItems: 35,
},
{
partitionKey: "mailbox:INBOX",
prefix: null,
start: "0018928321",
end: null,
singleItem: true,
deletedItems: 1,
},
]
```
## Internals: causality tokens
The method used is based on DVVS (dotted version vector sets). See:
- the paper "Scalable and Accurate Causality Tracking for Eventually Consistent Data Stores"
- <https://github.com/ricardobcl/Dotted-Version-Vectors>
For DVVS to work, write operations (at each node) must take a lock on the data table.

View file

@ -1,14 +0,0 @@
*
!assets
!.gitignore
!*.svg
!*.png
!*.jpg
!*.tex
!Makefile
!.gitignore
!assets/*.drawio.pdf
!talk.pdf

View file

@ -1,5 +0,0 @@
talk.pdf: talk.tex assets/consistent_hashing_1.pdf assets/consistent_hashing_2.pdf assets/consistent_hashing_3.pdf assets/consistent_hashing_4.pdf assets/garage_tables.pdf assets/deuxfleurs.pdf
pdflatex talk.tex
assets/%.pdf: assets/%.svg
inkscape -D -z --file=$^ --export-pdf=$@

Binary file not shown.

Before

Width:  |  Height:  |  Size: 32 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 115 KiB

File diff suppressed because one or more lines are too long

Before

Width:  |  Height:  |  Size: 184 KiB

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 26 KiB

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 27 KiB

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 8.9 KiB

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 18 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 4.8 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 263 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 82 KiB

File diff suppressed because one or more lines are too long

Before

Width:  |  Height:  |  Size: 53 KiB

File diff suppressed because one or more lines are too long

Before

Width:  |  Height:  |  Size: 54 KiB

File diff suppressed because one or more lines are too long

Before

Width:  |  Height:  |  Size: 56 KiB

File diff suppressed because one or more lines are too long

Before

Width:  |  Height:  |  Size: 57 KiB

View file

@ -1,91 +0,0 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<svg
viewBox="0 0 70.424515 70.300102"
version="1.1"
id="svg8"
sodipodi:docname="logo.svg"
inkscape:version="1.1 (c68e22c387, 2021-05-23)"
inkscape:export-filename="/home/quentin/Documents/dev/deuxfleurs/site/src/img/logo.png"
inkscape:export-xdpi="699.30194"
inkscape:export-ydpi="699.30194"
width="70.424515"
height="70.300102"
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
xmlns="http://www.w3.org/2000/svg"
xmlns:svg="http://www.w3.org/2000/svg">
<defs
id="defs12" />
<sodipodi:namedview
id="namedview10"
pagecolor="#ffffff"
bordercolor="#666666"
borderopacity="1.0"
inkscape:pageshadow="2"
inkscape:pageopacity="0.0"
inkscape:pagecheckerboard="0"
showgrid="false"
inkscape:zoom="12.125"
inkscape:cx="43.092783"
inkscape:cy="48.082474"
inkscape:window-width="3072"
inkscape:window-height="1659"
inkscape:window-x="0"
inkscape:window-y="0"
inkscape:window-maximized="1"
inkscape:current-layer="svg8" />
<g
id="g79969"
transform="translate(-0.827,34.992103)">
<path
fill="#ffffff"
d="m 15.632,34.661 c -0.799,-0.597 -1.498,-1.484 -2.035,-2.592 l -0.228,-0.47 -0.46,0.249 c -0.975,0.528 -1.913,0.858 -2.744,0.969 L 9.963,29.061 6.327,30.029 C 6.17,29.175 6.202,28.142 6.423,27.007 L 6.526,26.482 5.994,26.416 C 4.752,26.262 3.688,25.891 2.89,25.336 L 4.411,22.419 1.423,20.896 C 1.742,19.952 2.371,19.014 3.257,18.161 L 3.634,17.798 3.255,17.438 C 2.452,16.674 1.847,15.884 1.485,15.127 L 4.995,13.774 2.95,10.615 C 3.69,10.213 4.643,9.929 5.739,9.783 L 6.258,9.715 6.167,9.201 C 5.952,7.99 5.995,6.863 6.291,5.913 l 3.308,0.523 0.524,-3.308 c 0.988,0.013 2.08,0.326 3.164,0.907 L 13.749,4.283 13.975,3.81 C 14.454,2.807 15.019,1.986 15.628,1.406 L 18,4.326 20.372,1.406 c 0.609,0.58 1.175,1.401 1.653,2.404 l 0.226,0.473 0.462,-0.247 C 23.798,3.455 24.891,3.142 25.877,3.13 L 26.4,6.438 29.71,5.913 c 0.296,0.951 0.34,2.078 0.124,3.288 l -0.092,0.515 0.518,0.069 c 1.095,0.145 2.048,0.43 2.788,0.832 l -2.046,3.156 3.511,1.355 c -0.361,0.757 -0.966,1.547 -1.77,2.311 l -0.379,0.36 0.377,0.363 c 0.888,0.854 1.516,1.793 1.835,2.736 l -2.984,1.52 1.521,2.984 c -0.812,0.574 -1.871,0.964 -3.094,1.134 l -0.518,0.072 0.096,0.514 c 0.201,1.089 0.226,2.083 0.073,2.909 l -3.634,-0.97 -0.204,3.757 c -0.83,-0.11 -1.768,-0.44 -2.742,-0.968 l -0.459,-0.249 -0.228,0.47 c -0.539,1.107 -1.237,1.994 -2.036,2.591 L 18,32.293 Z"
id="path2" />
<path
d="M 7.092,10.678 C 6.562,9.189 6.394,7.708 6.66,6.478 l 2.368,0.375 0.987,0.156 0.157,-0.988 0.375,-2.368 C 11.808,3.78 13.16,4.396 14.409,5.359 14.527,5.022 14.653,4.696 14.791,4.392 13.24,3.257 11.568,2.629 10.061,2.629 9.938,2.629 9.816,2.633 9.695,2.642 L 9.184,5.865 5.96,5.354 C 5.36,6.841 5.395,8.769 6.045,10.747 6.38,10.71 6.729,10.686 7.092,10.678 Z M 21.593,5.359 c 1.248,-0.962 2.6,-1.578 3.86,-1.705 l 0.376,2.368 0.156,0.988 0.987,-0.157 2.369,-0.376 c 0.266,1.23 0.098,2.71 -0.432,4.2 0.361,0.009 0.711,0.032 1.046,0.07 C 30.606,8.769 30.64,6.841 30.04,5.353 L 26.815,5.865 26.304,2.641 c -0.12,-0.008 -0.242,-0.012 -0.365,-0.012 -1.507,0 -3.179,0.628 -4.73,1.762 0.14,0.306 0.266,0.631 0.384,0.968 z M 7.368,27 h 0.035 c 0.067,0 0.157,-0.604 0.26,-0.947 -0.098,0.004 -0.197,0.046 -0.294,0.046 -1.496,0 -2.826,-0.303 -3.83,-0.89 L 4.628,23.081 5.082,22.194 4.191,21.742 2.055,20.654 C 2.563,19.503 3.57,18.404 4.873,17.511 4.586,17.292 4.312,17.07 4.063,16.842 2.376,18.059 1.217,19.597 0.828,21.152 l 2.908,1.483 -1.482,2.843 C 3.475,26.501 5.303,27 7.368,27 Z m 27.806,-5.846 c -0.39,-1.555 -1.548,-3.093 -3.234,-4.311 -0.25,0.228 -0.523,0.451 -0.81,0.669 1.304,0.893 2.31,1.992 2.817,3.145 l -2.136,1.088 -0.891,0.453 0.454,0.892 1.089,2.137 c -1.004,0.587 -2.332,0.904 -3.828,0.904 -0.099,0 -0.199,-0.01 -0.299,-0.013 0.103,0.344 0.192,0.683 0.26,1.011 l 0.039,0.002 c 2.066,0 3.892,-0.563 5.112,-1.587 l -1.482,-2.908 z m -12.653,9.182 c -0.447,1.517 -1.181,2.812 -2.119,3.651 L 18.707,32.293 18,31.586 l -0.707,0.707 -1.695,1.694 c -0.938,-0.839 -1.673,-2.136 -2.12,-3.652 -0.296,0.206 -0.593,0.397 -0.886,0.563 0.636,1.98 1.741,3.559 3.1,4.409 L 18,33 l 2.308,2.308 c 1.358,-0.851 2.464,-2.428 3.101,-4.408 -0.295,-0.168 -0.591,-0.359 -0.888,-0.564 z"
fill="#ea596e"
id="path4" />
<path
fill="#ea596e"
d="m 20.118,5.683 c 0.426,1.146 0.748,2.596 0.841,4.284 l 0.2,3.683 3.564,-0.946 c 1.32,-0.351 2.655,-0.536 3.86,-0.536 0.16,0 0.318,0.003 0.474,0.01 l -1.827,2.819 3.139,1.211 c -0.958,0.759 -2.237,1.514 -3.814,2.123 l -3.441,1.328 2.001,3.099 c 0.918,1.42 1.509,2.782 1.838,3.96 L 23.709,25.853 23.527,29.21 C 22.508,28.533 21.395,27.55 20.329,26.237 L 18,23.374 15.672,26.236 c -1.066,1.312 -2.179,2.295 -3.198,2.972 l -0.18,-3.354 -3.248,0.864 c 0.329,-1.178 0.921,-2.54 1.839,-3.961 L 12.889,19.658 9.447,18.33 C 7.87,17.721 6.591,16.967 5.633,16.208 L 8.768,15 6.941,12.177 c 0.155,-0.006 0.313,-0.01 0.473,-0.01 1.206,0 2.541,0.185 3.861,0.536 l 3.564,0.947 0.202,-3.683 c 0.092,-1.688 0.415,-3.138 0.84,-4.284 L 18,8.292 20.118,5.683 M 20.308,0.692 18,3.533 15.692,0.692 C 13.703,2.224 12.271,5.684 12.046,9.804 10.429,9.374 8.854,9.167 7.414,9.167 c -2.11,0 -3.929,0.445 -5.161,1.289 l 1.989,3.073 -3.415,1.316 c 0.842,2.366 3.69,4.797 7.54,6.283 -2.241,3.465 -3.116,7.106 -2.407,9.516 l 3.537,-0.941 0.196,3.654 c 2.512,-0.07 5.703,-2.027 8.307,-5.228 2.603,3.201 5.796,5.158 8.306,5.228 l 0.198,-3.655 3.535,0.943 c 0.71,-2.411 -0.165,-6.05 -2.404,-9.517 3.849,-1.485 6.696,-3.918 7.538,-6.283 l -3.415,-1.318 1.99,-3.07 c -1.233,-0.844 -3.053,-1.29 -5.164,-1.29 -1.438,0 -3.013,0.207 -4.63,0.636 C 23.729,5.684 22.297,2.224 20.308,0.692 Z"
id="path6" />
</g>
<g
id="g79964"
transform="translate(-1.043816,35.993714)">
<path
fill="#ffffff"
d="m 51.92633,-2.0247139 c -0.799,-0.597 -1.498,-1.484 -2.035,-2.592 l -0.228,-0.47 -0.46,0.249 c -0.975,0.528 -1.913,0.858 -2.744,0.969 l -0.202,-3.7560001 -3.636,0.968 c -0.157,-0.854 -0.125,-1.887 0.096,-3.022 l 0.103,-0.525 -0.532,-0.066 c -1.242,-0.154 -2.306,-0.525 -3.104,-1.08 l 1.521,-2.917 -2.988,-1.523 c 0.319,-0.944 0.948,-1.882 1.834,-2.735 l 0.377,-0.363 -0.379,-0.36 c -0.803,-0.764 -1.408,-1.554 -1.77,-2.311 l 3.51,-1.353 -2.045,-3.159 c 0.74,-0.402 1.693,-0.686 2.789,-0.832 l 0.519,-0.068 -0.091,-0.514 c -0.215,-1.211 -0.172,-2.338 0.124,-3.288 l 3.308,0.523 0.524,-3.308 c 0.988,0.013 2.08,0.326 3.164,0.907 l 0.462,0.248 0.226,-0.473 c 0.479,-1.003 1.044,-1.824 1.653,-2.404 l 2.372,2.92 2.372,-2.92 c 0.609,0.58 1.175,1.401 1.653,2.404 l 0.226,0.473 0.462,-0.247 c 1.085,-0.581 2.178,-0.894 3.164,-0.906 l 0.523,3.308 3.31,-0.525 c 0.296,0.951 0.34,2.078 0.124,3.288 l -0.092,0.515 0.518,0.069 c 1.095,0.145 2.048,0.43 2.788,0.832 l -2.046,3.156 3.511,1.355 c -0.361,0.757 -0.966,1.547 -1.77,2.311 l -0.379,0.36 0.377,0.363 c 0.888,0.854 1.516,1.793 1.835,2.736 l -2.984,1.52 1.521,2.984 c -0.812,0.574 -1.871,0.964 -3.094,1.134 l -0.518,0.072 0.096,0.514 c 0.201,1.089 0.226,2.083 0.073,2.909 l -3.634,-0.97 -0.204,3.7570001 c -0.83,-0.11 -1.768,-0.44 -2.742,-0.968 l -0.459,-0.249 -0.228,0.47 c -0.539,1.107 -1.237,1.994 -2.036,2.591 l -2.367,-2.369 z"
id="path2-9" />
<path
d="m 43.38633,-26.007714 c -0.53,-1.489 -0.698,-2.97 -0.432,-4.2 l 2.368,0.375 0.987,0.156 0.157,-0.988 0.375,-2.368 c 1.261,0.127 2.613,0.743 3.862,1.706 0.118,-0.337 0.244,-0.663 0.382,-0.967 -1.551,-1.135 -3.223,-1.763 -4.73,-1.763 -0.123,0 -0.245,0.004 -0.366,0.013 l -0.511,3.223 -3.224,-0.511 c -0.6,1.487 -0.565,3.415 0.085,5.393 0.335,-0.037 0.684,-0.061 1.047,-0.069 z m 14.501,-5.319 c 1.248,-0.962 2.6,-1.578 3.86,-1.705 l 0.376,2.368 0.156,0.988 0.987,-0.157 2.369,-0.376 c 0.266,1.23 0.098,2.71 -0.432,4.2 0.361,0.009 0.711,0.032 1.046,0.07 0.651,-1.978 0.685,-3.906 0.085,-5.394 l -3.225,0.512 -0.511,-3.224 c -0.12,-0.008 -0.242,-0.012 -0.365,-0.012 -1.507,0 -3.179,0.628 -4.73,1.762 0.14,0.306 0.266,0.631 0.384,0.968 z m -14.225,21.641 h 0.035 c 0.067,0 0.157,-0.604 0.26,-0.947 -0.098,0.004 -0.197,0.046 -0.294,0.046 -1.496,0 -2.826,-0.303 -3.83,-0.89 l 1.089,-2.128 0.454,-0.887 -0.891,-0.452 -2.136,-1.088 c 0.508,-1.151 1.515,-2.25 2.818,-3.143 -0.287,-0.219 -0.561,-0.441 -0.81,-0.669 -1.687,1.217 -2.846,2.755 -3.235,4.31 l 2.908,1.483 -1.482,2.843 c 1.221,1.023 3.049,1.522 5.114,1.522 z m 27.806,-5.846 c -0.39,-1.555 -1.548,-3.093 -3.234,-4.311 -0.25,0.228 -0.523,0.451 -0.81,0.669 1.304,0.893 2.31,1.992 2.817,3.145 l -2.136,1.088 -0.891,0.453 0.454,0.892 1.089,2.137 c -1.004,0.587 -2.332,0.904 -3.828,0.904 -0.099,0 -0.199,-0.01 -0.299,-0.013 0.103,0.344 0.192,0.683 0.26,1.011 l 0.039,0.002 c 2.066,0 3.892,-0.563 5.112,-1.587 l -1.482,-2.908 z m -12.653,9.182 c -0.447,1.5170001 -1.181,2.8120001 -2.119,3.6510001 l -1.695,-1.694 -0.707,-0.707 -0.707,0.707 -1.695,1.694 c -0.938,-0.839 -1.673,-2.136 -2.12,-3.6520001 -0.296,0.2060001 -0.593,0.3970001 -0.886,0.5630001 0.636,1.98 1.741,3.559 3.1,4.409 l 2.308,-2.307 2.308,2.308 c 1.358,-0.851 2.464,-2.428 3.101,-4.408 -0.295,-0.168 -0.591,-0.359 -0.888,-0.5640001 z"
fill="#ea596e"
id="path4-3" />
<path
fill="#ea596e"
d="m 56.41233,-31.002714 c 0.426,1.146 0.748,2.596 0.841,4.284 l 0.2,3.683 3.564,-0.946 c 1.32,-0.351 2.655,-0.536 3.86,-0.536 0.16,0 0.318,0.003 0.474,0.01 l -1.827,2.819 3.139,1.211 c -0.958,0.759 -2.237,1.514 -3.814,2.123 l -3.441,1.328 2.001,3.099 c 0.918,1.42 1.509,2.782 1.838,3.96 l -3.244,-0.865 -0.182,3.357 c -1.019,-0.677 -2.132,-1.66 -3.198,-2.973 l -2.329,-2.863 -2.328,2.862 c -1.066,1.312 -2.179,2.295 -3.198,2.972 l -0.18,-3.354 -3.248,0.864 c 0.329,-1.178 0.921,-2.54 1.839,-3.961 l 2.004,-3.099 -3.442,-1.328 c -1.577,-0.609 -2.856,-1.363 -3.814,-2.122 l 3.135,-1.208 -1.827,-2.823 c 0.155,-0.006 0.313,-0.01 0.473,-0.01 1.206,0 2.541,0.185 3.861,0.536 l 3.564,0.947 0.202,-3.683 c 0.092,-1.688 0.415,-3.138 0.84,-4.284 l 2.119,2.609 2.118,-2.609 m 0.19,-4.991 -2.308,2.841 -2.308,-2.841 c -1.989,1.532 -3.421,4.992 -3.646,9.112 -1.617,-0.43 -3.192,-0.637 -4.632,-0.637 -2.11,0 -3.929,0.445 -5.161,1.289 l 1.989,3.073 -3.415,1.316 c 0.842,2.366 3.69,4.797 7.54,6.283 -2.241,3.465 -3.116,7.106 -2.407,9.5160001 l 3.537,-0.9410001 0.196,3.6540001 c 2.512,-0.07 5.703,-2.027 8.307,-5.2280001 2.603,3.2010001 5.796,5.1580001 8.306,5.2280001 l 0.198,-3.6550001 3.535,0.9430001 c 0.71,-2.4110001 -0.165,-6.0500001 -2.404,-9.5170001 3.849,-1.485 6.696,-3.918 7.538,-6.283 l -3.415,-1.318 1.99,-3.07 c -1.233,-0.844 -3.053,-1.29 -5.164,-1.29 -1.438,0 -3.013,0.207 -4.63,0.636 -0.225,-4.119 -1.657,-7.579 -3.646,-9.111 z"
id="path6-6" />
</g>
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:42.6667px;line-height:1.25;font-family:sans-serif;fill:#ea596e;fill-opacity:1;stroke:none"
x="2.2188232"
y="31.430677"
id="text46212"><tspan
sodipodi:role="line"
id="tspan46210"
x="2.2188232"
y="31.430677"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:42.6667px;font-family:'TeX Gyre Termes';-inkscape-font-specification:'TeX Gyre Termes'">D</tspan></text>
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:42.6667px;line-height:1.25;font-family:sans-serif;fill:#ea596e;fill-opacity:1;stroke:none"
x="41.347008"
y="67.114784"
id="text46212-1"><tspan
sodipodi:role="line"
id="tspan46210-5"
x="41.347008"
y="67.114784"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:42.6667px;font-family:'TeX Gyre Termes';-inkscape-font-specification:'TeX Gyre Termes'">F</tspan></text>
</svg>

Before

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 129 KiB

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 13 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 88 KiB

Binary file not shown.

Binary file not shown.

View file

@ -1,537 +0,0 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<svg
width="850"
height="480"
viewBox="0 0 224.89584 127"
version="1.1"
id="svg8"
inkscape:version="1.2 (dc2aedaf03, 2022-05-15)"
sodipodi:docname="garage_tables.svg"
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
xmlns="http://www.w3.org/2000/svg"
xmlns:svg="http://www.w3.org/2000/svg"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:cc="http://creativecommons.org/ns#"
xmlns:dc="http://purl.org/dc/elements/1.1/">
<defs
id="defs2">
<marker
style="overflow:visible"
id="marker1262"
refX="0"
refY="0"
orient="auto"
inkscape:stockid="Arrow1Mend"
inkscape:isstock="true">
<path
transform="matrix(-0.4,0,0,-0.4,-4,0)"
style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1pt;stroke-opacity:1"
d="M 0,0 5,-5 -12.5,0 5,5 Z"
id="path1260" />
</marker>
<marker
style="overflow:visible"
id="Arrow1Mend"
refX="0"
refY="0"
orient="auto"
inkscape:stockid="Arrow1Mend"
inkscape:isstock="true"
inkscape:collect="always">
<path
transform="matrix(-0.4,0,0,-0.4,-4,0)"
style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1pt;stroke-opacity:1"
d="M 0,0 5,-5 -12.5,0 5,5 Z"
id="path965" />
</marker>
<marker
style="overflow:visible"
id="Arrow1Lend"
refX="0"
refY="0"
orient="auto"
inkscape:stockid="Arrow1Lend"
inkscape:isstock="true">
<path
transform="matrix(-0.8,0,0,-0.8,-10,0)"
style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1pt;stroke-opacity:1"
d="M 0,0 5,-5 -12.5,0 5,5 Z"
id="path959" />
</marker>
</defs>
<sodipodi:namedview
id="base"
pagecolor="#ffffff"
bordercolor="#666666"
borderopacity="1.0"
inkscape:pageopacity="0.0"
inkscape:pageshadow="2"
inkscape:zoom="0.98994949"
inkscape:cx="429.31483"
inkscape:cy="289.40871"
inkscape:document-units="mm"
inkscape:current-layer="layer1"
inkscape:document-rotation="0"
showgrid="false"
units="px"
inkscape:window-width="1678"
inkscape:window-height="993"
inkscape:window-x="0"
inkscape:window-y="0"
inkscape:window-maximized="1"
inkscape:showpageshadow="2"
inkscape:pagecheckerboard="0"
inkscape:deskcolor="#d1d1d1" />
<metadata
id="metadata5">
<rdf:RDF>
<cc:Work
rdf:about="">
<dc:format>image/svg+xml</dc:format>
<dc:type
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
<dc:title />
</cc:Work>
</rdf:RDF>
</metadata>
<g
inkscape:label="Layer 1"
inkscape:groupmode="layer"
id="layer1">
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:5.64444px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
x="39.570904"
y="38.452755"
id="text2025"><tspan
sodipodi:role="line"
id="tspan2023"
x="39.570904"
y="38.452755"
style="font-size:5.64444px;stroke-width:0.264583" /></text>
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:10.5833px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
x="101.95796"
y="92.835831"
id="text2139"><tspan
sodipodi:role="line"
id="tspan2137"
x="101.95796"
y="92.835831"
style="stroke-width:0.264583"> </tspan></text>
<g
id="g2316"
transform="translate(-11.455511,1.5722486)">
<g
id="g2277">
<rect
style="fill:none;stroke:#000000;stroke-width:0.8;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
id="rect833"
width="47.419891"
height="95.353409"
x="18.534418"
y="24.42766" />
<rect
style="fill:none;stroke:#000000;stroke-width:0.799999;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
id="rect833-3"
width="47.419891"
height="86.973076"
x="18.534418"
y="32.807987" />
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:5.64444px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
x="32.250839"
y="29.894743"
id="text852"><tspan
sodipodi:role="line"
id="tspan850"
x="32.250839"
y="29.894743"
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:5.64444px;font-family:'Liberation Mono';-inkscape-font-specification:'Liberation Mono Bold';stroke-width:0.264583">Object</tspan></text>
</g>
<g
id="g2066"
transform="translate(-2.1807817,-3.0621439)">
<g
id="g1969"
transform="matrix(0.12763631,0,0,0.12763631,0.7215051,24.717273)"
style="fill:#ff6600;fill-opacity:1;stroke:none;stroke-opacity:1">
<path
style="fill:#ff6600;fill-opacity:1;stroke:none;stroke-width:0.264583;stroke-opacity:1"
d="m 203.71837,154.80038 c -1.11451,3.75057 -2.45288,5.84095 -5.11132,7.98327 -2.2735,1.83211 -4.66721,2.65982 -8.09339,2.79857 -2.59227,0.10498 -2.92868,0.0577 -5.02863,-0.70611 -3.99215,-1.45212 -7.1627,-4.65496 -8.48408,-8.57046 -1.28374,-3.80398 -0.61478,-8.68216 1.64793,-12.01698 0.87317,-1.28689 3.15089,-3.48326 4.18771,-4.03815 l 0.53332,-28.51234 5.78454,-5.09197 6.95158,6.16704 -3.21112,3.49026 3.17616,3.45499 -3.17616,3.40822 2.98973,3.28645 -3.24843,3.3829 4.49203,4.58395 0.0516,5.69106 c 1.06874,0.64848 3.81974,3.24046 4.69548,4.56257 0.452,0.68241 1.06834,2.0197 1.36962,2.97176 0.62932,1.98864 0.88051,5.785 0.47342,7.15497 z m -10.0406,2.32604 c -0.88184,-3.17515 -4.92402,-3.78864 -6.75297,-1.02492 -0.58328,0.8814 -0.6898,1.28852 -0.58362,2.23056 0.26492,2.35041 2.45434,3.95262 4.60856,3.37255 1.19644,-0.32217 2.39435,-1.44872 2.72875,-2.56621 0.30682,-1.02529 0.30686,-0.9045 -7.9e-4,-2.01198 z"
id="path1971"
sodipodi:nodetypes="ssscsscccccccccccssscsssscc" />
</g>
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:5.64444px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
x="28.809687"
y="44.070885"
id="text852-9"><tspan
sodipodi:role="line"
id="tspan850-4"
x="28.809687"
y="44.070885"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:5.64444px;font-family:'Liberation Mono';-inkscape-font-specification:'Liberation Mono Bold';stroke-width:0.264583">bucket </tspan></text>
</g>
<g
id="g2066-7"
transform="translate(-2.1807817,6.2627616)">
<g
id="g1969-8"
transform="matrix(0.12763631,0,0,0.12763631,0.7215051,24.717273)"
style="fill:#ff6600;fill-opacity:1;stroke:none;stroke-opacity:1">
<path
style="fill:#4040ff;fill-opacity:1;stroke:none;stroke-width:0.264583;stroke-opacity:1"
d="m 203.71837,154.80038 c -1.11451,3.75057 -2.45288,5.84095 -5.11132,7.98327 -2.2735,1.83211 -4.66721,2.65982 -8.09339,2.79857 -2.59227,0.10498 -2.92868,0.0577 -5.02863,-0.70611 -3.99215,-1.45212 -7.1627,-4.65496 -8.48408,-8.57046 -1.28374,-3.80398 -0.61478,-8.68216 1.64793,-12.01698 0.87317,-1.28689 3.15089,-3.48326 4.18771,-4.03815 l 0.53332,-28.51234 5.78454,-5.09197 6.95158,6.16704 -3.21112,3.49026 3.17616,3.45499 -3.17616,3.40822 2.98973,3.28645 -3.24843,3.3829 4.49203,4.58395 0.0516,5.69106 c 1.06874,0.64848 3.81974,3.24046 4.69548,4.56257 0.452,0.68241 1.06834,2.0197 1.36962,2.97176 0.62932,1.98864 0.88051,5.785 0.47342,7.15497 z m -10.0406,2.32604 c -0.88184,-3.17515 -4.92402,-3.78864 -6.75297,-1.02492 -0.58328,0.8814 -0.6898,1.28852 -0.58362,2.23056 0.26492,2.35041 2.45434,3.95262 4.60856,3.37255 1.19644,-0.32217 2.39435,-1.44872 2.72875,-2.56621 0.30682,-1.02529 0.30686,-0.9045 -7.9e-4,-2.01198 z"
id="path1971-4"
sodipodi:nodetypes="ssscsscccccccccccssscsssscc" />
</g>
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:5.64444px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
x="28.809687"
y="44.070885"
id="text852-9-5"><tspan
sodipodi:role="line"
id="tspan850-4-0"
x="28.809687"
y="44.070885"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:5.64444px;font-family:'Liberation Mono';-inkscape-font-specification:'Liberation Mono Bold';stroke-width:0.264583">file path </tspan></text>
<path
style="fill:#ff6600;fill-opacity:1;stroke:none;stroke-width:0.0337704;stroke-opacity:1"
d="m 174.20027,104.45585 c -0.14225,0.47871 -0.31308,0.74552 -0.65239,1.01896 -0.29018,0.23384 -0.5957,0.33949 -1.03301,0.3572 -0.33087,0.0134 -0.37381,0.007 -0.64184,-0.0901 -0.50954,-0.18534 -0.91422,-0.59414 -1.08287,-1.0939 -0.16385,-0.48552 -0.0785,-1.10816 0.21033,-1.5338 0.11145,-0.16426 0.40217,-0.44459 0.53451,-0.51542 l 0.0681,-3.639207 0.73832,-0.64992 0.88727,0.787138 -0.40986,0.445484 0.4054,0.440982 -0.4054,0.435013 0.3816,0.41947 -0.41461,0.43178 0.57334,0.58508 0.007,0.72639 c 0.13641,0.0828 0.48753,0.4136 0.59931,0.58235 0.0577,0.0871 0.13636,0.25778 0.17481,0.3793 0.0803,0.25382 0.11239,0.73838 0.0604,0.91323 z m -1.28154,0.29689 c -0.11256,-0.40526 -0.62849,-0.48357 -0.86193,-0.13082 -0.0745,0.1125 -0.088,0.16447 -0.0745,0.2847 0.0338,0.3 0.31326,0.5045 0.58822,0.43046 0.15271,-0.0411 0.30561,-0.1849 0.34829,-0.32754 0.0392,-0.13086 0.0392,-0.11544 -1e-4,-0.2568 z"
id="path1971-3"
sodipodi:nodetypes="ssscsscccccccccccssscsssscc" />
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:5.64444px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
x="177.8474"
y="104.05132"
id="text852-9-6"><tspan
sodipodi:role="line"
id="tspan850-4-7"
x="177.8474"
y="104.05132"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:5.64444px;font-family:'Liberation Mono';-inkscape-font-specification:'Liberation Mono Bold';stroke-width:0.264583">= partition key </tspan></text>
<path
style="fill:#4040ff;fill-opacity:1;stroke:none;stroke-width:0.0337704;stroke-opacity:1"
d="m 174.20027,113.78076 c -0.14225,0.47871 -0.31308,0.74552 -0.65239,1.01895 -0.29018,0.23385 -0.5957,0.33949 -1.03301,0.3572 -0.33087,0.0134 -0.37381,0.007 -0.64184,-0.0901 -0.50954,-0.18534 -0.91422,-0.59414 -1.08287,-1.0939 -0.16385,-0.48553 -0.0785,-1.10816 0.21033,-1.53381 0.11145,-0.16425 0.40217,-0.44459 0.53451,-0.51541 l 0.0681,-3.63921 0.73832,-0.64992 0.88727,0.78714 -0.40986,0.44548 0.4054,0.44098 -0.4054,0.43502 0.3816,0.41947 -0.41461,0.43178 0.57334,0.58508 0.007,0.72638 c 0.13641,0.0828 0.48753,0.4136 0.59931,0.58235 0.0577,0.0871 0.13636,0.25779 0.17481,0.37931 0.0803,0.25382 0.11239,0.73837 0.0604,0.91323 z m -1.28154,0.29689 c -0.11256,-0.40527 -0.62849,-0.48357 -0.86193,-0.13082 -0.0745,0.1125 -0.088,0.16446 -0.0745,0.2847 0.0338,0.3 0.31326,0.5045 0.58822,0.43046 0.15271,-0.0411 0.30561,-0.18491 0.34829,-0.32754 0.0392,-0.13087 0.0392,-0.11545 -1e-4,-0.2568 z"
id="path1971-4-5"
sodipodi:nodetypes="ssscsscccccccccccssscsssscc" />
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:5.64444px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
x="177.8474"
y="113.37622"
id="text852-9-5-3"><tspan
sodipodi:role="line"
id="tspan850-4-0-5"
x="177.8474"
y="113.37622"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:5.64444px;font-family:'Liberation Mono';-inkscape-font-specification:'Liberation Mono Bold';stroke-width:0.264583">= sort key </tspan></text>
</g>
<g
id="g2161"
transform="translate(-62.264403,-59.333115)">
<g
id="g2271"
transform="translate(0,67.042823)">
<rect
style="fill:none;stroke:#000000;stroke-width:0.799999;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
id="rect833-6"
width="39.008453"
height="16.775949"
x="84.896881"
y="90.266838" />
<rect
style="fill:none;stroke:#000000;stroke-width:0.799999;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
id="rect833-3-1"
width="39.008453"
height="8.673645"
x="84.896881"
y="98.369141" />
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:5.64444px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
x="89.826942"
y="96.212921"
id="text852-0"><tspan
sodipodi:role="line"
id="tspan850-6"
x="89.826942"
y="96.212921"
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:5.64444px;font-family:'Liberation Mono';-inkscape-font-specification:'Liberation Mono Bold';stroke-width:0.264583">Version 1</tspan></text>
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:5.64444px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
x="89.826942"
y="104.71013"
id="text852-0-3"><tspan
sodipodi:role="line"
id="tspan850-6-2"
x="89.826942"
y="104.71013"
style="font-style:italic;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:5.64444px;font-family:'Liberation Mono';-inkscape-font-specification:'Liberation Mono Bold';fill:#4d4d4d;stroke-width:0.264583">deleted</tspan></text>
</g>
</g>
<g
id="g2263"
transform="translate(0,-22.791204)">
<g
id="g2161-1"
transform="translate(-62.264403,-10.910843)">
<rect
style="fill:none;stroke:#000000;stroke-width:0.799999;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
id="rect833-6-5"
width="39.008453"
height="36.749603"
x="84.896881"
y="90.266838" />
<rect
style="fill:none;stroke:#000000;stroke-width:0.799999;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
id="rect833-3-1-5"
width="39.008453"
height="28.647301"
x="84.896881"
y="98.369141" />
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:5.64444px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
x="89.826942"
y="96.212921"
id="text852-0-4"><tspan
sodipodi:role="line"
id="tspan850-6-7"
x="89.826942"
y="96.212921"
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:5.64444px;font-family:'Liberation Mono';-inkscape-font-specification:'Liberation Mono Bold';stroke-width:0.264583">Version 2</tspan></text>
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:5.64444px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
x="89.826942"
y="104.71013"
id="text852-0-3-6"><tspan
sodipodi:role="line"
id="tspan850-6-2-5"
x="89.826942"
y="104.71013"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:5.64444px;font-family:'Liberation Mono';-inkscape-font-specification:'Liberation Mono Bold';fill:#000000;stroke-width:0.264583">id</tspan></text>
</g>
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:5.64444px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
x="27.56254"
y="100.34132"
id="text852-0-3-6-6"><tspan
sodipodi:role="line"
id="tspan850-6-2-5-9"
x="27.56254"
y="100.34132"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:5.64444px;font-family:'Liberation Mono';-inkscape-font-specification:'Liberation Mono Bold';fill:#000000;stroke-width:0.264583">size</tspan></text>
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:5.64444px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
x="27.56254"
y="106.90263"
id="text852-0-3-6-6-3"><tspan
sodipodi:role="line"
id="tspan850-6-2-5-9-7"
x="27.56254"
y="106.90263"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:5.64444px;font-family:'Liberation Mono';-inkscape-font-specification:'Liberation Mono Bold';fill:#000000;stroke-width:0.264583">MIME type</tspan></text>
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:5.64444px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
x="27.56254"
y="111.92816"
id="text852-0-3-6-6-3-4"><tspan
sodipodi:role="line"
id="tspan850-6-2-5-9-7-5"
x="27.56254"
y="111.92816"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:5.64444px;font-family:'Liberation Mono';-inkscape-font-specification:'Liberation Mono Bold';fill:#000000;stroke-width:0.264583">...</tspan></text>
</g>
</g>
<g
id="g898"
transform="translate(-6.2484318,29.95006)">
<rect
style="fill:none;stroke:#000000;stroke-width:0.799999;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
id="rect833-7"
width="47.419891"
height="44.007515"
x="95.443573"
y="24.42766" />
<rect
style="fill:none;stroke:#000000;stroke-width:0.799999;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
id="rect833-3-4"
width="47.419891"
height="35.627186"
x="95.443573"
y="32.807987" />
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:5.64444px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
x="107.46638"
y="29.894743"
id="text852-4"><tspan
sodipodi:role="line"
id="tspan850-3"
x="107.46638"
y="29.894743"
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:5.64444px;font-family:'Liberation Mono';-inkscape-font-specification:'Liberation Mono Bold';stroke-width:0.264583">Version</tspan></text>
<path
style="fill:#ff6600;fill-opacity:1;stroke:none;stroke-width:0.0337704;stroke-opacity:1"
d="m 102.90563,41.413279 c -0.14226,0.478709 -0.31308,0.745518 -0.65239,1.018956 -0.29019,0.233843 -0.59571,0.339489 -1.03301,0.357199 -0.33087,0.0134 -0.37381,0.0074 -0.64184,-0.09013 -0.50954,-0.185343 -0.914221,-0.594142 -1.082877,-1.093901 -0.163852,-0.485526 -0.07847,-1.108159 0.210335,-1.533803 0.111448,-0.164254 0.402172,-0.444591 0.534502,-0.515415 l 0.0681,-3.63921 0.73832,-0.64992 0.88727,0.787138 -0.40985,0.445484 0.40539,0.440982 -0.40539,0.435013 0.3816,0.41947 -0.41462,0.431781 0.57335,0.585078 0.007,0.726386 c 0.13641,0.08277 0.48753,0.413601 0.59931,0.58235 0.0577,0.0871 0.13636,0.257787 0.17481,0.379304 0.0803,0.253823 0.11239,0.738377 0.0604,0.913234 z m -1.28155,0.296888 c -0.11255,-0.405265 -0.62848,-0.483569 -0.86192,-0.130817 -0.0744,0.112498 -0.088,0.164461 -0.0745,0.2847 0.0338,0.299998 0.31326,0.504498 0.58822,0.43046 0.15271,-0.04112 0.3056,-0.184909 0.34828,-0.327542 0.0392,-0.130864 0.0392,-0.115447 -1e-4,-0.256801 z"
id="path1971-0"
sodipodi:nodetypes="ssscsscccccccccccssscsssscc" />
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:5.64444px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
x="104.99195"
y="41.008743"
id="text852-9-7"><tspan
sodipodi:role="line"
id="tspan850-4-8"
x="104.99195"
y="41.008743"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:5.64444px;font-family:'Liberation Mono';-inkscape-font-specification:'Liberation Mono Bold';stroke-width:0.264583">id </tspan></text>
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:5.64444px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
x="104.99195"
y="49.168018"
id="text852-9-7-6"><tspan
sodipodi:role="line"
id="tspan850-4-8-8"
x="104.99195"
y="49.168018"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:5.64444px;font-family:'Liberation Mono';-inkscape-font-specification:'Liberation Mono Bold';stroke-width:0.264583">h(block 1)</tspan></text>
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:5.64444px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
x="104.99195"
y="56.583336"
id="text852-9-7-6-8"><tspan
sodipodi:role="line"
id="tspan850-4-8-8-4"
x="104.99195"
y="56.583336"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:5.64444px;font-family:'Liberation Mono';-inkscape-font-specification:'Liberation Mono Bold';stroke-width:0.264583">h(block 2)</tspan></text>
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:5.64444px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
x="104.99195"
y="64.265732"
id="text852-9-7-6-3"><tspan
sodipodi:role="line"
id="tspan850-4-8-8-1"
x="104.99195"
y="64.265732"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:5.64444px;font-family:'Liberation Mono';-inkscape-font-specification:'Liberation Mono Bold';stroke-width:0.264583">...</tspan></text>
</g>
<g
id="g898-3"
transform="translate(75.777779,38.888663)">
<rect
style="fill:none;stroke:#000000;stroke-width:0.799999;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
id="rect833-7-6"
width="47.419891"
height="29.989157"
x="95.443573"
y="24.42766" />
<rect
style="fill:none;stroke:#000000;stroke-width:0.799999;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
id="rect833-3-4-7"
width="47.419891"
height="21.608831"
x="95.443573"
y="32.807987" />
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:5.64444px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
x="102.11134"
y="29.894743"
id="text852-4-5"><tspan
sodipodi:role="line"
id="tspan850-3-3"
x="102.11134"
y="29.894743"
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:5.64444px;font-family:'Liberation Mono';-inkscape-font-specification:'Liberation Mono Bold';stroke-width:0.264583">Data block</tspan></text>
<path
style="fill:#ff6600;fill-opacity:1;stroke:none;stroke-width:0.0337704;stroke-opacity:1"
d="m 102.90563,41.413279 c -0.14226,0.478709 -0.31308,0.745518 -0.65239,1.018956 -0.29019,0.233843 -0.59571,0.339489 -1.03301,0.357199 -0.33087,0.0134 -0.37381,0.0074 -0.64184,-0.09013 -0.50954,-0.185343 -0.914221,-0.594142 -1.082877,-1.093901 -0.163852,-0.485526 -0.07847,-1.108159 0.210335,-1.533803 0.111448,-0.164254 0.402172,-0.444591 0.534502,-0.515415 l 0.0681,-3.63921 0.73832,-0.64992 0.88727,0.787138 -0.40985,0.445484 0.40539,0.440982 -0.40539,0.435013 0.3816,0.41947 -0.41462,0.431781 0.57335,0.585078 0.007,0.726386 c 0.13641,0.08277 0.48753,0.413601 0.59931,0.58235 0.0577,0.0871 0.13636,0.257787 0.17481,0.379304 0.0803,0.253823 0.11239,0.738377 0.0604,0.913234 z m -1.28155,0.296888 c -0.11255,-0.405265 -0.62848,-0.483569 -0.86192,-0.130817 -0.0744,0.112498 -0.088,0.164461 -0.0745,0.2847 0.0338,0.299998 0.31326,0.504498 0.58822,0.43046 0.15271,-0.04112 0.3056,-0.184909 0.34828,-0.327542 0.0392,-0.130864 0.0392,-0.115447 -1e-4,-0.256801 z"
id="path1971-0-5"
sodipodi:nodetypes="ssscsscccccccccccssscsssscc" />
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:5.64444px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
x="104.99195"
y="41.008743"
id="text852-9-7-62"><tspan
sodipodi:role="line"
id="tspan850-4-8-9"
x="104.99195"
y="41.008743"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:5.64444px;font-family:'Liberation Mono';-inkscape-font-specification:'Liberation Mono Bold';stroke-width:0.264583">hash </tspan></text>
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:5.64444px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
x="104.99195"
y="49.168018"
id="text852-9-7-6-1"><tspan
sodipodi:role="line"
id="tspan850-4-8-8-2"
x="104.99195"
y="49.168018"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:5.64444px;font-family:'Liberation Mono';-inkscape-font-specification:'Liberation Mono Bold';stroke-width:0.264583">data</tspan></text>
</g>
<path
style="fill:none;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;marker-end:url(#Arrow1Mend)"
d="M 42.105292,69.455903 89.563703,69.317144"
id="path954"
sodipodi:nodetypes="cc" />
<path
style="fill:none;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;marker-end:url(#marker1262)"
d="m 134.32612,77.363197 38.12618,0.260865"
id="path1258"
sodipodi:nodetypes="cc" />
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:5.64444px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
x="8.6727352"
y="16.687063"
id="text852-3"><tspan
sodipodi:role="line"
id="tspan850-67"
x="8.6727352"
y="16.687063"
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:5.64444px;font-family:'Liberation Mono';-inkscape-font-specification:'Liberation Mono Bold';stroke-width:0.264583">Objects table </tspan></text>
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:5.64444px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
x="89.190445"
y="16.687063"
id="text852-3-5"><tspan
sodipodi:role="line"
id="tspan850-67-3"
x="89.190445"
y="16.687063"
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:5.64444px;font-family:'Liberation Mono';-inkscape-font-specification:'Liberation Mono Bold';stroke-width:0.264583">Versions table </tspan></text>
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:5.64444px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
x="174.55702"
y="16.687063"
id="text852-3-56"><tspan
sodipodi:role="line"
id="tspan850-67-2"
x="174.55702"
y="16.687063"
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:5.64444px;font-family:'Liberation Mono';-inkscape-font-specification:'Liberation Mono Bold';stroke-width:0.264583">Blocks table</tspan></text>
</g>
</svg>

Before

Width:  |  Height:  |  Size: 30 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 52 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 37 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 97 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 199 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 145 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 13 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 174 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 38 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 87 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 81 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 124 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 84 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 81 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 81 KiB

File diff suppressed because it is too large Load diff

Before

Width:  |  Height:  |  Size: 315 KiB

File diff suppressed because one or more lines are too long

Before

Width:  |  Height:  |  Size: 286 KiB

BIN
doc/talks/2022-06-23-stack/talk.pdf (Stored with Git LFS)

Binary file not shown.

View file

@ -1,480 +0,0 @@
%\nonstopmode
\documentclass[aspectratio=169]{beamer}
\usepackage[utf8]{inputenc}
% \usepackage[frenchb]{babel}
\usepackage{amsmath}
\usepackage{mathtools}
\usepackage{breqn}
\usepackage{multirow}
\usetheme{boxes}
\usepackage{graphicx}
\usepackage{adjustbox}
%\useoutertheme[footline=authortitle,subsection=false]{miniframes}
%\useoutertheme[footline=authorinstitute,subsection=false]{miniframes}
\useoutertheme{infolines}
\setbeamertemplate{headline}{}
\beamertemplatenavigationsymbolsempty
\definecolor{TitleOrange}{RGB}{255,137,0}
\setbeamercolor{title}{fg=TitleOrange}
\setbeamercolor{frametitle}{fg=TitleOrange}
\definecolor{ListOrange}{RGB}{255,145,5}
\setbeamertemplate{itemize item}{\color{ListOrange}$\blacktriangleright$}
\definecolor{verygrey}{RGB}{70,70,70}
\setbeamercolor{normal text}{fg=verygrey}
\usepackage{tabu}
\usepackage{multicol}
\usepackage{vwcol}
\usepackage{stmaryrd}
\usepackage{graphicx}
\usepackage[normalem]{ulem}
\title{Introducing Garage}
\subtitle{a new storage platform for self-hosted geo-distributed clusters}
\author{Deuxfleurs Association}
\date{IMT Atlantique, 2022-06-23}
\begin{document}
\begin{frame}
\centering
\includegraphics[width=.3\linewidth]{../../sticker/Garage.pdf}
\vspace{1em}
{\large\bf Deuxfleurs Association}
\vspace{1em}
\url{https://garagehq.deuxfleurs.fr/}
Matrix channel: \texttt{\#garage:deuxfleurs.fr}
\end{frame}
\begin{frame}
\frametitle{Who we are}
\begin{columns}[t]
\begin{column}{.2\textwidth}
\centering
\adjincludegraphics[width=.4\linewidth, valign=t]{assets/alex.jpg}
\end{column}
\begin{column}{.6\textwidth}
\textbf{Alex Auvolat}\\
PhD at Inria, team WIDE; co-founder of Deuxfleurs
\end{column}
\begin{column}{.2\textwidth}
~
\end{column}
\end{columns}
\vspace{1em}
\begin{columns}[t]
\begin{column}{.2\textwidth}
~
\end{column}
\begin{column}{.6\textwidth}
\textbf{Quentin Dufour}\\
PhD at Inria, team WIDE; co-founder of Deuxfleurs
\end{column}
\begin{column}{.2\textwidth}
\centering
\adjincludegraphics[width=.5\linewidth, valign=t]{assets/quentin.jpg}
\end{column}
\end{columns}
\vspace{2em}
\begin{columns}[t]
\begin{column}{.2\textwidth}
\centering
\adjincludegraphics[width=.5\linewidth, valign=t]{assets/deuxfleurs.pdf}
\end{column}
\begin{column}{.6\textwidth}
\textbf{Deuxfleurs}\\
A non-profit self-hosting collective,\\
member of the CHATONS network
\end{column}
\begin{column}{.2\textwidth}
\centering
\adjincludegraphics[width=.7\linewidth, valign=t]{assets/logo_chatons.png}
\end{column}
\end{columns}
\end{frame}
\begin{frame}
\frametitle{Our objective at Deuxfleurs}
\begin{center}
\textbf{Promote self-hosting and small-scale hosting\\
as an alternative to large cloud providers}
\end{center}
\vspace{2em}
\visible<2->{
Why is it hard?
}
\visible<3->{
\vspace{2em}
\begin{center}
\textbf{\underline{Resilience}}\\
{\footnotesize (we want good uptime/availability with low supervision)}
\end{center}
}
\end{frame}
\begin{frame}
\frametitle{How to make a \underline{stable} system}
Enterprise-grade systems typically employ:
\vspace{1em}
\begin{itemize}
\item RAID
\item Redundant power grid + UPS
\item Redundant Internet connections
\item Low-latency links
\item ...
\end{itemize}
\vspace{1em}
$\to$ it's costly and only worth it at DC scale
\end{frame}
\begin{frame}
\frametitle{How to make a \underline{resilient} system}
\only<1,4-5>{
Instead, we use:
\vspace{1em}
\begin{itemize}
\item \textcolor<2->{gray}{Commodity hardware (e.g. old desktop PCs)}
\vspace{.5em}
\item<4-> \textcolor<5->{gray}{Commodity Internet (e.g. FTTB, FTTH) and power grid}
\vspace{.5em}
\item<5-> \textcolor<6->{gray}{\textbf{Geographical redundancy} (multi-site replication)}
\end{itemize}
}
\only<2>{
\begin{center}
\includegraphics[width=.8\linewidth]{assets/atuin.jpg}
\end{center}
}
\only<3>{
\begin{center}
\includegraphics[width=.8\linewidth]{assets/neptune.jpg}
\end{center}
}
\only<6>{
\begin{center}
\includegraphics[width=.5\linewidth]{assets/inframap.jpg}
\end{center}
}
\end{frame}
\begin{frame}
\frametitle{How to make this happen}
\begin{center}
\only<1>{\includegraphics[width=.8\linewidth]{assets/slide1.png}}%
\only<2>{\includegraphics[width=.8\linewidth]{assets/slide2.png}}%
\only<3>{\includegraphics[width=.8\linewidth]{assets/slide3.png}}%
\end{center}
\end{frame}
\begin{frame}
\frametitle{Distributed file systems are slow}
File systems are complex, for example:
\vspace{1em}
\begin{itemize}
\item Concurrent modification by several processes
\vspace{1em}
\item Folder hierarchies
\vspace{1em}
\item Other requirements of the POSIX spec
\end{itemize}
\vspace{1em}
Coordination in a distributed system is costly
\vspace{1em}
Costs explode with commodity hardware / Internet connections\\
{\small (we experienced this!)}
\end{frame}
\begin{frame}
\frametitle{A simpler solution: object storage}
Only two operations:
\vspace{1em}
\begin{itemize}
\item Put an object at a key
\vspace{1em}
\item Retrieve an object from its key
\end{itemize}
\vspace{1em}
{\footnotesize (and a few others)}
\vspace{1em}
Sufficient for many applications!
\end{frame}
\begin{frame}
\frametitle{A simpler solution: object storage}
\begin{center}
\includegraphics[width=.2\linewidth]{../2020-12-02_wide-team/img/Amazon-S3.jpg}
\hspace{5em}
\includegraphics[width=.2\linewidth]{assets/minio.png}
\end{center}
\vspace{1em}
S3: a de-facto standard, many compatible applications
\vspace{1em}
MinIO is self-hostable but not suited for geo-distributed deployments
\end{frame}
\begin{frame}
\frametitle{But what is Garage, exactly?}
\textbf{Garage is a self-hosted drop-in replacement for the Amazon S3 object store}\\
\vspace{.5em}
that implements resilience through geographical redundancy on commodity hardware
\begin{center}
\includegraphics[width=.8\linewidth]{assets/garageuses.png}
\end{center}
\end{frame}
\begin{frame}
\frametitle{Overview}
\begin{center}
\only<1>{\includegraphics[width=.45\linewidth]{assets/garage2a.drawio.pdf}}%
\only<2>{\includegraphics[width=.45\linewidth]{assets/garage2b.drawio.pdf}}%
\end{center}
\end{frame}
\begin{frame}
\frametitle{Garage is \emph{location-aware}}
\begin{center}
\includegraphics[width=\linewidth]{assets/location-aware.png}
\end{center}
\vspace{2em}
Garage replicates data on different zones when possible
\end{frame}
\begin{frame}
\frametitle{Garage is \emph{location-aware}}
\begin{center}
\includegraphics[width=.8\linewidth]{assets/map.png}
\end{center}
\end{frame}
\begin{frame}
\frametitle{How to spread files over different cluster nodes?}
\textbf{Consistent hashing (DynamoDB):}
\vspace{1em}
\begin{center}
\only<1>{\includegraphics[width=.45\columnwidth]{assets/consistent_hashing_1.pdf}}%
\only<2>{\includegraphics[width=.45\columnwidth]{assets/consistent_hashing_2.pdf}}%
\only<3>{\includegraphics[width=.45\columnwidth]{assets/consistent_hashing_3.pdf}}%
\only<4>{\includegraphics[width=.45\columnwidth]{assets/consistent_hashing_4.pdf}}%
\end{center}
\end{frame}
\begin{frame}
\frametitle{How to spread files over different cluster nodes?}
\textbf{Issues with consistent hashing:}
\vspace{1em}
\begin{itemize}
\item Doesn't dispatch data based on geographical location of nodes
\vspace{1em}
\item<2-> Geographically aware adaptation, try 1:\\
data quantities not well balanced between nodes
\vspace{1em}
\item<3-> Geographically aware adaptation, try 2:\\
too many reshuffles when adding/removing nodes
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{How to spread files over different cluster nodes?}
\textbf{Garage's method: build an index table}
\vspace{1em}
Realization: we can actually precompute an optimal solution
\vspace{1em}
\visible<2->{
\begin{center}
\begin{tabular}{|l|l|l|l|}
\hline
\textbf{Partition} & \textbf{Node 1} & \textbf{Node 2} & \textbf{Node 3} \\
\hline
\hline
Partition 0 & Io (jupiter) & Drosera (atuin) & Courgette (neptune) \\
\hline
Partition 1 & Datura (atuin) & Courgette (neptune) & Io (jupiter) \\
\hline
Partition 2 & Io(jupiter) & Celeri (neptune) & Drosera (atuin) \\
\hline
\hspace{1em}$\vdots$ & \hspace{1em}$\vdots$ & \hspace{1em}$\vdots$ & \hspace{1em}$\vdots$ \\
\hline
Partition 255 & Concombre (neptune) & Io (jupiter) & Drosera (atuin) \\
\hline
\end{tabular}
\end{center}
}
\vspace{1em}
\visible<3->{
The index table is built centrally using an optimal* algorithm,\\
then propagated to all nodes\\
\hfill\footnotesize *not yet optimal but will be soon
}
\end{frame}
\begin{frame}
\frametitle{Garage's internal data structures}
\centering
\includegraphics[width=.75\columnwidth]{assets/garage_tables.pdf}
\end{frame}
%\begin{frame}
% \frametitle{Garage's architecture}
% \begin{center}
% \includegraphics[width=.35\linewidth]{assets/garage.drawio.pdf}
% \end{center}
%\end{frame}
\begin{frame}
\frametitle{Garage is \emph{coordination-free}:}
\begin{itemize}
\item No Raft or Paxos
\vspace{1em}
\item Internal data types are CRDTs
\vspace{1em}
\item All nodes are equivalent (no master/leader/index node)
\end{itemize}
\vspace{2em}
$\to$ less sensitive to higher latencies between nodes
\end{frame}
\begin{frame}
\frametitle{Consistency model}
\begin{itemize}
\item Not ACID (not required by S3 spec) / not linearizable
\vspace{1em}
\item \textbf{Read-after-write consistency}\\
{\footnotesize (stronger than eventual consistency)}
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{Impact on performances}
\begin{center}
\includegraphics[width=.8\linewidth]{assets/endpoint-latency-dc.png}
\end{center}
\end{frame}
\begin{frame}
\frametitle{An ever-increasing compatibility list}
\begin{center}
\includegraphics[width=.7\linewidth]{assets/compatibility.png}
\end{center}
\end{frame}
\begin{frame}
\frametitle{Further plans for Garage}
\begin{center}
\only<1>{\includegraphics[width=.8\linewidth]{assets/slideB1.png}}%
\only<2>{\includegraphics[width=.8\linewidth]{assets/slideB2.png}}%
\only<3>{\includegraphics[width=.8\linewidth]{assets/slideB3.png}}%
\end{center}
\end{frame}
\begin{frame}
\frametitle{K2V Design}
\begin{itemize}
\item A new, custom, minimal API
\vspace{1em}
\item<2-> Exposes the partitoning mechanism of Garage\\
K2V = partition key / sort key / value (like Dynamo)
\vspace{1em}
\item<3-> Coordination-free, CRDT-friendly (inspired by Riak)\\
\vspace{1em}
\item<4-> Cryptography-friendly: values are binary blobs
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{Application: an e-mail storage server}
\begin{center}
\only<1>{\includegraphics[width=.9\linewidth]{assets/aerogramme.png}}%
\end{center}
\end{frame}
\begin{frame}
\frametitle{Aerogramme data model}
\begin{center}
\only<1>{\includegraphics[width=.4\linewidth]{assets/aerogramme_datatype.drawio.pdf}}%
\only<2->{\includegraphics[width=.9\linewidth]{assets/aerogramme_keys.drawio.pdf}\vspace{1em}}%
\end{center}
\visible<3->{Aerogramme encrypts all stored values for privacy\\
(Garage server administrators can't read your mail)}
\end{frame}
\begin{frame}
\frametitle{Different deployment scenarios}
\begin{center}
\only<1>{\includegraphics[width=.9\linewidth]{assets/aerogramme_components1.drawio.pdf}}%
\only<2>{\includegraphics[width=.9\linewidth]{assets/aerogramme_components2.drawio.pdf}}%
\end{center}
\end{frame}
\begin{frame}
\frametitle{A new model for building resilient software}
\begin{itemize}
\item Design a data model suited to K2V\\
{\footnotesize (see Cassandra docs on porting SQL data models to Cassandra)}
\vspace{1em}
\begin{itemize}
\item Use CRDTs or other eventually consistent data types (see e.g. Bayou)
\vspace{1em}
\item Store opaque binary blobs to provide End-to-End Encryption\\
\end{itemize}
\vspace{1em}
\item Store big blobs (files) in S3
\vspace{1em}
\item Let Garage manage sharding, replication, failover, etc.
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{Research perspectives}
\begin{itemize}
\item Write about Garage's global architecture \emph{(paper in progress)}
\vspace{1em}
\item Measure and improve Garage's performances
\vspace{1em}
\item Discuss the optimal layout algorithm, provide proofs
\vspace{1em}
\item Write about our proposed architecture for (E2EE) apps over K2V+S3
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{Where to find us}
\begin{center}
\includegraphics[width=.25\linewidth]{../../logo/garage_hires.png}\\
\vspace{-1em}
\url{https://garagehq.deuxfleurs.fr/}\\
\url{mailto:garagehq@deuxfleurs.fr}\\
\texttt{\#garage:deuxfleurs.fr} on Matrix
\vspace{1.5em}
\includegraphics[width=.06\linewidth]{assets/rust_logo.png}
\includegraphics[width=.13\linewidth]{assets/AGPLv3_Logo.png}
\end{center}
\end{frame}
\end{document}
%% vim: set ts=4 sw=4 tw=0 noet spelllang=en :

View file

@ -1,158 +0,0 @@
#!/usr/bin/env python
import os
import requests
from datetime import datetime
# let's talk to our AWS Elasticsearch cluster
#from requests_aws4auth import AWS4Auth
#auth = AWS4Auth('GK31c2f218a2e44f485b94239e',
# 'b892c0665f0ada8a4755dae98baa3b133590e11dae3bcc1f9d769d67f16c3835',
# 'us-east-1',
# 's3')
from aws_requests_auth.aws_auth import AWSRequestsAuth
auth = AWSRequestsAuth(aws_access_key='GK31c2f218a2e44f485b94239e',
aws_secret_access_key='b892c0665f0ada8a4755dae98baa3b133590e11dae3bcc1f9d769d67f16c3835',
aws_host='localhost:3812',
aws_region='us-east-1',
aws_service='k2v')
print("-- ReadIndex")
response = requests.get('http://localhost:3812/alex',
auth=auth)
print(response.headers)
print(response.text)
sort_keys = ["a", "b", "c", "d"]
for sk in sort_keys:
print("-- (%s) Put initial (no CT)"%sk)
response = requests.put('http://localhost:3812/alex/root?sort_key=%s'%sk,
auth=auth,
data='{}: Hello, world!'.format(datetime.timestamp(datetime.now())))
print(response.headers)
print(response.text)
print("-- Get")
response = requests.get('http://localhost:3812/alex/root?sort_key=%s'%sk,
auth=auth)
print(response.headers)
print(response.text)
ct = response.headers["x-garage-causality-token"]
print("-- ReadIndex")
response = requests.get('http://localhost:3812/alex',
auth=auth)
print(response.headers)
print(response.text)
print("-- Put with CT")
response = requests.put('http://localhost:3812/alex/root?sort_key=%s'%sk,
auth=auth,
headers={'x-garage-causality-token': ct},
data='{}: Good bye, world!'.format(datetime.timestamp(datetime.now())))
print(response.headers)
print(response.text)
print("-- Get")
response = requests.get('http://localhost:3812/alex/root?sort_key=%s'%sk,
auth=auth)
print(response.headers)
print(response.text)
print("-- Put again with same CT (concurrent)")
response = requests.put('http://localhost:3812/alex/root?sort_key=%s'%sk,
auth=auth,
headers={'x-garage-causality-token': ct},
data='{}: Concurrent value, oops'.format(datetime.timestamp(datetime.now())))
print(response.headers)
print(response.text)
for sk in sort_keys:
print("-- (%s) Get"%sk)
response = requests.get('http://localhost:3812/alex/root?sort_key=%s'%sk,
auth=auth)
print(response.headers)
print(response.text)
ct = response.headers["x-garage-causality-token"]
print("-- Delete")
response = requests.delete('http://localhost:3812/alex/root?sort_key=%s'%sk,
headers={'x-garage-causality-token': ct},
auth=auth)
print(response.headers)
print(response.text)
print("-- ReadIndex")
response = requests.get('http://localhost:3812/alex',
auth=auth)
print(response.headers)
print(response.text)
print("-- InsertBatch")
response = requests.post('http://localhost:3812/alex',
auth=auth,
data='''
[
{"pk": "root", "sk": "a", "ct": null, "v": "aW5pdGlhbCB0ZXN0Cg=="},
{"pk": "root", "sk": "b", "ct": null, "v": "aW5pdGlhbCB0ZXN1Cg=="},
{"pk": "root", "sk": "c", "ct": null, "v": "aW5pdGlhbCB0ZXN2Cg=="}
]
''')
print(response.headers)
print(response.text)
print("-- ReadIndex")
response = requests.get('http://localhost:3812/alex',
auth=auth)
print(response.headers)
print(response.text)
for sk in sort_keys:
print("-- (%s) Get"%sk)
response = requests.get('http://localhost:3812/alex/root?sort_key=%s'%sk,
auth=auth)
print(response.headers)
print(response.text)
ct = response.headers["x-garage-causality-token"]
print("-- ReadBatch")
response = requests.post('http://localhost:3812/alex?search',
auth=auth,
data='''
[
{"partitionKey": "root"},
{"partitionKey": "root", "tombstones": true},
{"partitionKey": "root", "tombstones": true, "limit": 2},
{"partitionKey": "root", "start": "c", "singleItem": true},
{"partitionKey": "root", "start": "b", "end": "d", "tombstones": true}
]
''')
print(response.headers)
print(response.text)
print("-- DeleteBatch")
response = requests.post('http://localhost:3812/alex?delete',
auth=auth,
data='''
[
{"partitionKey": "root", "start": "b", "end": "c"}
]
''')
print(response.headers)
print(response.text)
print("-- ReadBatch")
response = requests.post('http://localhost:3812/alex?search',
auth=auth,
data='''
[
{"partitionKey": "root"}
]
''')
print(response.headers)
print(response.text)

View file

@ -8,10 +8,10 @@ rec {
sha256 = "1xy9zpypqfxs5gcq5dcla4bfkhxmh5nzn9dyqkr03lqycm9wg5cr"; sha256 = "1xy9zpypqfxs5gcq5dcla4bfkhxmh5nzn9dyqkr03lqycm9wg5cr";
}; };
cargo2nixSrc = fetchGit { cargo2nixSrc = fetchGit {
# As of 2022-03-17 # As of 2022-02-03
url = "https://github.com/superboum/cargo2nix"; url = "https://github.com/superboum/cargo2nix";
ref = "main"; ref = "backward-compat";
rev = "bcbf3ba99e9e01a61eb83a24624419c2dd9dec64"; rev = "08d963f32a774353ee8acf3f61749915875c1ec4";
}; };

View file

@ -18,7 +18,6 @@ let
pkgsHost = import pkgsSrc {}; pkgsHost = import pkgsSrc {};
lib = pkgsHost.lib; lib = pkgsHost.lib;
kaniko = (import ./kaniko.nix) pkgsHost; kaniko = (import ./kaniko.nix) pkgsHost;
winscp = (import ./winscp.nix) pkgsHost;
in in
lib.flatten (builtins.map (pkgs: [ lib.flatten (builtins.map (pkgs: [
pkgs.rustPlatform.rust.rustc pkgs.rustPlatform.rust.rustc
@ -26,6 +25,5 @@ in
pkgs.buildPackages.stdenv.cc pkgs.buildPackages.stdenv.cc
]) pkgsList) ++ [ ]) pkgsList) ++ [
kaniko kaniko
winscp
] ]

View file

@ -1,28 +0,0 @@
pkgs:
pkgs.stdenv.mkDerivation rec {
pname = "winscp";
version = "5.19.6";
src = pkgs.fetchzip {
url = "https://winscp.net/download/WinSCP-${version}-Portable.zip";
sha256 = "sha256-8+6JuT0b1fFQ6etaFTMSjIKvDGzmJoHAuByXiqCBzu0=";
stripRoot = false;
};
buildPhase = ''
cat > winscp <<EOF
#!${pkgs.bash}/bin/bash
WINEDEBUG=-all
${pkgs.winePackages.minimal}/bin/wine $out/opt/WinSCP.com
EOF
'';
installPhase = ''
mkdir -p $out/{bin,opt}
cp {WinSCP.com,WinSCP.exe} $out/opt
cp winscp $out/bin
chmod +x $out/bin/winscp
'';
}

View file

@ -44,9 +44,6 @@ root_domain = ".s3.garage.localhost"
bind_addr = "0.0.0.0:$((3920+$count))" bind_addr = "0.0.0.0:$((3920+$count))"
root_domain = ".web.garage.localhost" root_domain = ".web.garage.localhost"
index = "index.html" index = "index.html"
[admin]
api_bind_addr = "0.0.0.0:$((9900+$count))"
EOF EOF
echo -en "$LABEL configuration written to $CONF_PATH\n" echo -en "$LABEL configuration written to $CONF_PATH\n"

View file

@ -1,4 +0,0 @@
export AWS_ACCESS_KEY_ID=`cat /tmp/garage.s3 |cut -d' ' -f1`
export AWS_SECRET_ACCESS_KEY=`cat /tmp/garage.s3 |cut -d' ' -f2`
export AWS_DEFAULT_REGION='garage'
export WINSCP_URL="s3://${AWS_ACCESS_KEY_ID}:${AWS_SECRET_ACCESS_KEY}@127.0.0.1:4443 -certificate=* -rawsettings S3DefaultRegion=garage S3UrlStyle=1"

View file

@ -1,13 +0,0 @@
Spawn a cluster with minikube
```bash
minikube start
minikube kubectl -- apply -f config.yaml
minikube kubectl -- apply -f daemon.yaml
minikube dashboard
minikube kubectl -- exec -it garage-0 --container garage -- /garage status
# etc.
```

View file

@ -1,12 +0,0 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: garage-admin
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: cluster-admin
subjects:
- apiGroup: rbac.authorization.k8s.io
kind: User
name: system:serviceaccount:default:default

View file

@ -1,30 +0,0 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: garage-config
namespace: default
data:
garage.toml: |-
metadata_dir = "/tmp/meta"
data_dir = "/tmp/data"
replication_mode = "3"
rpc_bind_addr = "[::]:3901"
rpc_secret = "1799bccfd7411eddcf9ebd316bc1f5287ad12a68094e1c6ac6abde7e6feae1ec"
bootstrap_peers = []
kubernetes_namespace = "default"
kubernetes_service_name = "garage-daemon"
kubernetes_skip_crd = false
[s3_api]
s3_region = "garage"
api_bind_addr = "[::]:3900"
root_domain = ".s3.garage.tld"
[s3_web]
bind_addr = "[::]:3902"
root_domain = ".web.garage.tld"
index = "index.html"

View file

@ -1,52 +0,0 @@
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: garage
spec:
selector:
matchLabels:
app: garage
serviceName: "garage"
replicas: 3
template:
metadata:
labels:
app: garage
spec:
terminationGracePeriodSeconds: 10
containers:
- name: garage
image: dxflrs/amd64_garage:v0.7.0-rc1
ports:
- containerPort: 3900
name: s3-api
- containerPort: 3902
name: web-api
volumeMounts:
- name: fast
mountPath: /mnt/fast
- name: slow
mountPath: /mnt/slow
- name: etc
mountPath: /etc/garage.toml
subPath: garage.toml
volumes:
- name: etc
configMap:
name: garage-config
volumeClaimTemplates:
- metadata:
name: fast
spec:
accessModes: [ "ReadWriteOnce" ]
resources:
requests:
storage: 100Mi
- metadata:
name: slow
spec:
accessModes: [ "ReadWriteOnce" ]
resources:
requests:
storage: 100Mi

View file

@ -1,21 +0,0 @@
Configure your `[admin-api]` endpoint:
```
[admin]
api_bind_addr = "0.0.0.0:3903"
trace_sink = "http://localhost:4317"
```
Start the test stack:
```
cd telemetry
docker-compose up
```
Access the web interfaces:
- [Kibana](http://localhost:5601) - Click on the hamburger menu, in the Observability section, click APM
- [Grafana](http://localhost:3000) - Set a password, then on the left menu, click Dashboard -> Browse. On the new page click Import -> Choose the test dashboard we ship `grafana-garage-dashboard-elasticsearch.json`

View file

@ -1,3 +0,0 @@
COMPOSE_PROJECT_NAME=telemetry
OTEL_COLLECT_TAG=0.44.0
ELASTIC_BUNDLE_TAG=7.17.0

View file

@ -1,10 +0,0 @@
apm-server:
# Defines the host and port the server is listening on. Use "unix:/path/to.sock" to listen on a unix domain socket.
host: "0.0.0.0:8200"
#-------------------------- Elasticsearch output --------------------------
output.elasticsearch:
# Array of hosts to connect to.
# Scheme and port can be left out and will be set to the default (`http` and `9200`).
# In case you specify and additional path, the scheme is required: `http://localhost:9200/path`.
# IPv6 addresses should always be defined as: `https://[2001:db8::1]:9200`.
hosts: ["localhost:9200"]

View file

@ -1,69 +0,0 @@
version: "2"
services:
otel:
image: otel/opentelemetry-collector-contrib:${OTEL_COLLECT_TAG}
command: [ "--config=/etc/otel-config.yaml" ]
volumes:
- ./otel-config.yaml:/etc/otel-config.yaml
network_mode: "host"
elastic:
image: docker.elastic.co/elasticsearch/elasticsearch:${ELASTIC_BUNDLE_TAG}
container_name: elastic
environment:
- "node.name=elastic"
- "http.port=9200"
- "cluster.name=es-docker-cluster"
- "discovery.type=single-node"
- "bootstrap.memory_lock=true"
- "ES_JAVA_OPTS=-Xms512m -Xmx512m"
ulimits:
memlock:
soft: -1
hard: -1
nofile: 65536
volumes:
- "es_data:/usr/share/elasticsearch/data"
network_mode: "host"
# kibana instance and collectors
# see https://www.elastic.co/guide/en/elastic-stack-get-started/current/get-started-docker.html
kibana:
image: docker.elastic.co/kibana/kibana:${ELASTIC_BUNDLE_TAG}
container_name: kibana
environment:
SERVER_NAME: "kibana.local"
# ELASTICSEARCH_URL: "http://localhost:9700"
ELASTICSEARCH_HOSTS: "http://localhost:9200"
depends_on: [ 'elastic' ]
network_mode: "host"
apm:
image: docker.elastic.co/apm/apm-server:${ELASTIC_BUNDLE_TAG}
container_name: apm
volumes:
- "./apm-config.yaml:/usr/share/apm-server/apm-server.yml:ro"
depends_on: [ 'elastic' ]
network_mode: "host"
grafana:
# see https://grafana.com/docs/grafana/latest/installation/docker/
image: "grafana/grafana:8.3.5"
container_name: grafana
# restart: unless-stopped
environment:
- "GF_INSTALL_PLUGINS=grafana-clock-panel,grafana-simple-json-datasource,grafana-piechart-panel,grafana-worldmap-panel,grafana-polystat-panel"
network_mode: "host"
volumes:
# chown 472:472 if needed
- grafana:/var/lib/grafana
- ./grafana/provisioning/:/etc/grafana/provisioning/
volumes:
es_data:
driver: local
grafana:
driver: local
metricbeat:
driver: local

View file

@ -1,19 +0,0 @@
apiVersion: 1
datasources:
- name: DS_ELASTICSEARCH
type: elasticsearch
access: proxy
url: http://localhost:9200
password: ''
user: ''
database: apm-*
basicAuth: false
isDefault: true
jsonData:
esVersion: 7.10.0
logLevelField: ''
logMessageField: ''
maxConcurrentShardRequests: 5
timeField: "@timestamp"
readOnly: false

View file

@ -1,47 +0,0 @@
receivers:
# Data sources: metrics, traces
otlp:
protocols:
grpc:
endpoint: ":4317"
http:
endpoint: ":55681"
# Data sources: metrics
prometheus:
config:
scrape_configs:
- job_name: "garage"
scrape_interval: 5s
static_configs:
- targets: ["localhost:3903"]
exporters:
logging:
logLevel: info
# see https://www.elastic.co/guide/en/apm/get-started/current/open-telemetry-elastic.html#open-telemetry-collector
otlp/elastic:
endpoint: "localhost:8200"
tls:
insecure: true
processors:
batch:
extensions:
health_check:
pprof:
endpoint: :1888
zpages:
endpoint: :55679
service:
extensions: [pprof, zpages, health_check]
pipelines:
traces:
receivers: [otlp]
processors: [batch]
exporters: [logging, otlp/elastic]
metrics:
receivers: [otlp, prometheus]
processors: [batch]
exporters: [logging, otlp/elastic]

File diff suppressed because it is too large Load diff

View file

@ -116,33 +116,295 @@ if [ -z "$SKIP_DUCK" ]; then
done done
fi fi
if [ -z "$SKIP_WINSCP" ]; then # Advanced testing via S3API
echo "🛠️ Testing with winscp" if [ -z "$SKIP_AWS" ]; then
source ${SCRIPT_FOLDER}/dev-env-winscp.sh echo "🔌 Test S3API"
winscp <<EOF
open $WINSCP_URL echo "Test Objects"
ls aws s3api put-object --bucket eprouvette --key a
mkdir eprouvette/winscp aws s3api put-object --bucket eprouvette --key a/a
EOF aws s3api put-object --bucket eprouvette --key a/b
for idx in {1..3}.{rnd,b64}; do aws s3api put-object --bucket eprouvette --key a/c
winscp <<EOF aws s3api put-object --bucket eprouvette --key a/d/a
open $WINSCP_URL aws s3api put-object --bucket eprouvette --key a/é
put Z:\\tmp\\garage.$idx eprouvette/winscp/garage.$idx.winscp aws s3api put-object --bucket eprouvette --key b
ls eprouvette/winscp/ aws s3api put-object --bucket eprouvette --key c
get eprouvette/winscp/garage.$idx.winscp Z:\\tmp\\garage.$idx.dl
rm eprouvette/winscp/garage.$idx.winscp
EOF aws s3api list-objects-v2 --bucket eprouvette >$CMDOUT
diff /tmp/garage.$idx /tmp/garage.$idx.dl [ $(jq '.Contents | length' $CMDOUT) == 8 ]
rm /tmp/garage.$idx.dl [ $(jq '.CommonPrefixes | length' $CMDOUT) == 0 ]
aws s3api list-objects-v2 --bucket eprouvette --page-size 0 >$CMDOUT
[ $(jq '.Contents | length' $CMDOUT) == 8 ]
[ $(jq '.CommonPrefixes | length' $CMDOUT) == 0 ]
aws s3api list-objects-v2 --bucket eprouvette --page-size 999999999 >$CMDOUT
[ $(jq '.Contents | length' $CMDOUT) == 8 ]
[ $(jq '.CommonPrefixes | length' $CMDOUT) == 0 ]
aws s3api list-objects-v2 --bucket eprouvette --page-size 1 >$CMDOUT
[ $(jq '.Contents | length' $CMDOUT) == 8 ]
[ $(jq '.CommonPrefixes | length' $CMDOUT) == 0 ]
aws s3api list-objects-v2 --bucket eprouvette --delimiter '/' >$CMDOUT
[ $(jq '.Contents | length' $CMDOUT) == 3 ]
[ $(jq '.CommonPrefixes | length' $CMDOUT) == 1 ]
aws s3api list-objects-v2 --bucket eprouvette --delimiter '/' --page-size 1 >$CMDOUT
[ $(jq '.Contents | length' $CMDOUT) == 3 ]
[ $(jq '.CommonPrefixes | length' $CMDOUT) == 1 ]
aws s3api list-objects-v2 --bucket eprouvette --prefix 'a/' >$CMDOUT
[ $(jq '.Contents | length' $CMDOUT) == 5 ]
[ $(jq '.CommonPrefixes | length' $CMDOUT) == 0 ]
aws s3api list-objects-v2 --bucket eprouvette --prefix 'a/' --delimiter '/' >$CMDOUT
[ $(jq '.Contents | length' $CMDOUT) == 4 ]
[ $(jq '.CommonPrefixes | length' $CMDOUT) == 1 ]
aws s3api list-objects-v2 --bucket eprouvette --prefix 'a/' --page-size 1 >$CMDOUT
[ $(jq '.Contents | length' $CMDOUT) == 5 ]
[ $(jq '.CommonPrefixes | length' $CMDOUT) == 0 ]
aws s3api list-objects-v2 --bucket eprouvette --prefix 'a/' --delimiter '/' --page-size 1 >$CMDOUT
[ $(jq '.Contents | length' $CMDOUT) == 4 ]
[ $(jq '.CommonPrefixes | length' $CMDOUT) == 1 ]
aws s3api list-objects-v2 --bucket eprouvette --start-after 'Z' >$CMDOUT
[ $(jq '.Contents | length' $CMDOUT) == 8 ]
[ $(jq '.CommonPrefixes | length' $CMDOUT) == 0 ]
aws s3api list-objects-v2 --bucket eprouvette --start-after 'c' >$CMDOUT
! [ -s $CMDOUT ]
aws s3api list-objects --bucket eprouvette >$CMDOUT
[ $(jq '.Contents | length' $CMDOUT) == 8 ]
[ $(jq '.CommonPrefixes | length' $CMDOUT) == 0 ]
aws s3api list-objects --bucket eprouvette --page-size 1 >$CMDOUT
[ $(jq '.Contents | length' $CMDOUT) == 8 ]
[ $(jq '.CommonPrefixes | length' $CMDOUT) == 0 ]
aws s3api list-objects --bucket eprouvette --delimiter '/' >$CMDOUT
[ $(jq '.Contents | length' $CMDOUT) == 3 ]
[ $(jq '.CommonPrefixes | length' $CMDOUT) == 1 ]
# @FIXME it does not work as expected but might be a limitation of aws s3api
# The problem is the conjunction of a delimiter + pagination + v1 of listobjects
#aws s3api list-objects --bucket eprouvette --delimiter '/' --page-size 1 >$CMDOUT
#[ $(jq '.Contents | length' $CMDOUT) == 3 ]
#[ $(jq '.CommonPrefixes | length' $CMDOUT) == 1 ]
aws s3api list-objects --bucket eprouvette --prefix 'a/' >$CMDOUT
[ $(jq '.Contents | length' $CMDOUT) == 5 ]
[ $(jq '.CommonPrefixes | length' $CMDOUT) == 0 ]
aws s3api list-objects --bucket eprouvette --prefix 'a/' --delimiter '/' >$CMDOUT
[ $(jq '.Contents | length' $CMDOUT) == 4 ]
[ $(jq '.CommonPrefixes | length' $CMDOUT) == 1 ]
aws s3api list-objects --bucket eprouvette --prefix 'a/' --page-size 1 >$CMDOUT
[ $(jq '.Contents | length' $CMDOUT) == 5 ]
[ $(jq '.CommonPrefixes | length' $CMDOUT) == 0 ]
# @FIXME idem
#aws s3api list-objects --bucket eprouvette --prefix 'a/' --delimiter '/' --page-size 1 >$CMDOUT
#[ $(jq '.Contents | length' $CMDOUT) == 4 ]
#[ $(jq '.CommonPrefixes | length' $CMDOUT) == 1 ]
aws s3api list-objects --bucket eprouvette --starting-token 'Z' >$CMDOUT
[ $(jq '.Contents | length' $CMDOUT) == 8 ]
[ $(jq '.CommonPrefixes | length' $CMDOUT) == 0 ]
aws s3api list-objects --bucket eprouvette --starting-token 'c' >$CMDOUT
! [ -s $CMDOUT ]
aws s3api list-objects-v2 --bucket eprouvette | \
jq -c '. | {Objects: [.Contents[] | {Key: .Key}], Quiet: true}' | \
aws s3api delete-objects --bucket eprouvette --delete file:///dev/stdin
echo "Test Multipart Upload"
aws s3api create-multipart-upload --bucket eprouvette --key a
aws s3api create-multipart-upload --bucket eprouvette --key a
aws s3api create-multipart-upload --bucket eprouvette --key c
aws s3api create-multipart-upload --bucket eprouvette --key c/a
aws s3api create-multipart-upload --bucket eprouvette --key c/b
aws s3api list-multipart-uploads --bucket eprouvette >$CMDOUT
[ $(jq '.Uploads | length' $CMDOUT) == 5 ]
[ $(jq '.CommonPrefixes | length' $CMDOUT) == 0 ]
aws s3api list-multipart-uploads --bucket eprouvette --page-size 1 >$CMDOUT
[ $(jq '.Uploads | length' $CMDOUT) == 5 ]
[ $(jq '.CommonPrefixes | length' $CMDOUT) == 0 ]
aws s3api list-multipart-uploads --bucket eprouvette --delimiter '/' >$CMDOUT
[ $(jq '.Uploads | length' $CMDOUT) == 3 ]
[ $(jq '.CommonPrefixes | length' $CMDOUT) == 1 ]
aws s3api list-multipart-uploads --bucket eprouvette --delimiter '/' --page-size 1 >$CMDOUT
[ $(jq '.Uploads | length' $CMDOUT) == 3 ]
[ $(jq '.CommonPrefixes | length' $CMDOUT) == 1 ]
aws s3api list-multipart-uploads --bucket eprouvette --prefix 'c' >$CMDOUT
[ $(jq '.Uploads | length' $CMDOUT) == 3 ]
[ $(jq '.CommonPrefixes | length' $CMDOUT) == 0 ]
aws s3api list-multipart-uploads --bucket eprouvette --prefix 'c' --page-size 1 >$CMDOUT
[ $(jq '.Uploads | length' $CMDOUT) == 3 ]
[ $(jq '.CommonPrefixes | length' $CMDOUT) == 0 ]
aws s3api list-multipart-uploads --bucket eprouvette --prefix 'c' --delimiter '/' >$CMDOUT
[ $(jq '.Uploads | length' $CMDOUT) == 1 ]
[ $(jq '.CommonPrefixes | length' $CMDOUT) == 1 ]
aws s3api list-multipart-uploads --bucket eprouvette --prefix 'c' --delimiter '/' --page-size 1 >$CMDOUT
[ $(jq '.Uploads | length' $CMDOUT) == 1 ]
[ $(jq '.CommonPrefixes | length' $CMDOUT) == 1 ]
aws s3api list-multipart-uploads --bucket eprouvette --starting-token 'ZZZZZ' >$CMDOUT
[ $(jq '.Uploads | length' $CMDOUT) == 5 ]
[ $(jq '.CommonPrefixes | length' $CMDOUT) == 0 ]
aws s3api list-multipart-uploads --bucket eprouvette --starting-token 'd' >$CMDOUT
! [ -s $CMDOUT ]
aws s3api list-multipart-uploads --bucket eprouvette | \
jq -r '.Uploads[] | "\(.Key) \(.UploadId)"' | \
while read r; do
key=$(echo $r|cut -d' ' -f 1);
uid=$(echo $r|cut -d' ' -f 2);
aws s3api abort-multipart-upload --bucket eprouvette --key $key --upload-id $uid;
echo "Deleted ${key}:${uid}"
done done
winscp <<EOF
open $WINSCP_URL echo "Test for ListParts"
rm eprouvette/winscp UPLOAD_ID=$(aws s3api create-multipart-upload --bucket eprouvette --key list-parts | jq -r .UploadId)
aws s3api list-parts --bucket eprouvette --key list-parts --upload-id $UPLOAD_ID >$CMDOUT
[ $(jq '.Parts | length' $CMDOUT) == 0 ]
[ $(jq -r '.StorageClass' $CMDOUT) == 'STANDARD' ] # check that the result is not empty
ETAG1=$(aws s3api upload-part --bucket eprouvette --key list-parts --upload-id $UPLOAD_ID --part-number 1 --body /tmp/garage.2.rnd | jq .ETag)
aws s3api list-parts --bucket eprouvette --key list-parts --upload-id $UPLOAD_ID >$CMDOUT
[ $(jq '.Parts | length' $CMDOUT) == 1 ]
[ $(jq '.Parts[0].PartNumber' $CMDOUT) == 1 ]
[ $(jq '.Parts[0].Size' $CMDOUT) == 5242880 ]
[ $(jq '.Parts[0].ETag' $CMDOUT) == $ETAG1 ]
ETAG2=$(aws s3api upload-part --bucket eprouvette --key list-parts --upload-id $UPLOAD_ID --part-number 3 --body /tmp/garage.3.rnd | jq .ETag)
ETAG3=$(aws s3api upload-part --bucket eprouvette --key list-parts --upload-id $UPLOAD_ID --part-number 2 --body /tmp/garage.2.rnd | jq .ETag)
aws s3api list-parts --bucket eprouvette --key list-parts --upload-id $UPLOAD_ID >$CMDOUT
[ $(jq '.Parts | length' $CMDOUT) == 3 ]
[ $(jq '.Parts[1].ETag' $CMDOUT) == $ETAG3 ]
aws s3api list-parts --bucket eprouvette --key list-parts --upload-id $UPLOAD_ID --page-size 1 >$CMDOUT
[ $(jq '.Parts | length' $CMDOUT) == 3 ]
[ $(jq '.Parts[1].ETag' $CMDOUT) == $ETAG3 ]
cat >/tmp/garage.multipart_struct <<EOF
{
"Parts": [
{
"ETag": $ETAG1,
"PartNumber": 1
},
{
"ETag": $ETAG3,
"PartNumber": 2
},
{
"ETag": $ETAG2,
"PartNumber": 3
}
]
}
EOF EOF
aws s3api complete-multipart-upload \
--bucket eprouvette --key list-parts --upload-id $UPLOAD_ID \
--multipart-upload file:///tmp/garage.multipart_struct
! aws s3api list-parts --bucket eprouvette --key list-parts --upload-id $UPLOAD_ID >$CMDOUT
aws s3 rm "s3://eprouvette/list-parts"
# @FIXME We do not write tests with --starting-token due to a bug with awscli
# See here: https://github.com/aws/aws-cli/issues/6666
echo "Test for UploadPartCopy"
aws s3 cp "/tmp/garage.3.rnd" "s3://eprouvette/copy_part_source"
UPLOAD_ID=$(aws s3api create-multipart-upload --bucket eprouvette --key test_multipart | jq -r .UploadId)
PART1=$(aws s3api upload-part \
--bucket eprouvette --key test_multipart \
--upload-id $UPLOAD_ID --part-number 1 \
--body /tmp/garage.2.rnd | jq .ETag)
PART2=$(aws s3api upload-part-copy \
--bucket eprouvette --key test_multipart \
--upload-id $UPLOAD_ID --part-number 2 \
--copy-source "/eprouvette/copy_part_source" \
--copy-source-range "bytes=500-5000500" \
| jq .CopyPartResult.ETag)
PART3=$(aws s3api upload-part \
--bucket eprouvette --key test_multipart \
--upload-id $UPLOAD_ID --part-number 3 \
--body /tmp/garage.3.rnd | jq .ETag)
cat >/tmp/garage.multipart_struct <<EOF
{
"Parts": [
{
"ETag": $PART1,
"PartNumber": 1
},
{
"ETag": $PART2,
"PartNumber": 2
},
{
"ETag": $PART3,
"PartNumber": 3
}
]
}
EOF
aws s3api complete-multipart-upload \
--bucket eprouvette --key test_multipart --upload-id $UPLOAD_ID \
--multipart-upload file:///tmp/garage.multipart_struct
aws s3 cp "s3://eprouvette/test_multipart" /tmp/garage.test_multipart
cat /tmp/garage.2.rnd <(tail -c +501 /tmp/garage.3.rnd | head -c 5000001) /tmp/garage.3.rnd > /tmp/garage.test_multipart_reference
diff /tmp/garage.test_multipart /tmp/garage.test_multipart_reference >/tmp/garage.test_multipart_diff 2>&1
aws s3 rm "s3://eprouvette/copy_part_source"
aws s3 rm "s3://eprouvette/test_multipart"
rm /tmp/garage.multipart_struct
rm /tmp/garage.test_multipart
rm /tmp/garage.test_multipart_reference
rm /tmp/garage.test_multipart_diff
echo "Test CORS endpoints"
garage -c /tmp/config.1.toml bucket website --allow eprouvette
aws s3api put-object --bucket eprouvette --key index.html
CORS='{"CORSRules":[{"AllowedHeaders":["*"],"AllowedMethods":["GET","PUT"],"AllowedOrigins":["*"]}]}'
aws s3api put-bucket-cors --bucket eprouvette --cors-configuration $CORS
[ `aws s3api get-bucket-cors --bucket eprouvette | jq -c` == $CORS ]
curl -s -i -H 'Origin: http://example.com' --header "Host: eprouvette.web.garage.localhost" http://127.0.0.1:3921/ | grep access-control-allow-origin
curl -s -i -X OPTIONS -H 'Access-Control-Request-Method: PUT' -H 'Origin: http://example.com' --header "Host: eprouvette.web.garage.localhost" http://127.0.0.1:3921/ | grep access-control-allow-methods
curl -s -i -X OPTIONS -H 'Access-Control-Request-Method: DELETE' -H 'Origin: http://example.com' --header "Host: eprouvette.web.garage.localhost" http://127.0.0.1:3921/ | grep '403 Forbidden'
#@TODO we may want to test the S3 endpoint but we need to handle authentication, which is way more complex.
aws s3api delete-bucket-cors --bucket eprouvette
! [ -s `aws s3api get-bucket-cors --bucket eprouvette` ]
curl -s -i -X OPTIONS -H 'Access-Control-Request-Method: PUT' -H 'Origin: http://example.com' --header "Host: eprouvette.web.garage.localhost" http://127.0.0.1:3921/ | grep '403 Forbidden'
aws s3api delete-object --bucket eprouvette --key index.html
garage -c /tmp/config.1.toml bucket website --deny eprouvette
fi fi
rm /tmp/garage.{1..3}.{rnd,b64} rm /tmp/garage.{1..3}.{rnd,b64}
if [ -z "$SKIP_AWS" ]; then
echo "🪣 Test bucket logic "
AWS_ACCESS_KEY_ID=`cat /tmp/garage.s3 |cut -d' ' -f1`
[ $(aws s3 ls | wc -l) == 1 ]
garage -c /tmp/config.1.toml bucket create seau
garage -c /tmp/config.1.toml bucket allow --read seau --key $AWS_ACCESS_KEY_ID
[ $(aws s3 ls | wc -l) == 2 ]
garage -c /tmp/config.1.toml bucket deny --read seau --key $AWS_ACCESS_KEY_ID
[ $(aws s3 ls | wc -l) == 1 ]
garage -c /tmp/config.1.toml bucket allow --read seau --key $AWS_ACCESS_KEY_ID
[ $(aws s3 ls | wc -l) == 2 ]
garage -c /tmp/config.1.toml bucket delete --yes seau
[ $(aws s3 ls | wc -l) == 1 ]
fi
if [ -z "$SKIP_AWS" ]; then
echo "🧪 Website Testing"
echo "<h1>hello world</h1>" > /tmp/garage-index.html
aws s3 cp /tmp/garage-index.html s3://eprouvette/index.html
[ `curl -s -o /dev/null -w "%{http_code}" --header "Host: eprouvette.web.garage.localhost" http://127.0.0.1:3921/ ` == 404 ]
garage -c /tmp/config.1.toml bucket website --allow eprouvette
[ `curl -s -o /dev/null -w "%{http_code}" --header "Host: eprouvette.web.garage.localhost" http://127.0.0.1:3921/ ` == 200 ]
garage -c /tmp/config.1.toml bucket website --deny eprouvette
[ `curl -s -o /dev/null -w "%{http_code}" --header "Host: eprouvette.web.garage.localhost" http://127.0.0.1:3921/ ` == 404 ]
aws s3 rm s3://eprouvette/index.html
rm /tmp/garage-index.html
fi
echo "🏁 Teardown" echo "🏁 Teardown"
AWS_ACCESS_KEY_ID=`cat /tmp/garage.s3 |cut -d' ' -f1` AWS_ACCESS_KEY_ID=`cat /tmp/garage.s3 |cut -d' ' -f1`
AWS_SECRET_ACCESS_KEY=`cat /tmp/garage.s3 |cut -d' ' -f2` AWS_SECRET_ACCESS_KEY=`cat /tmp/garage.s3 |cut -d' ' -f2`

View file

@ -13,7 +13,6 @@ let
overlays = [ cargo2nixOverlay ]; overlays = [ cargo2nixOverlay ];
}; };
kaniko = (import ./nix/kaniko.nix) pkgs; kaniko = (import ./nix/kaniko.nix) pkgs;
winscp = (import ./nix/winscp.nix) pkgs;
in in
@ -77,15 +76,10 @@ function refresh_toolchain {
pkgs.rustPlatform.rust.cargo pkgs.rustPlatform.rust.cargo
pkgs.clippy pkgs.clippy
pkgs.rustfmt pkgs.rustfmt
pkgs.perl
pkgs.protobuf
pkgs.pkg-config
pkgs.openssl
cargo2nix.packages.x86_64-linux.cargo2nix cargo2nix.packages.x86_64-linux.cargo2nix
] else []) ] else [])
++ ++
(if integration then [ (if integration then [
winscp
pkgs.s3cmd pkgs.s3cmd
pkgs.awscli2 pkgs.awscli2
pkgs.minio-client pkgs.minio-client

View file

@ -1,6 +1,6 @@
[package] [package]
name = "garage_api" name = "garage_api"
version = "0.7.0" version = "0.6.0"
authors = ["Alex Auvolat <alex@adnab.me>"] authors = ["Alex Auvolat <alex@adnab.me>"]
edition = "2018" edition = "2018"
license = "AGPL-3.0" license = "AGPL-3.0"
@ -14,13 +14,10 @@ path = "lib.rs"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies] [dependencies]
garage_model = { version = "0.7.0", path = "../model" } garage_model = { version = "0.6.0", path = "../model" }
garage_table = { version = "0.7.0", path = "../table" } garage_table = { version = "0.6.0", path = "../table" }
garage_block = { version = "0.7.0", path = "../block" } garage_util = { version = "0.6.0", path = "../util" }
garage_util = { version = "0.7.0", path = "../util" }
garage_rpc = { version = "0.7.0", path = "../rpc" }
async-trait = "0.1.7"
base64 = "0.13" base64 = "0.13"
bytes = "1.0" bytes = "1.0"
chrono = "0.4" chrono = "0.4"
@ -29,7 +26,7 @@ err-derive = "0.3"
hex = "0.4" hex = "0.4"
hmac = "0.10" hmac = "0.10"
idna = "0.2" idna = "0.2"
tracing = "0.1.30" log = "0.4"
md-5 = "0.9" md-5 = "0.9"
nom = "7.1" nom = "7.1"
sha2 = "0.9" sha2 = "0.9"
@ -44,7 +41,6 @@ http = "0.2"
httpdate = "0.3" httpdate = "0.3"
http-range = "0.1" http-range = "0.1"
hyper = { version = "0.14", features = ["server", "http1", "runtime", "tcp", "stream"] } hyper = { version = "0.14", features = ["server", "http1", "runtime", "tcp", "stream"] }
hyper-tls = {version = "0.5.0"}
multer = "2.0" multer = "2.0"
percent-encoding = "2.1.0" percent-encoding = "2.1.0"
roxmltree = "0.14" roxmltree = "0.14"
@ -53,11 +49,3 @@ serde_bytes = "0.11"
serde_json = "1.0" serde_json = "1.0"
quick-xml = { version = "0.21", features = [ "serialize" ] } quick-xml = { version = "0.21", features = [ "serialize" ] }
url = "2.1" url = "2.1"
opentelemetry = "0.17"
opentelemetry-prometheus = "0.10"
opentelemetry-otlp = "0.10"
prometheus = "0.13"
[features]
k2v = [ "garage_util/k2v", "garage_model/k2v" ]

View file

@ -1,194 +0,0 @@
use std::sync::Arc;
use async_trait::async_trait;
use futures::future::Future;
use http::header::{
ACCESS_CONTROL_ALLOW_METHODS, ACCESS_CONTROL_ALLOW_ORIGIN, ALLOW, CONTENT_TYPE,
};
use hyper::{Body, Request, Response};
use opentelemetry::trace::{SpanRef, Tracer};
use opentelemetry_prometheus::PrometheusExporter;
use prometheus::{Encoder, TextEncoder};
use garage_model::garage::Garage;
use garage_util::error::Error as GarageError;
use crate::generic_server::*;
use crate::admin::bucket::*;
use crate::admin::cluster::*;
use crate::admin::error::*;
use crate::admin::key::*;
use crate::admin::router::{Authorization, Endpoint};
pub struct AdminApiServer {
garage: Arc<Garage>,
exporter: PrometheusExporter,
metrics_token: Option<String>,
admin_token: Option<String>,
}
impl AdminApiServer {
pub fn new(garage: Arc<Garage>) -> Self {
let exporter = opentelemetry_prometheus::exporter().init();
let cfg = &garage.config.admin;
let metrics_token = cfg
.metrics_token
.as_ref()
.map(|tok| format!("Bearer {}", tok));
let admin_token = cfg
.admin_token
.as_ref()
.map(|tok| format!("Bearer {}", tok));
Self {
garage,
exporter,
metrics_token,
admin_token,
}
}
pub async fn run(self, shutdown_signal: impl Future<Output = ()>) -> Result<(), GarageError> {
if let Some(bind_addr) = self.garage.config.admin.api_bind_addr {
let region = self.garage.config.s3_api.s3_region.clone();
ApiServer::new(region, self)
.run_server(bind_addr, shutdown_signal)
.await
} else {
Ok(())
}
}
fn handle_options(&self, _req: &Request<Body>) -> Result<Response<Body>, Error> {
Ok(Response::builder()
.status(204)
.header(ALLOW, "OPTIONS, GET, POST")
.header(ACCESS_CONTROL_ALLOW_METHODS, "OPTIONS, GET, POST")
.header(ACCESS_CONTROL_ALLOW_ORIGIN, "*")
.body(Body::empty())?)
}
fn handle_metrics(&self) -> Result<Response<Body>, Error> {
let mut buffer = vec![];
let encoder = TextEncoder::new();
let tracer = opentelemetry::global::tracer("garage");
let metric_families = tracer.in_span("admin/gather_metrics", |_| {
self.exporter.registry().gather()
});
encoder
.encode(&metric_families, &mut buffer)
.ok_or_internal_error("Could not serialize metrics")?;
Ok(Response::builder()
.status(200)
.header(CONTENT_TYPE, encoder.format_type())
.body(Body::from(buffer))?)
}
}
#[async_trait]
impl ApiHandler for AdminApiServer {
const API_NAME: &'static str = "admin";
const API_NAME_DISPLAY: &'static str = "Admin";
type Endpoint = Endpoint;
type Error = Error;
fn parse_endpoint(&self, req: &Request<Body>) -> Result<Endpoint, Error> {
Endpoint::from_request(req)
}
async fn handle(
&self,
req: Request<Body>,
endpoint: Endpoint,
) -> Result<Response<Body>, Error> {
let expected_auth_header =
match endpoint.authorization_type() {
Authorization::MetricsToken => self.metrics_token.as_ref(),
Authorization::AdminToken => match &self.admin_token {
None => return Err(Error::forbidden(
"Admin token isn't configured, admin API access is disabled for security.",
)),
Some(t) => Some(t),
},
};
if let Some(h) = expected_auth_header {
match req.headers().get("Authorization") {
None => return Err(Error::forbidden("Authorization token must be provided")),
Some(v) => {
let authorized = v.to_str().map(|hv| hv.trim() == h).unwrap_or(false);
if !authorized {
return Err(Error::forbidden("Invalid authorization token provided"));
}
}
}
}
match endpoint {
Endpoint::Options => self.handle_options(&req),
Endpoint::Metrics => self.handle_metrics(),
Endpoint::GetClusterStatus => handle_get_cluster_status(&self.garage).await,
Endpoint::ConnectClusterNodes => handle_connect_cluster_nodes(&self.garage, req).await,
// Layout
Endpoint::GetClusterLayout => handle_get_cluster_layout(&self.garage).await,
Endpoint::UpdateClusterLayout => handle_update_cluster_layout(&self.garage, req).await,
Endpoint::ApplyClusterLayout => handle_apply_cluster_layout(&self.garage, req).await,
Endpoint::RevertClusterLayout => handle_revert_cluster_layout(&self.garage, req).await,
// Keys
Endpoint::ListKeys => handle_list_keys(&self.garage).await,
Endpoint::GetKeyInfo { id, search } => {
handle_get_key_info(&self.garage, id, search).await
}
Endpoint::CreateKey => handle_create_key(&self.garage, req).await,
Endpoint::ImportKey => handle_import_key(&self.garage, req).await,
Endpoint::UpdateKey { id } => handle_update_key(&self.garage, id, req).await,
Endpoint::DeleteKey { id } => handle_delete_key(&self.garage, id).await,
// Buckets
Endpoint::ListBuckets => handle_list_buckets(&self.garage).await,
Endpoint::GetBucketInfo { id, global_alias } => {
handle_get_bucket_info(&self.garage, id, global_alias).await
}
Endpoint::CreateBucket => handle_create_bucket(&self.garage, req).await,
Endpoint::DeleteBucket { id } => handle_delete_bucket(&self.garage, id).await,
Endpoint::UpdateBucket { id } => handle_update_bucket(&self.garage, id, req).await,
// Bucket-key permissions
Endpoint::BucketAllowKey => {
handle_bucket_change_key_perm(&self.garage, req, true).await
}
Endpoint::BucketDenyKey => {
handle_bucket_change_key_perm(&self.garage, req, false).await
}
// Bucket aliasing
Endpoint::GlobalAliasBucket { id, alias } => {
handle_global_alias_bucket(&self.garage, id, alias).await
}
Endpoint::GlobalUnaliasBucket { id, alias } => {
handle_global_unalias_bucket(&self.garage, id, alias).await
}
Endpoint::LocalAliasBucket {
id,
access_key_id,
alias,
} => handle_local_alias_bucket(&self.garage, id, access_key_id, alias).await,
Endpoint::LocalUnaliasBucket {
id,
access_key_id,
alias,
} => handle_local_unalias_bucket(&self.garage, id, access_key_id, alias).await,
}
}
}
impl ApiEndpoint for Endpoint {
fn name(&self) -> &'static str {
Endpoint::name(self)
}
fn add_span_attributes(&self, _span: SpanRef<'_>) {}
}

View file

@ -1,580 +0,0 @@
use std::collections::HashMap;
use std::sync::Arc;
use hyper::{Body, Request, Response, StatusCode};
use serde::{Deserialize, Serialize};
use garage_util::crdt::*;
use garage_util::data::*;
use garage_util::time::*;
use garage_table::*;
use garage_model::bucket_alias_table::*;
use garage_model::bucket_table::*;
use garage_model::garage::Garage;
use garage_model::permission::*;
use garage_model::s3::object_table::*;
use crate::admin::error::*;
use crate::admin::key::ApiBucketKeyPerm;
use crate::common_error::CommonError;
use crate::helpers::{json_ok_response, parse_json_body};
pub async fn handle_list_buckets(garage: &Arc<Garage>) -> Result<Response<Body>, Error> {
let buckets = garage
.bucket_table
.get_range(
&EmptyKey,
None,
Some(DeletedFilter::NotDeleted),
10000,
EnumerationOrder::Forward,
)
.await?;
let res = buckets
.into_iter()
.map(|b| {
let state = b.state.as_option().unwrap();
ListBucketResultItem {
id: hex::encode(b.id),
global_aliases: state
.aliases
.items()
.iter()
.filter(|(_, _, a)| *a)
.map(|(n, _, _)| n.to_string())
.collect::<Vec<_>>(),
local_aliases: state
.local_aliases
.items()
.iter()
.filter(|(_, _, a)| *a)
.map(|((k, n), _, _)| BucketLocalAlias {
access_key_id: k.to_string(),
alias: n.to_string(),
})
.collect::<Vec<_>>(),
}
})
.collect::<Vec<_>>();
Ok(json_ok_response(&res)?)
}
#[derive(Serialize)]
#[serde(rename_all = "camelCase")]
struct ListBucketResultItem {
id: String,
global_aliases: Vec<String>,
local_aliases: Vec<BucketLocalAlias>,
}
#[derive(Serialize)]
#[serde(rename_all = "camelCase")]
struct BucketLocalAlias {
access_key_id: String,
alias: String,
}
#[derive(Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
struct ApiBucketQuotas {
max_size: Option<u64>,
max_objects: Option<u64>,
}
pub async fn handle_get_bucket_info(
garage: &Arc<Garage>,
id: Option<String>,
global_alias: Option<String>,
) -> Result<Response<Body>, Error> {
let bucket_id = match (id, global_alias) {
(Some(id), None) => parse_bucket_id(&id)?,
(None, Some(ga)) => garage
.bucket_helper()
.resolve_global_bucket_name(&ga)
.await?
.ok_or_else(|| HelperError::NoSuchBucket(ga.to_string()))?,
_ => {
return Err(Error::bad_request(
"Either id or globalAlias must be provided (but not both)",
));
}
};
bucket_info_results(garage, bucket_id).await
}
async fn bucket_info_results(
garage: &Arc<Garage>,
bucket_id: Uuid,
) -> Result<Response<Body>, Error> {
let bucket = garage
.bucket_helper()
.get_existing_bucket(bucket_id)
.await?;
let counters = garage
.object_counter_table
.table
.get(&bucket_id, &EmptyKey)
.await?
.map(|x| x.filtered_values(&garage.system.ring.borrow()))
.unwrap_or_default();
let mut relevant_keys = HashMap::new();
for (k, _) in bucket
.state
.as_option()
.unwrap()
.authorized_keys
.items()
.iter()
{
if let Some(key) = garage
.key_table
.get(&EmptyKey, k)
.await?
.filter(|k| !k.is_deleted())
{
if !key.state.is_deleted() {
relevant_keys.insert(k.clone(), key);
}
}
}
for ((k, _), _, _) in bucket
.state
.as_option()
.unwrap()
.local_aliases
.items()
.iter()
{
if relevant_keys.contains_key(k) {
continue;
}
if let Some(key) = garage.key_table.get(&EmptyKey, k).await? {
if !key.state.is_deleted() {
relevant_keys.insert(k.clone(), key);
}
}
}
let state = bucket.state.as_option().unwrap();
let quotas = state.quotas.get();
let res =
GetBucketInfoResult {
id: hex::encode(&bucket.id),
global_aliases: state
.aliases
.items()
.iter()
.filter(|(_, _, a)| *a)
.map(|(n, _, _)| n.to_string())
.collect::<Vec<_>>(),
website_access: state.website_config.get().is_some(),
website_config: state.website_config.get().clone().map(|wsc| {
GetBucketInfoWebsiteResult {
index_document: wsc.index_document,
error_document: wsc.error_document,
}
}),
keys: relevant_keys
.into_iter()
.map(|(_, key)| {
let p = key.state.as_option().unwrap();
GetBucketInfoKey {
access_key_id: key.key_id,
name: p.name.get().to_string(),
permissions: p
.authorized_buckets
.get(&bucket.id)
.map(|p| ApiBucketKeyPerm {
read: p.allow_read,
write: p.allow_write,
owner: p.allow_owner,
})
.unwrap_or_default(),
bucket_local_aliases: p
.local_aliases
.items()
.iter()
.filter(|(_, _, b)| *b == Some(bucket.id))
.map(|(n, _, _)| n.to_string())
.collect::<Vec<_>>(),
}
})
.collect::<Vec<_>>(),
objects: counters.get(OBJECTS).cloned().unwrap_or_default(),
bytes: counters.get(BYTES).cloned().unwrap_or_default(),
unfinshed_uploads: counters
.get(UNFINISHED_UPLOADS)
.cloned()
.unwrap_or_default(),
quotas: ApiBucketQuotas {
max_size: quotas.max_size,
max_objects: quotas.max_objects,
},
};
Ok(json_ok_response(&res)?)
}
#[derive(Serialize)]
#[serde(rename_all = "camelCase")]
struct GetBucketInfoResult {
id: String,
global_aliases: Vec<String>,
website_access: bool,
#[serde(default)]
website_config: Option<GetBucketInfoWebsiteResult>,
keys: Vec<GetBucketInfoKey>,
objects: i64,
bytes: i64,
unfinshed_uploads: i64,
quotas: ApiBucketQuotas,
}
#[derive(Serialize)]
#[serde(rename_all = "camelCase")]
struct GetBucketInfoWebsiteResult {
index_document: String,
error_document: Option<String>,
}
#[derive(Serialize)]
#[serde(rename_all = "camelCase")]
struct GetBucketInfoKey {
access_key_id: String,
name: String,
permissions: ApiBucketKeyPerm,
bucket_local_aliases: Vec<String>,
}
pub async fn handle_create_bucket(
garage: &Arc<Garage>,
req: Request<Body>,
) -> Result<Response<Body>, Error> {
let req = parse_json_body::<CreateBucketRequest>(req).await?;
if let Some(ga) = &req.global_alias {
if !is_valid_bucket_name(ga) {
return Err(Error::bad_request(format!(
"{}: {}",
ga, INVALID_BUCKET_NAME_MESSAGE
)));
}
if let Some(alias) = garage.bucket_alias_table.get(&EmptyKey, ga).await? {
if alias.state.get().is_some() {
return Err(CommonError::BucketAlreadyExists.into());
}
}
}
if let Some(la) = &req.local_alias {
if !is_valid_bucket_name(&la.alias) {
return Err(Error::bad_request(format!(
"{}: {}",
la.alias, INVALID_BUCKET_NAME_MESSAGE
)));
}
let key = garage
.key_helper()
.get_existing_key(&la.access_key_id)
.await?;
let state = key.state.as_option().unwrap();
if matches!(state.local_aliases.get(&la.alias), Some(_)) {
return Err(Error::bad_request("Local alias already exists"));
}
}
let bucket = Bucket::new();
garage.bucket_table.insert(&bucket).await?;
if let Some(ga) = &req.global_alias {
garage
.bucket_helper()
.set_global_bucket_alias(bucket.id, ga)
.await?;
}
if let Some(la) = &req.local_alias {
garage
.bucket_helper()
.set_local_bucket_alias(bucket.id, &la.access_key_id, &la.alias)
.await?;
if la.allow.read || la.allow.write || la.allow.owner {
garage
.bucket_helper()
.set_bucket_key_permissions(
bucket.id,
&la.access_key_id,
BucketKeyPerm {
timestamp: now_msec(),
allow_read: la.allow.read,
allow_write: la.allow.write,
allow_owner: la.allow.owner,
},
)
.await?;
}
}
bucket_info_results(garage, bucket.id).await
}
#[derive(Deserialize)]
#[serde(rename_all = "camelCase")]
struct CreateBucketRequest {
global_alias: Option<String>,
local_alias: Option<CreateBucketLocalAlias>,
}
#[derive(Deserialize)]
#[serde(rename_all = "camelCase")]
struct CreateBucketLocalAlias {
access_key_id: String,
alias: String,
#[serde(default)]
allow: ApiBucketKeyPerm,
}
pub async fn handle_delete_bucket(
garage: &Arc<Garage>,
id: String,
) -> Result<Response<Body>, Error> {
let helper = garage.bucket_helper();
let bucket_id = parse_bucket_id(&id)?;
let mut bucket = helper.get_existing_bucket(bucket_id).await?;
let state = bucket.state.as_option().unwrap();
// Check bucket is empty
if !helper.is_bucket_empty(bucket_id).await? {
return Err(CommonError::BucketNotEmpty.into());
}
// --- done checking, now commit ---
// 1. delete authorization from keys that had access
for (key_id, perm) in bucket.authorized_keys() {
if perm.is_any() {
helper
.set_bucket_key_permissions(bucket.id, key_id, BucketKeyPerm::NO_PERMISSIONS)
.await?;
}
}
// 2. delete all local aliases
for ((key_id, alias), _, active) in state.local_aliases.items().iter() {
if *active {
helper
.unset_local_bucket_alias(bucket.id, key_id, alias)
.await?;
}
}
// 3. delete all global aliases
for (alias, _, active) in state.aliases.items().iter() {
if *active {
helper.purge_global_bucket_alias(bucket.id, alias).await?;
}
}
// 4. delete bucket
bucket.state = Deletable::delete();
garage.bucket_table.insert(&bucket).await?;
Ok(Response::builder()
.status(StatusCode::NO_CONTENT)
.body(Body::empty())?)
}
pub async fn handle_update_bucket(
garage: &Arc<Garage>,
id: String,
req: Request<Body>,
) -> Result<Response<Body>, Error> {
let req = parse_json_body::<UpdateBucketRequest>(req).await?;
let bucket_id = parse_bucket_id(&id)?;
let mut bucket = garage
.bucket_helper()
.get_existing_bucket(bucket_id)
.await?;
let state = bucket.state.as_option_mut().unwrap();
if let Some(wa) = req.website_access {
if wa.enabled {
state.website_config.update(Some(WebsiteConfig {
index_document: wa.index_document.ok_or_bad_request(
"Please specify indexDocument when enabling website access.",
)?,
error_document: wa.error_document,
}));
} else {
if wa.index_document.is_some() || wa.error_document.is_some() {
return Err(Error::bad_request(
"Cannot specify indexDocument or errorDocument when disabling website access.",
));
}
state.website_config.update(None);
}
}
if let Some(q) = req.quotas {
state.quotas.update(BucketQuotas {
max_size: q.max_size,
max_objects: q.max_objects,
});
}
garage.bucket_table.insert(&bucket).await?;
bucket_info_results(garage, bucket_id).await
}
#[derive(Deserialize)]
#[serde(rename_all = "camelCase")]
struct UpdateBucketRequest {
website_access: Option<UpdateBucketWebsiteAccess>,
quotas: Option<ApiBucketQuotas>,
}
#[derive(Deserialize)]
#[serde(rename_all = "camelCase")]
struct UpdateBucketWebsiteAccess {
enabled: bool,
index_document: Option<String>,
error_document: Option<String>,
}
// ---- BUCKET/KEY PERMISSIONS ----
pub async fn handle_bucket_change_key_perm(
garage: &Arc<Garage>,
req: Request<Body>,
new_perm_flag: bool,
) -> Result<Response<Body>, Error> {
let req = parse_json_body::<BucketKeyPermChangeRequest>(req).await?;
let bucket_id = parse_bucket_id(&req.bucket_id)?;
let bucket = garage
.bucket_helper()
.get_existing_bucket(bucket_id)
.await?;
let state = bucket.state.as_option().unwrap();
let key = garage
.key_helper()
.get_existing_key(&req.access_key_id)
.await?;
let mut perm = state
.authorized_keys
.get(&key.key_id)
.cloned()
.unwrap_or(BucketKeyPerm::NO_PERMISSIONS);
if req.permissions.read {
perm.allow_read = new_perm_flag;
}
if req.permissions.write {
perm.allow_write = new_perm_flag;
}
if req.permissions.owner {
perm.allow_owner = new_perm_flag;
}
garage
.bucket_helper()
.set_bucket_key_permissions(bucket.id, &key.key_id, perm)
.await?;
bucket_info_results(garage, bucket.id).await
}
#[derive(Deserialize)]
#[serde(rename_all = "camelCase")]
struct BucketKeyPermChangeRequest {
bucket_id: String,
access_key_id: String,
permissions: ApiBucketKeyPerm,
}
// ---- BUCKET ALIASES ----
pub async fn handle_global_alias_bucket(
garage: &Arc<Garage>,
bucket_id: String,
alias: String,
) -> Result<Response<Body>, Error> {
let bucket_id = parse_bucket_id(&bucket_id)?;
garage
.bucket_helper()
.set_global_bucket_alias(bucket_id, &alias)
.await?;
bucket_info_results(garage, bucket_id).await
}
pub async fn handle_global_unalias_bucket(
garage: &Arc<Garage>,
bucket_id: String,
alias: String,
) -> Result<Response<Body>, Error> {
let bucket_id = parse_bucket_id(&bucket_id)?;
garage
.bucket_helper()
.unset_global_bucket_alias(bucket_id, &alias)
.await?;
bucket_info_results(garage, bucket_id).await
}
pub async fn handle_local_alias_bucket(
garage: &Arc<Garage>,
bucket_id: String,
access_key_id: String,
alias: String,
) -> Result<Response<Body>, Error> {
let bucket_id = parse_bucket_id(&bucket_id)?;
garage
.bucket_helper()
.set_local_bucket_alias(bucket_id, &access_key_id, &alias)
.await?;
bucket_info_results(garage, bucket_id).await
}
pub async fn handle_local_unalias_bucket(
garage: &Arc<Garage>,
bucket_id: String,
access_key_id: String,
alias: String,
) -> Result<Response<Body>, Error> {
let bucket_id = parse_bucket_id(&bucket_id)?;
garage
.bucket_helper()
.unset_local_bucket_alias(bucket_id, &access_key_id, &alias)
.await?;
bucket_info_results(garage, bucket_id).await
}
// ---- HELPER ----
fn parse_bucket_id(id: &str) -> Result<Uuid, Error> {
let id_hex = hex::decode(&id).ok_or_bad_request("Invalid bucket id")?;
Ok(Uuid::try_from(&id_hex).ok_or_bad_request("Invalid bucket id")?)
}

View file

@ -1,191 +0,0 @@
use std::collections::HashMap;
use std::net::SocketAddr;
use std::sync::Arc;
use hyper::{Body, Request, Response, StatusCode};
use serde::{Deserialize, Serialize};
use garage_util::crdt::*;
use garage_util::data::*;
use garage_rpc::layout::*;
use garage_model::garage::Garage;
use crate::admin::error::*;
use crate::helpers::{json_ok_response, parse_json_body};
pub async fn handle_get_cluster_status(garage: &Arc<Garage>) -> Result<Response<Body>, Error> {
let res = GetClusterStatusResponse {
node: hex::encode(garage.system.id),
garage_version: garage.system.garage_version(),
db_engine: garage.db.engine(),
known_nodes: garage
.system
.get_known_nodes()
.into_iter()
.map(|i| {
(
hex::encode(i.id),
KnownNodeResp {
addr: i.addr,
is_up: i.is_up,
last_seen_secs_ago: i.last_seen_secs_ago,
hostname: i.status.hostname,
},
)
})
.collect(),
layout: get_cluster_layout(garage),
};
Ok(json_ok_response(&res)?)
}
pub async fn handle_connect_cluster_nodes(
garage: &Arc<Garage>,
req: Request<Body>,
) -> Result<Response<Body>, Error> {
let req = parse_json_body::<Vec<String>>(req).await?;
let res = futures::future::join_all(req.iter().map(|node| garage.system.connect(node)))
.await
.into_iter()
.map(|r| match r {
Ok(()) => ConnectClusterNodesResponse {
success: true,
error: None,
},
Err(e) => ConnectClusterNodesResponse {
success: false,
error: Some(format!("{}", e)),
},
})
.collect::<Vec<_>>();
Ok(json_ok_response(&res)?)
}
pub async fn handle_get_cluster_layout(garage: &Arc<Garage>) -> Result<Response<Body>, Error> {
let res = get_cluster_layout(garage);
Ok(json_ok_response(&res)?)
}
fn get_cluster_layout(garage: &Arc<Garage>) -> GetClusterLayoutResponse {
let layout = garage.system.get_cluster_layout();
GetClusterLayoutResponse {
version: layout.version,
roles: layout
.roles
.items()
.iter()
.filter(|(_, _, v)| v.0.is_some())
.map(|(k, _, v)| (hex::encode(k), v.0.clone()))
.collect(),
staged_role_changes: layout
.staging
.items()
.iter()
.filter(|(k, _, v)| layout.roles.get(k) != Some(v))
.map(|(k, _, v)| (hex::encode(k), v.0.clone()))
.collect(),
}
}
#[derive(Serialize)]
#[serde(rename_all = "camelCase")]
struct GetClusterStatusResponse {
node: String,
garage_version: &'static str,
db_engine: String,
known_nodes: HashMap<String, KnownNodeResp>,
layout: GetClusterLayoutResponse,
}
#[derive(Serialize)]
struct ConnectClusterNodesResponse {
success: bool,
error: Option<String>,
}
#[derive(Serialize)]
#[serde(rename_all = "camelCase")]
struct GetClusterLayoutResponse {
version: u64,
roles: HashMap<String, Option<NodeRole>>,
staged_role_changes: HashMap<String, Option<NodeRole>>,
}
#[derive(Serialize)]
struct KnownNodeResp {
addr: SocketAddr,
is_up: bool,
last_seen_secs_ago: Option<u64>,
hostname: String,
}
pub async fn handle_update_cluster_layout(
garage: &Arc<Garage>,
req: Request<Body>,
) -> Result<Response<Body>, Error> {
let updates = parse_json_body::<UpdateClusterLayoutRequest>(req).await?;
let mut layout = garage.system.get_cluster_layout();
let mut roles = layout.roles.clone();
roles.merge(&layout.staging);
for (node, role) in updates {
let node = hex::decode(node).ok_or_bad_request("Invalid node identifier")?;
let node = Uuid::try_from(&node).ok_or_bad_request("Invalid node identifier")?;
layout
.staging
.merge(&roles.update_mutator(node, NodeRoleV(role)));
}
garage.system.update_cluster_layout(&layout).await?;
Ok(Response::builder()
.status(StatusCode::OK)
.body(Body::empty())?)
}
pub async fn handle_apply_cluster_layout(
garage: &Arc<Garage>,
req: Request<Body>,
) -> Result<Response<Body>, Error> {
let param = parse_json_body::<ApplyRevertLayoutRequest>(req).await?;
let layout = garage.system.get_cluster_layout();
let layout = layout.apply_staged_changes(Some(param.version))?;
garage.system.update_cluster_layout(&layout).await?;
Ok(Response::builder()
.status(StatusCode::OK)
.body(Body::empty())?)
}
pub async fn handle_revert_cluster_layout(
garage: &Arc<Garage>,
req: Request<Body>,
) -> Result<Response<Body>, Error> {
let param = parse_json_body::<ApplyRevertLayoutRequest>(req).await?;
let layout = garage.system.get_cluster_layout();
let layout = layout.revert_staged_changes(Some(param.version))?;
garage.system.update_cluster_layout(&layout).await?;
Ok(Response::builder()
.status(StatusCode::OK)
.body(Body::empty())?)
}
type UpdateClusterLayoutRequest = HashMap<String, Option<NodeRole>>;
#[derive(Deserialize)]
struct ApplyRevertLayoutRequest {
version: u64,
}

View file

@ -1,97 +0,0 @@
use err_derive::Error;
use hyper::header::HeaderValue;
use hyper::{Body, HeaderMap, StatusCode};
pub use garage_model::helper::error::Error as HelperError;
use crate::common_error::CommonError;
pub use crate::common_error::{CommonErrorDerivative, OkOrBadRequest, OkOrInternalError};
use crate::generic_server::ApiError;
use crate::helpers::CustomApiErrorBody;
/// Errors of this crate
#[derive(Debug, Error)]
pub enum Error {
#[error(display = "{}", _0)]
/// Error from common error
Common(CommonError),
// Category: cannot process
/// The API access key does not exist
#[error(display = "Access key not found: {}", _0)]
NoSuchAccessKey(String),
/// In Import key, the key already exists
#[error(
display = "Key {} already exists in data store. Even if it is deleted, we can't let you create a new key with the same ID. Sorry.",
_0
)]
KeyAlreadyExists(String),
}
impl<T> From<T> for Error
where
CommonError: From<T>,
{
fn from(err: T) -> Self {
Error::Common(CommonError::from(err))
}
}
impl CommonErrorDerivative for Error {}
impl From<HelperError> for Error {
fn from(err: HelperError) -> Self {
match err {
HelperError::Internal(i) => Self::Common(CommonError::InternalError(i)),
HelperError::BadRequest(b) => Self::Common(CommonError::BadRequest(b)),
HelperError::InvalidBucketName(n) => Self::Common(CommonError::InvalidBucketName(n)),
HelperError::NoSuchBucket(n) => Self::Common(CommonError::NoSuchBucket(n)),
HelperError::NoSuchAccessKey(n) => Self::NoSuchAccessKey(n),
}
}
}
impl Error {
fn code(&self) -> &'static str {
match self {
Error::Common(c) => c.aws_code(),
Error::NoSuchAccessKey(_) => "NoSuchAccessKey",
Error::KeyAlreadyExists(_) => "KeyAlreadyExists",
}
}
}
impl ApiError for Error {
/// Get the HTTP status code that best represents the meaning of the error for the client
fn http_status_code(&self) -> StatusCode {
match self {
Error::Common(c) => c.http_status_code(),
Error::NoSuchAccessKey(_) => StatusCode::NOT_FOUND,
Error::KeyAlreadyExists(_) => StatusCode::CONFLICT,
}
}
fn add_http_headers(&self, header_map: &mut HeaderMap<HeaderValue>) {
use hyper::header;
header_map.append(header::CONTENT_TYPE, "application/json".parse().unwrap());
}
fn http_body(&self, garage_region: &str, path: &str) -> Body {
let error = CustomApiErrorBody {
code: self.code().to_string(),
message: format!("{}", self),
path: path.to_string(),
region: garage_region.to_string(),
};
Body::from(serde_json::to_string_pretty(&error).unwrap_or_else(|_| {
r#"
{
"code": "InternalError",
"message": "JSON encoding of error failed"
}
"#
.into()
}))
}
}

View file

@ -1,256 +0,0 @@
use std::collections::HashMap;
use std::sync::Arc;
use hyper::{Body, Request, Response, StatusCode};
use serde::{Deserialize, Serialize};
use garage_table::*;
use garage_model::garage::Garage;
use garage_model::key_table::*;
use crate::admin::error::*;
use crate::helpers::{json_ok_response, parse_json_body};
pub async fn handle_list_keys(garage: &Arc<Garage>) -> Result<Response<Body>, Error> {
let res = garage
.key_table
.get_range(
&EmptyKey,
None,
Some(KeyFilter::Deleted(DeletedFilter::NotDeleted)),
10000,
EnumerationOrder::Forward,
)
.await?
.iter()
.map(|k| ListKeyResultItem {
id: k.key_id.to_string(),
name: k.params().unwrap().name.get().clone(),
})
.collect::<Vec<_>>();
Ok(json_ok_response(&res)?)
}
#[derive(Serialize)]
struct ListKeyResultItem {
id: String,
name: String,
}
pub async fn handle_get_key_info(
garage: &Arc<Garage>,
id: Option<String>,
search: Option<String>,
) -> Result<Response<Body>, Error> {
let key = if let Some(id) = id {
garage.key_helper().get_existing_key(&id).await?
} else if let Some(search) = search {
garage
.key_helper()
.get_existing_matching_key(&search)
.await?
} else {
unreachable!();
};
key_info_results(garage, key).await
}
pub async fn handle_create_key(
garage: &Arc<Garage>,
req: Request<Body>,
) -> Result<Response<Body>, Error> {
let req = parse_json_body::<CreateKeyRequest>(req).await?;
let key = Key::new(&req.name);
garage.key_table.insert(&key).await?;
key_info_results(garage, key).await
}
#[derive(Deserialize)]
struct CreateKeyRequest {
name: String,
}
pub async fn handle_import_key(
garage: &Arc<Garage>,
req: Request<Body>,
) -> Result<Response<Body>, Error> {
let req = parse_json_body::<ImportKeyRequest>(req).await?;
let prev_key = garage.key_table.get(&EmptyKey, &req.access_key_id).await?;
if prev_key.is_some() {
return Err(Error::KeyAlreadyExists(req.access_key_id.to_string()));
}
let imported_key = Key::import(&req.access_key_id, &req.secret_access_key, &req.name);
garage.key_table.insert(&imported_key).await?;
key_info_results(garage, imported_key).await
}
#[derive(Deserialize)]
#[serde(rename_all = "camelCase")]
struct ImportKeyRequest {
access_key_id: String,
secret_access_key: String,
name: String,
}
pub async fn handle_update_key(
garage: &Arc<Garage>,
id: String,
req: Request<Body>,
) -> Result<Response<Body>, Error> {
let req = parse_json_body::<UpdateKeyRequest>(req).await?;
let mut key = garage.key_helper().get_existing_key(&id).await?;
let key_state = key.state.as_option_mut().unwrap();
if let Some(new_name) = req.name {
key_state.name.update(new_name);
}
if let Some(allow) = req.allow {
if allow.create_bucket {
key_state.allow_create_bucket.update(true);
}
}
if let Some(deny) = req.deny {
if deny.create_bucket {
key_state.allow_create_bucket.update(false);
}
}
garage.key_table.insert(&key).await?;
key_info_results(garage, key).await
}
#[derive(Deserialize)]
struct UpdateKeyRequest {
name: Option<String>,
allow: Option<KeyPerm>,
deny: Option<KeyPerm>,
}
pub async fn handle_delete_key(garage: &Arc<Garage>, id: String) -> Result<Response<Body>, Error> {
let mut key = garage.key_helper().get_existing_key(&id).await?;
key.state.as_option().unwrap();
garage.key_helper().delete_key(&mut key).await?;
Ok(Response::builder()
.status(StatusCode::NO_CONTENT)
.body(Body::empty())?)
}
async fn key_info_results(garage: &Arc<Garage>, key: Key) -> Result<Response<Body>, Error> {
let mut relevant_buckets = HashMap::new();
let key_state = key.state.as_option().unwrap();
for id in key_state
.authorized_buckets
.items()
.iter()
.map(|(id, _)| id)
.chain(
key_state
.local_aliases
.items()
.iter()
.filter_map(|(_, _, v)| v.as_ref()),
) {
if !relevant_buckets.contains_key(id) {
if let Some(b) = garage.bucket_table.get(&EmptyKey, id).await? {
if b.state.as_option().is_some() {
relevant_buckets.insert(*id, b);
}
}
}
}
let res = GetKeyInfoResult {
name: key_state.name.get().clone(),
access_key_id: key.key_id.clone(),
secret_access_key: key_state.secret_key.clone(),
permissions: KeyPerm {
create_bucket: *key_state.allow_create_bucket.get(),
},
buckets: relevant_buckets
.into_iter()
.map(|(_, bucket)| {
let state = bucket.state.as_option().unwrap();
KeyInfoBucketResult {
id: hex::encode(bucket.id),
global_aliases: state
.aliases
.items()
.iter()
.filter(|(_, _, a)| *a)
.map(|(n, _, _)| n.to_string())
.collect::<Vec<_>>(),
local_aliases: state
.local_aliases
.items()
.iter()
.filter(|((k, _), _, a)| *a && *k == key.key_id)
.map(|((_, n), _, _)| n.to_string())
.collect::<Vec<_>>(),
permissions: key_state
.authorized_buckets
.get(&bucket.id)
.map(|p| ApiBucketKeyPerm {
read: p.allow_read,
write: p.allow_write,
owner: p.allow_owner,
})
.unwrap_or_default(),
}
})
.collect::<Vec<_>>(),
};
Ok(json_ok_response(&res)?)
}
#[derive(Serialize)]
#[serde(rename_all = "camelCase")]
struct GetKeyInfoResult {
name: String,
access_key_id: String,
secret_access_key: String,
permissions: KeyPerm,
buckets: Vec<KeyInfoBucketResult>,
}
#[derive(Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
struct KeyPerm {
#[serde(default)]
create_bucket: bool,
}
#[derive(Serialize)]
#[serde(rename_all = "camelCase")]
struct KeyInfoBucketResult {
id: String,
global_aliases: Vec<String>,
local_aliases: Vec<String>,
permissions: ApiBucketKeyPerm,
}
#[derive(Serialize, Deserialize, Default)]
pub(crate) struct ApiBucketKeyPerm {
#[serde(default)]
pub(crate) read: bool,
#[serde(default)]
pub(crate) write: bool,
#[serde(default)]
pub(crate) owner: bool,
}

View file

@ -1,7 +0,0 @@
pub mod api_server;
mod error;
mod router;
mod bucket;
mod cluster;
mod key;

View file

@ -1,145 +0,0 @@
use std::borrow::Cow;
use hyper::{Method, Request};
use crate::admin::error::*;
use crate::router_macros::*;
pub enum Authorization {
MetricsToken,
AdminToken,
}
router_match! {@func
/// List of all Admin API endpoints.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Endpoint {
Options,
Metrics,
GetClusterStatus,
ConnectClusterNodes,
// Layout
GetClusterLayout,
UpdateClusterLayout,
ApplyClusterLayout,
RevertClusterLayout,
// Keys
ListKeys,
CreateKey,
ImportKey,
GetKeyInfo {
id: Option<String>,
search: Option<String>,
},
DeleteKey {
id: String,
},
UpdateKey {
id: String,
},
// Buckets
ListBuckets,
CreateBucket,
GetBucketInfo {
id: Option<String>,
global_alias: Option<String>,
},
DeleteBucket {
id: String,
},
UpdateBucket {
id: String,
},
// Bucket-Key Permissions
BucketAllowKey,
BucketDenyKey,
// Bucket aliases
GlobalAliasBucket {
id: String,
alias: String,
},
GlobalUnaliasBucket {
id: String,
alias: String,
},
LocalAliasBucket {
id: String,
access_key_id: String,
alias: String,
},
LocalUnaliasBucket {
id: String,
access_key_id: String,
alias: String,
},
}}
impl Endpoint {
/// Determine which S3 endpoint a request is for using the request, and a bucket which was
/// possibly extracted from the Host header.
/// Returns Self plus bucket name, if endpoint is not Endpoint::ListBuckets
pub fn from_request<T>(req: &Request<T>) -> Result<Self, Error> {
let uri = req.uri();
let path = uri.path();
let query = uri.query();
let mut query = QueryParameters::from_query(query.unwrap_or_default())?;
let res = router_match!(@gen_path_parser (req.method(), path, query) [
OPTIONS _ => Options,
GET "/metrics" => Metrics,
GET "/v0/status" => GetClusterStatus,
POST "/v0/connect" => ConnectClusterNodes,
// Layout endpoints
GET "/v0/layout" => GetClusterLayout,
POST "/v0/layout" => UpdateClusterLayout,
POST "/v0/layout/apply" => ApplyClusterLayout,
POST "/v0/layout/revert" => RevertClusterLayout,
// API key endpoints
GET "/v0/key" if id => GetKeyInfo (query_opt::id, query_opt::search),
GET "/v0/key" if search => GetKeyInfo (query_opt::id, query_opt::search),
POST "/v0/key" if id => UpdateKey (query::id),
POST "/v0/key" => CreateKey,
POST "/v0/key/import" => ImportKey,
DELETE "/v0/key" if id => DeleteKey (query::id),
GET "/v0/key" => ListKeys,
// Bucket endpoints
GET "/v0/bucket" if id => GetBucketInfo (query_opt::id, query_opt::global_alias),
GET "/v0/bucket" if global_alias => GetBucketInfo (query_opt::id, query_opt::global_alias),
GET "/v0/bucket" => ListBuckets,
POST "/v0/bucket" => CreateBucket,
DELETE "/v0/bucket" if id => DeleteBucket (query::id),
PUT "/v0/bucket" if id => UpdateBucket (query::id),
// Bucket-key permissions
POST "/v0/bucket/allow" => BucketAllowKey,
POST "/v0/bucket/deny" => BucketDenyKey,
// Bucket aliases
PUT "/v0/bucket/alias/global" => GlobalAliasBucket (query::id, query::alias),
DELETE "/v0/bucket/alias/global" => GlobalUnaliasBucket (query::id, query::alias),
PUT "/v0/bucket/alias/local" => LocalAliasBucket (query::id, query::access_key_id, query::alias),
DELETE "/v0/bucket/alias/local" => LocalUnaliasBucket (query::id, query::access_key_id, query::alias),
]);
if let Some(message) = query.nonempty_message() {
debug!("Unused query parameter: {}", message)
}
Ok(res)
}
/// Get the kind of authorization which is required to perform the operation.
pub fn authorization_type(&self) -> Authorization {
match self {
Self::Metrics => Authorization::MetricsToken,
_ => Authorization::AdminToken,
}
}
}
generateQueryParameters! {
"id" => id,
"search" => search,
"globalAlias" => global_alias,
"alias" => alias,
"accessKeyId" => access_key_id
}

489
src/api/api_server.rs Normal file
View file

@ -0,0 +1,489 @@
use std::net::SocketAddr;
use std::sync::Arc;
use futures::future::Future;
use hyper::header;
use hyper::server::conn::AddrStream;
use hyper::service::{make_service_fn, service_fn};
use hyper::{Body, Method, Request, Response, Server};
use garage_util::data::*;
use garage_util::error::Error as GarageError;
use garage_model::garage::Garage;
use garage_model::key_table::Key;
use garage_table::util::*;
use crate::error::*;
use crate::signature::payload::check_payload_signature;
use crate::helpers::*;
use crate::s3_bucket::*;
use crate::s3_copy::*;
use crate::s3_cors::*;
use crate::s3_delete::*;
use crate::s3_get::*;
use crate::s3_list::*;
use crate::s3_post_object::handle_post_object;
use crate::s3_put::*;
use crate::s3_router::{Authorization, Endpoint};
use crate::s3_website::*;
/// Run the S3 API server
pub async fn run_api_server(
garage: Arc<Garage>,
shutdown_signal: impl Future<Output = ()>,
) -> Result<(), GarageError> {
let addr = &garage.config.s3_api.api_bind_addr;
let service = make_service_fn(|conn: &AddrStream| {
let garage = garage.clone();
let client_addr = conn.remote_addr();
async move {
Ok::<_, GarageError>(service_fn(move |req: Request<Body>| {
let garage = garage.clone();
handler(garage, req, client_addr)
}))
}
});
let server = Server::bind(addr).serve(service);
let graceful = server.with_graceful_shutdown(shutdown_signal);
info!("API server listening on http://{}", addr);
graceful.await?;
Ok(())
}
async fn handler(
garage: Arc<Garage>,
req: Request<Body>,
addr: SocketAddr,
) -> Result<Response<Body>, GarageError> {
let uri = req.uri().clone();
info!("{} {} {}", addr, req.method(), uri);
debug!("{:?}", req);
match handler_inner(garage.clone(), req).await {
Ok(x) => {
debug!("{} {:?}", x.status(), x.headers());
Ok(x)
}
Err(e) => {
let body: Body = Body::from(e.aws_xml(&garage.config.s3_api.s3_region, uri.path()));
let mut http_error_builder = Response::builder()
.status(e.http_status_code())
.header("Content-Type", "application/xml");
if let Some(header_map) = http_error_builder.headers_mut() {
e.add_headers(header_map)
}
let http_error = http_error_builder.body(body)?;
if e.http_status_code().is_server_error() {
warn!("Response: error {}, {}", e.http_status_code(), e);
} else {
info!("Response: error {}, {}", e.http_status_code(), e);
}
Ok(http_error)
}
}
}
async fn handler_inner(garage: Arc<Garage>, req: Request<Body>) -> Result<Response<Body>, Error> {
let authority = req
.headers()
.get(header::HOST)
.ok_or_else(|| Error::BadRequest("HOST header required".to_owned()))?
.to_str()?;
let host = authority_to_host(authority)?;
let bucket_name = garage
.config
.s3_api
.root_domain
.as_ref()
.and_then(|root_domain| host_to_bucket(&host, root_domain));
let (endpoint, bucket_name) = Endpoint::from_request(&req, bucket_name.map(ToOwned::to_owned))?;
debug!("Endpoint: {:?}", endpoint);
if let Endpoint::PostObject {} = endpoint {
return handle_post_object(garage, req, bucket_name.unwrap()).await;
}
let (api_key, content_sha256) = check_payload_signature(&garage, &req).await?;
let api_key = api_key.ok_or_else(|| {
Error::Forbidden("Garage does not support anonymous access yet".to_string())
})?;
let bucket_name = match bucket_name {
None => return handle_request_without_bucket(garage, req, api_key, endpoint).await,
Some(bucket) => bucket.to_string(),
};
// Special code path for CreateBucket API endpoint
if let Endpoint::CreateBucket {} = endpoint {
return handle_create_bucket(&garage, req, content_sha256, api_key, bucket_name).await;
}
let bucket_id = resolve_bucket(&garage, &bucket_name, &api_key).await?;
let bucket = garage
.bucket_table
.get(&EmptyKey, &bucket_id)
.await?
.filter(|b| !b.state.is_deleted())
.ok_or(Error::NoSuchBucket)?;
let allowed = match endpoint.authorization_type() {
Authorization::Read => api_key.allow_read(&bucket_id),
Authorization::Write => api_key.allow_write(&bucket_id),
Authorization::Owner => api_key.allow_owner(&bucket_id),
_ => unreachable!(),
};
if !allowed {
return Err(Error::Forbidden(
"Operation is not allowed for this key.".to_string(),
));
}
// Look up what CORS rule might apply to response.
// Requests for methods different than GET, HEAD or POST
// are always preflighted, i.e. the browser should make
// an OPTIONS call before to check it is allowed
let matching_cors_rule = match *req.method() {
Method::GET | Method::HEAD | Method::POST => find_matching_cors_rule(&bucket, &req)?,
_ => None,
};
let resp = match endpoint {
Endpoint::Options => handle_options(&req, &bucket).await,
Endpoint::HeadObject {
key, part_number, ..
} => handle_head(garage, &req, bucket_id, &key, part_number).await,
Endpoint::GetObject {
key, part_number, ..
} => handle_get(garage, &req, bucket_id, &key, part_number).await,
Endpoint::UploadPart {
key,
part_number,
upload_id,
} => {
handle_put_part(
garage,
req,
bucket_id,
&key,
part_number,
&upload_id,
content_sha256,
)
.await
}
Endpoint::CopyObject { key } => handle_copy(garage, &api_key, &req, bucket_id, &key).await,
Endpoint::UploadPartCopy {
key,
part_number,
upload_id,
} => {
handle_upload_part_copy(
garage,
&api_key,
&req,
bucket_id,
&key,
part_number,
&upload_id,
)
.await
}
Endpoint::PutObject { key } => {
handle_put(garage, req, bucket_id, &key, &api_key, content_sha256).await
}
Endpoint::AbortMultipartUpload { key, upload_id } => {
handle_abort_multipart_upload(garage, bucket_id, &key, &upload_id).await
}
Endpoint::DeleteObject { key, .. } => handle_delete(garage, bucket_id, &key).await,
Endpoint::CreateMultipartUpload { key } => {
handle_create_multipart_upload(garage, &req, &bucket_name, bucket_id, &key).await
}
Endpoint::CompleteMultipartUpload { key, upload_id } => {
handle_complete_multipart_upload(
garage,
req,
&bucket_name,
bucket_id,
&key,
&upload_id,
content_sha256,
)
.await
}
Endpoint::CreateBucket {} => unreachable!(),
Endpoint::HeadBucket {} => {
let empty_body: Body = Body::from(vec![]);
let response = Response::builder().body(empty_body).unwrap();
Ok(response)
}
Endpoint::DeleteBucket {} => {
handle_delete_bucket(&garage, bucket_id, bucket_name, api_key).await
}
Endpoint::GetBucketLocation {} => handle_get_bucket_location(garage),
Endpoint::GetBucketVersioning {} => handle_get_bucket_versioning(),
Endpoint::ListObjects {
delimiter,
encoding_type,
marker,
max_keys,
prefix,
} => {
handle_list(
garage,
&ListObjectsQuery {
common: ListQueryCommon {
bucket_name,
bucket_id,
delimiter: delimiter.map(|d| d.to_string()),
page_size: max_keys.map(|p| p.clamp(1, 1000)).unwrap_or(1000),
prefix: prefix.unwrap_or_default(),
urlencode_resp: encoding_type.map(|e| e == "url").unwrap_or(false),
},
is_v2: false,
marker,
continuation_token: None,
start_after: None,
},
)
.await
}
Endpoint::ListObjectsV2 {
delimiter,
encoding_type,
max_keys,
prefix,
continuation_token,
start_after,
list_type,
..
} => {
if list_type == "2" {
handle_list(
garage,
&ListObjectsQuery {
common: ListQueryCommon {
bucket_name,
bucket_id,
delimiter: delimiter.map(|d| d.to_string()),
page_size: max_keys.map(|p| p.clamp(1, 1000)).unwrap_or(1000),
urlencode_resp: encoding_type.map(|e| e == "url").unwrap_or(false),
prefix: prefix.unwrap_or_default(),
},
is_v2: true,
marker: None,
continuation_token,
start_after,
},
)
.await
} else {
Err(Error::BadRequest(format!(
"Invalid endpoint: list-type={}",
list_type
)))
}
}
Endpoint::ListMultipartUploads {
delimiter,
encoding_type,
key_marker,
max_uploads,
prefix,
upload_id_marker,
} => {
handle_list_multipart_upload(
garage,
&ListMultipartUploadsQuery {
common: ListQueryCommon {
bucket_name,
bucket_id,
delimiter: delimiter.map(|d| d.to_string()),
page_size: max_uploads.map(|p| p.clamp(1, 1000)).unwrap_or(1000),
prefix: prefix.unwrap_or_default(),
urlencode_resp: encoding_type.map(|e| e == "url").unwrap_or(false),
},
key_marker,
upload_id_marker,
},
)
.await
}
Endpoint::ListParts {
key,
max_parts,
part_number_marker,
upload_id,
} => {
handle_list_parts(
garage,
&ListPartsQuery {
bucket_name,
bucket_id,
key,
upload_id,
part_number_marker: part_number_marker.map(|p| p.clamp(1, 10000)),
max_parts: max_parts.map(|p| p.clamp(1, 1000)).unwrap_or(1000),
},
)
.await
}
Endpoint::DeleteObjects {} => {
handle_delete_objects(garage, bucket_id, req, content_sha256).await
}
Endpoint::GetBucketWebsite {} => handle_get_website(&bucket).await,
Endpoint::PutBucketWebsite {} => {
handle_put_website(garage, bucket_id, req, content_sha256).await
}
Endpoint::DeleteBucketWebsite {} => handle_delete_website(garage, bucket_id).await,
Endpoint::GetBucketCors {} => handle_get_cors(&bucket).await,
Endpoint::PutBucketCors {} => handle_put_cors(garage, bucket_id, req, content_sha256).await,
Endpoint::DeleteBucketCors {} => handle_delete_cors(garage, bucket_id).await,
endpoint => Err(Error::NotImplemented(endpoint.name().to_owned())),
};
// If request was a success and we have a CORS rule that applies to it,
// add the corresponding CORS headers to the response
let mut resp_ok = resp?;
if let Some(rule) = matching_cors_rule {
add_cors_headers(&mut resp_ok, rule)
.ok_or_internal_error("Invalid bucket CORS configuration")?;
}
Ok(resp_ok)
}
async fn handle_request_without_bucket(
garage: Arc<Garage>,
_req: Request<Body>,
api_key: Key,
endpoint: Endpoint,
) -> Result<Response<Body>, Error> {
match endpoint {
Endpoint::ListBuckets => handle_list_buckets(&garage, &api_key).await,
endpoint => Err(Error::NotImplemented(endpoint.name().to_owned())),
}
}
#[allow(clippy::ptr_arg)]
pub async fn resolve_bucket(
garage: &Garage,
bucket_name: &String,
api_key: &Key,
) -> Result<Uuid, Error> {
let api_key_params = api_key
.state
.as_option()
.ok_or_internal_error("Key should not be deleted at this point")?;
if let Some(Some(bucket_id)) = api_key_params.local_aliases.get(bucket_name) {
Ok(*bucket_id)
} else {
Ok(garage
.bucket_helper()
.resolve_global_bucket_name(bucket_name)
.await?
.ok_or(Error::NoSuchBucket)?)
}
}
/// Extract the bucket name and the key name from an HTTP path and possibly a bucket provided in
/// the host header of the request
///
/// S3 internally manages only buckets and keys. This function splits
/// an HTTP path to get the corresponding bucket name and key.
pub fn parse_bucket_key<'a>(
path: &'a str,
host_bucket: Option<&'a str>,
) -> Result<(&'a str, Option<&'a str>), Error> {
let path = path.trim_start_matches('/');
if let Some(bucket) = host_bucket {
if !path.is_empty() {
return Ok((bucket, Some(path)));
} else {
return Ok((bucket, None));
}
}
let (bucket, key) = match path.find('/') {
Some(i) => {
let key = &path[i + 1..];
if !key.is_empty() {
(&path[..i], Some(key))
} else {
(&path[..i], None)
}
}
None => (path, None),
};
if bucket.is_empty() {
return Err(Error::BadRequest("No bucket specified".to_string()));
}
Ok((bucket, key))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parse_bucket_containing_a_key() -> Result<(), Error> {
let (bucket, key) = parse_bucket_key("/my_bucket/a/super/file.jpg", None)?;
assert_eq!(bucket, "my_bucket");
assert_eq!(key.expect("key must be set"), "a/super/file.jpg");
Ok(())
}
#[test]
fn parse_bucket_containing_no_key() -> Result<(), Error> {
let (bucket, key) = parse_bucket_key("/my_bucket/", None)?;
assert_eq!(bucket, "my_bucket");
assert!(key.is_none());
let (bucket, key) = parse_bucket_key("/my_bucket", None)?;
assert_eq!(bucket, "my_bucket");
assert!(key.is_none());
Ok(())
}
#[test]
fn parse_bucket_containing_no_bucket() {
let parsed = parse_bucket_key("", None);
assert!(parsed.is_err());
let parsed = parse_bucket_key("/", None);
assert!(parsed.is_err());
let parsed = parse_bucket_key("////", None);
assert!(parsed.is_err());
}
#[test]
fn parse_bucket_with_vhost_and_key() -> Result<(), Error> {
let (bucket, key) = parse_bucket_key("/a/super/file.jpg", Some("my-bucket"))?;
assert_eq!(bucket, "my-bucket");
assert_eq!(key.expect("key must be set"), "a/super/file.jpg");
Ok(())
}
#[test]
fn parse_bucket_with_vhost_no_key() -> Result<(), Error> {
let (bucket, key) = parse_bucket_key("", Some("my-bucket"))?;
assert_eq!(bucket, "my-bucket");
assert!(key.is_none());
let (bucket, key) = parse_bucket_key("/", Some("my-bucket"))?;
assert_eq!(bucket, "my-bucket");
assert!(key.is_none());
Ok(())
}
}

View file

@ -1,177 +0,0 @@
use err_derive::Error;
use hyper::StatusCode;
use garage_util::error::Error as GarageError;
/// Errors of this crate
#[derive(Debug, Error)]
pub enum CommonError {
// ---- INTERNAL ERRORS ----
/// Error related to deeper parts of Garage
#[error(display = "Internal error: {}", _0)]
InternalError(#[error(source)] GarageError),
/// Error related to Hyper
#[error(display = "Internal error (Hyper error): {}", _0)]
Hyper(#[error(source)] hyper::Error),
/// Error related to HTTP
#[error(display = "Internal error (HTTP error): {}", _0)]
Http(#[error(source)] http::Error),
// ---- GENERIC CLIENT ERRORS ----
/// Proper authentication was not provided
#[error(display = "Forbidden: {}", _0)]
Forbidden(String),
/// Generic bad request response with custom message
#[error(display = "Bad request: {}", _0)]
BadRequest(String),
// ---- SPECIFIC ERROR CONDITIONS ----
// These have to be error codes referenced in the S3 spec here:
// https://docs.aws.amazon.com/AmazonS3/latest/API/ErrorResponses.html#ErrorCodeList
/// The bucket requested don't exists
#[error(display = "Bucket not found: {}", _0)]
NoSuchBucket(String),
/// Tried to create a bucket that already exist
#[error(display = "Bucket already exists")]
BucketAlreadyExists,
/// Tried to delete a non-empty bucket
#[error(display = "Tried to delete a non-empty bucket")]
BucketNotEmpty,
// Category: bad request
/// Bucket name is not valid according to AWS S3 specs
#[error(display = "Invalid bucket name: {}", _0)]
InvalidBucketName(String),
}
impl CommonError {
pub fn http_status_code(&self) -> StatusCode {
match self {
CommonError::InternalError(
GarageError::Timeout
| GarageError::RemoteError(_)
| GarageError::Quorum(_, _, _, _),
) => StatusCode::SERVICE_UNAVAILABLE,
CommonError::InternalError(_) | CommonError::Hyper(_) | CommonError::Http(_) => {
StatusCode::INTERNAL_SERVER_ERROR
}
CommonError::BadRequest(_) => StatusCode::BAD_REQUEST,
CommonError::Forbidden(_) => StatusCode::FORBIDDEN,
CommonError::NoSuchBucket(_) => StatusCode::NOT_FOUND,
CommonError::BucketNotEmpty | CommonError::BucketAlreadyExists => StatusCode::CONFLICT,
CommonError::InvalidBucketName(_) => StatusCode::BAD_REQUEST,
}
}
pub fn aws_code(&self) -> &'static str {
match self {
CommonError::Forbidden(_) => "AccessDenied",
CommonError::InternalError(
GarageError::Timeout
| GarageError::RemoteError(_)
| GarageError::Quorum(_, _, _, _),
) => "ServiceUnavailable",
CommonError::InternalError(_) | CommonError::Hyper(_) | CommonError::Http(_) => {
"InternalError"
}
CommonError::BadRequest(_) => "InvalidRequest",
CommonError::NoSuchBucket(_) => "NoSuchBucket",
CommonError::BucketAlreadyExists => "BucketAlreadyExists",
CommonError::BucketNotEmpty => "BucketNotEmpty",
CommonError::InvalidBucketName(_) => "InvalidBucketName",
}
}
pub fn bad_request<M: ToString>(msg: M) -> Self {
CommonError::BadRequest(msg.to_string())
}
}
pub trait CommonErrorDerivative: From<CommonError> {
fn internal_error<M: ToString>(msg: M) -> Self {
Self::from(CommonError::InternalError(GarageError::Message(
msg.to_string(),
)))
}
fn bad_request<M: ToString>(msg: M) -> Self {
Self::from(CommonError::BadRequest(msg.to_string()))
}
fn forbidden<M: ToString>(msg: M) -> Self {
Self::from(CommonError::Forbidden(msg.to_string()))
}
}
/// Trait to map error to the Bad Request error code
pub trait OkOrBadRequest {
type S;
fn ok_or_bad_request<M: AsRef<str>>(self, reason: M) -> Result<Self::S, CommonError>;
}
impl<T, E> OkOrBadRequest for Result<T, E>
where
E: std::fmt::Display,
{
type S = T;
fn ok_or_bad_request<M: AsRef<str>>(self, reason: M) -> Result<T, CommonError> {
match self {
Ok(x) => Ok(x),
Err(e) => Err(CommonError::BadRequest(format!(
"{}: {}",
reason.as_ref(),
e
))),
}
}
}
impl<T> OkOrBadRequest for Option<T> {
type S = T;
fn ok_or_bad_request<M: AsRef<str>>(self, reason: M) -> Result<T, CommonError> {
match self {
Some(x) => Ok(x),
None => Err(CommonError::BadRequest(reason.as_ref().to_string())),
}
}
}
/// Trait to map an error to an Internal Error code
pub trait OkOrInternalError {
type S;
fn ok_or_internal_error<M: AsRef<str>>(self, reason: M) -> Result<Self::S, CommonError>;
}
impl<T, E> OkOrInternalError for Result<T, E>
where
E: std::fmt::Display,
{
type S = T;
fn ok_or_internal_error<M: AsRef<str>>(self, reason: M) -> Result<T, CommonError> {
match self {
Ok(x) => Ok(x),
Err(e) => Err(CommonError::InternalError(GarageError::Message(format!(
"{}: {}",
reason.as_ref(),
e
)))),
}
}
}
impl<T> OkOrInternalError for Option<T> {
type S = T;
fn ok_or_internal_error<M: AsRef<str>>(self, reason: M) -> Result<T, CommonError> {
match self {
Some(x) => Ok(x),
None => Err(CommonError::InternalError(GarageError::Message(
reason.as_ref().to_string(),
))),
}
}
}

View file

@ -2,24 +2,34 @@ use std::convert::TryInto;
use err_derive::Error; use err_derive::Error;
use hyper::header::HeaderValue; use hyper::header::HeaderValue;
use hyper::{Body, HeaderMap, StatusCode}; use hyper::{HeaderMap, StatusCode};
use garage_model::helper::error::Error as HelperError; use garage_model::helper::error::Error as HelperError;
use garage_util::error::Error as GarageError;
use crate::common_error::CommonError; use crate::s3_xml;
pub use crate::common_error::{CommonErrorDerivative, OkOrBadRequest, OkOrInternalError};
use crate::generic_server::ApiError;
use crate::s3::xml as s3_xml;
use crate::signature::error::Error as SignatureError;
/// Errors of this crate /// Errors of this crate
#[derive(Debug, Error)] #[derive(Debug, Error)]
pub enum Error { pub enum Error {
#[error(display = "{}", _0)] // Category: internal error
/// Error from common error /// Error related to deeper parts of Garage
Common(CommonError), #[error(display = "Internal error: {}", _0)]
InternalError(#[error(source)] GarageError),
/// Error related to Hyper
#[error(display = "Internal error (Hyper error): {}", _0)]
Hyper(#[error(source)] hyper::Error),
/// Error related to HTTP
#[error(display = "Internal error (HTTP error): {}", _0)]
Http(#[error(source)] http::Error),
// Category: cannot process // Category: cannot process
/// No proper api key was used, or the signature was invalid
#[error(display = "Forbidden: {}", _0)]
Forbidden(String),
/// Authorization Header Malformed /// Authorization Header Malformed
#[error(display = "Authorization header malformed, expected scope: {}", _0)] #[error(display = "Authorization header malformed, expected scope: {}", _0)]
AuthorizationHeaderMalformed(String), AuthorizationHeaderMalformed(String),
@ -28,10 +38,22 @@ pub enum Error {
#[error(display = "Key not found")] #[error(display = "Key not found")]
NoSuchKey, NoSuchKey,
/// The bucket requested don't exists
#[error(display = "Bucket not found")]
NoSuchBucket,
/// The multipart upload requested don't exists /// The multipart upload requested don't exists
#[error(display = "Upload not found")] #[error(display = "Upload not found")]
NoSuchUpload, NoSuchUpload,
/// Tried to create a bucket that already exist
#[error(display = "Bucket already exists")]
BucketAlreadyExists,
/// Tried to delete a non-empty bucket
#[error(display = "Tried to delete a non-empty bucket")]
BucketNotEmpty,
/// Precondition failed (e.g. x-amz-copy-source-if-match) /// Precondition failed (e.g. x-amz-copy-source-if-match)
#[error(display = "At least one of the preconditions you specified did not hold")] #[error(display = "At least one of the preconditions you specified did not hold")]
PreconditionFailed, PreconditionFailed,
@ -58,6 +80,10 @@ pub enum Error {
#[error(display = "Invalid UTF-8: {}", _0)] #[error(display = "Invalid UTF-8: {}", _0)]
InvalidUtf8String(#[error(source)] std::string::FromUtf8Error), InvalidUtf8String(#[error(source)] std::string::FromUtf8Error),
/// Some base64 encoded data was badly encoded
#[error(display = "Invalid base64: {}", _0)]
InvalidBase64(#[error(source)] base64::DecodeError),
/// The client sent invalid XML data /// The client sent invalid XML data
#[error(display = "Invalid XML: {}", _0)] #[error(display = "Invalid XML: {}", _0)]
InvalidXml(String), InvalidXml(String),
@ -70,34 +96,15 @@ pub enum Error {
#[error(display = "Invalid HTTP range: {:?}", _0)] #[error(display = "Invalid HTTP range: {:?}", _0)]
InvalidRange(#[error(from)] (http_range::HttpRangeParseError, u64)), InvalidRange(#[error(from)] (http_range::HttpRangeParseError, u64)),
/// The client sent an invalid request
#[error(display = "Bad request: {}", _0)]
BadRequest(String),
/// The client sent a request for an action not supported by garage /// The client sent a request for an action not supported by garage
#[error(display = "Unimplemented action: {}", _0)] #[error(display = "Unimplemented action: {}", _0)]
NotImplemented(String), NotImplemented(String),
} }
impl<T> From<T> for Error
where
CommonError: From<T>,
{
fn from(err: T) -> Self {
Error::Common(CommonError::from(err))
}
}
impl CommonErrorDerivative for Error {}
impl From<HelperError> for Error {
fn from(err: HelperError) -> Self {
match err {
HelperError::Internal(i) => Self::Common(CommonError::InternalError(i)),
HelperError::BadRequest(b) => Self::Common(CommonError::BadRequest(b)),
HelperError::InvalidBucketName(n) => Self::Common(CommonError::InvalidBucketName(n)),
HelperError::NoSuchBucket(n) => Self::Common(CommonError::NoSuchBucket(n)),
e => Self::bad_request(format!("{}", e)),
}
}
}
impl From<roxmltree::Error> for Error { impl From<roxmltree::Error> for Error {
fn from(err: roxmltree::Error) -> Self { fn from(err: roxmltree::Error) -> Self {
Self::InvalidXml(format!("{}", err)) Self::InvalidXml(format!("{}", err))
@ -110,71 +117,88 @@ impl From<quick_xml::de::DeError> for Error {
} }
} }
impl From<SignatureError> for Error { impl From<HelperError> for Error {
fn from(err: SignatureError) -> Self { fn from(err: HelperError) -> Self {
match err { match err {
SignatureError::Common(c) => Self::Common(c), HelperError::Internal(i) => Self::InternalError(i),
SignatureError::AuthorizationHeaderMalformed(c) => { HelperError::BadRequest(b) => Self::BadRequest(b),
Self::AuthorizationHeaderMalformed(c)
}
SignatureError::InvalidUtf8Str(i) => Self::InvalidUtf8Str(i),
SignatureError::InvalidHeader(h) => Self::InvalidHeader(h),
} }
} }
} }
impl From<multer::Error> for Error { impl From<multer::Error> for Error {
fn from(err: multer::Error) -> Self { fn from(err: multer::Error) -> Self {
Self::bad_request(err) Self::BadRequest(err.to_string())
} }
} }
impl Error { impl Error {
/// Get the HTTP status code that best represents the meaning of the error for the client
pub fn http_status_code(&self) -> StatusCode {
match self {
Error::NoSuchKey | Error::NoSuchBucket | Error::NoSuchUpload => StatusCode::NOT_FOUND,
Error::BucketNotEmpty | Error::BucketAlreadyExists => StatusCode::CONFLICT,
Error::PreconditionFailed => StatusCode::PRECONDITION_FAILED,
Error::Forbidden(_) => StatusCode::FORBIDDEN,
Error::InternalError(
GarageError::Timeout
| GarageError::RemoteError(_)
| GarageError::Quorum(_, _, _, _),
) => StatusCode::SERVICE_UNAVAILABLE,
Error::InternalError(_) | Error::Hyper(_) | Error::Http(_) => {
StatusCode::INTERNAL_SERVER_ERROR
}
Error::InvalidRange(_) => StatusCode::RANGE_NOT_SATISFIABLE,
Error::NotImplemented(_) => StatusCode::NOT_IMPLEMENTED,
_ => StatusCode::BAD_REQUEST,
}
}
pub fn aws_code(&self) -> &'static str { pub fn aws_code(&self) -> &'static str {
match self { match self {
Error::Common(c) => c.aws_code(),
Error::NoSuchKey => "NoSuchKey", Error::NoSuchKey => "NoSuchKey",
Error::NoSuchBucket => "NoSuchBucket",
Error::NoSuchUpload => "NoSuchUpload", Error::NoSuchUpload => "NoSuchUpload",
Error::BucketAlreadyExists => "BucketAlreadyExists",
Error::BucketNotEmpty => "BucketNotEmpty",
Error::PreconditionFailed => "PreconditionFailed", Error::PreconditionFailed => "PreconditionFailed",
Error::InvalidPart => "InvalidPart", Error::InvalidPart => "InvalidPart",
Error::InvalidPartOrder => "InvalidPartOrder", Error::InvalidPartOrder => "InvalidPartOrder",
Error::EntityTooSmall => "EntityTooSmall", Error::EntityTooSmall => "EntityTooSmall",
Error::Forbidden(_) => "AccessDenied",
Error::AuthorizationHeaderMalformed(_) => "AuthorizationHeaderMalformed", Error::AuthorizationHeaderMalformed(_) => "AuthorizationHeaderMalformed",
Error::NotImplemented(_) => "NotImplemented", Error::NotImplemented(_) => "NotImplemented",
Error::InvalidXml(_) => "MalformedXML", Error::InternalError(
Error::InvalidRange(_) => "InvalidRange", GarageError::Timeout
Error::InvalidUtf8Str(_) | Error::InvalidUtf8String(_) | Error::InvalidHeader(_) => { | GarageError::RemoteError(_)
"InvalidRequest" | GarageError::Quorum(_, _, _, _),
} ) => "ServiceUnavailable",
} Error::InternalError(_) | Error::Hyper(_) | Error::Http(_) => "InternalError",
_ => "InvalidRequest",
} }
} }
impl ApiError for Error { pub fn aws_xml(&self, garage_region: &str, path: &str) -> String {
/// Get the HTTP status code that best represents the meaning of the error for the client let error = s3_xml::Error {
fn http_status_code(&self) -> StatusCode { code: s3_xml::Value(self.aws_code().to_string()),
match self { message: s3_xml::Value(format!("{}", self)),
Error::Common(c) => c.http_status_code(), resource: Some(s3_xml::Value(path.to_string())),
Error::NoSuchKey | Error::NoSuchUpload => StatusCode::NOT_FOUND, region: Some(s3_xml::Value(garage_region.to_string())),
Error::PreconditionFailed => StatusCode::PRECONDITION_FAILED, };
Error::InvalidRange(_) => StatusCode::RANGE_NOT_SATISFIABLE, s3_xml::to_xml_with_header(&error).unwrap_or_else(|_| {
Error::NotImplemented(_) => StatusCode::NOT_IMPLEMENTED, r#"
Error::AuthorizationHeaderMalformed(_) <?xml version="1.0" encoding="UTF-8"?>
| Error::InvalidPart <Error>
| Error::InvalidPartOrder <Code>InternalError</Code>
| Error::EntityTooSmall <Message>XML encoding of error failed</Message>
| Error::InvalidXml(_) </Error>
| Error::InvalidUtf8Str(_) "#
| Error::InvalidUtf8String(_) .into()
| Error::InvalidHeader(_) => StatusCode::BAD_REQUEST, })
}
} }
fn add_http_headers(&self, header_map: &mut HeaderMap<HeaderValue>) { pub fn add_headers(&self, header_map: &mut HeaderMap<HeaderValue>) {
use hyper::header; use hyper::header;
header_map.append(header::CONTENT_TYPE, "application/xml".parse().unwrap());
#[allow(clippy::single_match)] #[allow(clippy::single_match)]
match self { match self {
Error::InvalidRange((_, len)) => { Error::InvalidRange((_, len)) => {
@ -188,23 +212,68 @@ impl ApiError for Error {
_ => (), _ => (),
} }
} }
}
fn http_body(&self, garage_region: &str, path: &str) -> Body { /// Trait to map error to the Bad Request error code
let error = s3_xml::Error { pub trait OkOrBadRequest {
code: s3_xml::Value(self.aws_code().to_string()), type S;
message: s3_xml::Value(format!("{}", self)), fn ok_or_bad_request<M: AsRef<str>>(self, reason: M) -> Result<Self::S, Error>;
resource: Some(s3_xml::Value(path.to_string())), }
region: Some(s3_xml::Value(garage_region.to_string())),
}; impl<T, E> OkOrBadRequest for Result<T, E>
Body::from(s3_xml::to_xml_with_header(&error).unwrap_or_else(|_| { where
r#" E: std::fmt::Display,
<?xml version="1.0" encoding="UTF-8"?> {
<Error> type S = T;
<Code>InternalError</Code> fn ok_or_bad_request<M: AsRef<str>>(self, reason: M) -> Result<T, Error> {
<Message>XML encoding of error failed</Message> match self {
</Error> Ok(x) => Ok(x),
"# Err(e) => Err(Error::BadRequest(format!("{}: {}", reason.as_ref(), e))),
.into() }
})) }
}
impl<T> OkOrBadRequest for Option<T> {
type S = T;
fn ok_or_bad_request<M: AsRef<str>>(self, reason: M) -> Result<T, Error> {
match self {
Some(x) => Ok(x),
None => Err(Error::BadRequest(reason.as_ref().to_string())),
}
}
}
/// Trait to map an error to an Internal Error code
pub trait OkOrInternalError {
type S;
fn ok_or_internal_error<M: AsRef<str>>(self, reason: M) -> Result<Self::S, Error>;
}
impl<T, E> OkOrInternalError for Result<T, E>
where
E: std::fmt::Display,
{
type S = T;
fn ok_or_internal_error<M: AsRef<str>>(self, reason: M) -> Result<T, Error> {
match self {
Ok(x) => Ok(x),
Err(e) => Err(Error::InternalError(GarageError::Message(format!(
"{}: {}",
reason.as_ref(),
e
)))),
}
}
}
impl<T> OkOrInternalError for Option<T> {
type S = T;
fn ok_or_internal_error<M: AsRef<str>>(self, reason: M) -> Result<T, Error> {
match self {
Some(x) => Ok(x),
None => Err(Error::InternalError(GarageError::Message(
reason.as_ref().to_string(),
))),
}
} }
} }

View file

@ -1,207 +0,0 @@
use std::net::SocketAddr;
use std::sync::Arc;
use async_trait::async_trait;
use futures::future::Future;
use hyper::header::HeaderValue;
use hyper::server::conn::AddrStream;
use hyper::service::{make_service_fn, service_fn};
use hyper::{Body, Request, Response, Server};
use hyper::{HeaderMap, StatusCode};
use opentelemetry::{
global,
metrics::{Counter, ValueRecorder},
trace::{FutureExt, SpanRef, TraceContextExt, Tracer},
Context, KeyValue,
};
use garage_util::error::Error as GarageError;
use garage_util::metrics::{gen_trace_id, RecordDuration};
pub(crate) trait ApiEndpoint: Send + Sync + 'static {
fn name(&self) -> &'static str;
fn add_span_attributes(&self, span: SpanRef<'_>);
}
pub trait ApiError: std::error::Error + Send + Sync + 'static {
fn http_status_code(&self) -> StatusCode;
fn add_http_headers(&self, header_map: &mut HeaderMap<HeaderValue>);
fn http_body(&self, garage_region: &str, path: &str) -> Body;
}
#[async_trait]
pub(crate) trait ApiHandler: Send + Sync + 'static {
const API_NAME: &'static str;
const API_NAME_DISPLAY: &'static str;
type Endpoint: ApiEndpoint;
type Error: ApiError;
fn parse_endpoint(&self, r: &Request<Body>) -> Result<Self::Endpoint, Self::Error>;
async fn handle(
&self,
req: Request<Body>,
endpoint: Self::Endpoint,
) -> Result<Response<Body>, Self::Error>;
}
pub(crate) struct ApiServer<A: ApiHandler> {
region: String,
api_handler: A,
// Metrics
request_counter: Counter<u64>,
error_counter: Counter<u64>,
request_duration: ValueRecorder<f64>,
}
impl<A: ApiHandler> ApiServer<A> {
pub fn new(region: String, api_handler: A) -> Arc<Self> {
let meter = global::meter("garage/api");
Arc::new(Self {
region,
api_handler,
request_counter: meter
.u64_counter(format!("api.{}.request_counter", A::API_NAME))
.with_description(format!(
"Number of API calls to the various {} API endpoints",
A::API_NAME_DISPLAY
))
.init(),
error_counter: meter
.u64_counter(format!("api.{}.error_counter", A::API_NAME))
.with_description(format!(
"Number of API calls to the various {} API endpoints that resulted in errors",
A::API_NAME_DISPLAY
))
.init(),
request_duration: meter
.f64_value_recorder(format!("api.{}.request_duration", A::API_NAME))
.with_description(format!(
"Duration of API calls to the various {} API endpoints",
A::API_NAME_DISPLAY
))
.init(),
})
}
pub async fn run_server(
self: Arc<Self>,
bind_addr: SocketAddr,
shutdown_signal: impl Future<Output = ()>,
) -> Result<(), GarageError> {
let service = make_service_fn(|conn: &AddrStream| {
let this = self.clone();
let client_addr = conn.remote_addr();
async move {
Ok::<_, GarageError>(service_fn(move |req: Request<Body>| {
let this = this.clone();
this.handler(req, client_addr)
}))
}
});
let server = Server::bind(&bind_addr).serve(service);
let graceful = server.with_graceful_shutdown(shutdown_signal);
info!(
"{} API server listening on http://{}",
A::API_NAME_DISPLAY,
bind_addr
);
graceful.await?;
Ok(())
}
async fn handler(
self: Arc<Self>,
req: Request<Body>,
addr: SocketAddr,
) -> Result<Response<Body>, GarageError> {
let uri = req.uri().clone();
info!("{} {} {}", addr, req.method(), uri);
debug!("{:?}", req);
let tracer = opentelemetry::global::tracer("garage");
let span = tracer
.span_builder(format!("{} API call (unknown)", A::API_NAME_DISPLAY))
.with_trace_id(gen_trace_id())
.with_attributes(vec![
KeyValue::new("method", format!("{}", req.method())),
KeyValue::new("uri", req.uri().to_string()),
])
.start(&tracer);
let res = self
.handler_stage2(req)
.with_context(Context::current_with_span(span))
.await;
match res {
Ok(x) => {
debug!("{} {:?}", x.status(), x.headers());
Ok(x)
}
Err(e) => {
let body: Body = e.http_body(&self.region, uri.path());
let mut http_error_builder = Response::builder().status(e.http_status_code());
if let Some(header_map) = http_error_builder.headers_mut() {
e.add_http_headers(header_map)
}
let http_error = http_error_builder.body(body)?;
if e.http_status_code().is_server_error() {
warn!("Response: error {}, {}", e.http_status_code(), e);
} else {
info!("Response: error {}, {}", e.http_status_code(), e);
}
Ok(http_error)
}
}
}
async fn handler_stage2(&self, req: Request<Body>) -> Result<Response<Body>, A::Error> {
let endpoint = self.api_handler.parse_endpoint(&req)?;
debug!("Endpoint: {}", endpoint.name());
let current_context = Context::current();
let current_span = current_context.span();
current_span.update_name::<String>(format!("S3 API {}", endpoint.name()));
current_span.set_attribute(KeyValue::new("endpoint", endpoint.name()));
endpoint.add_span_attributes(current_span);
let metrics_tags = &[KeyValue::new("api_endpoint", endpoint.name())];
let res = self
.api_handler
.handle(req, endpoint)
.record_duration(&self.request_duration, &metrics_tags[..])
.await;
self.request_counter.add(1, &metrics_tags[..]);
let status_code = match &res {
Ok(r) => r.status(),
Err(e) => e.http_status_code(),
};
if status_code.is_client_error() || status_code.is_server_error() {
self.error_counter.add(
1,
&[
metrics_tags[0].clone(),
KeyValue::new("status_code", status_code.as_str().to_string()),
],
);
}
res
}
}

View file

@ -1,21 +1,5 @@
use hyper::{Body, Request, Response}; use crate::Error;
use idna::domain_to_unicode; use idna::domain_to_unicode;
use serde::{Deserialize, Serialize};
use crate::common_error::{CommonError as Error, *};
/// What kind of authorization is required to perform a given action
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Authorization {
/// No authorization is required
None,
/// Having Read permission on bucket
Read,
/// Having Write permission on bucket
Write,
/// Having Owner permission on bucket
Owner,
}
/// Host to bucket /// Host to bucket
/// ///
@ -47,7 +31,7 @@ pub fn authority_to_host(authority: &str) -> Result<String, Error> {
let mut iter = authority.chars().enumerate(); let mut iter = authority.chars().enumerate();
let (_, first_char) = iter let (_, first_char) = iter
.next() .next()
.ok_or_else(|| Error::bad_request("Authority is empty".to_string()))?; .ok_or_else(|| Error::BadRequest("Authority is empty".to_string()))?;
let split = match first_char { let split = match first_char {
'[' => { '[' => {
@ -55,7 +39,7 @@ pub fn authority_to_host(authority: &str) -> Result<String, Error> {
match iter.next() { match iter.next() {
Some((_, ']')) => iter.next(), Some((_, ']')) => iter.next(),
_ => { _ => {
return Err(Error::bad_request(format!( return Err(Error::BadRequest(format!(
"Authority {} has an illegal format", "Authority {} has an illegal format",
authority authority
))) )))
@ -68,7 +52,7 @@ pub fn authority_to_host(authority: &str) -> Result<String, Error> {
let authority = match split { let authority = match split {
Some((i, ':')) => Ok(&authority[..i]), Some((i, ':')) => Ok(&authority[..i]),
None => Ok(authority), None => Ok(authority),
Some((_, _)) => Err(Error::bad_request(format!( Some((_, _)) => Err(Error::BadRequest(format!(
"Authority {} has an illegal format", "Authority {} has an illegal format",
authority authority
))), ))),
@ -76,134 +60,10 @@ pub fn authority_to_host(authority: &str) -> Result<String, Error> {
authority.map(|h| domain_to_unicode(h).0) authority.map(|h| domain_to_unicode(h).0)
} }
/// Extract the bucket name and the key name from an HTTP path and possibly a bucket provided in
/// the host header of the request
///
/// S3 internally manages only buckets and keys. This function splits
/// an HTTP path to get the corresponding bucket name and key.
pub fn parse_bucket_key<'a>(
path: &'a str,
host_bucket: Option<&'a str>,
) -> Result<(&'a str, Option<&'a str>), Error> {
let path = path.trim_start_matches('/');
if let Some(bucket) = host_bucket {
if !path.is_empty() {
return Ok((bucket, Some(path)));
} else {
return Ok((bucket, None));
}
}
let (bucket, key) = match path.find('/') {
Some(i) => {
let key = &path[i + 1..];
if !key.is_empty() {
(&path[..i], Some(key))
} else {
(&path[..i], None)
}
}
None => (path, None),
};
if bucket.is_empty() {
return Err(Error::bad_request("No bucket specified"));
}
Ok((bucket, key))
}
const UTF8_BEFORE_LAST_CHAR: char = '\u{10FFFE}';
/// Compute the key after the prefix
pub fn key_after_prefix(pfx: &str) -> Option<String> {
let mut next = pfx.to_string();
while !next.is_empty() {
let tail = next.pop().unwrap();
if tail >= char::MAX {
continue;
}
// Circumvent a limitation of RangeFrom that overflow earlier than needed
// See: https://doc.rust-lang.org/core/ops/struct.RangeFrom.html
let new_tail = if tail == UTF8_BEFORE_LAST_CHAR {
char::MAX
} else {
(tail..).nth(1).unwrap()
};
next.push(new_tail);
return Some(next);
}
None
}
pub async fn parse_json_body<T: for<'de> Deserialize<'de>>(req: Request<Body>) -> Result<T, Error> {
let body = hyper::body::to_bytes(req.into_body()).await?;
let resp: T = serde_json::from_slice(&body).ok_or_bad_request("Invalid JSON")?;
Ok(resp)
}
pub fn json_ok_response<T: Serialize>(res: &T) -> Result<Response<Body>, Error> {
let resp_json = serde_json::to_string_pretty(res).map_err(garage_util::error::Error::from)?;
Ok(Response::builder()
.status(hyper::StatusCode::OK)
.header(http::header::CONTENT_TYPE, "application/json")
.body(Body::from(resp_json))?)
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
#[test]
fn parse_bucket_containing_a_key() -> Result<(), Error> {
let (bucket, key) = parse_bucket_key("/my_bucket/a/super/file.jpg", None)?;
assert_eq!(bucket, "my_bucket");
assert_eq!(key.expect("key must be set"), "a/super/file.jpg");
Ok(())
}
#[test]
fn parse_bucket_containing_no_key() -> Result<(), Error> {
let (bucket, key) = parse_bucket_key("/my_bucket/", None)?;
assert_eq!(bucket, "my_bucket");
assert!(key.is_none());
let (bucket, key) = parse_bucket_key("/my_bucket", None)?;
assert_eq!(bucket, "my_bucket");
assert!(key.is_none());
Ok(())
}
#[test]
fn parse_bucket_containing_no_bucket() {
let parsed = parse_bucket_key("", None);
assert!(parsed.is_err());
let parsed = parse_bucket_key("/", None);
assert!(parsed.is_err());
let parsed = parse_bucket_key("////", None);
assert!(parsed.is_err());
}
#[test]
fn parse_bucket_with_vhost_and_key() -> Result<(), Error> {
let (bucket, key) = parse_bucket_key("/a/super/file.jpg", Some("my-bucket"))?;
assert_eq!(bucket, "my-bucket");
assert_eq!(key.expect("key must be set"), "a/super/file.jpg");
Ok(())
}
#[test]
fn parse_bucket_with_vhost_no_key() -> Result<(), Error> {
let (bucket, key) = parse_bucket_key("", Some("my-bucket"))?;
assert_eq!(bucket, "my-bucket");
assert!(key.is_none());
let (bucket, key) = parse_bucket_key("/", Some("my-bucket"))?;
assert_eq!(bucket, "my-bucket");
assert!(key.is_none());
Ok(())
}
#[test] #[test]
fn authority_to_host_with_port() -> Result<(), Error> { fn authority_to_host_with_port() -> Result<(), Error> {
let domain = authority_to_host("[::1]:3902")?; let domain = authority_to_host("[::1]:3902")?;
@ -251,47 +111,4 @@ mod tests {
assert_eq!(host_to_bucket("not-garage.tld", "garage.tld"), None); assert_eq!(host_to_bucket("not-garage.tld", "garage.tld"), None);
assert_eq!(host_to_bucket("not-garage.tld", ".garage.tld"), None); assert_eq!(host_to_bucket("not-garage.tld", ".garage.tld"), None);
} }
#[test]
fn test_key_after_prefix() {
use std::iter::FromIterator;
assert_eq!(UTF8_BEFORE_LAST_CHAR as u32, (char::MAX as u32) - 1);
assert_eq!(key_after_prefix("a/b/").unwrap().as_str(), "a/b0");
assert_eq!(key_after_prefix("").unwrap().as_str(), "");
assert_eq!(
key_after_prefix("􏿽").unwrap().as_str(),
String::from(char::from_u32(0x10FFFE).unwrap())
);
// When the last character is the biggest UTF8 char
let a = String::from_iter(['a', char::MAX].iter());
assert_eq!(key_after_prefix(a.as_str()).unwrap().as_str(), "b");
// When all characters are the biggest UTF8 char
let b = String::from_iter([char::MAX; 3].iter());
assert!(key_after_prefix(b.as_str()).is_none());
// Check utf8 surrogates
let c = String::from('\u{D7FF}');
assert_eq!(
key_after_prefix(c.as_str()).unwrap().as_str(),
String::from('\u{E000}')
);
// Check the character before the biggest one
let d = String::from('\u{10FFFE}');
assert_eq!(
key_after_prefix(d.as_str()).unwrap().as_str(),
String::from(char::MAX)
);
}
}
#[derive(Serialize)]
pub(crate) struct CustomApiErrorBody {
pub(crate) code: String,
pub(crate) message: String,
pub(crate) region: String,
pub(crate) path: String,
} }

View file

@ -1,196 +0,0 @@
use std::sync::Arc;
use async_trait::async_trait;
use futures::future::Future;
use hyper::{Body, Method, Request, Response};
use opentelemetry::{trace::SpanRef, KeyValue};
use garage_util::error::Error as GarageError;
use garage_model::garage::Garage;
use crate::generic_server::*;
use crate::k2v::error::*;
use crate::signature::payload::check_payload_signature;
use crate::signature::streaming::*;
use crate::helpers::*;
use crate::k2v::batch::*;
use crate::k2v::index::*;
use crate::k2v::item::*;
use crate::k2v::router::Endpoint;
use crate::s3::cors::*;
pub struct K2VApiServer {
garage: Arc<Garage>,
}
pub(crate) struct K2VApiEndpoint {
bucket_name: String,
endpoint: Endpoint,
}
impl K2VApiServer {
pub async fn run(
garage: Arc<Garage>,
shutdown_signal: impl Future<Output = ()>,
) -> Result<(), GarageError> {
if let Some(cfg) = &garage.config.k2v_api {
let bind_addr = cfg.api_bind_addr;
ApiServer::new(
garage.config.s3_api.s3_region.clone(),
K2VApiServer { garage },
)
.run_server(bind_addr, shutdown_signal)
.await
} else {
Ok(())
}
}
}
#[async_trait]
impl ApiHandler for K2VApiServer {
const API_NAME: &'static str = "k2v";
const API_NAME_DISPLAY: &'static str = "K2V";
type Endpoint = K2VApiEndpoint;
type Error = Error;
fn parse_endpoint(&self, req: &Request<Body>) -> Result<K2VApiEndpoint, Error> {
let (endpoint, bucket_name) = Endpoint::from_request(req)?;
Ok(K2VApiEndpoint {
bucket_name,
endpoint,
})
}
async fn handle(
&self,
req: Request<Body>,
endpoint: K2VApiEndpoint,
) -> Result<Response<Body>, Error> {
let K2VApiEndpoint {
bucket_name,
endpoint,
} = endpoint;
let garage = self.garage.clone();
// The OPTIONS method is procesed early, before we even check for an API key
if let Endpoint::Options = endpoint {
return Ok(handle_options_s3api(garage, &req, Some(bucket_name))
.await
.ok_or_bad_request("Error handling OPTIONS")?);
}
let (api_key, mut content_sha256) = check_payload_signature(&garage, "k2v", &req).await?;
let api_key = api_key
.ok_or_else(|| Error::forbidden("Garage does not support anonymous access yet"))?;
let req = parse_streaming_body(
&api_key,
req,
&mut content_sha256,
&garage.config.s3_api.s3_region,
"k2v",
)?;
let bucket_id = garage
.bucket_helper()
.resolve_bucket(&bucket_name, &api_key)
.await?;
let bucket = garage
.bucket_helper()
.get_existing_bucket(bucket_id)
.await?;
let allowed = match endpoint.authorization_type() {
Authorization::Read => api_key.allow_read(&bucket_id),
Authorization::Write => api_key.allow_write(&bucket_id),
Authorization::Owner => api_key.allow_owner(&bucket_id),
_ => unreachable!(),
};
if !allowed {
return Err(Error::forbidden("Operation is not allowed for this key."));
}
// Look up what CORS rule might apply to response.
// Requests for methods different than GET, HEAD or POST
// are always preflighted, i.e. the browser should make
// an OPTIONS call before to check it is allowed
let matching_cors_rule = match *req.method() {
Method::GET | Method::HEAD | Method::POST => find_matching_cors_rule(&bucket, &req)
.ok_or_internal_error("Error looking up CORS rule")?,
_ => None,
};
let resp = match endpoint {
Endpoint::DeleteItem {
partition_key,
sort_key,
} => handle_delete_item(garage, req, bucket_id, &partition_key, &sort_key).await,
Endpoint::InsertItem {
partition_key,
sort_key,
} => handle_insert_item(garage, req, bucket_id, &partition_key, &sort_key).await,
Endpoint::ReadItem {
partition_key,
sort_key,
} => handle_read_item(garage, &req, bucket_id, &partition_key, &sort_key).await,
Endpoint::PollItem {
partition_key,
sort_key,
causality_token,
timeout,
} => {
handle_poll_item(
garage,
&req,
bucket_id,
partition_key,
sort_key,
causality_token,
timeout,
)
.await
}
Endpoint::ReadIndex {
prefix,
start,
end,
limit,
reverse,
} => handle_read_index(garage, bucket_id, prefix, start, end, limit, reverse).await,
Endpoint::InsertBatch {} => handle_insert_batch(garage, bucket_id, req).await,
Endpoint::ReadBatch {} => handle_read_batch(garage, bucket_id, req).await,
Endpoint::DeleteBatch {} => handle_delete_batch(garage, bucket_id, req).await,
Endpoint::Options => unreachable!(),
};
// If request was a success and we have a CORS rule that applies to it,
// add the corresponding CORS headers to the response
let mut resp_ok = resp?;
if let Some(rule) = matching_cors_rule {
add_cors_headers(&mut resp_ok, rule)
.ok_or_internal_error("Invalid bucket CORS configuration")?;
}
Ok(resp_ok)
}
}
impl ApiEndpoint for K2VApiEndpoint {
fn name(&self) -> &'static str {
self.endpoint.name()
}
fn add_span_attributes(&self, span: SpanRef<'_>) {
span.set_attribute(KeyValue::new("bucket", self.bucket_name.clone()));
}
}

Some files were not shown because too many files have changed in this diff Show more