Compare commits

...

172 commits

Author SHA1 Message Date
76db422c64 Fix garage admin service 2022-11-23 22:51:09 +01:00
1dd5b96350 Local version bump until this is merged upstream 2022-11-23 22:47:39 +01:00
67102dd185 Add admin port 2022-11-23 22:45:28 +01:00
fd38b387a8 Local version bump until this is merged upstream 2022-11-20 10:26:58 +01:00
47bbe9f0b2 Merge branch 'feat-k8s-dbengine' 2022-11-18 20:19:32 +01:00
c2a2d70a59 Make db engine configurable through helm values 2022-11-18 20:17:58 +01:00
7bca6ccd0b Add documentation about setting db engine in helm 2022-11-18 20:06:32 +01:00
4787685912 Fix documentation based on new deployment values 2022-11-18 20:04:15 +01:00
17a0ba9f7c Set hostPath type for volumes 2022-11-18 20:04:15 +01:00
462655188c Fix volume handling and persistence flag 2022-11-18 20:04:15 +01:00
a53e6271bb Enable daemonset deployment using the helm chart
DaemonSet is a k8s resource that schedules one instance per node,
which is useful for some garage deployment use cases, including
managing garage nodes using k8s node labels
2022-11-18 20:04:15 +01:00
7fafd14a25 Merge pull request 'Documentation updates' (#423) from doc-0.8 into main
Reviewed-on: Deuxfleurs/garage#423
2022-11-16 20:50:45 +00:00
555a54ec40
doc precisions and fixes 2022-11-16 13:40:49 +01:00
fc8f795bba
Rename subsections and add docker compose file 2022-11-16 13:33:33 +01:00
a7af0c8af9
Add best practices and doc of monitoring (fix #419) 2022-11-16 13:27:24 +01:00
bcc9772470 Merge pull request 'OpenAPI spec for admin API' (#379) from ecosystem/openapi into main
Reviewed-on: Deuxfleurs/garage#379
2022-11-16 10:51:04 +00:00
c4e4cc1156 Merge pull request 'Move testing strategy to a dedicated doc section (fix #114)' (#415) from doc-testing-strategy into main
Reviewed-on: Deuxfleurs/garage#415
2022-11-14 12:38:28 +00:00
05547f2ba6
Move testing strategy to a dedicated doc section (fix #114) 2022-11-14 13:34:00 +01:00
39ac295eb7 Merge pull request 'Improve Nginx reverse proxy example' (#413) from baptiste/garage:nginx_fix into main
Reviewed-on: Deuxfleurs/garage#413
2022-11-14 12:21:56 +00:00
cf23aee183
Add a "build" section, doc for SDK 2022-11-13 16:48:52 +01:00
74ea449f4b
Add missing parameter 2022-11-12 23:04:37 +01:00
eabb37b53f
openapi validate fix 2022-11-12 22:37:42 +01:00
e7824faa17
Finalize the specification of the admin API 2022-11-12 18:08:41 +01:00
Baptiste Jonglez
8dfc909759 Improve Nginx reverse proxy example
By default, Nginx does proxy buffering and it may store big replies to a
temporary file up to 1 GB.  It also means that Nginx will read data as
fast as possible from Garage, even if the client downloads slowly.  Both
behaviours are often not wanted, so disable this temporary file in the example.

Ref: https://nginx.org/en/docs/http/ngx_http_proxy_module.html#proxy_buffering

Also add an example of upstream with a "backup" server, which may be
useful to only use remote servers as fallback.
2022-11-11 21:50:08 +01:00
485109ea60
Bucket CRUD is defined 2022-11-11 18:32:35 +01:00
ebe8a41f2d
Bucket skeleton 2022-11-11 17:10:41 +01:00
dc50fa3b34
Fix typo in admin API on BucketInfo 2022-11-11 16:56:56 +01:00
a976c9190c
Use awscli in the getting started guide 2022-11-11 12:48:52 +01:00
72a0f90070
Make capacity nullable to allow gateway config 2022-11-11 09:22:37 +01:00
d814deb806
Error is nullable on AddNode 2022-11-11 09:22:37 +01:00
6a09f16da7
Set required fields in the spec 2022-11-11 09:22:36 +01:00
23207d18a0
Fix case of garage version 2022-11-11 09:22:36 +01:00
3024405a65
Add operationId to entrypoints 2022-11-11 09:22:36 +01:00
5f0928f89c
Declare Authorization scheme in OpenAPI 2022-11-11 09:22:36 +01:00
0a01b34e81
Partial OpenAPI spec for admin API with a viewer 2022-11-11 09:22:36 +01:00
6f60fe42c3 Set hostPath type for volumes 2022-11-06 21:54:09 +01:00
66f2daa025 Merge pull request 'Add documentation to run Mastodon on Garage' (#411) from baptiste/garage:doc_mastodon into main
Reviewed-on: Deuxfleurs/garage#411
2022-11-06 17:06:07 +00:00
bf5868a71d Fix volume handling and persistence flag 2022-11-06 17:50:06 +01:00
Baptiste Jonglez
26b3295aaa Add documentation to run Mastodon on Garage 2022-11-06 14:07:31 +01:00
0d279918b7 Merge pull request 'Improvements to CLI' (#410) from cleanup-uploads-command into main
Reviewed-on: Deuxfleurs/garage#410
2022-11-04 15:51:16 +00:00
e03d9062f7
Show a nice message and a backtrace when Garage panics 2022-11-04 16:39:02 +01:00
8d3bbf5703
Clearer error messsages 2022-11-04 16:07:33 +01:00
5b18fd8201
Add garage bucket cleanup-incomplete-uploads command 2022-11-04 11:55:59 +01:00
f285cb6ecf Enable daemonset deployment using the helm chart
DaemonSet is a k8s resource that schedules one instance per node,
which is useful for some garage deployment use cases, including
managing garage nodes using k8s node labels
2022-10-29 21:07:02 +02:00
043246c575 Merge pull request 'Fix helm chart with correct configuration syntax' (#406) from fix-helm-chart into main
Reviewed-on: Deuxfleurs/garage#406
2022-10-18 20:30:58 +00:00
d6c77ea327
Fix helm chart with correct configuration syntax 2022-10-18 22:30:05 +02:00
5254750658 Merge pull request 'Add TLS support for Consul discovery + refactoring' (#405) from consul-tls into main
Reviewed-on: Deuxfleurs/garage#405
2022-10-18 20:20:55 +00:00
57b5c2c754
Change reqwest rustls features 2022-10-18 22:11:27 +02:00
8bc5caf7aa
Fix issue with 'http(s)://' prefix 2022-10-18 21:17:11 +02:00
2da8786f54
move things around 2022-10-18 19:13:52 +02:00
5d8d393054
Load TLS certificates only once 2022-10-18 19:11:16 +02:00
002b9fc50c
Add TLS support for Consul discovery + refactoring 2022-10-18 18:38:20 +02:00
5670599372 Merge pull request 'Use status code 204 No Content for empty responses' (#403) from tobikris/garage:http-no-content into main
Reviewed-on: Deuxfleurs/garage#403
2022-10-18 14:20:44 +00:00
7bc9fd34b2 Merge pull request 'upgrade Nix toolchain' (#400) from upgrade-toolchain into main
Reviewed-on: Deuxfleurs/garage#400
2022-10-18 14:16:52 +00:00
a54a63c491
Add function to upload a build and its dependencies to the cache
to faster bootstrap new runner nodes
2022-10-18 14:19:19 +02:00
f1c96d108c
update k2v docs for status 204 changes 2022-10-18 13:50:56 +02:00
8fc93abc79
Some things are now in result-bin 2022-10-18 13:39:21 +02:00
667ca9d3e3
Cleanup nix scripts 2022-10-18 12:48:31 +02:00
6a5eba0b72
Add garage_db test to CI 2022-10-18 12:33:35 +02:00
00cf076412
Fix cargo2nix feature discovery 2022-10-18 12:15:45 +02:00
7c0c229934
move refresh_toolchain 2022-10-18 12:15:31 +02:00
7865003323
Use status code 204 No Content for empty responses 2022-10-17 10:55:26 +02:00
4582a8f34a Merge pull request 'Update 'doc/book/reference-manual/features.md'' (#402) from borgified/garage:borgified-patch-1 into main
Reviewed-on: Deuxfleurs/garage#402
2022-10-16 07:41:32 +00:00
8e442001b9 Update 'doc/book/reference-manual/features.md'
typo
2022-10-16 07:13:21 +00:00
c050a59fd0
Fix conditional testing in garage_db 2022-10-14 18:27:18 +02:00
fcaee3bea0
definitively expunge openssl from dependencies everywhere 2022-10-14 18:10:36 +02:00
e89e047c5a
Fix i386 build with custom toolchain (armv6 unknown state) 2022-10-14 18:10:24 +02:00
8d04ae7014
cargo2nix unstable (patched), rust 1.63.0, nixpkgs 22.05 (32-bit builds are broken) 2022-10-14 14:30:48 +02:00
a096ced355 Merge pull request 'Fix instant substractions that might have panicked' (#398) from fix-time into main
Reviewed-on: Deuxfleurs/garage#398
2022-10-02 16:41:06 +02:00
e21b672c96 Merge pull request 'Add helm chart' (#331) from chemicstry/garage:helm_chart into main
Reviewed-on: Deuxfleurs/garage#331
Reviewed-by: maximilien <me@mricher.fr>
2022-10-02 16:40:54 +02:00
db0c8b3980 Updates values.yml with some opinionated and untested defaults 2022-09-30 18:46:57 +02:00
6dba7dadf4 Add missing ClusterRole and bindings for CRDs 2022-09-30 18:46:57 +02:00
d2c937a931 Fix typo 2022-09-30 18:46:57 +02:00
744c3b4d94 Update docs 2022-09-30 18:46:57 +02:00
b71fa2ddf4 Generate random RPC secret if not provided 2022-09-30 18:46:57 +02:00
37a73d7d37 Move documentation to book 2022-09-30 18:46:57 +02:00
d0f08c254e Add secret to overrides 2022-09-30 18:46:57 +02:00
fa52558ca1 Add configuration instructions to README 2022-09-30 18:46:57 +02:00
131cc2532b Cleanup values.yaml 2022-09-30 18:46:57 +02:00
a93dcce841 Add helm chart 2022-09-30 18:46:57 +02:00
b17d59cfab Merge pull request 'Document db_engine' (#399) from doc-0.8 into main
Reviewed-on: Deuxfleurs/garage#399
2022-09-29 17:29:44 +02:00
ad917ffd3f
Fix instant substractions that might have panicked 2022-09-29 15:53:54 +02:00
497164d782 Merge pull request 'Shutdown properly on SIGTERM/SIGHUP and on Windows signals' (#397) from handle-sigterm into main
Reviewed-on: Deuxfleurs/garage#397
2022-09-28 12:16:55 +02:00
1f97ce37e6
Shutdown properly on SIGTERM/SIGHUP and on Windows signals 2022-09-28 10:41:59 +02:00
0ab0d3cc29
Document db_engine 2022-09-27 16:52:36 +02:00
2197753dfd Merge pull request 'Add step to generate multi-arch Docker container in CI' (#393) from multi-arch-container into main
Reviewed-on: Deuxfleurs/garage#393
2022-09-27 11:55:49 +02:00
3f95a0f717 Merge pull request 'Enable k2v feature flag by default in CI' (#302) from k2v into main
Reviewed-on: Deuxfleurs/garage#302
2022-09-27 11:38:23 +02:00
7291747a28 Merge pull request 'Documentation changes for v0.8' (#394) from doc-0.8 into main
Reviewed-on: Deuxfleurs/garage#394
2022-09-27 11:37:12 +02:00
d104ae8711
Add step to generate multi-arch Docker container in CI 2022-09-26 19:09:55 +02:00
194e8be1bb
Update docker image links 2022-09-26 18:01:17 +02:00
69bcc813de
Add garage v0.8 migration guide 2022-09-26 17:46:38 +02:00
ea7a571d88 Merge pull request 'Fix span name for api server requests' (#392) from fix-span-name into main
Reviewed-on: Deuxfleurs/garage#392
2022-09-26 16:57:37 +02:00
1778e4b318
Fix span name for api server requests 2022-09-26 16:21:30 +02:00
af2b2f26b4 Merge pull request 'Update README (fix #230)' (#391) from new-readme into main
Reviewed-on: Deuxfleurs/garage#391
2022-09-21 13:28:52 +02:00
a3758dc4c4
Update README 2022-09-21 12:53:02 +02:00
e89f880694
Enable k2v feature flag in CI 2022-09-20 17:54:41 +02:00
fc85508648 Merge pull request 'Initialize metrics exporter earlier (fix #389)' (#390) from fix-metrics into main
Reviewed-on: Deuxfleurs/garage#390
2022-09-20 17:53:46 +02:00
782630fc27
Initialize metrics exporter earlier (fix #389) 2022-09-20 17:50:22 +02:00
7a901f7aab Merge pull request 'RPC performance changes' (#387) from configurable-timeouts into main
Reviewed-on: Deuxfleurs/garage#387
2022-09-20 16:17:23 +02:00
ded444f6c9
Ability to have custom timeouts in request strategy (not used) 2022-09-20 16:01:41 +02:00
357b72f4ff
Merge branch 'main' into configurable-timeouts 2022-09-20 15:19:58 +02:00
2c312e9cbd Merge pull request 'Change a warn! into a debug!' (#388) from less-noise into main
Reviewed-on: Deuxfleurs/garage#388
2022-09-20 11:57:52 +02:00
1f7b050b7d
Change a warn! into a debug! 2022-09-20 11:49:48 +02:00
56592e1853
RPC performance changes
- configurable ping timeout
- single, much higher, configurable RPC timeout
- no more concurrency semaphore
2022-09-19 20:31:00 +02:00
fbd32933ea Merge pull request 'Faster GetObject workflow for getting entire objects' (#386) from faster-get into main
Reviewed-on: Deuxfleurs/garage#386
2022-09-19 15:24:06 +02:00
5d4b6f2173
Faster GetObject workflow for getting entire objects 2022-09-19 12:19:59 +02:00
4fba06d62e Merge pull request 'updates to documentation for v0.8' (#385) from doc-0.8 into main
Reviewed-on: Deuxfleurs/garage#385
2022-09-19 10:45:10 +02:00
1d0a610690
Finish writing about Garage features, and fix from-source instructions 2022-09-15 13:23:57 +02:00
f6aebefcc9
Some work on documentation towards v0.8 2022-09-14 19:31:13 +02:00
89b8087ba8 Merge pull request 'Properly return HTTP 204 when deleting non-existent object (fix #227)' (#384) from deleteobject-204 into main
Reviewed-on: Deuxfleurs/garage#384
2022-09-14 17:16:39 +02:00
76f42a1a2b
Properly return HTTP 204 when deleting non-existent object (fix #227) 2022-09-14 17:07:55 +02:00
82600acf77 Merge pull request 'Allow for hostnames in bootstrap_peers and rpc_public_addr (fix #353)' (#383) from resolve-peer-names into main
Reviewed-on: Deuxfleurs/garage#383
2022-09-14 16:37:18 +02:00
e46dc2a8ef
Allow for hostnames in bootstrap_peers and rpc_public_addr (fix #353) 2022-09-14 16:09:38 +02:00
80fdbfb0aa Merge pull request 'various fixes for v0.8.0' (#380) from various-fixes-for-0.8 into main
Reviewed-on: Deuxfleurs/garage#380
2022-09-13 16:49:05 +02:00
ab722cb40f
Add checks on replication_factor of layouts we use (fix #363, fix #364) 2022-09-13 16:22:23 +02:00
38be811b1c
Fix clippy lint that says we should implement Eq 2022-09-13 16:08:00 +02:00
44733474bb
Remove/change println! in server code (fix #358) 2022-09-13 16:01:55 +02:00
07febd3ecd
Ensure data dir is created immediately when Garage starts (fix #349) 2022-09-13 15:57:27 +02:00
11bdc971e2 Merge pull request 'use netapp streaming body' (#343) from netapp-stream-body into main
Reviewed-on: Deuxfleurs/garage#343
2022-09-13 15:26:08 +02:00
ff30891999
Use streaming block API for get with Range requests 2022-09-13 15:13:07 +02:00
28a4af73ca
Use netapp 0.5 published from crates.io 2022-09-13 13:11:44 +02:00
b823151a0b
improvements in block manager 2022-09-12 16:57:38 +02:00
309d7aef3f Merge pull request 'performance improvements' (#342) from lx-perf-improvements into main
Performance improvements included in this PR:

- [x] Use `Bytes` at a few places where appropriate, instead of `Vec<u8>`, to reduce the number of copies
  - [x] StreamChunker now accumulates incoming slices in a `Vec<Bytes>` instead of a `VecDeque<u8>`. Replaces calls to `.extend()` and `.drain()` that were quite costly by a simple `concat()` on a vec of slices which is much more optimized
- [x] Hashing (b2, sha256, md5) is now done on a Tokio thread dedicated to cpu-intensive tasks, using `spawn_blocking`
- [x] Block manager now uses 256 independant locks instead of one big lock for writing, reduces contention when writing several/many objects in parallel
- [x] Better LMDB defaults: we now put flags `NoSync` and `NoMetaSync` to avoid `fsync` at each transaction (extremely slow). Also increased number of LMDB readers to accomodate more intensive workloads

Other changes included in this PR:

- [x] Update to hashing and MAC crates: md5 and sha2 from 0.9 to 0.10, hmac from 0.10 to 0.12
- [x] switch to `tracing_subscriber` for logs, which allows to have timing of each event

Reviewed-on: Deuxfleurs/garage#342
2022-09-12 16:38:43 +02:00
f91fab8582
Simplify+improve async hasher by using bounded channel 2022-09-12 16:23:43 +02:00
7f54706b95
Merge branch 'lx-perf-improvements' into netapp-stream-body 2022-09-08 15:50:56 +02:00
d9d199a6c9
Merge branch 'main' into lx-perf-improvements 2022-09-08 15:49:17 +02:00
907054775d
Faster copy, better get error message 2022-09-06 22:25:23 +02:00
6b958979bd
Merge branch 'lx-perf-improvements' into netapp-stream-body 2022-09-06 22:13:01 +02:00
d23b3a14fc
Merge branch 'main' into lx-perf-improvements 2022-09-06 21:53:37 +02:00
4024822585
Update netapp to lastest git version with LAS scheduling 2022-09-06 19:45:00 +02:00
c2cc08852b
Reenable node ordering 2022-09-06 19:31:42 +02:00
07e6bcde85
Merge branch 'main' into lx-perf-improvements 2022-09-05 12:40:17 +02:00
6226f5ceca
Update to netapp 0.4.5 - fixed ping 2022-09-02 14:33:12 +02:00
13b5f28c7e
Make use of BytesBuf from new Netapp 2022-09-02 13:46:42 +02:00
1ef87ac4cb
cargo fmt 2022-09-02 13:38:29 +02:00
99b532b85b
Apply PRIO_SECONDARY to block data transfers 2022-09-01 16:35:43 +02:00
e648bf7b69
update cargo.nix 2022-09-01 16:31:04 +02:00
df094bd807
Less strict timeouts 2022-09-01 16:30:44 +02:00
f3bf34b6a1
update netapp: straming + fix-ping 2022-09-01 14:23:54 +02:00
bc977f9a7a
Update to Netapp with OrderTag support and exploit OrderTags 2022-09-01 12:58:20 +02:00
4b726b0941
netapp recv with unbounded channel removes deadlock 2022-09-01 09:47:28 +02:00
70231d68b2
Fix bytes_read counter 2022-08-31 19:44:27 +02:00
e598231ca4
update netapp git commit 2022-08-31 19:27:25 +02:00
c9bc9d89de
Merge branch 'lx-perf-improvements' into netapp-stream-body 2022-08-31 17:42:31 +02:00
eb97e13a6a
update cargo.nix 2022-08-31 17:42:00 +02:00
efbca67ce4
Add env filter to tracing subscriber 2022-08-31 14:39:12 +02:00
44cd98d2e4
Tracing-subscriber: write to stderr 2022-08-31 14:28:17 +02:00
dd5304f6fc
Replace logging crate pretty_env_logger by tracing_subscriber::fmt 2022-08-31 14:24:41 +02:00
322dafc761
Try to fix clippy 2022-08-29 17:32:45 +02:00
5d065b8a0f
cargo2nix fix to fetchCrateGit 2022-08-29 17:24:53 +02:00
52749e28f7
Merge branch 'lx-perf-improvements' into netapp-stream-body 2022-08-29 16:48:43 +02:00
4da67b0035
Update drone signature 2022-08-29 16:48:31 +02:00
1921f4f7e6
Merge branch 'lx-perf-improvements' into netapp-stream-body 2022-08-29 16:45:05 +02:00
ebc20a8798
Merge branch 'main' into lx-perf-improvements 2022-08-29 16:44:13 +02:00
e935861854
Factor out node request order selection logic & use in manager 2022-07-29 12:25:03 +02:00
f0ee3056d3
Update cargo.nix 2022-07-29 12:25:03 +02:00
126b037307
update netapp 2022-07-29 12:25:03 +02:00
33750c04ed
Update cargo.nix 2022-07-29 12:25:03 +02:00
68087ee13d
Fix clippy 2022-07-29 12:25:03 +02:00
605a630333
Use streaming in block manager 2022-07-29 12:25:02 +02:00
a35d4da721
update netapp to 0.5 2022-07-29 12:25:02 +02:00
8e7e680afe
First adaptation to WIP netapp with streaming body 2022-07-29 12:25:02 +02:00
16f6a1a65d
fix clippy 2022-07-29 12:24:49 +02:00
ad35b18bb1
Faster chunker 2022-07-29 12:24:49 +02:00
49154a78d8
Update cargo.nix 2022-07-29 12:24:48 +02:00
ff4771c36a
cargo fmt 2022-07-29 12:24:48 +02:00
381eb9a5a1
Fix tests 2022-07-29 12:24:48 +02:00
2cad656a03
More make clippy happy 2022-07-29 12:24:48 +02:00
0176da3ad2
Make clippy happy 2022-07-29 12:24:48 +02:00
40150527b8
Update cargo.nix 2022-07-29 12:24:48 +02:00
2f111e6b3d
Performance improvements:
- reduce contention on mutation_lock by having 256 of them
- better lmdb defaults
2022-07-29 12:24:48 +02:00
1b2e1296eb
Compute hashes on dedicated threads 2022-07-29 12:24:44 +02:00
158 changed files with 11203 additions and 4120 deletions

View file

@ -19,9 +19,11 @@ steps:
- name: unit + func tests
image: nixpkgs/nix:nixos-22.05
environment:
GARAGE_TEST_INTEGRATION_EXE: result/bin/garage
GARAGE_TEST_INTEGRATION_EXE: result-bin/bin/garage
commands:
- nix-build --no-build-output --attr clippy.amd64 --argstr git_version ${DRONE_TAG:-$DRONE_COMMIT}
- nix-build --no-build-output --attr test.amd64
- ./result/bin/garage_db-*
- ./result/bin/garage_api-*
- ./result/bin/garage_model-*
- ./result/bin/garage_rpc-*
@ -30,6 +32,7 @@ steps:
- ./result/bin/garage_web-*
- ./result/bin/garage-*
- ./result/bin/integration-*
- rm result
- name: integration tests
image: nixpkgs/nix:nixos-22.05
@ -58,7 +61,7 @@ steps:
image: nixpkgs/nix:nixos-22.05
commands:
- nix-build --no-build-output --attr pkgs.amd64.release --argstr git_version ${DRONE_TAG:-$DRONE_COMMIT}
- nix-shell --attr rust --run "./script/not-dynamic.sh result/bin/garage"
- nix-shell --attr rust --run "./script/not-dynamic.sh result-bin/bin/garage"
- name: integration
image: nixpkgs/nix:nixos-22.05
@ -109,7 +112,7 @@ steps:
image: nixpkgs/nix:nixos-22.05
commands:
- nix-build --no-build-output --attr pkgs.i386.release --argstr git_version ${DRONE_TAG:-$DRONE_COMMIT}
- nix-shell --attr rust --run "./script/not-dynamic.sh result/bin/garage"
- nix-shell --attr rust --run "./script/not-dynamic.sh result-bin/bin/garage"
- name: integration
image: nixpkgs/nix:nixos-22.05
@ -159,7 +162,7 @@ steps:
image: nixpkgs/nix:nixos-22.05
commands:
- nix-build --no-build-output --attr pkgs.arm64.release --argstr git_version ${DRONE_TAG:-$DRONE_COMMIT}
- nix-shell --attr rust --run "./script/not-dynamic.sh result/bin/garage"
- nix-shell --attr rust --run "./script/not-dynamic.sh result-bin/bin/garage"
- name: push static binary
image: nixpkgs/nix:nixos-22.05
@ -204,7 +207,7 @@ steps:
image: nixpkgs/nix:nixos-22.05
commands:
- nix-build --no-build-output --attr pkgs.arm.release --argstr git_version ${DRONE_TAG:-$DRONE_COMMIT}
- nix-shell --attr rust --run "./script/not-dynamic.sh result/bin/garage"
- nix-shell --attr rust --run "./script/not-dynamic.sh result-bin/bin/garage"
- name: push static binary
image: nixpkgs/nix:nixos-22.05
@ -245,6 +248,17 @@ node:
nix-daemon: 1
steps:
- name: multiarch-docker
image: nixpkgs/nix:nixos-22.05
environment:
DOCKER_AUTH:
from_secret: docker_auth
HOME: "/root"
commands:
- mkdir -p /root/.docker
- echo $DOCKER_AUTH > /root/.docker/config.json
- export CONTAINER_TAG=${DRONE_TAG:-$DRONE_COMMIT}
- nix-shell --attr release --run "multiarch_docker"
- name: refresh-index
image: nixpkgs/nix:nixos-22.05
environment:
@ -269,6 +283,6 @@ trigger:
---
kind: signature
hmac: 362639b4c9541ad9bd06ff7f72b5235b2b0216bcb16eececd25285b6fe94ba6f
hmac: ac09a5a8c82502f67271f93afa1e1e21ce66383b8e24a6deb26b285cc1c378ba
...

1090
Cargo.lock generated

File diff suppressed because it is too large Load diff

4392
Cargo.nix

File diff suppressed because it is too large Load diff

View file

@ -3,5 +3,5 @@ FROM scratch
ENV RUST_BACKTRACE=1
ENV RUST_LOG=garage=info
COPY result/bin/garage /
COPY result-bin/bin/garage /
CMD [ "/garage", "server"]

View file

@ -15,18 +15,24 @@ Garage [![Build Status](https://drone.deuxfleurs.fr/api/badges/Deuxfleurs/garage
]
</p>
Garage is a lightweight S3-compatible distributed object store, with the following goals:
Garage is an S3-compatible distributed object storage service
designed for self-hosting at a small-to-medium scale.
- As self-contained as possible
- Easy to set up
- Highly resilient to network failures, network latency, disk failures, sysadmin failures
- Relatively simple
- Made for multi-datacenter deployments
Garage is designed for storage clusters composed of nodes running
at different physical locations,
in order to easily provide a storage service that replicates data at these different
locations and stays available even when some servers are unreachable.
Garage also focuses on being lightweight, easy to operate, and highly resilient to
machine failures.
Non-goals include:
Garage is built by [Deuxfleurs](https://deuxfleurs.fr),
an experimental small-scale self hosted service provider,
which has been using it in production since its first release in 2020.
- Extremely high performance
- Complete implementation of the S3 API
- Erasure coding (our replication model is simply to copy the data as is on several nodes, in different datacenters if possible)
Learn more on our dedicated documentation pages:
Our main use case is to provide a distributed storage layer for small-scale self hosted services such as [Deuxfleurs](https://deuxfleurs.fr).
- [Goals and use cases](https://garagehq.deuxfleurs.fr/documentation/design/goals/)
- [Features](https://garagehq.deuxfleurs.fr/documentation/reference-manual/features/)
- [Quick start](https://garagehq.deuxfleurs.fr/documentation/quick-start/)
Garage is entirely free software released under the terms of the AGPLv3.

View file

@ -8,10 +8,23 @@ with import ./nix/common.nix;
let
pkgs = import pkgsSrc { };
compile = import ./nix/compile.nix;
build_debug_and_release = (target: {
debug = (compile { inherit target git_version; release = false; }).workspace.garage { compileMode = "build"; };
release = (compile { inherit target git_version; release = true; }).workspace.garage { compileMode = "build"; };
debug = (compile {
inherit target git_version;
release = false;
}).workspace.garage {
compileMode = "build";
};
release = (compile {
inherit target git_version;
release = true;
}).workspace.garage {
compileMode = "build";
};
});
test = (rustPkgs: pkgs.symlinkJoin {
name ="garage-tests";
paths = builtins.map (key: rustPkgs.workspace.${key} { compileMode = "test"; }) (builtins.attrNames rustPkgs.workspace);
@ -25,9 +38,25 @@ in {
arm = build_debug_and_release "armv6l-unknown-linux-musleabihf";
};
test = {
amd64 = test (compile { inherit git_version; target = "x86_64-unknown-linux-musl"; });
amd64 = test (compile {
inherit git_version;
target = "x86_64-unknown-linux-musl";
features = [
"garage/bundled-libs"
"garage/k2v"
"garage/sled"
"garage/lmdb"
"garage/sqlite"
];
});
};
clippy = {
amd64 = (compile { inherit git_version; compiler = "clippy"; }).workspace.garage { compileMode = "build"; } ;
amd64 = (compile {
inherit git_version;
target = "x86_64-unknown-linux-musl";
compiler = "clippy";
}).workspace.garage {
compileMode = "build";
};
};
}

17
doc/api/README.md Normal file
View file

@ -0,0 +1,17 @@
# Browse doc
Run in this directory:
```
python3 -m http.server
```
And open in your browser:
- http://localhost:8000/garage-admin-v0.html
# Validate doc
```
wget https://repo1.maven.org/maven2/org/openapitools/openapi-generator-cli/6.1.0/openapi-generator-cli-6.1.0.jar -O openapi-generator-cli.jar
java -jar openapi-generator-cli.jar validate -i garage-admin-v0.yml
```

59
doc/api/css/redoc.css Normal file
View file

@ -0,0 +1,59 @@
/* montserrat-300 - latin */
@font-face {
font-family: 'Montserrat';
font-style: normal;
font-weight: 300;
src: local(''),
url('../fonts/montserrat-v25-latin-300.woff2') format('woff2'), /* Chrome 26+, Opera 23+, Firefox 39+ */
url('../fonts/montserrat-v25-latin-300.woff') format('woff'); /* Chrome 6+, Firefox 3.6+, IE 9+, Safari 5.1+ */
}
/* montserrat-regular - latin */
@font-face {
font-family: 'Montserrat';
font-style: normal;
font-weight: 400;
src: local(''),
url('../fonts/montserrat-v25-latin-regular.woff2') format('woff2'), /* Chrome 26+, Opera 23+, Firefox 39+ */
url('../fonts/montserrat-v25-latin-regular.woff') format('woff'); /* Chrome 6+, Firefox 3.6+, IE 9+, Safari 5.1+ */
}
/* montserrat-700 - latin */
@font-face {
font-family: 'Montserrat';
font-style: normal;
font-weight: 700;
src: local(''),
url('../fonts/montserrat-v25-latin-700.woff2') format('woff2'), /* Chrome 26+, Opera 23+, Firefox 39+ */
url('../fonts/montserrat-v25-latin-700.woff') format('woff'); /* Chrome 6+, Firefox 3.6+, IE 9+, Safari 5.1+ */
}
/* roboto-300 - latin */
@font-face {
font-family: 'Roboto';
font-style: normal;
font-weight: 300;
src: local(''),
url('../fonts/roboto-v30-latin-300.woff2') format('woff2'), /* Chrome 26+, Opera 23+, Firefox 39+ */
url('../fonts/roboto-v30-latin-300.woff') format('woff'); /* Chrome 6+, Firefox 3.6+, IE 9+, Safari 5.1+ */
}
/* roboto-regular - latin */
@font-face {
font-family: 'Roboto';
font-style: normal;
font-weight: 400;
src: local(''),
url('../fonts/roboto-v30-latin-regular.woff2') format('woff2'), /* Chrome 26+, Opera 23+, Firefox 39+ */
url('../fonts/roboto-v30-latin-regular.woff') format('woff'); /* Chrome 6+, Firefox 3.6+, IE 9+, Safari 5.1+ */
}
/* roboto-700 - latin */
@font-face {
font-family: 'Roboto';
font-style: normal;
font-weight: 700;
src: local(''),
url('../fonts/roboto-v30-latin-700.woff2') format('woff2'), /* Chrome 26+, Opera 23+, Firefox 39+ */
url('../fonts/roboto-v30-latin-700.woff') format('woff'); /* Chrome 6+, Firefox 3.6+, IE 9+, Safari 5.1+ */
}

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View file

@ -0,0 +1,24 @@
<!DOCTYPE html>
<html>
<head>
<title>Garage Adminstration API v0</title>
<!-- needed for adaptive design -->
<meta charset="utf-8"/>
<meta name="viewport" content="width=device-width, initial-scale=1">
<link href="./css/redoc.css" rel="stylesheet">
<!--
Redoc doesn't change outer page styles
-->
<style>
body {
margin: 0;
padding: 0;
}
</style>
</head>
<body>
<redoc spec-url='./garage-admin-v0.yml'></redoc>
<script src="./redoc.standalone.js"> </script>
</body>
</html>

1212
doc/api/garage-admin-v0.yml Normal file

File diff suppressed because it is too large Load diff

1806
doc/api/redoc.standalone.js Normal file

File diff suppressed because one or more lines are too long

54
doc/book/build/_index.md Normal file
View file

@ -0,0 +1,54 @@
+++
title = "Build your own app"
weight = 4
sort_by = "weight"
template = "documentation.html"
+++
Garage has many API that you can rely on to build complex applications.
In this section, we reference the existing SDKs and give some code examples.
## ⚠️ DISCLAIMER
**K2V AND ADMIN SDK ARE TECHNICAL PREVIEWS**. The following limitations apply:
- The API is not complete, some actions are possible only through the `garage` binary
- The underlying admin API is not yet stable nor complete, it can breaks at any time
- The generator configuration is currently tweaked, the library might break at any time due to a generator change
- Because the API and the library are not stable, none of them are published in a package manager (npm, pypi, etc.)
- This code has not been extensively tested, some things might not work (please report!)
To have the best experience possible, please consider:
- Make sure that the version of the library you are using is pinned (`go.sum`, `package-lock.json`, `requirements.txt`).
- Before upgrading your Garage cluster, make sure that you can find a version of this SDK that works with your targeted version and that you are able to update your own code to work with this new version of the library.
- Join our Matrix channel at `#garage:deuxfleurs.fr`, say that you are interested by this SDK, and report any friction.
- If stability is critical, mirror this repository on your own infrastructure, regenerate the SDKs and upgrade them at your own pace.
## About the APIs
Code can interact with Garage through 3 different APIs: S3, K2V, and Admin.
Each of them has a specific scope.
### S3
De-facto standard, introduced by Amazon, designed to store blobs of data.
### K2V
A simple database API similar to RiakKV or DynamoDB.
Think a key value store with some additional operations.
Its design is inspired by Distributed Hash Tables (DHT).
More information:
- [In the reference manual](@/documentation/reference-manual/k2v.md)
### Administration
Garage operations can also be automated through a REST API.
We are currently building this SDK for [Python](@/documentation/build/python.md#admin-api), [Javascript](@/documentation/build/javascript.md#administration) and [Golang](@/documentation/build/golang.md#administration).
More information:
- [In the reference manual](@/documentation/reference-manual/admin-api.md)
- [Full specifiction](https://garagehq.deuxfleurs.fr/api/garage-admin-v0.html)

69
doc/book/build/golang.md Normal file
View file

@ -0,0 +1,69 @@
+++
title = "Golang"
weight = 30
+++
## S3
*Coming soon*
Some refs:
- Minio minio-go-sdk
- [Reference](https://docs.min.io/docs/golang-client-api-reference.html)
- Amazon aws-sdk-go-v2
- [Installation](https://aws.github.io/aws-sdk-go-v2/docs/getting-started/)
- [Reference](https://pkg.go.dev/github.com/aws/aws-sdk-go-v2/service/s3)
- [Example](https://aws.github.io/aws-sdk-go-v2/docs/code-examples/s3/putobject/)
## K2V
*Coming soon*
## Administration
Install the SDK with:
```bash
go get git.deuxfleurs.fr/garage-sdk/garage-admin-sdk-golang
```
A short example:
```go
package main
import (
"context"
"fmt"
"os"
garage "git.deuxfleurs.fr/garage-sdk/garage-admin-sdk-golang"
)
func main() {
// Set Host and other parameters
configuration := garage.NewConfiguration()
configuration.Host = "127.0.0.1:3903"
// We can now generate a client
client := garage.NewAPIClient(configuration)
// Authentication is handled through the context pattern
ctx := context.WithValue(context.Background(), garage.ContextAccessToken, "s3cr3t")
// Send a request
resp, r, err := client.NodesApi.GetNodes(ctx).Execute()
if err != nil {
fmt.Fprintf(os.Stderr, "Error when calling `NodesApi.GetNodes``: %v\n", err)
fmt.Fprintf(os.Stderr, "Full HTTP response: %v\n", r)
}
// Process the response
fmt.Fprintf(os.Stdout, "Target hostname: %v\n", resp.KnownNodes[resp.Node].Hostname)
}
```
See also:
- [generated doc](https://git.deuxfleurs.fr/garage-sdk/garage-admin-sdk-golang)
- [examples](https://git.deuxfleurs.fr/garage-sdk/garage-admin-sdk-generator/src/branch/main/example/golang)

View file

@ -0,0 +1,55 @@
+++
title = "Javascript"
weight = 10
+++
## S3
*Coming soon*.
Some refs:
- Minio SDK
- [Reference](https://docs.min.io/docs/javascript-client-api-reference.html)
- Amazon aws-sdk-js
- [Installation](https://docs.aws.amazon.com/sdk-for-javascript/v3/developer-guide/getting-started.html)
- [Reference](https://docs.aws.amazon.com/AWSJavaScriptSDK/latest/AWS/S3.html)
- [Example](https://docs.aws.amazon.com/sdk-for-javascript/v3/developer-guide/s3-example-creating-buckets.html)
## K2V
*Coming soon*
## Administration
Install the SDK with:
```bash
npm install --save git+https://git.deuxfleurs.fr/garage-sdk/garage-admin-sdk-js.git
```
A short example:
```javascript
const garage = require('garage_administration_api_v0garage_v0_8_0');
const api = new garage.ApiClient("http://127.0.0.1:3903/v0");
api.authentications['bearerAuth'].accessToken = "s3cr3t";
const [node, layout, key, bucket] = [
new garage.NodesApi(api),
new garage.LayoutApi(api),
new garage.KeyApi(api),
new garage.BucketApi(api),
];
node.getNodes().then((data) => {
console.log(`nodes: ${Object.values(data.knownNodes).map(n => n.hostname)}`)
}, (error) => {
console.error(error);
});
```
See also:
- [sdk repository](https://git.deuxfleurs.fr/garage-sdk/garage-admin-sdk-js)
- [examples](https://git.deuxfleurs.fr/garage-sdk/garage-admin-sdk-generator/src/branch/main/example/javascript)

View file

@ -1,8 +1,10 @@
+++
title = "Your code (PHP, JS, Go...)"
weight = 30
title = "Others"
weight = 99
+++
## S3
If you are developping a new application, you may want to use Garage to store your user's media.
The S3 API that Garage uses is a standard REST API, so as long as you can make HTTP requests,
@ -13,44 +15,14 @@ Instead, there are some libraries already avalaible.
Some of them are maintained by Amazon, some by Minio, others by the community.
## PHP
### PHP
- Amazon aws-sdk-php
- [Installation](https://docs.aws.amazon.com/sdk-for-php/v3/developer-guide/getting-started_installation.html)
- [Reference](https://docs.aws.amazon.com/aws-sdk-php/v3/api/api-s3-2006-03-01.html)
- [Example](https://docs.aws.amazon.com/sdk-for-php/v3/developer-guide/s3-examples-creating-buckets.html)
## Javascript
- Minio SDK
- [Reference](https://docs.min.io/docs/javascript-client-api-reference.html)
- Amazon aws-sdk-js
- [Installation](https://docs.aws.amazon.com/sdk-for-javascript/v3/developer-guide/getting-started.html)
- [Reference](https://docs.aws.amazon.com/AWSJavaScriptSDK/latest/AWS/S3.html)
- [Example](https://docs.aws.amazon.com/sdk-for-javascript/v3/developer-guide/s3-example-creating-buckets.html)
## Golang
- Minio minio-go-sdk
- [Reference](https://docs.min.io/docs/golang-client-api-reference.html)
- Amazon aws-sdk-go-v2
- [Installation](https://aws.github.io/aws-sdk-go-v2/docs/getting-started/)
- [Reference](https://pkg.go.dev/github.com/aws/aws-sdk-go-v2/service/s3)
- [Example](https://aws.github.io/aws-sdk-go-v2/docs/code-examples/s3/putobject/)
## Python
- Minio SDK
- [Reference](https://docs.min.io/docs/python-client-api-reference.html)
- Amazon boto3
- [Installation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/quickstart.html)
- [Reference](https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html)
- [Example](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/s3-uploading-files.html)
## Java
### Java
- Minio SDK
- [Reference](https://docs.min.io/docs/java-client-api-reference.html)
@ -60,23 +32,18 @@ Some of them are maintained by Amazon, some by Minio, others by the community.
- [Reference](https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/services/s3/S3Client.html)
- [Example](https://docs.aws.amazon.com/sdk-for-java/latest/developer-guide/examples-s3-objects.html)
## Rust
- Amazon aws-rust-sdk
- [Github](https://github.com/awslabs/aws-sdk-rust)
## .NET
### .NET
- Minio SDK
- [Reference](https://docs.min.io/docs/dotnet-client-api-reference.html)
- Amazon aws-dotnet-sdk
## C++
### C++
- Amazon aws-cpp-sdk
## Haskell
### Haskell
- Minio SDK
- [Reference](https://docs.min.io/docs/haskell-client-api-reference.html)

95
doc/book/build/python.md Normal file
View file

@ -0,0 +1,95 @@
+++
title = "Python"
weight = 20
+++
## S3
*Coming soon*
Some refs:
- Minio SDK
- [Reference](https://docs.min.io/docs/python-client-api-reference.html)
- Amazon boto3
- [Installation](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/quickstart.html)
- [Reference](https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html)
- [Example](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/s3-uploading-files.html)
## K2V
*Coming soon*
## Admin API
You need at least Python 3.6, pip, and setuptools.
Because the python package is in a subfolder, the command is a bit more complicated than usual:
```bash
pip3 install --user 'git+https://git.deuxfleurs.fr/garage-sdk/garage-admin-sdk-python'
```
Now, let imagine you have a fresh Garage instance running on localhost, with the admin API configured on port 3903 with the bearer `s3cr3t`:
```python
import garage_admin_sdk
from garage_admin_sdk.apis import *
from garage_admin_sdk.models import *
configuration = garage_admin_sdk.Configuration(
host = "http://localhost:3903/v0",
access_token = "s3cr3t"
)
# Init APIs
api = garage_admin_sdk.ApiClient(configuration)
nodes, layout, keys, buckets = NodesApi(api), LayoutApi(api), KeyApi(api), BucketApi(api)
# Display some info on the node
status = nodes.get_nodes()
print(f"running garage {status.garage_version}, node_id {status.node}")
# Change layout of this node
current = layout.get_layout()
layout.add_layout({
status.node: NodeClusterInfo(
zone = "dc1",
capacity = 1,
tags = [ "dev" ],
)
})
layout.apply_layout(LayoutVersion(
version = current.version + 1
))
# Create key, allow it to create buckets
kinfo = keys.add_key(AddKeyRequest(name="openapi"))
allow_create = UpdateKeyRequestAllow(create_bucket=True)
keys.update_key(kinfo.access_key_id, UpdateKeyRequest(allow=allow_create))
# Create a bucket, allow key, set quotas
binfo = buckets.create_bucket(CreateBucketRequest(global_alias="documentation"))
binfo = buckets.allow_bucket_key(AllowBucketKeyRequest(
bucket_id=binfo.id,
access_key_id=kinfo.access_key_id,
permissions=AllowBucketKeyRequestPermissions(read=True, write=True, owner=True),
))
binfo = buckets.update_bucket(binfo.id, UpdateBucketRequest(
quotas=UpdateBucketRequestQuotas(max_size=19029801,max_objects=1500)))
# Display key
print(f"""
cluster ready
key id is {kinfo.access_key_id}
secret key is {kinfo.secret_access_key}
bucket {binfo.global_aliases[0]} contains {binfo.objects}/{binfo.quotas.max_objects} objects
""")
```
*This example is named `short.py` in the example folder. Other python examples are also available.*
See also:
- [sdk repo](https://git.deuxfleurs.fr/garage-sdk/garage-admin-sdk-python)
- [examples](https://git.deuxfleurs.fr/garage-sdk/garage-admin-sdk-generator/src/branch/main/example/python)

47
doc/book/build/rust.md Normal file
View file

@ -0,0 +1,47 @@
+++
title = "Rust"
weight = 40
+++
## S3
*Coming soon*
Some refs:
- Amazon aws-rust-sdk
- [Github](https://github.com/awslabs/aws-sdk-rust)
## K2V
*Coming soon*
Some refs: https://git.deuxfleurs.fr/Deuxfleurs/garage/src/branch/main/src/k2v-client
```bash
# all these values can be provided on the cli instead
export AWS_ACCESS_KEY_ID=GK123456
export AWS_SECRET_ACCESS_KEY=0123..789
export AWS_REGION=garage
export K2V_ENDPOINT=http://172.30.2.1:3903
export K2V_BUCKET=my-bucket
cargo run --features=cli -- read-range my-partition-key --all
cargo run --features=cli -- insert my-partition-key my-sort-key --text "my string1"
cargo run --features=cli -- insert my-partition-key my-sort-key --text "my string2"
cargo run --features=cli -- insert my-partition-key my-sort-key2 --text "my string"
cargo run --features=cli -- read-range my-partition-key --all
causality=$(cargo run --features=cli -- read my-partition-key my-sort-key2 -b | head -n1)
cargo run --features=cli -- delete my-partition-key my-sort-key2 -c $causality
causality=$(cargo run --features=cli -- read my-partition-key my-sort-key -b | head -n1)
cargo run --features=cli -- insert my-partition-key my-sort-key --text "my string3" -c $causality
cargo run --features=cli -- read-range my-partition-key --all
```
## Admin API
*Coming soon*

View file

@ -1,5 +1,5 @@
+++
title = "Integrations"
title = "Existing integrations"
weight = 3
sort_by = "weight"
template = "documentation.html"
@ -14,7 +14,6 @@ In particular, you will find here instructions to connect it with:
- [Applications](@/documentation/connect/apps/index.md)
- [Website hosting](@/documentation/connect/websites.md)
- [Software repositories](@/documentation/connect/repositories.md)
- [Your own code](@/documentation/connect/code.md)
- [FUSE](@/documentation/connect/fs.md)
### Generic instructions

View file

@ -9,7 +9,7 @@ In this section, we cover the following web applications:
|------|--------|------|
| [Nextcloud](#nextcloud) | ✅ | Both Primary Storage and External Storage are supported |
| [Peertube](#peertube) | ✅ | Must be configured with the website endpoint |
| [Mastodon](#mastodon) | ❓ | Not yet tested |
| [Mastodon](#mastodon) | ✅ | Natively supported |
| [Matrix](#matrix) | ✅ | Tested with `synapse-s3-storage-provider` |
| [Pixelfed](#pixelfed) | ❓ | Not yet tested |
| [Pleroma](#pleroma) | ❓ | Not yet tested |
@ -224,7 +224,135 @@ You can now reload the page and see in your browser console that data are fetche
## Mastodon
https://docs.joinmastodon.org/admin/config/#cdn
Mastodon natively supports the S3 protocol to store media files, and it works out-of-the-box with Garage.
You will need to expose your Garage bucket as a website: that way, media files will be served directly from Garage.
### Performance considerations
Mastodon tends to store many small objects over time: expect hundreds of thousands of objects,
with average object size ranging from 50 KB to 150 KB.
As such, your Garage cluster should be configured appropriately for good performance:
- use Garage v0.8.0 or higher with the [LMDB database engine](@documentation/reference-manual/configuration.md#db-engine-since-v0-8-0).
With the default Sled database engine, your database could quickly end up taking tens of GB of disk space.
- the Garage database should be stored on a SSD
### Creating your bucket
This is the usual Garage setup:
```bash
garage key new --name mastodon-key
garage bucket create mastodon-data
garage bucket allow mastodon-data --read --write --key mastodon-key
```
Note the Key ID and Secret Key.
### Exposing your bucket as a website
Create a DNS name to serve your media files, such as `my-social-media.mydomain.tld`.
This name will be publicly exposed to the users of your Mastodon instance: they
will load images directly from this DNS name.
As [documented here](@/documentation/cookbook/exposing-websites.md),
add this DNS name as alias to your bucket, and expose it as a website:
```bash
garage bucket alias mastodon-data my-social-media.mydomain.tld
garage bucket website --allow mastodon-data
```
Then you will likely need to [setup a reverse proxy](@/documentation/cookbook/reverse-proxy.md)
in front of it to serve your media files over HTTPS.
### Cleaning up old media files before migration
Mastodon instance quickly accumulate a lot of media files from the federation.
Most of them are not strictly necessary because they can be fetched again from
other servers. As such, it is highly recommended to clean them up before
migration, this will greatly reduce the migration time.
From the [official Mastodon documentation](https://docs.joinmastodon.org/admin/tootctl/#media):
```bash
$ RAILS_ENV=production bin/tootctl media remove --days 3
$ RAILS_ENV=production bin/tootctl media remove-orphans
$ RAILS_ENV=production bin/tootctl preview_cards remove --days 15
```
Here is a typical disk usage for a small but multi-year instance after cleanup:
```bash
$ RAILS_ENV=production bin/tootctl media usage
Attachments: 5.67 GB (1.14 GB local)
Custom emoji: 295 MB (0 Bytes local)
Preview cards: 154 MB
Avatars: 3.77 GB (127 KB local)
Headers: 8.72 GB (242 KB local)
Backups: 0 Bytes
Imports: 1.7 KB
Settings: 0 Bytes
```
Unfortunately, [old avatars and headers cannot currently be cleaned up](https://github.com/mastodon/mastodon/issues/9567).
### Migrating your data
Data migration should be done with an efficient S3 client.
The [minio client](@documentation/connect/cli.md#minio-client) is a good choice
thanks to its mirror mode:
```bash
mc mirror ./public/system/ garage/mastodon-data
```
Here is a typical bucket usage after all data has been migrated:
```bash
$ garage bucket info mastodon-data
Size: 20.3 GiB (21.8 GB)
Objects: 175968
```
### Configuring Mastodon
In your `.env.production` configuration file:
```bash
S3_ENABLED=true
# Internal access to Garage
S3_ENDPOINT=http://my-garage-instance.mydomain.tld:3900
S3_REGION=garage
S3_BUCKET=mastodon-data
# Change this (Key ID and Secret Key of your Garage key)
AWS_ACCESS_KEY_ID=GKe88df__CHANGETHIS__c5145
AWS_SECRET_ACCESS_KEY=a2f7__CHANGETHIS__77fcfcf7a58f47a4aa4431f2e675c56da37821a1070000
# What name gets exposed to users (HTTPS is implicit)
S3_ALIAS_HOST=my-social-media.mydomain.tld
```
For more details, see the [reference Mastodon documentation](https://docs.joinmastodon.org/admin/config/#cdn).
Restart all Mastodon services and everything should now be using Garage!
You can check the URLs of images in the Mastodon web client, they should start
with `https://my-social-media.mydomain.tld`.
### Last migration sync
After Mastodon is successfully using Garage, you can run a last sync from the local filesystem to Garage:
```bash
mc mirror --newer-than "3h" ./public/system/ garage/mastodon-data
```
### References
[cybrespace's guide to migrate to S3](https://github.com/cybrespace/cybrespace-meta/blob/master/s3.md)
(the guide is for Amazon S3, so the configuration is a bit different, but the rest is similar)
## Matrix

View file

@ -5,12 +5,14 @@ weight = 25
## Configuring a bucket for website access
There are two methods to expose buckets as website:
There are three methods to expose buckets as website:
1. using the PutBucketWebsite S3 API call, which is allowed for access keys that have the owner permission bit set
2. from the Garage CLI, by an adminstrator of the cluster
3. using the Garage administration API
The `PutBucketWebsite` API endpoint [is documented](https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutBucketWebsite.html) in the official AWS docs.
This endpoint can also be called [using `aws s3api`](https://docs.aws.amazon.com/cli/latest/reference/s3api/put-bucket-website.html) on the command line.
The website configuration supported by Garage is only a subset of the possibilities on Amazon S3: redirections are not supported, only the index document and error document can be specified.

View file

@ -20,57 +20,76 @@ sudo apt-get update
sudo apt-get install build-essential
```
## Using source from the Gitea repository (recommended)
## Building from source from the Gitea repository
The primary location for Garage's source code is the
[Gitea repository](https://git.deuxfleurs.fr/Deuxfleurs/garage).
[Gitea repository](https://git.deuxfleurs.fr/Deuxfleurs/garage),
which contains all of the released versions as well as the code
for the developpement of the next version.
Clone the repository and build Garage with the following commands:
Clone the repository and enter it as follows:
```bash
git clone https://git.deuxfleurs.fr/Deuxfleurs/garage.git
cd garage
cargo build
```
Be careful, as this will make a debug build of Garage, which will be extremely slow!
To make a release build, invoke `cargo build --release` (this takes much longer).
The binaries built this way are found in `target/{debug,release}/garage`.
## Using source from `crates.io`
Garage's source code is published on `crates.io`, Rust's official package repository.
This means you can simply ask `cargo` to download and build this source code for you:
If you wish to build a specific version of Garage, check out the corresponding tag. For instance:
```bash
cargo install garage
git tag # List available tags
git checkout v0.8.0 # Change v0.8.0 with the version you wish to build
```
That's all, `garage` should be in `$HOME/.cargo/bin`.
Otherwise you will be building a developpement build from the `main` branch
that includes all of the changes to be released in the next version.
Be careful that such a build might be unstable or contain bugs,
and could be incompatible with nodes that run stable versions of Garage.
You can add this folder to your `$PATH` or copy the binary somewhere else on your system.
For instance:
Finally, build Garage with the following command:
```bash
sudo cp $HOME/.cargo/bin/garage /usr/local/bin/garage
cargo build --release
```
The binary built this way can now be found in `target/release/garage`.
You may simply copy this binary to somewhere in your `$PATH` in order to
have the `garage` command available in your shell, for instance:
## Selecting features to activate in your build
```bash
sudo cp target/release/garage /usr/local/bin/garage
```
Garage supports a number of compilation options in the form of Cargo features,
If you are planning to develop Garage,
you might be interested in producing debug builds, which compile faster but run slower:
this can be done by removing the `--release` flag, and the resulting build can then
be found in `target/debug/garage`.
## List of available Cargo feature flags
Garage supports a number of compilation options in the form of Cargo feature flags,
which can be used to provide builds adapted to your system and your use case.
The following features are available:
To produce a build with a given set of features, invoke the `cargo build` command
as follows:
| Feature | Enabled | Description |
| ------- | ------- | ----------- |
| `bundled-libs` | BY DEFAULT | Use bundled version of sqlite3, zstd, lmdb and libsodium |
| `system-libs` | optional | Use system version of sqlite3, zstd, lmdb and libsodium if available (exclusive with `bundled-libs`, build using `cargo build --no-default-features --features system-libs`) |
| `k2v` | optional | Enable the experimental K2V API (if used, all nodes on your Garage cluster must have it enabled as well) |
| `kubernetes-discovery` | optional | Enable automatic registration and discovery of cluster nodes through the Kubernetes API |
| `metrics` | BY DEFAULT | Enable collection of metrics in Prometheus format on the admin API |
```bash
# This will build the default feature set plus feature1, feature2 and feature3
cargo build --release --features feature1,feature2,feature3
# This will build ONLY feature1, feature2 and feature3
cargo build --release --no-default-features \
--features feature1,feature2,feature3
```
The following feature flags are available in v0.8.0:
| Feature flag | Enabled | Description |
| ------------ | ------- | ----------- |
| `bundled-libs` | *by default* | Use bundled version of sqlite3, zstd, lmdb and libsodium |
| `system-libs` | optional | Use system version of sqlite3, zstd, lmdb and libsodium<br>if available (exclusive with `bundled-libs`, build using<br>`cargo build --no-default-features --features system-libs`) |
| `k2v` | optional | Enable the experimental K2V API (if used, all nodes on your<br>Garage cluster must have it enabled as well) |
| `kubernetes-discovery` | optional | Enable automatic registration and discovery<br>of cluster nodes through the Kubernetes API |
| `metrics` | *by default* | Enable collection of metrics in Prometheus format on the admin API |
| `telemetry-otlp` | optional | Enable collection of execution traces using OpenTelemetry |
| `sled` | BY DEFAULT | Enable using Sled to store Garage's metadata |
| `sled` | *by default* | Enable using Sled to store Garage's metadata |
| `lmdb` | optional | Enable using LMDB to store Garage's metadata |
| `sqlite` | optional | Enable using Sqlite3 to store Garage's metadata |

View file

@ -0,0 +1,91 @@
+++
title = "Deploying on Kubernetes"
weight = 32
+++
Garage can also be deployed on a kubernetes cluster via helm chart.
## Deploying
Firstly clone the repository:
```bash
git clone https://git.deuxfleurs.fr/Deuxfleurs/garage
cd garage/scripts/helm
```
Deploy with default options:
```bash
helm install --create-namespace --namespace garage garage ./garage
```
Or deploy with custom values:
```bash
helm install --create-namespace --namespace garage garage ./garage -f values.override.yaml
```
After deploying, cluster layout must be configured manually as described in [Creating a cluster layout](@/documentation/quick-start/_index.md#creating-a-cluster-layout). Use the following command to access garage CLI:
```bash
kubectl exec --stdin --tty -n garage garage-0 -- ./garage status
```
## Overriding default values
All possible configuration values can be found with:
```bash
helm show values ./garage
```
This is an example `values.overrride.yaml` for deploying in a microk8s cluster with a https s3 api ingress route:
```yaml
garage:
# Use only 2 replicas per object
replicationMode: "2"
# Use recommended lmdb db engine
dbEngine: "lmdb"
# Start 4 instances (StatefulSets) of garage
deployment:
replicaCount: 4
# Override default storage class and size
persistence:
meta:
storageClass: "openebs-hostpath"
size: 100Mi
data:
storageClass: "openebs-hostpath"
size: 1Gi
ingress:
s3:
api:
enabled: true
className: "public"
annotations:
cert-manager.io/cluster-issuer: "letsencrypt-prod"
nginx.ingress.kubernetes.io/proxy-body-size: 500m
hosts:
- host: s3-api.my-domain.com
paths:
- path: /
pathType: Prefix
tls:
- secretName: garage-ingress-cert
hosts:
- s3-api.my-domain.com
```
## Removing
```bash
helm delete --namespace garage garage
```
Note that this will leave behind custom CRD `garagenodes.deuxfleurs.fr`, which must be removed manually if desired.

View file

@ -0,0 +1,306 @@
+++
title = "Monitoring Garage"
weight = 40
+++
Garage exposes some internal metrics in the Prometheus data format.
This page explains how to exploit these metrics.
## Setting up monitoring
### Enabling the Admin API endpoint
If you have not already enabled the [administration API endpoint](@/documentation/reference-manual/admin-api.md), do so by adding the following lines to your configuration file:
```toml
[admin]
api_bind_addr = "0.0.0.0:3903"
```
This will allow anyone to scrape Prometheus metrics by fetching
`http://localhost:3093/metrics`. If you want to restrict access
to the exported metrics, set the `metrics_token` configuration value
to a bearer token to be used when fetching the metrics endpoint.
### Setting up Prometheus and Grafana
Add a scrape config to your Prometheus daemon to scrape metrics from
all of your nodes:
```yaml
scrape_configs:
- job_name: 'garage'
static_configs:
- targets:
- 'node1.mycluster:3903'
- 'node2.mycluster:3903'
- 'node3.mycluster:3903'
```
If you have set a metrics token in your Garage configuration file,
add the following lines in your Prometheus scrape config:
```yaml
authorization:
type: Bearer
credentials: 'your metrics token'
```
To visualize the scraped data in Grafana,
you can either import our [Grafana dashboard for Garage](https://git.deuxfleurs.fr/Deuxfleurs/garage/raw/branch/main/script/telemetry/grafana-garage-dashboard-prometheus.json)
or make your own.
We detail below the list of exposed metrics and their meaning.
## List of exported metrics
### Metrics of the API endpoints
#### `api_admin_request_counter` (counter)
Counts the number of requests to a given endpoint of the administration API. Example:
```
api_admin_request_counter{api_endpoint="Metrics"} 127041
```
#### `api_admin_request_duration` (histogram)
Evaluates the duration of API calls to the various administration API endpoint. Example:
```
api_admin_request_duration_bucket{api_endpoint="Metrics",le="0.5"} 127041
api_admin_request_duration_sum{api_endpoint="Metrics"} 605.250344830999
api_admin_request_duration_count{api_endpoint="Metrics"} 127041
```
#### `api_s3_request_counter` (counter)
Counts the number of requests to a given endpoint of the S3 API. Example:
```
api_s3_request_counter{api_endpoint="CreateMultipartUpload"} 1
```
#### `api_s3_error_counter` (counter)
Counts the number of requests to a given endpoint of the S3 API that returned an error. Example:
```
api_s3_error_counter{api_endpoint="GetObject",status_code="404"} 39
```
#### `api_s3_request_duration` (histogram)
Evaluates the duration of API calls to the various S3 API endpoints. Example:
```
api_s3_request_duration_bucket{api_endpoint="CreateMultipartUpload",le="0.5"} 1
api_s3_request_duration_sum{api_endpoint="CreateMultipartUpload"} 0.046340762
api_s3_request_duration_count{api_endpoint="CreateMultipartUpload"} 1
```
#### `api_k2v_request_counter` (counter), `api_k2v_error_counter` (counter), `api_k2v_error_duration` (histogram)
Same as for S3, for the K2V API.
### Metrics of the Web endpoint
#### `web_request_counter` (counter)
Number of requests to the web endpoint
```
web_request_counter{method="GET"} 80
```
#### `web_request_duration` (histogram)
Duration of requests to the web endpoint
```
web_request_duration_bucket{method="GET",le="0.5"} 80
web_request_duration_sum{method="GET"} 1.0528433229999998
web_request_duration_count{method="GET"} 80
```
#### `web_error_counter` (counter)
Number of requests to the web endpoint resulting in errors
```
web_error_counter{method="GET",status_code="404 Not Found"} 64
```
### Metrics of the data block manager
#### `block_bytes_read`, `block_bytes_written` (counter)
Number of bytes read/written to/from disk in the data storage directory.
```
block_bytes_read 120586322022
block_bytes_written 3386618077
```
#### `block_read_duration`, `block_write_duration` (histograms)
Evaluates the duration of the reading/writing of individual data blocks in the data storage directory.
```
block_read_duration_bucket{le="0.5"} 169229
block_read_duration_sum 2761.6902550310056
block_read_duration_count 169240
block_write_duration_bucket{le="0.5"} 3559
block_write_duration_sum 195.59170078500006
block_write_duration_count 3571
```
#### `block_delete_counter` (counter)
Counts the number of data blocks that have been deleted from storage.
```
block_delete_counter 122
```
#### `block_resync_counter` (counter), `block_resync_duration` (histogram)
Counts the number of resync operations the node has executed, and evaluates their duration.
```
block_resync_counter 308897
block_resync_duration_bucket{le="0.5"} 308892
block_resync_duration_sum 139.64204196100016
block_resync_duration_count 308897
```
#### `block_resync_queue_length` (gauge)
The number of block hashes currently queued for a resync.
This is normal to be nonzero for long periods of time.
```
block_resync_queue_length 0
```
#### `block_resync_errored_blocks` (gauge)
The number of block hashes that we were unable to resync last time we tried.
**THIS SHOULD BE ZERO, OR FALL BACK TO ZERO RAPIDLY, IN A HEALTHY CLUSTER.**
Persistent nonzero values indicate that some data is likely to be lost.
```
block_resync_errored_blocks 0
```
### Metrics related to RPCs (remote procedure calls) between nodes
#### `rpc_netapp_request_counter` (counter)
Number of RPC requests emitted
```
rpc_request_counter{from="<this node>",rpc_endpoint="garage_block/manager.rs/Rpc",to="<remote node>"} 176
```
#### `rpc_netapp_error_counter` (counter)
Number of communication errors (errors in the Netapp library, generally due to disconnected nodes)
```
rpc_netapp_error_counter{from="<this node>",rpc_endpoint="garage_block/manager.rs/Rpc",to="<remote node>"} 354
```
#### `rpc_timeout_counter` (counter)
Number of RPC timeouts, should be close to zero in a healthy cluster.
```
rpc_timeout_counter{from="<this node>",rpc_endpoint="garage_rpc/membership.rs/SystemRpc",to="<remote node>"} 1
```
#### `rpc_duration` (histogram)
The duration of internal RPC calls between Garage nodes.
```
rpc_duration_bucket{from="<this node>",rpc_endpoint="garage_block/manager.rs/Rpc",to="<remote node>",le="0.5"} 166
rpc_duration_sum{from="<this node>",rpc_endpoint="garage_block/manager.rs/Rpc",to="<remote node>"} 35.172253716
rpc_duration_count{from="<this node>",rpc_endpoint="garage_block/manager.rs/Rpc",to="<remote node>"} 174
```
### Metrics of the metadata table manager
#### `table_gc_todo_queue_length` (gauge)
Table garbage collector TODO queue length
```
table_gc_todo_queue_length{table_name="block_ref"} 0
```
#### `table_get_request_counter` (counter), `table_get_request_duration` (histogram)
Number of get/get_range requests internally made on each table, and their duration.
```
table_get_request_counter{table_name="bucket_alias"} 315
table_get_request_duration_bucket{table_name="bucket_alias",le="0.5"} 315
table_get_request_duration_sum{table_name="bucket_alias"} 0.048509778000000024
table_get_request_duration_count{table_name="bucket_alias"} 315
```
#### `table_put_request_counter` (counter), `table_put_request_duration` (histogram)
Number of insert/insert_many requests internally made on this table, and their duration
```
table_put_request_counter{table_name="block_ref"} 677
table_put_request_duration_bucket{table_name="block_ref",le="0.5"} 677
table_put_request_duration_sum{table_name="block_ref"} 61.617528636
table_put_request_duration_count{table_name="block_ref"} 677
```
#### `table_internal_delete_counter` (counter)
Number of value deletions in the tree (due to GC or repartitioning)
```
table_internal_delete_counter{table_name="block_ref"} 2296
```
#### `table_internal_update_counter` (counter)
Number of value updates where the value actually changes (includes creation of new key and update of existing key)
```
table_internal_update_counter{table_name="block_ref"} 5996
```
#### `table_merkle_updater_todo_queue_length` (gauge)
Merkle tree updater TODO queue length (should fall to zero rapidly)
```
table_merkle_updater_todo_queue_length{table_name="block_ref"} 0
```
#### `table_sync_items_received`, `table_sync_items_sent` (counters)
Number of data items sent to/recieved from other nodes during resync procedures
```
table_sync_items_received{from="<remote node>",table_name="bucket_v2"} 3
table_sync_items_sent{table_name="block_ref",to="<remote node>"} 2
```

View file

@ -11,8 +11,9 @@ We recommend first following the [quick start guide](@/documentation/quick-start
to get familiar with Garage's command line and usage patterns.
## Preparing your environment
## Prerequisites
### Prerequisites
To run a real-world deployment, make sure the following conditions are met:
@ -21,10 +22,6 @@ To run a real-world deployment, make sure the following conditions are met:
- Each machine has a public IP address which is reachable by other machines.
Running behind a NAT is likely to be possible but hasn't been tested for the latest version (TODO).
- Ideally, each machine should have a SSD available in addition to the HDD you are dedicating
to Garage. This will allow for faster access to metadata and has the potential
to significantly reduce Garage's response times.
- This guide will assume you are using Docker containers to deploy Garage on each node.
Garage can also be run independently, for instance as a [Systemd service](@/documentation/cookbook/systemd.md).
You can also use an orchestrator such as Nomad or Kubernetes to automatically manage
@ -49,17 +46,53 @@ available in the different locations of your cluster is roughly the same.
For instance, here, the Mercury node could be moved to Brussels; this would allow the cluster
to store 2 TB of data in total.
### Best practices
- If you have fast dedicated networking between all your nodes, and are planing to store
very large files, bump the `block_size` configuration parameter to 10 MB
(`block_size = 10485760`).
- Garage stores its files in two locations: it uses a metadata directory to store frequently-accessed
small metadata items, and a data directory to store data blocks of uploaded objects.
Ideally, the metadata directory would be stored on an SSD (smaller but faster),
and the data directory would be stored on an HDD (larger but slower).
- For the data directory, Garage already does checksumming and integrity verification,
so there is no need to use a filesystem such as BTRFS or ZFS that does it.
We recommend using XFS for the data partition, as it has the best performance.
EXT4 is not recommended as it has more strict limitations on the number of inodes,
which might cause issues with Garage when large numbers of objects are stored.
- If you only have an HDD and no SSD, it's fine to put your metadata alongside the data
on the same drive. Having lots of RAM for your kernel to cache the metadata will
help a lot with performance. Make sure to use the LMDB database engine,
instead of Sled, which suffers from quite bad performance degradation on HDDs.
Sled is still the default for legacy reasons, but is not recommended anymore.
- For the metadata storage, Garage does not do checksumming and integrity
verification on its own. If you are afraid of bitrot/data corruption,
put your metadata directory on a BTRFS partition. Otherwise, just use regular
EXT4 or XFS.
- Having a single server with several storage drives is currently not very well
supported in Garage ([#218](https://git.deuxfleurs.fr/Deuxfleurs/garage/issues/218)).
For an easy setup, just put all your drives in a RAID0 or a ZFS RAIDZ array.
If you're adventurous, you can try to format each of your disk as
a separate XFS partition, and then run one `garage` daemon per disk drive,
or use something like [`mergerfs`](https://github.com/trapexit/mergerfs) to merge
all your disks in a single union filesystem that spreads load over them.
## Get a Docker image
Our docker image is currently named `dxflrs/amd64_garage` and is stored on the [Docker Hub](https://hub.docker.com/r/dxflrs/amd64_garage/tags?page=1&ordering=last_updated).
We encourage you to use a fixed tag (eg. `v0.4.0`) and not the `latest` tag.
For this example, we will use the latest published version at the time of the writing which is `v0.4.0` but it's up to you
to check [the most recent versions on the Docker Hub](https://hub.docker.com/r/dxflrs/amd64_garage/tags?page=1&ordering=last_updated).
Our docker image is currently named `dxflrs/garage` and is stored on the [Docker Hub](https://hub.docker.com/r/dxflrs/garage/tags?page=1&ordering=last_updated).
We encourage you to use a fixed tag (eg. `v0.8.0`) and not the `latest` tag.
For this example, we will use the latest published version at the time of the writing which is `v0.8.0` but it's up to you
to check [the most recent versions on the Docker Hub](https://hub.docker.com/r/dxflrs/garage/tags?page=1&ordering=last_updated).
For example:
```
sudo docker pull dxflrs/amd64_garage:v0.4.0
sudo docker pull dxflrs/garage:v0.8.0
```
## Deploying and configuring Garage
@ -81,6 +114,7 @@ A valid `/etc/garage/garage.toml` for our cluster would look as follows:
```toml
metadata_dir = "/var/lib/garage/meta"
data_dir = "/var/lib/garage/data"
db_engine = "lmdb"
replication_mode = "3"
@ -90,8 +124,6 @@ rpc_bind_addr = "[::]:3901"
rpc_public_addr = "<this node's public IP>:3901"
rpc_secret = "<RPC secret>"
bootstrap_peers = []
[s3_api]
s3_region = "garage"
api_bind_addr = "[::]:3900"
@ -125,13 +157,28 @@ docker run \
-v /etc/garage.toml:/etc/garage.toml \
-v /var/lib/garage/meta:/var/lib/garage/meta \
-v /var/lib/garage/data:/var/lib/garage/data \
lxpz/garage_amd64:v0.4.0
dxflrs/garage:v0.8.0
```
It should be restarted automatically at each reboot.
Please note that we use host networking as otherwise Docker containers
can not communicate with IPv6.
If you want to use `docker-compose`, you may use the following `docker-compose.yml` file as a reference:
```yaml
version: "3"
services:
garage:
image: dxflrs/garage:v0.8.0
network_mode: "host"
restart: unless-stopped
volumes:
- /etc/garage.toml:/etc/garage.toml
- /var/lib/garage/meta:/var/lib/garage/meta
- /var/lib/garage/data:/var/lib/garage/data
```
Upgrading between Garage versions should be supported transparently,
but please check the relase notes before doing so!
To upgrade, simply stop and remove this container and

View file

@ -1,6 +1,6 @@
+++
title = "Recovering from failures"
weight = 35
weight = 50
+++
Garage is meant to work on old, second-hand hardware.

View file

@ -70,14 +70,16 @@ A possible configuration:
```nginx
upstream s3_backend {
# if you have a garage instance locally
# If you have a garage instance locally.
server 127.0.0.1:3900;
# you can also put your other instances
# You can also put your other instances.
server 192.168.1.3:3900;
# domain names also work
# Domain names also work.
server garage1.example.com:3900;
# you can assign weights if you have some servers
# that are more powerful than others
# A "backup" server is only used if all others have failed.
server garage-remote.example.com:3900 backup;
# You can assign weights if you have some servers
# that can serve more requests than others.
server garage2.example.com:3900 weight=2;
}
@ -96,6 +98,8 @@ server {
proxy_pass http://s3_backend;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header Host $host;
# Disable buffering to a temporary file.
proxy_max_temp_file_size 0;
}
}
```

View file

@ -1,6 +1,6 @@
+++
title = "Upgrading Garage"
weight = 40
weight = 60
+++
Garage is a stateful clustered application, where all nodes are communicating together and share data structures.

View file

@ -1,6 +1,6 @@
+++
title = "Design"
weight = 5
weight = 6
sort_by = "weight"
template = "documentation.html"
+++

View file

@ -1,6 +1,6 @@
+++
title = "Benchmarks"
weight = 10
weight = 40
+++
With Garage, we wanted to build a software defined storage service that follow the [KISS principle](https://en.wikipedia.org/wiki/KISS_principle),

View file

@ -1,23 +1,23 @@
+++
title = "Goals and use cases"
weight = 5
weight = 10
+++
## Goals and non-goals
Garage is a lightweight geo-distributed data store that implements the
[Amazon S3](https://docs.aws.amazon.com/AmazonS3/latest/API/Welcome.html)
object storage protocole. It enables applications to store large blobs such
object storage protocol. It enables applications to store large blobs such
as pictures, video, images, documents, etc., in a redundant multi-node
setting. S3 is versatile enough to also be used to publish a static
website.
Garage is an opinionated object storage solutoin, we focus on the following **desirable properties**:
- **Internet enabled**: made for multi-sites (eg. datacenters, offices, households, etc.) interconnected through regular Internet connections.
- **Self-contained & lightweight**: works everywhere and integrates well in existing environments to target [hyperconverged infrastructures](https://en.wikipedia.org/wiki/Hyper-converged_infrastructure).
- **Highly resilient**: highly resilient to network failures, network latency, disk failures, sysadmin failures.
- **Simple**: simple to understand, simple to operate, simple to debug.
- **Internet enabled**: made for multi-sites (eg. datacenters, offices, households, etc.) interconnected through regular Internet connections.
We also noted that the pursuit of some other goals are detrimental to our initial goals.
The following has been identified as **non-goals** (if these points matter to you, you should not use Garage):

View file

@ -20,6 +20,49 @@ In the meantime, you can find some information at the following links:
- [an old design draft](@/documentation/working-documents/design-draft.md)
## Request routing logic
Data retrieval requests to Garage endpoints (S3 API and websites) are resolved
to an individual object in a bucket. Since objects are replicated to multiple nodes
Garage must ensure consistency before answering the request.
### Using quorum to ensure consistency
Garage ensures consistency by attempting to establish a quorum with the
data nodes responsible for the object. When a majority of the data nodes
have provided metadata on a object Garage can then answer the request.
When a request arrives Garage will, assuming the recommended 3 replicas, perform the following actions:
- Make a request to the two preferred nodes for object metadata
- Try the third node if one of the two initial requests fail
- Check that the metadata from at least 2 nodes match
- Check that the object hasn't been marked deleted
- Answer the request with inline data from metadata if object is small enough
- Or get data blocks from the preferred nodes and answer using the assembled object
Garage dynamically determines which nodes to query based on health, preference, and
which nodes actually host a given data. Garage has no concept of "primary" so any
healthy node with the data can be used as long as a quorum is reached for the metadata.
### Node health
Garage keeps a TCP session open to each node in the cluster and periodically pings them. If a connection
cannot be established, or a node fails to answer a number of pings, the target node is marked as failed.
Failed nodes are not used for quorum or other internal requests.
### Node preference
Garage prioritizes which nodes to query according to a few criteria:
- A node always prefers itself if it can answer the request
- Then the node prioritizes nodes in the same zone
- Finally the nodes with the lowest latency are prioritized
For further reading on the cluster structure look at the [gateway](@/documentation/cookbook/gateways.md)
and [cluster layout management](@/documentation/reference-manual/layout.md) pages.
## Garbage collection
A faulty garbage collection procedure has been the cause of

View file

@ -1,6 +1,6 @@
+++
title = "Related work"
weight = 15
weight = 50
+++
## Context

View file

@ -1,6 +1,6 @@
+++
title = "Development"
weight = 6
weight = 7
sort_by = "weight"
template = "documentation.html"
+++

View file

@ -9,6 +9,15 @@ Let's start your Garage journey!
In this chapter, we explain how to deploy Garage as a single-node server
and how to interact with it.
## What is Garage?
Before jumping in, you might be interested in reading the following pages:
- [Goals and use cases](@/documentation/design/goals.md)
- [List of features](@/documentation/reference-manual/features.md)
## Scope of this tutorial
Our goal is to introduce you to Garage's workflows.
Following this guide is recommended before moving on to
[configuring a multi-node cluster](@/documentation/cookbook/real-world.md).
@ -33,25 +42,25 @@ you can [build Garage from source](@/documentation/cookbook/from-source.md).
## Configuring and starting Garage
### Writing a first configuration file
### Generating a first configuration file
This first configuration file should allow you to get started easily with the simplest
possible Garage deployment.
**Save it as `/etc/garage.toml`.**
You can also store it somewhere else, but you will have to specify `-c path/to/garage.toml`
at each invocation of the `garage` binary (for example: `garage -c ./garage.toml server`, `garage -c ./garage.toml status`).
```toml
We will create it with the following command line
to generate unique and private secrets for security reasons:
```bash
cat > garage.toml <<EOF
metadata_dir = "/tmp/meta"
data_dir = "/tmp/data"
db_engine = "lmdb"
replication_mode = "none"
rpc_bind_addr = "[::]:3901"
rpc_public_addr = "127.0.0.1:3901"
rpc_secret = "1799bccfd7411eddcf9ebd316bc1f5287ad12a68094e1c6ac6abde7e6feae1ec"
bootstrap_peers = []
rpc_secret = "$(openssl rand -hex 32)"
[s3_api]
s3_region = "garage"
@ -62,12 +71,26 @@ root_domain = ".s3.garage.localhost"
bind_addr = "[::]:3902"
root_domain = ".web.garage.localhost"
index = "index.html"
[k2v_api]
api_bind_addr = "[::]:3904"
[admin]
api_bind_addr = "0.0.0.0:3903"
admin_token = "$(openssl rand -base64 32)"
EOF
```
The `rpc_secret` value provided above is just an example. It will work, but in
order to secure your cluster you will need to use another one. You can generate
such a value with `openssl rand -hex 32`.
Now that your configuration file has been created, you can put
it in the right place. By default, garage looks at **`/etc/garage.toml`.**
You can also store it somewhere else, but you will have to specify `-c path/to/garage.toml`
at each invocation of the `garage` binary (for example: `garage -c ./garage.toml server`, `garage -c ./garage.toml status`).
As you can see, the `rpc_secret` is a 32 bytes hexadecimal string.
You can regenerate it with `openssl rand -hex 32`.
If you target a cluster deployment with multiple nodes, make sure that
you use the same value for all nodes.
As you can see in the `metadata_dir` and `data_dir` parameters, we are saving Garage's data
in `/tmp` which gets erased when your system reboots. This means that data stored on this
@ -210,6 +233,7 @@ Now that we have a bucket and a key, we need to give permissions to the key on t
garage bucket allow \
--read \
--write \
--owner \
nextcloud-bucket \
--key nextcloud-app-key
```
@ -223,54 +247,73 @@ garage bucket info nextcloud-bucket
## Uploading and downlading from Garage
We recommend the use of MinIO Client to interact with Garage files (`mc`).
Instructions to install it and use it are provided on the
[MinIO website](https://docs.min.io/docs/minio-client-quickstart-guide.html).
Before reading the following, you need a working `mc` command on your path.
To download and upload files on garage, we can use a third-party tool named `awscli`.
Note that on certain Linux distributions such as Arch Linux, the Minio client binary
is called `mcli` instead of `mc` (to avoid name clashes with the Midnight Commander).
### Configure `mc`
### Install and configure `awscli`
You need your access key and secret key created above.
We will assume you are invoking `mc` on the same machine as the Garage server,
your S3 API endpoint is therefore `http://127.0.0.1:3900`.
For this whole configuration, you must set an alias name: we chose `my-garage`, that you will used for all commands.
Adapt the following command accordingly and run it:
If you have python on your system, you can install it with:
```bash
mc alias set \
my-garage \
http://127.0.0.1:3900 \
<access key> \
<secret key> \
--api S3v4
python -m pip install --user awscli
```
### Use `mc`
You can not list buckets from `mc` currently.
But the following commands and many more should work:
Now that `awscli` is installed, you must configure it to talk to your Garage instance,
with your key. There are multiple ways to do that, the simplest one is to create a file
named `~/.awsrc` with this content:
```bash
mc cp image.png my-garage/nextcloud-bucket
mc cp my-garage/nextcloud-bucket/image.png .
mc ls my-garage/nextcloud-bucket
mc mirror localdir/ my-garage/another-bucket
export AWS_ACCESS_KEY_ID=xxxx # put your Key ID here
export AWS_SECRET_ACCESS_KEY=xxxx # put your Secret key here
export AWS_DEFAULT_REGION='garage'
export AWS_ENDPOINT='http://localhost:3900'
function aws { command aws --endpoint-url $AWS_ENDPOINT $@ ; }
aws --version
```
Now, each time you want to use `awscli` on this target, run:
```bash
source ~/.awsrc
```
*You can create multiple files with different names if you
have multiple Garage clusters or different keys.
Switching from one cluster to another is as simple as
sourcing the right file.*
### Example usage of `awscli`
```bash
# list buckets
aws s3 ls
# list objects of a bucket
aws s3 ls s3://my_files
# copy from your filesystem to garage
aws s3 cp /proc/cpuinfo s3://my_files/cpuinfo.txt
# copy from garage to your filesystem
aws s3 cp s3/my_files/cpuinfo.txt /tmp/cpuinfo.txt
```
Note that you can use `awscli` for more advanced operations like
creating a bucket, pre-signing a request or managing your website.
[Read the full documentation to know more](https://awscli.amazonaws.com/v2/documentation/api/latest/reference/s3/index.html).
Some features are however not implemented like ACL or policy.
Check [our s3 compatibility list](@/documentation/reference-manual/s3-compatibility.md).
### Other tools for interacting with Garage
The following tools can also be used to send and recieve files from/to Garage:
- the [AWS CLI](https://aws.amazon.com/cli/)
- [`rclone`](https://rclone.org/)
- [Cyberduck](https://cyberduck.io/)
- [`s3cmd`](https://s3tools.org/s3cmd)
- [minio-client](@/documentation/connect/cli.md#minio-client)
- [s3cmd](@/documentation/connect/cli.md#s3cmd)
- [rclone](@/documentation/connect/cli.md#rclone)
- [Cyberduck](@/documentation/connect/cli.md#cyberduck)
- [WinSCP](@/documentation/connect/cli.md#winscp)
Refer to the ["Integrations" section](@/documentation/connect/_index.md) to learn how to
configure application and command line utilities to integrate with Garage.
An exhaustive list is maintained in the ["Integrations" > "Browsing tools" section](@/documentation/connect/_index.md).

View file

@ -1,6 +1,6 @@
+++
title = "Reference Manual"
weight = 4
weight = 5
sort_by = "weight"
template = "documentation.html"
+++

View file

@ -1,6 +1,6 @@
+++
title = "Administration API"
weight = 16
weight = 60
+++
The Garage administration API is accessible through a dedicated server whose
@ -47,598 +47,13 @@ Returns internal Garage metrics in Prometheus format.
### Cluster operations
#### GetClusterStatus `GET /v0/status`
These endpoints are defined on a dedicated [Redocly page](https://garagehq.deuxfleurs.fr/api/garage-admin-v0.html). You can also download its [OpenAPI specification](https://garagehq.deuxfleurs.fr/api/garage-admin-v0.yml).
Returns the cluster's current status in JSON, including:
Requesting the API from the command line can be as simple as running:
- ID of the node being queried and its version of the Garage daemon
- Live nodes
- Currently configured cluster layout
- Staged changes to the cluster layout
Example response body:
```json
{
"node": "ec79480e0ce52ae26fd00c9da684e4fa56658d9c64cdcecb094e936de0bfe71f",
"garage_version": "git:v0.8.0",
"knownNodes": {
"ec79480e0ce52ae26fd00c9da684e4fa56658d9c64cdcecb094e936de0bfe71f": {
"addr": "10.0.0.11:3901",
"is_up": true,
"last_seen_secs_ago": 9,
"hostname": "node1"
},
"4a6ae5a1d0d33bf895f5bb4f0a418b7dc94c47c0dd2eb108d1158f3c8f60b0ff": {
"addr": "10.0.0.12:3901",
"is_up": true,
"last_seen_secs_ago": 1,
"hostname": "node2"
},
"23ffd0cdd375ebff573b20cc5cef38996b51c1a7d6dbcf2c6e619876e507cf27": {
"addr": "10.0.0.21:3901",
"is_up": true,
"last_seen_secs_ago": 7,
"hostname": "node3"
},
"e2ee7984ee65b260682086ec70026165903c86e601a4a5a501c1900afe28d84b": {
"addr": "10.0.0.22:3901",
"is_up": true,
"last_seen_secs_ago": 1,
"hostname": "node4"
}
},
"layout": {
"version": 12,
"roles": {
"ec79480e0ce52ae26fd00c9da684e4fa56658d9c64cdcecb094e936de0bfe71f": {
"zone": "dc1",
"capacity": 4,
"tags": [
"node1"
]
},
"4a6ae5a1d0d33bf895f5bb4f0a418b7dc94c47c0dd2eb108d1158f3c8f60b0ff": {
"zone": "dc1",
"capacity": 6,
"tags": [
"node2"
]
},
"23ffd0cdd375ebff573b20cc5cef38996b51c1a7d6dbcf2c6e619876e507cf27": {
"zone": "dc2",
"capacity": 10,
"tags": [
"node3"
]
}
},
"stagedRoleChanges": {
"e2ee7984ee65b260682086ec70026165903c86e601a4a5a501c1900afe28d84b": {
"zone": "dc2",
"capacity": 5,
"tags": [
"node4"
]
}
}
}
}
```bash
curl -H 'Authorization: Bearer s3cr3t' http://localhost:3903/v0/status | jq
```
#### ConnectClusterNodes `POST /v0/connect`
Instructs this Garage node to connect to other Garage nodes at specified addresses.
Example request body:
```json
[
"ec79480e0ce52ae26fd00c9da684e4fa56658d9c64cdcecb094e936de0bfe71f@10.0.0.11:3901",
"4a6ae5a1d0d33bf895f5bb4f0a418b7dc94c47c0dd2eb108d1158f3c8f60b0ff@10.0.0.12:3901"
]
```
The format of the string for a node to connect to is: `<node ID>@<ip address>:<port>`, same as in the `garage node connect` CLI call.
Example response:
```json
[
{
"success": true,
"error": null
},
{
"success": false,
"error": "Handshake error"
}
]
```
#### GetClusterLayout `GET /v0/layout`
Returns the cluster's current layout in JSON, including:
- Currently configured cluster layout
- Staged changes to the cluster layout
(the info returned by this endpoint is a subset of the info returned by GetClusterStatus)
Example response body:
```json
{
"version": 12,
"roles": {
"ec79480e0ce52ae26fd00c9da684e4fa56658d9c64cdcecb094e936de0bfe71f": {
"zone": "dc1",
"capacity": 4,
"tags": [
"node1"
]
},
"4a6ae5a1d0d33bf895f5bb4f0a418b7dc94c47c0dd2eb108d1158f3c8f60b0ff": {
"zone": "dc1",
"capacity": 6,
"tags": [
"node2"
]
},
"23ffd0cdd375ebff573b20cc5cef38996b51c1a7d6dbcf2c6e619876e507cf27": {
"zone": "dc2",
"capacity": 10,
"tags": [
"node3"
]
}
},
"stagedRoleChanges": {
"e2ee7984ee65b260682086ec70026165903c86e601a4a5a501c1900afe28d84b": {
"zone": "dc2",
"capacity": 5,
"tags": [
"node4"
]
}
}
}
```
#### UpdateClusterLayout `POST /v0/layout`
Send modifications to the cluster layout. These modifications will
be included in the staged role changes, visible in subsequent calls
of `GetClusterLayout`. Once the set of staged changes is satisfactory,
the user may call `ApplyClusterLayout` to apply the changed changes,
or `Revert ClusterLayout` to clear all of the staged changes in
the layout.
Request body format:
```json
{
<node_id>: {
"capacity": <new_capacity>,
"zone": <new_zone>,
"tags": [
<new_tag>,
...
]
},
<node_id_to_remove>: null,
...
}
```
Contrary to the CLI that may update only a subset of the fields
`capacity`, `zone` and `tags`, when calling this API all of these
values must be specified.
#### ApplyClusterLayout `POST /v0/layout/apply`
Applies to the cluster the layout changes currently registered as
staged layout changes.
Request body format:
```json
{
"version": 13
}
```
Similarly to the CLI, the body must include the version of the new layout
that will be created, which MUST be 1 + the value of the currently
existing layout in the cluster.
#### RevertClusterLayout `POST /v0/layout/revert`
Clears all of the staged layout changes.
Request body format:
```json
{
"version": 13
}
```
Reverting the staged changes is done by incrementing the version number
and clearing the contents of the staged change list.
Similarly to the CLI, the body must include the incremented
version number, which MUST be 1 + the value of the currently
existing layout in the cluster.
### Access key operations
#### ListKeys `GET /v0/key`
Returns all API access keys in the cluster.
Example response:
```json
[
{
"id": "GK31c2f218a2e44f485b94239e",
"name": "test"
},
{
"id": "GKe10061ac9c2921f09e4c5540",
"name": "test2"
}
]
```
#### CreateKey `POST /v0/key`
Creates a new API access key.
Request body format:
```json
{
"name": "NameOfMyKey"
}
```
#### ImportKey `POST /v0/key/import`
Imports an existing API key.
Request body format:
```json
{
"accessKeyId": "GK31c2f218a2e44f485b94239e",
"secretAccessKey": "b892c0665f0ada8a4755dae98baa3b133590e11dae3bcc1f9d769d67f16c3835",
"name": "NameOfMyKey"
}
```
#### GetKeyInfo `GET /v0/key?id=<acces key id>`
#### GetKeyInfo `GET /v0/key?search=<pattern>`
Returns information about the requested API access key.
If `id` is set, the key is looked up using its exact identifier (faster).
If `search` is set, the key is looked up using its name or prefix
of identifier (slower, all keys are enumerated to do this).
Example response:
```json
{
"name": "test",
"accessKeyId": "GK31c2f218a2e44f485b94239e",
"secretAccessKey": "b892c0665f0ada8a4755dae98baa3b133590e11dae3bcc1f9d769d67f16c3835",
"permissions": {
"createBucket": false
},
"buckets": [
{
"id": "70dc3bed7fe83a75e46b66e7ddef7d56e65f3c02f9f80b6749fb97eccb5e1033",
"globalAliases": [
"test2"
],
"localAliases": [],
"permissions": {
"read": true,
"write": true,
"owner": false
}
},
{
"id": "d7452a935e663fc1914f3a5515163a6d3724010ce8dfd9e4743ca8be5974f995",
"globalAliases": [
"test3"
],
"localAliases": [],
"permissions": {
"read": true,
"write": true,
"owner": false
}
},
{
"id": "e6a14cd6a27f48684579ec6b381c078ab11697e6bc8513b72b2f5307e25fff9b",
"globalAliases": [],
"localAliases": [
"test"
],
"permissions": {
"read": true,
"write": true,
"owner": true
}
},
{
"id": "96470e0df00ec28807138daf01915cfda2bee8eccc91dea9558c0b4855b5bf95",
"globalAliases": [
"alex"
],
"localAliases": [],
"permissions": {
"read": true,
"write": true,
"owner": true
}
}
]
}
```
#### DeleteKey `DELETE /v0/key?id=<acces key id>`
Deletes an API access key.
#### UpdateKey `POST /v0/key?id=<acces key id>`
Updates information about the specified API access key.
Request body format:
```json
{
"name": "NameOfMyKey",
"allow": {
"createBucket": true,
},
"deny": {}
}
```
All fields (`name`, `allow` and `deny`) are optionnal.
If they are present, the corresponding modifications are applied to the key, otherwise nothing is changed.
The possible flags in `allow` and `deny` are: `createBucket`.
### Bucket operations
#### ListBuckets `GET /v0/bucket`
Returns all storage buckets in the cluster.
Example response:
```json
[
{
"id": "70dc3bed7fe83a75e46b66e7ddef7d56e65f3c02f9f80b6749fb97eccb5e1033",
"globalAliases": [
"test2"
],
"localAliases": []
},
{
"id": "96470e0df00ec28807138daf01915cfda2bee8eccc91dea9558c0b4855b5bf95",
"globalAliases": [
"alex"
],
"localAliases": []
},
{
"id": "d7452a935e663fc1914f3a5515163a6d3724010ce8dfd9e4743ca8be5974f995",
"globalAliases": [
"test3"
],
"localAliases": []
},
{
"id": "e6a14cd6a27f48684579ec6b381c078ab11697e6bc8513b72b2f5307e25fff9b",
"globalAliases": [],
"localAliases": [
{
"accessKeyId": "GK31c2f218a2e44f485b94239e",
"alias": "test"
}
]
}
]
```
#### GetBucketInfo `GET /v0/bucket?id=<bucket id>`
#### GetBucketInfo `GET /v0/bucket?globalAlias=<alias>`
Returns information about the requested storage bucket.
If `id` is set, the bucket is looked up using its exact identifier.
If `globalAlias` is set, the bucket is looked up using its global alias.
(both are fast)
Example response:
```json
{
"id": "afa8f0a22b40b1247ccd0affb869b0af5cff980924a20e4b5e0720a44deb8d39",
"globalAliases": [],
"websiteAccess": false,
"websiteConfig": null,
"keys": [
{
"accessKeyId": "GK31c2f218a2e44f485b94239e",
"name": "Imported key",
"permissions": {
"read": true,
"write": true,
"owner": true
},
"bucketLocalAliases": [
"debug"
]
}
],
"objects": 14827,
"bytes": 13189855625,
"unfinshedUploads": 0,
"quotas": {
"maxSize": null,
"maxObjects": null
}
}
```
#### CreateBucket `POST /v0/bucket`
Creates a new storage bucket.
Request body format:
```json
{
"globalAlias": "NameOfMyBucket"
}
```
OR
```json
{
"localAlias": {
"accessKeyId": "GK31c2f218a2e44f485b94239e",
"alias": "NameOfMyBucket",
"allow": {
"read": true,
"write": true,
"owner": false
}
}
}
```
OR
```json
{}
```
Creates a new bucket, either with a global alias, a local one,
or no alias at all.
Technically, you can also specify both `globalAlias` and `localAlias` and that would create
two aliases, but I don't see why you would want to do that.
#### DeleteBucket `DELETE /v0/bucket?id=<bucket id>`
Deletes a storage bucket. A bucket cannot be deleted if it is not empty.
Warning: this will delete all aliases associated with the bucket!
#### UpdateBucket `PUT /v0/bucket?id=<bucket id>`
Updates configuration of the given bucket.
Request body format:
```json
{
"websiteAccess": {
"enabled": true,
"indexDocument": "index.html",
"errorDocument": "404.html"
},
"quotas": {
"maxSize": 19029801,
"maxObjects": null,
}
}
```
All fields (`websiteAccess` and `quotas`) are optionnal.
If they are present, the corresponding modifications are applied to the bucket, otherwise nothing is changed.
In `websiteAccess`: if `enabled` is `true`, `indexDocument` must be specified.
The field `errorDocument` is optional, if no error document is set a generic
error message is displayed when errors happen. Conversely, if `enabled` is
`false`, neither `indexDocument` nor `errorDocument` must be specified.
In `quotas`: new values of `maxSize` and `maxObjects` must both be specified, or set to `null`
to remove the quotas. An absent value will be considered the same as a `null`. It is not possible
to change only one of the two quotas.
### Operations on permissions for keys on buckets
#### BucketAllowKey `POST /v0/bucket/allow`
Allows a key to do read/write/owner operations on a bucket.
Request body format:
```json
{
"bucketId": "e6a14cd6a27f48684579ec6b381c078ab11697e6bc8513b72b2f5307e25fff9b",
"accessKeyId": "GK31c2f218a2e44f485b94239e",
"permissions": {
"read": true,
"write": true,
"owner": true
},
}
```
Flags in `permissions` which have the value `true` will be activated.
Other flags will remain unchanged.
#### BucketDenyKey `POST /v0/bucket/deny`
Denies a key from doing read/write/owner operations on a bucket.
Request body format:
```json
{
"bucketId": "e6a14cd6a27f48684579ec6b381c078ab11697e6bc8513b72b2f5307e25fff9b",
"accessKeyId": "GK31c2f218a2e44f485b94239e",
"permissions": {
"read": false,
"write": false,
"owner": true
},
}
```
Flags in `permissions` which have the value `true` will be deactivated.
Other flags will remain unchanged.
### Operations on bucket aliases
#### GlobalAliasBucket `PUT /v0/bucket/alias/global?id=<bucket id>&alias=<global alias>`
Empty body. Creates a global alias for a bucket.
#### GlobalUnaliasBucket `DELETE /v0/bucket/alias/global?id=<bucket id>&alias=<global alias>`
Removes a global alias for a bucket.
#### LocalAliasBucket `PUT /v0/bucket/alias/local?id=<bucket id>&accessKeyId=<access key ID>&alias=<local alias>`
Empty body. Creates a local alias for a bucket in the namespace of a specific access key.
#### LocalUnaliasBucket `DELETE /v0/bucket/alias/local?id=<bucket id>&accessKeyId<access key ID>&alias=<local alias>`
Removes a local alias for a bucket in the namespace of a specific access key.
For more advanced use cases, we recommend using a SDK.
[Go to the "Build your own app" section to know how to use our SDKs](@/documentation/build/_index.md)

View file

@ -1,6 +1,6 @@
+++
title = "Garage CLI"
weight = 15
weight = 30
+++
The Garage CLI is mostly self-documented. Make use of the `help` subcommand

View file

@ -1,6 +1,6 @@
+++
title = "Configuration file format"
weight = 5
weight = 20
+++
Here is an example `garage.toml` configuration file that illustrates all of the possible options:
@ -9,8 +9,12 @@ Here is an example `garage.toml` configuration file that illustrates all of the
metadata_dir = "/var/lib/garage/meta"
data_dir = "/var/lib/garage/data"
db_engine = "lmdb"
block_size = 1048576
block_manager_background_tranquility = 2
sled_cache_capacity = 134217728
sled_flush_every_ms = 2000
replication_mode = "3"
@ -27,15 +31,20 @@ bootstrap_peers = [
"212fd62eeaca72c122b45a7f4fa0f55e012aa5e24ac384a72a3016413fa724ff@[fc00:F::1]:3901",
]
consul_host = "consul.service"
consul_service_name = "garage-daemon"
kubernetes_namespace = "garage"
kubernetes_service_name = "garage-daemon"
kubernetes_skip_crd = false
[consul_discovery]
consul_http_addr = "http://127.0.0.1:8500"
service_name = "garage-daemon"
ca_cert = "/etc/consul/consul-ca.crt"
client_cert = "/etc/consul/consul-client.crt"
client_key = "/etc/consul/consul-key.crt"
tls_skip_verify = false
[kubernetes_discovery]
namespace = "garage"
service_name = "garage-daemon"
skip_crd = false
sled_cache_capacity = 134217728
sled_flush_every_ms = 2000
[s3_api]
api_bind_addr = "[::]:3900"
@ -72,6 +81,47 @@ This folder can be placed on an HDD. The space available for `data_dir`
should be counted to determine a node's capacity
when [adding it to the cluster layout](@/documentation/cookbook/real-world.md).
### `db_engine` (since `v0.8.0`)
By default, Garage uses the Sled embedded database library
to store its metadata on-disk. Since `v0.8.0`, Garage can use alternative storage backends as follows:
| DB engine | `db_engine` value | Database path |
| --------- | ----------------- | ------------- |
| [Sled](https://sled.rs) | `"sled"` | `<metadata_dir>/db/` |
| [LMDB](https://www.lmdb.tech) | `"lmdb"` | `<metadata_dir>/db.lmdb/` |
| [Sqlite](https://sqlite.org) | `"sqlite"` | `<metadata_dir>/db.sqlite` |
Performance characteristics of the different DB engines are as follows:
- Sled: the default database engine, which tends to produce
large data files and also has performance issues, especially when the metadata folder
is on a traditionnal HDD and not on SSD.
- LMDB: the recommended alternative on 64-bit systems,
much more space-efficiant and slightly faster. Note that the data format of LMDB is not portable
between architectures, so for instance the Garage database of an x86-64
node cannot be moved to an ARM64 node. Also note that, while LMDB can technically be used on 32-bit systems,
this will limit your node to very small database sizes due to how LMDB works; it is therefore not recommended.
- Sqlite: Garage supports Sqlite as a storage backend for metadata,
however it may have issues and is also very slow in its current implementation,
so it is not recommended to be used for now.
It is possible to convert Garage's metadata directory from one format to another with a small utility named `convert_db`,
which can be downloaded at the following locations:
[for amd64](https://garagehq.deuxfleurs.fr/_releases/convert_db/amd64/convert_db),
[for i386](https://garagehq.deuxfleurs.fr/_releases/convert_db/i386/convert_db),
[for arm64](https://garagehq.deuxfleurs.fr/_releases/convert_db/arm64/convert_db),
[for arm](https://garagehq.deuxfleurs.fr/_releases/convert_db/arm/convert_db).
The `convert_db` utility is used as folows:
```
convert-db -a <input db engine> -i <input db path> \
-b <output db engine> -o <output db path>
```
Make sure to specify the full database path as presented in the table above,
and not just the path to the metadata directory.
### `block_size`
Garage splits stored objects in consecutive chunks of size `block_size`
@ -87,16 +137,20 @@ files will remain available. This however means that chunks from existing files
will not be deduplicated with chunks from newly uploaded files, meaning you
might use more storage space that is optimally possible.
### `block_manager_background_tranquility`
### `sled_cache_capacity`
This parameter tunes the activity of the background worker responsible for
resyncing data blocks between nodes. The higher the tranquility value is set,
the more the background worker will wait between iterations, meaning the load
on the system (including network usage between nodes) will be reduced. The
minimal value for this parameter is `0`, where the background worker will
allways work at maximal throughput to resynchronize blocks. The default value
is `2`, where the background worker will try to spend at most 1/3 of its time
working, and 2/3 sleeping in order to reduce system load.
This parameter can be used to tune the capacity of the cache used by
[sled](https://sled.rs), the database Garage uses internally to store metadata.
Tune this to fit the RAM you wish to make available to your Garage instance.
This value has a conservative default (128MB) so that Garage doesn't use too much
RAM by default, but feel free to increase this for higher performance.
### `sled_flush_every_ms`
This parameters can be used to tune the flushing interval of sled.
Increase this if sled is thrashing your SSD, at the risk of losing more data in case
of a power outage (though this should not matter much as data is replicated on other
nodes). The default value, 2000ms, should be appropriate for most use cases.
### `replication_mode`
@ -245,48 +299,58 @@ be obtained by running `garage node id` and then included directly in the
key will be returned by `garage node id` and you will have to add the IP
yourself.
### `consul_host` and `consul_service_name`
## The `[consul_discovery]` section
Garage supports discovering other nodes of the cluster using Consul. For this
to work correctly, nodes need to know their IP address by which they can be
reached by other nodes of the cluster, which should be set in `rpc_public_addr`.
The `consul_host` parameter should be set to the hostname of the Consul server,
and `consul_service_name` should be set to the service name under which Garage's
### `consul_http_addr` and `service_name`
The `consul_http_addr` parameter should be set to the full HTTP(S) address of the Consul server.
### `service_name`
`service_name` should be set to the service name under which Garage's
RPC ports are announced.
Garage does not yet support talking to Consul over TLS.
### `client_cert`, `client_key`
### `kubernetes_namespace`, `kubernetes_service_name` and `kubernetes_skip_crd`
TLS client certificate and client key to use when communicating with Consul over TLS. Both are mandatory when doing so.
### `ca_cert`
TLS CA certificate to use when communicating with Consul over TLS.
### `tls_skip_verify`
Skip server hostname verification in TLS handshake.
`ca_cert` is ignored when this is set.
## The `[kubernetes_discovery]` section
Garage supports discovering other nodes of the cluster using kubernetes custom
resources. For this to work `kubernetes_namespace` and `kubernetes_service_name`
need to be configured.
resources. For this to work, a `[kubernetes_discovery]` section must be present
with at least the `namespace` and `service_name` parameters.
`kubernetes_namespace` sets the namespace in which the custom resources are
configured. `kubernetes_service_name` is added as a label to these resources to
### `namespace`
`namespace` sets the namespace in which the custom resources are
configured.
### `service_name`
`service_name` is added as a label to the advertised resources to
filter them, to allow for multiple deployments in a single namespace.
`kubernetes_skip_crd` can be set to true to disable the automatic creation and
### `skip_crd`
`skip_crd` can be set to true to disable the automatic creation and
patching of the `garagenodes.deuxfleurs.fr` CRD. You will need to create the CRD
manually.
### `sled_cache_capacity`
This parameter can be used to tune the capacity of the cache used by
[sled](https://sled.rs), the database Garage uses internally to store metadata.
Tune this to fit the RAM you wish to make available to your Garage instance.
This value has a conservative default (128MB) so that Garage doesn't use too much
RAM by default, but feel free to increase this for higher performance.
### `sled_flush_every_ms`
This parameters can be used to tune the flushing interval of sled.
Increase this if sled is thrashing your SSD, at the risk of losing more data in case
of a power outage (though this should not matter much as data is replicated on other
nodes). The default value, 2000ms, should be appropriate for most use cases.
## The `[s3_api]` section

View file

@ -0,0 +1,125 @@
+++
title = "List of Garage features"
weight = 10
+++
### S3 API
The main goal of Garage is to provide an object storage service that is compatible with the
[S3 API](https://docs.aws.amazon.com/AmazonS3/latest/API/Welcome.html) from Amazon Web Services.
We try to adhere as strictly as possible to the semantics of the API as implemented by Amazon
and other vendors such as Minio or CEPH.
Of course Garage does not implement the full span of API endpoints that AWS S3 does;
the exact list of S3 features implemented by Garage can be found [on our S3 compatibility page](@/documentation/reference-manual/s3-compatibility.md).
### Geo-distribution
Garage allows you to store copies of your data in multiple geographical locations in order to maximize resilience
to adverse events, such as network/power outages or hardware failures.
This allows Garage to run very well even at home, using consumer-grade Internet connectivity
(such as FTTH) and power, as long as cluster nodes can be spawned at several physical locations.
Garage exploits knowledge of the capacity and physical location of each storage node to design
a storage plan that best exploits the available storage capacity while satisfying the geo-distributed replication constraint.
To learn more about geo-distributed Garage clusters,
read our documentation on [setting up a real-world deployment](@/documentation/cookbook/real-world.md).
### Standalone/self-contained
Garage is extremely simple to deploy, and does not depend on any external service to run.
This makes setting up and administering storage clusters, we hope, as easy as it could be.
### Flexible topology
A Garage cluster can very easily evolve over time, as storage nodes are added or removed.
Garage will automatically rebalance data between nodes as needed to ensure the desired number of copies.
Read about cluster layout management [here](@/documentation/reference-manual/layout.md).
### No RAFT slowing you down
It might seem strange to tout the absence of something as a desirable feature,
but this is in fact a very important point! Garage does not use RAFT or another
consensus algorithm internally to order incoming requests: this means that all requests
directed to a Garage cluster can be handled independently of one another instead
of going through a central bottleneck (the leader node).
As a consequence, requests can be handled much faster, even in cases where latency
between cluster nodes is important (see our [benchmarks](@/documentation/design/benchmarks/index.md) for data on this).
This is particularly usefull when nodes are far from one another and talk to one other through standard Internet connections.
### Several replication modes
Garage supports a variety of replication modes, with 1 copy, 2 copies or 3 copies of your data,
and with various levels of consistency, in order to adapt to a variety of usage scenarios.
Read our reference page on [supported replication modes](@/documentation/reference-manual/configuration.md#replication-mode)
to select the replication mode best suited to your use case (hint: in most cases, `replication_mode = "3"` is what you want).
### Web server for static websites
A storage bucket can easily be configured to be served directly by Garage as a static web site.
Domain names for multiple websites directly map to bucket names, making it easy to build
a platform for your users to autonomously build and host their websites over Garage.
Surprisingly, none of the other alternative S3 implementations we surveyed (such as Minio
or CEPH) support publishing static websites from S3 buckets, a feature that is however
directly inherited from S3 on AWS.
Read more on our [dedicated documentation page](@/documentation/cookbook/exposing-websites.md).
### Bucket names as aliases
In Garage, a bucket may have several names, known as aliases.
Aliases can easily be added and removed on demand:
this allows to easily rename buckets if needed
without having to copy all of their content, something that cannot be done on AWS.
For buckets served as static websites, having multiple aliases for a bucket can allow
exposing the same content under different domain names.
Garage also supports bucket aliases which are local to a single user:
this allows different users to have different buckets with the same name, thus avoiding naming collisions.
This can be helpfull for instance if you want to write an application that creates per-user buckets with always the same name.
This feature is totally invisible to S3 clients and does not break compatibility with AWS.
### Cluster administration API
Garage provides a fully-fledged REST API to administer your cluster programatically.
Functionnality included in the admin API include: setting up and monitoring
cluster nodes, managing access credentials, and managing storage buckets and bucket aliases.
A full reference of the administration API is available [here](@/documentation/reference-manual/admin-api.md).
### Metrics and traces
Garage makes some internal metrics available in the Prometheus data format,
which allows you to build interactive dashboards to visualize the load and internal state of your storage cluster.
For developpers and performance-savvy administrators,
Garage also supports exporting traces of what it does internally in OpenTelemetry format.
This allows to monitor the time spent at various steps of the processing of requests,
in order to detect potential performance bottlenecks.
### Kubernetes and Nomad integrations
Garage can automatically discover other nodes in the cluster thanks to integration
with orchestrators such as Kubernetes and Nomad (when used with Consul).
This eases the configuration of your cluster as it removes one step where nodes need
to be manually connected to one another.
### Support for changing IP addresses
As long as all of your nodes don't change their IP address at the same time,
Garage should be able to tolerate nodes with changing/dynamic IP addresses,
as nodes will regularly exchange the IP addresses of their peers and try to
reconnect using newer addresses when existing connections are broken.
### K2V API (experimental)
As part of an ongoing research project, Garage can expose an experimental key/value storage API called K2V.
K2V is made for the storage and retrieval of many small key/value pairs that need to be processed in bulk.
This completes the S3 API with an alternative that can be used to easily store and access metadata
related to objects stored in an S3 bucket.
In the context of our research project, [Aérogramme](https://aerogramme.deuxfleurs.fr),
K2V is used to provide metadata and log storage for operations on encrypted e-mail storage.
Learn more on the specification of K2V [here](https://git.deuxfleurs.fr/Deuxfleurs/garage/src/branch/k2v/doc/drafts/k2v-spec.md)
and on how to enable it in Garage [here](@/documentation/reference-manual/k2v.md).

View file

@ -1,6 +1,6 @@
+++
title = "K2V"
weight = 30
weight = 70
+++
Starting with version 0.7.2, Garage introduces an optionnal feature, K2V,

View file

@ -1,6 +1,6 @@
+++
title = "Cluster layout management"
weight = 10
weight = 50
+++
The cluster layout in Garage is a table that assigns to each node a role in

View file

@ -1,45 +0,0 @@
+++
title = "Request routing logic"
weight = 10
+++
Data retrieval requests to Garage endpoints (S3 API and websites) are resolved
to an individual object in a bucket. Since objects are replicated to multiple nodes
Garage must ensure consistency before answering the request.
## Using quorum to ensure consistency
Garage ensures consistency by attempting to establish a quorum with the
data nodes responsible for the object. When a majority of the data nodes
have provided metadata on a object Garage can then answer the request.
When a request arrives Garage will, assuming the recommended 3 replicas, perform the following actions:
- Make a request to the two preferred nodes for object metadata
- Try the third node if one of the two initial requests fail
- Check that the metadata from at least 2 nodes match
- Check that the object hasn't been marked deleted
- Answer the request with inline data from metadata if object is small enough
- Or get data blocks from the preferred nodes and answer using the assembled object
Garage dynamically determines which nodes to query based on health, preference, and
which nodes actually host a given data. Garage has no concept of "primary" so any
healthy node with the data can be used as long as a quorum is reached for the metadata.
## Node health
Garage keeps a TCP session open to each node in the cluster and periodically pings them. If a connection
cannot be established, or a node fails to answer a number of pings, the target node is marked as failed.
Failed nodes are not used for quorum or other internal requests.
## Node preference
Garage prioritizes which nodes to query according to a few criteria:
- A node always prefers itself if it can answer the request
- Then the node prioritizes nodes in the same zone
- Finally the nodes with the lowest latency are prioritized
For further reading on the cluster structure look at the [gateway](@/documentation/cookbook/gateways.md)
and [cluster layout management](@/documentation/reference-manual/layout.md) pages.

View file

@ -1,6 +1,6 @@
+++
title = "S3 Compatibility status"
weight = 20
weight = 40
+++
## DISCLAIMER

View file

@ -1,6 +1,6 @@
+++
title = "Working Documents"
weight = 7
weight = 8
sort_by = "weight"
template = "documentation.html"
+++

View file

@ -1,6 +1,6 @@
+++
title = "Design draft"
weight = 25
title = "Design draft (obsolete)"
weight = 900
+++
**WARNING: this documentation is a design draft which was written before Garage's actual implementation.

View file

@ -1,6 +1,6 @@
+++
title = "Load balancing data"
weight = 10
title = "Load balancing data (obsolete)"
weight = 910
+++
**This is being yet improved in release 0.5. The working document has not been updated yet, it still only applies to Garage 0.2 through 0.4.**

View file

@ -16,7 +16,7 @@ The migration steps are as follows:
1. Do `garage repair --all-nodes --yes tables` and `garage repair --all-nodes --yes blocks`,
check the logs and check that all data seems to be synced correctly between
nodes. If you have time, do additional checks (`scrub`, `block_refs`, etc.)
2. Disable api and web access. Garage does not support disabling
2. Disable API and web access. Garage does not support disabling
these endpoints but you can change the port number or stop your reverse
proxy for instance.
3. Check once again that your cluster is healty. Run again `garage repair --all-nodes --yes tables` which is quick.

View file

@ -0,0 +1,34 @@
+++
title = "Migrating from 0.7 to 0.8"
weight = 13
+++
**This guide explains how to migrate to 0.8 if you have an existing 0.7 cluster.
We don't recommend trying to migrate to 0.8 directly from 0.6 or older.**
**We make no guarantee that this migration will work perfectly:
back up all your data before attempting it!**
Garage v0.8 introduces new data tables that allow the counting of objects in buckets in order to implement bucket quotas.
A manual migration step is required to first count objects in Garage buckets and populate these tables with accurate data.
The migration steps are as follows:
1. Disable API and web access. Garage v0.7 does not support disabling
these endpoints but you can change the port number or stop your reverse proxy for instance.
2. Do `garage repair --all-nodes --yes tables` and `garage repair --all-nodes --yes blocks`,
check the logs and check that all data seems to be synced correctly between
nodes. If you have time, do additional checks (`scrub`, `block_refs`, etc.)
3. Check that queues are empty: run `garage stats` to query them or inspect metrics in the Grafana dashboard.
4. Turn off Garage v0.7
5. **Backup the metadata folder of all your nodes!** For instance, use the following command
if your metadata directory is `/var/lib/garage/meta`: `cd /var/lib/garage ; tar -acf meta-v0.7.tar.zst meta/`
6. Install Garage v0.8
7. **Before starting Garage v0.8**, run the offline migration step: `garage offline-repair --yes object_counters`.
This can take a while to run, depending on the number of objects stored in your cluster.
8. Turn on Garage v0.8
9. Do `garage repair --all-nodes --yes tables` and `garage repair --all-nodes --yes blocks`.
Wait for a full table sync to run.
10. Your upgraded cluster should be in a working state. Re-enable API and Web
access and check that everything went well.
11. Monitor your cluster in the next hours to see if it works well under your production load, report any issue.

View file

@ -0,0 +1,75 @@
+++
title = "Testing strategy"
weight = 30
+++
## Testing Garage
Currently, we have the following tests:
- some unit tests spread around the codebase
- integration tests written in Rust (`src/garage/test`) to check that Garage operations perform correctly
- integration test for compatibility with external tools (`script/test-smoke.sh`)
We have also tried `minio/mint` but it fails a lot and for now we haven't gotten a lot from it.
In the future:
1. We'd like to have a systematic way of testing with `minio/mint`,
it would add value to Garage by providing a compatibility score and reference that can be trusted.
2. We'd also like to do testing with Jepsen in some way.
## How to instrument Garagae
We should try to test in least invasive ways, i.e. minimize the impact of the testing framework on Garage's source code. This means for example:
- Not abstracting IO/nondeterminism in the source code
- Not making `garage` a shared library (launch using `execve`, it's perfectly fine)
Instead, we should focus on building a clean outer interface for the `garage` binary,
for example loading configuration using environnement variables instead of the configuration file if that's helpfull for writing the tests.
There are two reasons for this:
- Keep the soure code clean and focused
- Test something that is as close as possible as the true garage that will actually be running
Reminder: rules of simplicity, concerning changes to Garage's source code.
Always question what we are doing.
Never do anything just because it looks nice or because we "think" it might be usefull at some later point but without knowing precisely why/when.
Only do things that make perfect sense in the context of what we currently know.
## References
Testing is a research field on its own.
About testing distributed systems:
- [Jepsen](https://jepsen.io/) is a testing framework designed to test distributed systems. It can mock some part of the system like the time and the network.
- [FoundationDB Testing Approach](https://www.micahlerner.com/2021/06/12/foundationdb-a-distributed-unbundled-transactional-key-value-store.html#what-is-unique-about-foundationdbs-testing-framework). They chose to abstract "all sources of nondeterminism and communication are abstracted, including network, disk, time, and pseudo random number generator" to be able to run tests by simulating faults.
- [Testing Distributed Systems](https://asatarin.github.io/testing-distributed-systems/) - Curated list of resources on testing distributed systems
About S3 compatibility:
- [ceph/s3-tests](https://github.com/ceph/s3-tests)
- (deprecated) [minio/s3verify](https://blog.min.io/s3verify-a-simple-tool-to-verify-aws-s3-api-compatibility/)
- [minio/mint](https://github.com/minio/mint)
About benchmarking S3 (I think it is not necessarily very relevant for this iteration):
- [minio/warp](https://github.com/minio/warp)
- [wasabi-tech/s3-benchmark](https://github.com/wasabi-tech/s3-benchmark)
- [dvassallo/s3-benchmark](https://github.com/dvassallo/s3-benchmark)
- [intel-cloud/cosbench](https://github.com/intel-cloud/cosbench) - used by Ceph
Engineering blog posts:
- [Quincy @ Scale: A Tale of Three Large-Scale Clusters](https://ceph.io/en/news/blog/2022/three-large-scale-clusters/)
Interesting blog posts on the blog of the Sled database:
- <https://sled.rs/simulation.html>
- <https://sled.rs/perf.html>
Misc:
- [mutagen](https://github.com/llogiq/mutagen) - mutation testing is a way to assert our test quality by mutating the code and see if the mutation makes the tests fail
- [fuzzing](https://rust-fuzz.github.io/book/) - cargo supports fuzzing, it could be a way to test our software reliability in presence of garbage data.

View file

@ -206,8 +206,8 @@ and responses need to be translated.
Query parameters:
| name | default value | meaning |
| - | - | - |
| name | default value | meaning |
|------------|---------------|----------------------------------|
| `sort_key` | **mandatory** | The sort key of the item to read |
Returns the item with specified partition key and sort key. Values can be
@ -317,11 +317,11 @@ an HTTP 304 NOT MODIFIED is returned.
Query parameters:
| name | default value | meaning |
| - | - | - |
| `sort_key` | **mandatory** | The sort key of the item to read |
| `causality_token` | **mandatory** | The causality token of the last known value or set of values |
| `timeout` | 300 | The timeout before 304 NOT MODIFIED is returned if the value isn't updated |
| name | default value | meaning |
|-------------------|---------------|----------------------------------------------------------------------------|
| `sort_key` | **mandatory** | The sort key of the item to read |
| `causality_token` | **mandatory** | The causality token of the last known value or set of values |
| `timeout` | 300 | The timeout before 304 NOT MODIFIED is returned if the value isn't updated |
The timeout can be set to any number of seconds, with a maximum of 600 seconds (10 minutes).
@ -346,7 +346,7 @@ myblobblahblahblah
Example response:
```
HTTP/1.1 200 OK
HTTP/1.1 204 No Content
```
**DeleteItem: `DELETE /<bucket>/<partition key>?sort_key=<sort_key>`**
@ -382,13 +382,13 @@ as these values are asynchronously updated, and thus eventually consistent.
Query parameters:
| name | default value | meaning |
| - | - | - |
| `prefix` | `null` | Restrict listing to partition keys that start with this prefix |
| `start` | `null` | First partition key to list, in lexicographical order |
| `end` | `null` | Last partition key to list (excluded) |
| `limit` | `null` | Maximum number of partition keys to list |
| `reverse` | `false` | Iterate in reverse lexicographical order |
| name | default value | meaning |
|-----------|---------------|----------------------------------------------------------------|
| `prefix` | `null` | Restrict listing to partition keys that start with this prefix |
| `start` | `null` | First partition key to list, in lexicographical order |
| `end` | `null` | Last partition key to list (excluded) |
| `limit` | `null` | Maximum number of partition keys to list |
| `reverse` | `false` | Iterate in reverse lexicographical order |
The response consists in a JSON object that repeats the parameters of the query and gives the result (see below).
@ -512,7 +512,7 @@ POST /my_bucket HTTP/1.1
Example response:
```
HTTP/1.1 200 OK
HTTP/1.1 204 NO CONTENT
```
@ -525,17 +525,17 @@ The request body is a JSON list of searches, that each specify a range of
items to get (to get single items, set `singleItem` to `true`). A search is a
JSON struct with the following fields:
| name | default value | meaning |
| - | - | - |
| `partitionKey` | **mandatory** | The partition key in which to search |
| `prefix` | `null` | Restrict items to list to those whose sort keys start with this prefix |
| `start` | `null` | The sort key of the first item to read |
| `end` | `null` | The sort key of the last item to read (excluded) |
| `limit` | `null` | The maximum number of items to return |
| `reverse` | `false` | Iterate in reverse lexicographical order on sort keys |
| `singleItem` | `false` | Whether to return only the item with sort key `start` |
| `conflictsOnly` | `false` | Whether to return only items that have several concurrent values |
| `tombstones` | `false` | Whether or not to return tombstone lines to indicate the presence of old deleted items |
| name | default value | meaning |
|-----------------|---------------|----------------------------------------------------------------------------------------|
| `partitionKey` | **mandatory** | The partition key in which to search |
| `prefix` | `null` | Restrict items to list to those whose sort keys start with this prefix |
| `start` | `null` | The sort key of the first item to read |
| `end` | `null` | The sort key of the last item to read (excluded) |
| `limit` | `null` | The maximum number of items to return |
| `reverse` | `false` | Iterate in reverse lexicographical order on sort keys |
| `singleItem` | `false` | Whether to return only the item with sort key `start` |
| `conflictsOnly` | `false` | Whether to return only items that have several concurrent values |
| `tombstones` | `false` | Whether or not to return tombstone lines to indicate the presence of old deleted items |
For each of the searches, triplets are listed and returned separately. The
@ -683,7 +683,7 @@ POST /my_bucket?delete HTTP/1.1
Example response:
```
```json
HTTP/1.1 200 OK
[

View file

@ -3,20 +3,20 @@ rec {
* Fixed dependencies
*/
pkgsSrc = fetchTarball {
# As of 2021-10-04
url = "https://github.com/NixOS/nixpkgs/archive/b27d18a412b071f5d7991d1648cfe78ee7afe68a.tar.gz";
sha256 = "1xy9zpypqfxs5gcq5dcla4bfkhxmh5nzn9dyqkr03lqycm9wg5cr";
# As of 2022-10-13
url = "https://github.com/NixOS/nixpkgs/archive/a3073c49bc0163fea6a121c276f526837672b555.zip";
sha256 = "1bz632psfbpmicyzjb8b4265y50shylccvfm6ry6mgnv5hvz324s";
};
cargo2nixSrc = fetchGit {
# As of 2022-03-17
url = "https://github.com/superboum/cargo2nix";
ref = "dedup_propagate";
rev = "486675c67249e735dd7eb68e1b9feac9db102be7";
# As of 2022-10-18: two small patches over unstable branch, one for clippy and one to fix feature detection
url = "https://github.com/Alexis211/cargo2nix";
ref = "custom_unstable";
rev = "a7a61179b66054904ef6a195d8da736eaaa06c36";
};
/*
* Shared objects
*/
cargo2nix = import cargo2nixSrc;
cargo2nixOverlay = import "${cargo2nixSrc}/overlay";
cargo2nixOverlay = cargo2nix.overlays.default;
}

View file

@ -1,9 +1,10 @@
{
system ? builtins.currentSystem,
target ? null,
target,
compiler ? "rustc",
release ? false,
git_version ? null,
features ? null,
}:
with import ./common.nix;
@ -13,70 +14,40 @@ let
pkgs = import pkgsSrc {
inherit system;
${ if target == null then null else "crossSystem" } = { config = target; };
crossSystem = {
config = target;
isStatic = true;
};
overlays = [ cargo2nixOverlay ];
};
/*
Rust and Nix triples are not the same. Cargo2nix has a dedicated library
to convert Nix triples to Rust ones. We need this conversion as we want to
set later options linked to our (rust) target in a generic way. Not only
the triple terminology is different, but also the "roles" are named differently.
Nix uses a build/host/target terminology where Nix's "host" maps to Cargo's "target".
*/
rustTarget = log (pkgs.rustBuilder.rustLib.rustTriple pkgs.stdenv.hostPlatform);
/*
Cargo2nix is built for rustOverlay which installs Rust from Mozilla releases.
We want our own Rust to avoid incompatibilities, like we had with musl 1.2.0.
rustc was built with musl < 1.2.0 and nix shipped musl >= 1.2.0 which lead to compilation breakage.
This is fine for 64-bit platforms, but for 32-bit platforms, we need our own Rust
to avoid incompatibilities with time_t between different versions of musl
(>= 1.2.0 shipped by NixOS, < 1.2.0 with which rustc was built), which lead to compilation breakage.
So we want a Rust release that is bound to our Nix repository to avoid these problems.
See here for more info: https://musl.libc.org/time64.html
Because Cargo2nix does not support the Rust environment shipped by NixOS,
we emulate the structure of the Rust object created by rustOverlay.
In practise, rustOverlay ships rustc+cargo in a single derivation while
NixOS ships them in separate ones. We reunite them with symlinkJoin.
*/
rustChannel = {
rustc = pkgs.symlinkJoin {
name = "rust-channel";
paths = [
pkgs.rustPlatform.rust.cargo
pkgs.rustPlatform.rust.rustc
];
*/
toolchainOptions =
if target == "x86_64-unknown-linux-musl" || target == "aarch64-unknown-linux-musl" then {
rustVersion = "1.63.0";
extraRustComponents = [ "clippy" ];
} else {
rustToolchain = pkgs.symlinkJoin {
name = "rust-static-toolchain-${target}";
paths = [
pkgs.rustPlatform.rust.cargo
pkgs.rustPlatform.rust.rustc
# clippy not needed, it only runs on amd64
];
};
};
clippy = pkgs.symlinkJoin {
name = "clippy-channel";
paths = [
pkgs.rustPlatform.rust.cargo
pkgs.rustPlatform.rust.rustc
pkgs.clippy
];
};
}.${compiler};
clippyBuilder = pkgs.writeScriptBin "clippy" ''
#!${pkgs.stdenv.shell}
. ${cargo2nixSrc + "/overlay/utils.sh"}
isBuildScript=
args=("$@")
for i in "''${!args[@]}"; do
if [ "xmetadata=" = "x''${args[$i]::9}" ]; then
args[$i]=metadata=$NIX_RUST_METADATA
elif [ "x--crate-name" = "x''${args[$i]}" ] && [ "xbuild_script_" = "x''${args[$i+1]::13}" ]; then
isBuildScript=1
fi
done
if [ "$isBuildScript" ]; then
args+=($NIX_RUST_BUILD_LINK_FLAGS)
else
args+=($NIX_RUST_LINK_FLAGS)
fi
touch invoke.log
echo "''${args[@]}" >>invoke.log
exec ${rustChannel}/bin/clippy-driver --deny warnings "''${args[@]}"
'';
buildEnv = (drv: {
rustc = drv.setBuildEnv;
@ -86,7 +57,8 @@ let
echo --- BUILDING WITH CLIPPY ---
echo
export RUSTC=${clippyBuilder}/bin/clippy
export NIX_RUST_BUILD_FLAGS="''${NIX_RUST_BUILD_FLAGS} --deny warnings"
export RUSTC="''${CLIPPY_DRIVER}"
'';
}.${compiler});
@ -97,7 +69,7 @@ let
You can have a complete list of the available options by looking at the overriden object, mkcrate:
https://github.com/cargo2nix/cargo2nix/blob/master/overlay/mkcrate.nix
*/
overrides = pkgs.rustBuilder.overrides.all ++ [
packageOverrides = pkgs: pkgs.rustBuilder.overrides.all ++ [
/*
[1] We add some logic to compile our crates with clippy, it provides us many additional lints
@ -113,12 +85,7 @@ let
As we do not want to consider the .git folder as part of the input source,
we ask the user (the CI often) to pass the value to Nix.
[4] We ship some parts of the code disabled by default by putting them behind a flag.
It speeds up the compilation (when the feature is not required) and released crates have less dependency by default (less attack surface, disk space, etc.).
But we want to ship these additional features when we release Garage.
In the end, we chose to exclude all features from debug builds while putting (all of) them in the release builds.
[5] We don't want libsodium-sys and zstd-sys to try to use pkgconfig to build against a system library.
[4] We don't want libsodium-sys and zstd-sys to try to use pkgconfig to build against a system library.
However the features to do so get activated for some reason (due to a bug in cargo2nix?),
so disable them manually here.
*/
@ -136,10 +103,6 @@ let
/* [1] */ setBuildEnv = (buildEnv drv);
/* [2] */ hardeningDisable = [ "pie" ];
};
overrideArgs = old: {
/* [4] */ features = [ "bundled-libs" "sled" ]
++ (if release then [ "kubernetes-discovery" "telemetry-otlp" "metrics" "lmdb" "sqlite" ] else []);
};
})
(pkgs.rustBuilder.rustLib.makeOverride {
@ -190,18 +153,39 @@ let
(pkgs.rustBuilder.rustLib.makeOverride {
name = "libsodium-sys";
overrideArgs = old: {
features = [ ]; /* [5] */
features = [ ]; /* [4] */
};
})
(pkgs.rustBuilder.rustLib.makeOverride {
name = "zstd-sys";
overrideArgs = old: {
features = [ ]; /* [5] */
features = [ ]; /* [4] */
};
})
];
/*
We ship some parts of the code disabled by default by putting them behind a flag.
It speeds up the compilation (when the feature is not required) and released crates have less dependency by default (less attack surface, disk space, etc.).
But we want to ship these additional features when we release Garage.
In the end, we chose to exclude all features from debug builds while putting (all of) them in the release builds.
*/
rootFeatures = if features != null then features else
([
"garage/bundled-libs"
"garage/sled"
"garage/k2v"
] ++ (if release then [
"garage/consul-discovery"
"garage/kubernetes-discovery"
"garage/metrics"
"garage/telemetry-otlp"
"garage/lmdb"
"garage/sqlite"
] else []));
packageFun = import ../Cargo.nix;
/*
@ -222,23 +206,15 @@ let
"x86_64-unknown-linux-musl" = [ "target-feature=+crt-static" "link-arg=-static-pie" ];
};
in
/*
The following definition is not elegant as we use a low level function of Cargo2nix
that enables us to pass our custom rustChannel object. We need this low level definition
to pass Nix's Rust toolchains instead of Mozilla's one.
target is mandatory but must be kept to null to allow cargo2nix to set it to the appropriate value
for each crate.
NixOS and Rust/Cargo triples do not match for ARM, fix it here.
*/
pkgs.rustBuilder.makePackageSet {
inherit packageFun rustChannel release codegenOpts;
packageOverrides = overrides;
target = null;
rustTarget = if target == "armv6l-unknown-linux-musleabihf"
then "arm-unknown-linux-musleabihf"
else target;
buildRustPackages = pkgs.buildPackages.rustBuilder.makePackageSet {
inherit rustChannel packageFun codegenOpts;
packageOverrides = overrides;
target = null;
};
}
in
pkgs.rustBuilder.makePackageSet ({
inherit release packageFun packageOverrides codegenOpts rootFeatures;
target = rustTarget;
} // toolchainOptions)

23
nix/manifest-tool.nix Normal file
View file

@ -0,0 +1,23 @@
pkgs:
pkgs.buildGoModule rec {
pname = "manifest-tool";
version = "2.0.5";
src = pkgs.fetchFromGitHub {
owner = "estesp";
repo = "manifest-tool";
rev = "v${version}";
sha256 = "hjCGKnE0yrlnF/VIzOwcDzmQX3Wft+21KCny/opqdLg=";
} + "/v2";
vendorSha256 = null;
checkPhase = "true";
meta = with pkgs.lib; {
description = "Command line tool to create and query container image manifest list/indexes";
homepage = "https://github.com/estesp/manifest-tool";
license = licenses.asl20;
platforms = platforms.linux;
};
}

View file

@ -6,19 +6,24 @@ with import ./common.nix;
let
platforms = [
"x86_64-unknown-linux-musl"
#"x86_64-unknown-linux-musl"
"i686-unknown-linux-musl"
"aarch64-unknown-linux-musl"
#"aarch64-unknown-linux-musl"
"armv6l-unknown-linux-musleabihf"
];
pkgsList = builtins.map (target: import pkgsSrc {
inherit system;
crossSystem = { config = target; };
crossSystem = {
config = target;
isStatic = true;
};
overlays = [ cargo2nixOverlay ];
}) platforms;
pkgsHost = import pkgsSrc {};
lib = pkgsHost.lib;
kaniko = (import ./kaniko.nix) pkgsHost;
winscp = (import ./winscp.nix) pkgsHost;
manifestTool = (import ./manifest-tool.nix) pkgsHost;
in
lib.flatten (builtins.map (pkgs: [
pkgs.rustPlatform.rust.rustc
@ -27,5 +32,6 @@ in
]) pkgsList) ++ [
kaniko
winscp
manifestTool
]

View file

@ -11,7 +11,7 @@ PATH="${GARAGE_DEBUG}:${GARAGE_RELEASE}:${NIX_RELEASE}:$PATH"
FANCYCOLORS=("41m" "42m" "44m" "45m" "100m" "104m")
export RUST_BACKTRACE=1
export RUST_LOG=garage=info,garage_api=debug
export RUST_LOG=garage=info,garage_api=debug,netapp=trace
MAIN_LABEL="\e[${FANCYCOLORS[0]}[main]\e[49m"
WHICH_GARAGE=$(which garage || exit 1)

3
script/helm/README.md Normal file
View file

@ -0,0 +1,3 @@
# Garage helm3 chart
Documentation is located [here](/doc/book/cookbook/kubernetes.md).

View file

@ -0,0 +1,23 @@
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*.orig
*~
# Various IDEs
.project
.idea/
*.tmproj
.vscode/

View file

@ -0,0 +1,24 @@
apiVersion: v2
name: garage
description: S3-compatible object store for small self-hosted geo-distributed deployments
# A chart can be either an 'application' or a 'library' chart.
#
# Application charts are a collection of templates that can be packaged into versioned archives
# to be deployed.
#
# Library charts provide useful utilities or functions for the chart developer. They're included as
# a dependency of application charts to inject those utilities and functions into the rendering
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.1.3
# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
# It is recommended to use it with quotes.
appVersion: "v0.7.2.1"

View file

@ -0,0 +1,88 @@
{{/*
Expand the name of the chart.
*/}}
{{- define "garage.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
{{- end }}
{{/*
Create a default fully qualified app name.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
If release name contains chart name it will be used as a full name.
*/}}
{{- define "garage.fullname" -}}
{{- if .Values.fullnameOverride }}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- $name := default .Chart.Name .Values.nameOverride }}
{{- if contains $name .Release.Name }}
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
{{- end }}
{{- end }}
{{- end }}
{{/*
Create the name of the rpc secret
*/}}
{{- define "garage.rpcSecretName" -}}
{{- printf "%s-rpc-secret" (include "garage.fullname" .) -}}
{{- end }}
{{/*
Create chart name and version as used by the chart label.
*/}}
{{- define "garage.chart" -}}
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
{{- end }}
{{/*
Common labels
*/}}
{{- define "garage.labels" -}}
helm.sh/chart: {{ include "garage.chart" . }}
{{ include "garage.selectorLabels" . }}
{{- if .Chart.AppVersion }}
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
{{- end }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
{{- end }}
{{/*
Selector labels
*/}}
{{- define "garage.selectorLabels" -}}
app.kubernetes.io/name: {{ include "garage.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- end }}
{{/*
Create the name of the service account to use
*/}}
{{- define "garage.serviceAccountName" -}}
{{- if .Values.serviceAccount.create }}
{{- default (include "garage.fullname" .) .Values.serviceAccount.name }}
{{- else }}
{{- default "default" .Values.serviceAccount.name }}
{{- end }}
{{- end }}
{{/*
Returns given number of random Hex characters.
In practice, it generates up to 100 randAlphaNum strings
that are filtered from non-hex characters and augmented
to the resulting string that is finally trimmed down.
*/}}
{{- define "jupyterhub.randHex" -}}
{{- $result := "" }}
{{- range $i := until 100 }}
{{- if lt (len $result) . }}
{{- $rand_list := randAlphaNum . | splitList "" -}}
{{- $reduced_list := without $rand_list "g" "h" "i" "j" "k" "l" "m" "n" "o" "p" "q" "r" "s" "t" "u" "v" "w" "x" "y" "z" "A" "B" "C" "D" "E" "F" "G" "H" "I" "J" "K" "L" "M" "N" "O" "P" "Q" "R" "S" "T" "U" "V" "W" "X" "Y" "Z" }}
{{- $rand_string := join "" $reduced_list }}
{{- $result = print $result $rand_string -}}
{{- end }}
{{- end }}
{{- $result | trunc . }}
{{- end }}

View file

@ -0,0 +1,28 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: manage-crds-{{ .Release.Namespace }}-{{ .Release.Name }}
labels:
{{- include "garage.labels" . | nindent 4 }}
rules:
- apiGroups: ["apiextensions.k8s.io"]
resources: ["customresourcedefinitions"]
verbs: ["get", "list", "watch", "create", "patch"]
- apiGroups: ["deuxfleurs.fr"]
resources: ["garagenodes"]
verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: allow-crds-for-{{ .Release.Namespace }}-{{ .Release.Name }}
labels:
{{- include "garage.labels" . | nindent 4 }}
subjects:
- kind: ServiceAccount
name: {{ include "garage.serviceAccountName" . }}
namespace: {{ .Release.Namespace }}
roleRef:
kind: ClusterRole
name: manage-crds-{{ .Release.Namespace }}-{{ .Release.Name }}
apiGroup: rbac.authorization.k8s.io

View file

@ -0,0 +1,35 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: {{ include "garage.fullname" . }}-config
data:
garage.toml: |-
metadata_dir = "{{ .Values.garage.metadataDir }}"
data_dir = "{{ .Values.garage.dataDir }}"
db_engine = "{{ .Values.garage.dbEngine }}"
replication_mode = "{{ .Values.garage.replicationMode }}"
rpc_bind_addr = "{{ .Values.garage.rpcBindAddr }}"
# rpc_secret will be populated by the init container from a k8s secret object
rpc_secret = "__RPC_SECRET_REPLACE__"
bootstrap_peers = {{ .Values.garage.bootstrapPeers }}
[kubernetes_discovery]
namespace = "{{ .Release.Namespace }}"
service_name = "{{ include "garage.fullname" . }}"
skip_crd = {{ .Values.garage.kubernetesSkipCrd }}
[s3_api]
s3_region = "{{ .Values.garage.s3.api.region }}"
api_bind_addr = "[::]:3900"
root_domain = "{{ .Values.garage.s3.api.rootDomain }}"
[s3_web]
bind_addr = "[::]:3902"
root_domain = "{{ .Values.garage.s3.web.rootDomain }}"
index = "{{ .Values.garage.s3.web.index }}"
[admin]
api_bind_addr = "[::]:3903"

View file

@ -0,0 +1,123 @@
{{- if .Values.ingress.s3.api.enabled -}}
{{- $fullName := include "garage.fullname" . -}}
{{- $svcPort := .Values.service.s3.api.port -}}
{{- if and .Values.ingress.className (not (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion)) }}
{{- if not (hasKey .Values.ingress.s3.api.annotations "kubernetes.io/ingress.class") }}
{{- $_ := set .Values.ingress.s3.api.annotations "kubernetes.io/ingress.class" .Values.ingress.s3.api.className}}
{{- end }}
{{- end }}
{{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion -}}
apiVersion: networking.k8s.io/v1
{{- else if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion -}}
apiVersion: networking.k8s.io/v1beta1
{{- else -}}
apiVersion: extensions/v1beta1
{{- end }}
kind: Ingress
metadata:
name: {{ $fullName }}-s3-api
labels:
{{- include "garage.labels" . | nindent 4 }}
{{- with .Values.ingress.s3.api.annotations }}
annotations:
{{- toYaml . | nindent 4 }}
{{- end }}
spec:
{{- if and .Values.ingress.s3.api.className (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion) }}
ingressClassName: {{ .Values.ingress.s3.api.className }}
{{- end }}
{{- if .Values.ingress.s3.api.tls }}
tls:
{{- range .Values.ingress.s3.api.tls }}
- hosts:
{{- range .hosts }}
- {{ . | quote }}
{{- end }}
secretName: {{ .secretName }}
{{- end }}
{{- end }}
rules:
{{- range .Values.ingress.s3.api.hosts }}
- host: {{ .host | quote }}
http:
paths:
{{- range .paths }}
- path: {{ .path }}
{{- if and .pathType (semverCompare ">=1.18-0" $.Capabilities.KubeVersion.GitVersion) }}
pathType: {{ .pathType }}
{{- end }}
backend:
{{- if semverCompare ">=1.19-0" $.Capabilities.KubeVersion.GitVersion }}
service:
name: {{ $fullName }}
port:
number: {{ $svcPort }}
{{- else }}
serviceName: {{ $fullName }}
servicePort: {{ $svcPort }}
{{- end }}
{{- end }}
{{- end }}
{{- end }}
---
{{- if .Values.ingress.s3.web.enabled -}}
{{- $fullName := include "garage.fullname" . -}}
{{- $svcPort := .Values.service.s3.web.port -}}
{{- if and .Values.ingress.s3.web.className (not (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion)) }}
{{- if not (hasKey .Values.ingress.s3.web.annotations "kubernetes.io/ingress.class") }}
{{- $_ := set .Values.ingress.s3.web.annotations "kubernetes.io/ingress.class" .Values.ingress.s3.web.className}}
{{- end }}
{{- end }}
{{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion -}}
apiVersion: networking.k8s.io/v1
{{- else if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion -}}
apiVersion: networking.k8s.io/v1beta1
{{- else -}}
apiVersion: extensions/v1beta1
{{- end }}
kind: Ingress
metadata:
name: {{ $fullName }}-s3-web
labels:
{{- include "garage.labels" . | nindent 4 }}
{{- with .Values.ingress.s3.web.annotations }}
annotations:
{{- toYaml . | nindent 4 }}
{{- end }}
spec:
{{- if and .Values.ingress.s3.web.className (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion) }}
ingressClassName: {{ .Values.ingress.s3.web.className }}
{{- end }}
{{- if .Values.ingress.s3.web.tls }}
tls:
{{- range .Values.ingress.s3.web.tls }}
- hosts:
{{- range .hosts }}
- {{ . | quote }}
{{- end }}
secretName: {{ .secretName }}
{{- end }}
{{- end }}
rules:
{{- range .Values.ingress.s3.web.hosts }}
- host: {{ .host | quote }}
http:
paths:
{{- range .paths }}
- path: {{ .path }}
{{- if and .pathType (semverCompare ">=1.18-0" $.Capabilities.KubeVersion.GitVersion) }}
pathType: {{ .pathType }}
{{- end }}
backend:
{{- if semverCompare ">=1.19-0" $.Capabilities.KubeVersion.GitVersion }}
service:
name: {{ $fullName }}
port:
number: {{ $svcPort }}
{{- else }}
serviceName: {{ $fullName }}
servicePort: {{ $svcPort }}
{{- end }}
{{- end }}
{{- end }}
{{- end }}

View file

@ -0,0 +1,14 @@
apiVersion: v1
kind: Secret
metadata:
name: {{ include "garage.rpcSecretName" . }}
labels:
{{- include "garage.labels" . | nindent 4 }}
type: Opaque
data:
{{/* retrieve the secret data using lookup function and when not exists, return an empty dictionary / map as result */}}
{{- $prevSecret := (lookup "v1" "Secret" .Release.Namespace (include "garage.rpcSecretName" .)) | default dict }}
{{- $prevSecretData := $prevSecret.data | default dict }}
{{- $prevRpcSecret := $prevSecretData.rpcSecret | default "" | b64dec }}
{{/* Priority is: 1. from values, 2. previous value, 3. generate random */}}
rpcSecret: {{ .Values.garage.rpcSecret | default $prevRpcSecret | default (include "jupyterhub.randHex" 64) | b64enc | quote }}

View file

@ -0,0 +1,23 @@
apiVersion: v1
kind: Service
metadata:
name: {{ include "garage.fullname" . }}
labels:
{{- include "garage.labels" . | nindent 4 }}
spec:
type: {{ .Values.service.type }}
ports:
- port: {{ .Values.service.s3.api.port }}
targetPort: 3900
protocol: TCP
name: s3-api
- port: {{ .Values.service.s3.web.port }}
targetPort: 3902
protocol: TCP
name: s3-web
- port: 3903
targetPort: 3903
protocol: TCP
name: admin
selector:
{{- include "garage.selectorLabels" . | nindent 4 }}

View file

@ -0,0 +1,12 @@
{{- if .Values.serviceAccount.create -}}
apiVersion: v1
kind: ServiceAccount
metadata:
name: {{ include "garage.serviceAccountName" . }}
labels:
{{- include "garage.labels" . | nindent 4 }}
{{- with .Values.serviceAccount.annotations }}
annotations:
{{- toYaml . | nindent 4 }}
{{- end }}
{{- end }}

View file

@ -0,0 +1,137 @@
apiVersion: apps/v1
kind: {{ .Values.deployment.kind }}
metadata:
name: {{ include "garage.fullname" . }}
labels:
{{- include "garage.labels" . | nindent 4 }}
spec:
selector:
matchLabels:
{{- include "garage.selectorLabels" . | nindent 6 }}
{{- if eq .Values.deployment.kind "StatefulSet" }}
replicas: {{ .Values.deployment.replicaCount }}
serviceName: {{ include "garage.fullname" . }}
{{- end }}
template:
metadata:
{{- with .Values.podAnnotations }}
annotations:
{{- toYaml . | nindent 8 }}
{{- end }}
labels:
{{- include "garage.selectorLabels" . | nindent 8 }}
spec:
{{- with .Values.imagePullSecrets }}
imagePullSecrets:
{{- toYaml . | nindent 8 }}
{{- end }}
serviceAccountName: {{ include "garage.serviceAccountName" . }}
securityContext:
{{- toYaml .Values.podSecurityContext | nindent 8 }}
initContainers:
# Copies garage.toml from configmap to temporary etc volume and replaces RPC secret placeholder
- name: {{ .Chart.Name }}-init
image: busybox:1.28
command: ["sh", "-c", "sed \"s/__RPC_SECRET_REPLACE__/$RPC_SECRET/\" /mnt/garage.toml > /mnt/etc/garage.toml"]
env:
- name: RPC_SECRET
valueFrom:
secretKeyRef:
name: {{ include "garage.rpcSecretName" . }}
key: rpcSecret
volumeMounts:
- name: configmap
mountPath: /mnt/garage.toml
subPath: garage.toml
- name: etc
mountPath: /mnt/etc
containers:
- name: {{ .Chart.Name }}
securityContext:
{{- toYaml .Values.securityContext | nindent 12 }}
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
imagePullPolicy: {{ .Values.image.pullPolicy }}
ports:
- containerPort: 3900
name: s3-api
- containerPort: 3902
name: web-api
- containerPort: 3903
name: admin
volumeMounts:
- name: meta
mountPath: /mnt/meta
- name: data
mountPath: /mnt/data
- name: etc
mountPath: /etc/garage.toml
subPath: garage.toml
# TODO
# livenessProbe:
# httpGet:
# path: /
# port: 3900
# readinessProbe:
# httpGet:
# path: /
# port: 3900
resources:
{{- toYaml .Values.resources | nindent 12 }}
volumes:
- name: configmap
configMap:
name: {{ include "garage.fullname" . }}-config
- name: etc
emptyDir: {}
{{- if .Values.persistence.enabled }}
{{- if eq .Values.deployment.kind "DaemonSet" }}
- name: meta
hostPath:
path: {{ .Values.persistence.meta.hostPath }}
type: DirectoryOrCreate
- name: data
hostPath:
path: {{ .Values.persistence.data.hostPath }}
type: DirectoryOrCreate
{{- end }}
{{- else }}
- name: meta
emptyDir: {}
- name: data
emptyDir: {}
{{- end }}
{{- with .Values.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- if and .Values.persistence.enabled (eq .Values.deployment.kind "StatefulSet") }}
volumeClaimTemplates:
- metadata:
name: meta
spec:
accessModes: [ "ReadWriteOnce" ]
{{- if hasKey .Values.persistence.meta "storageClass" }}
storageClassName: {{ .Values.persistence.meta.storageClass | quote }}
{{- end }}
resources:
requests:
storage: {{ .Values.persistence.meta.size | quote }}
- metadata:
name: data
spec:
accessModes: [ "ReadWriteOnce" ]
{{- if hasKey .Values.persistence.data "storageClass" }}
storageClassName: {{ .Values.persistence.data.storageClass | quote }}
{{- end }}
resources:
requests:
storage: {{ .Values.persistence.data.size | quote }}
{{- end }}

View file

@ -0,0 +1,150 @@
# Default values for garage.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.
# Garage configuration. These values go to garage.toml
garage:
metadataDir: "/mnt/meta"
dataDir: "/mnt/data"
# Default to 3 replicas, see the replication_mode section at
# https://garagehq.deuxfleurs.fr/documentation/reference-manual/configuration/
replicationMode: "3"
rpcBindAddr: "[::]:3901"
# If not given, a random secret will be generated and stored in a Secret object
rpcSecret: ""
# This is not required if you use the integrated kubernetes discovery
bootstrapPeers: []
kubernetesSkipCrd: false
s3:
api:
region: "garage"
rootDomain: ".s3.garage.tld"
web:
rootDomain: ".web.garage.tld"
index: "index.html"
# Data persistence
persistence:
enabled: true
meta:
# storageClass: "fast-storage-class"
size: 100Mi
# used only for daemon sets
hostPath: /var/lib/garage/meta
data:
# storageClass: "slow-storage-class"
size: 100Mi
# used only for daemon sets
hostPath: /var/lib/garage/data
# Deployment configuration
deployment:
# Switchable to DaemonSet
kind: StatefulSet
# Number of StatefulSet replicas/garage nodes to start
replicaCount: 3
image:
repository: dxflrs/amd64_garage
# please prefer using the chart version and not this tag
tag: ""
pullPolicy: IfNotPresent
imagePullSecrets: []
nameOverride: ""
fullnameOverride: ""
serviceAccount:
# Specifies whether a service account should be created
create: true
# Annotations to add to the service account
annotations: {}
# The name of the service account to use.
# If not set and create is true, a name is generated using the fullname template
name: ""
podAnnotations: {}
podSecurityContext: {}
# fsGroup: 2000
securityContext:
# The default security context is heavily restricted
# feel free to tune it to your requirements
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 1000
service:
# You can rely on any service to expose your cluster
# - ClusterIP (+ Ingress)
# - NodePort (+ Ingress)
# - LoadBalancer
type: ClusterIP
s3:
api:
port: 3900
web:
port: 3902
# NOTE: the admin API is excluded for now as it is not consistent across nodes
ingress:
s3:
api:
enabled: true
# Rely either on the className or the annotation below but not both
# replace "nginx" by an Ingress controller
# you can find examples here https://kubernetes.io/docs/concepts/services-networking/ingress-controllers
className: "nginx"
annotations:
# kubernetes.io/ingress.class: "nginx"
# kubernetes.io/tls-acme: "true"
hosts:
- host: "s3.garage.tld" # garage S3 API endpoint
paths:
- path: /
pathType: Prefix
- host: "*.s3.garage.tld" # garage S3 API endpoint, DNS style bucket access
paths:
- path: /
pathType: Prefix
tls: []
# - secretName: my-garage-cluster-tls
# hosts:
# - kubernetes.docker.internal
web:
enabled: true
className: "nginx"
annotations: {}
# kubernetes.io/ingress.class: nginx
# kubernetes.io/tls-acme: "true"
hosts:
- host: "*.web.garage.tld" # wildcard website access with bucket name prefix
paths:
- path: /
pathType: Prefix
- host: "mywebpage.example.com" # specific bucket access with FQDN bucket
paths:
- path: /
pathType: Prefix
tls: []
# - secretName: my-garage-cluster-tls
# hosts:
# - kubernetes.docker.internal
resources: {}
# The following are indicative for a small-size deployement, for anything serious double them.
# limits:
# cpu: 100m
# memory: 1024Mi
# requests:
# cpu: 100m
# memory: 512Mi
nodeSelector: {}
tolerations: []
affinity: {}

File diff suppressed because it is too large Load diff

View file

@ -8,7 +8,7 @@ SCRIPT_FOLDER="`dirname \"$0\"`"
REPO_FOLDER="${SCRIPT_FOLDER}/../"
GARAGE_DEBUG="${REPO_FOLDER}/target/debug/"
GARAGE_RELEASE="${REPO_FOLDER}/target/release/"
NIX_RELEASE="${REPO_FOLDER}/result/bin/"
NIX_RELEASE="${REPO_FOLDER}/result/bin/:${REPO_FOLDER}/result-bin/bin/"
PATH="${GARAGE_DEBUG}:${GARAGE_RELEASE}:${NIX_RELEASE}:$PATH"
CMDOUT=/tmp/garage.cmd.tmp

View file

@ -10,24 +10,15 @@ let
overlays = [ cargo2nixOverlay ];
};
kaniko = (import ./nix/kaniko.nix) pkgs;
manifest-tool = (import ./nix/manifest-tool.nix) pkgs;
winscp = (import ./nix/winscp.nix) pkgs;
in
{
/* --- Rust Shell ---
* Use it to compile Garage
*/
rust = pkgs.mkShell {
shellHook = ''
function refresh_toolchain {
nix copy \
--to 's3://nix?endpoint=garage.deuxfleurs.fr&region=garage&secret-key=/etc/nix/signing-key.sec' \
$(nix-store -qR \
$(nix-build --quiet --no-build-output --no-out-link nix/toolchain.nix))
}
'';
nativeBuildInputs = [
#pkgs.rustPlatform.rust.rustc
pkgs.rustPlatform.rust.cargo
@ -66,12 +57,33 @@ function refresh_toolchain {
*/
release = pkgs.mkShell {
shellHook = ''
function refresh_toolchain {
pass show deuxfleurs/nix_priv_key > /tmp/nix-signing-key.sec
nix copy \
--to 's3://nix?endpoint=garage.deuxfleurs.fr&region=garage&secret-key=/tmp/nix-signing-key.sec' \
$(nix-store -qR \
$(nix-build --no-build-output --no-out-link nix/toolchain.nix))
rm /tmp/nix-signing-key.sec
}
function refresh_cache {
pass show deuxfleurs/nix_priv_key > /tmp/nix-signing-key.sec
for attr in clippy.amd64 test.amd64 pkgs.{amd64,i386,arm,arm64}.{debug,release}; do
echo "Updating cache for ''${attr}"
derivation=$(nix-instantiate --attr ''${attr})
nix copy \
--to 's3://nix?endpoint=garage.deuxfleurs.fr&region=garage&secret-key=/tmp/nix-signing-key.sec' \
$(nix-store -qR ''${derivation%\!bin})
done
rm /tmp/nix-signing-key.sec
}
function to_s3 {
aws \
--endpoint-url https://garage.deuxfleurs.fr \
--region garage \
s3 cp \
./result/bin/garage \
./result-bin/bin/garage \
s3://garagehq.deuxfleurs.fr/_releases/''${DRONE_TAG:-$DRONE_COMMIT}/''${TARGET}/garage
}
@ -84,6 +96,34 @@ function to_docker {
--verbosity=debug
}
function multiarch_docker {
manifest-tool push from-spec <(cat <<EOF
image: dxflrs/garage:''${CONTAINER_TAG}
manifests:
-
image: dxflrs/arm64_garage:''${CONTAINER_TAG}
platform:
architecture: arm64
os: linux
-
image: dxflrs/amd64_garage:''${CONTAINER_TAG}
platform:
architecture: amd64
os: linux
-
image: dxflrs/386_garage:''${CONTAINER_TAG}
platform:
architecture: 386
os: linux
-
image: dxflrs/arm_garage:''${CONTAINER_TAG}
platform:
architecture: arm
os: linux
EOF
)
}
function refresh_index {
aws \
--endpoint-url https://garage.deuxfleurs.fr \
@ -113,6 +153,7 @@ function refresh_index {
nativeBuildInputs = [
pkgs.awscli2
kaniko
manifest-tool
];
};
}

View file

@ -24,20 +24,21 @@ async-trait = "0.1.7"
base64 = "0.13"
bytes = "1.0"
chrono = "0.4"
crypto-mac = "0.10"
crypto-common = "0.1"
err-derive = "0.3"
hex = "0.4"
hmac = "0.10"
hmac = "0.12"
idna = "0.2"
tracing = "0.1.30"
md-5 = "0.9"
md-5 = "0.10"
nom = "7.1"
sha2 = "0.9"
sha2 = "0.10"
futures = "0.3"
futures-util = "0.3"
pin-project = "1.0"
pin-project = "1.0.11"
tokio = { version = "1.0", default-features = false, features = ["rt", "rt-multi-thread", "io-util", "net", "time", "macros", "sync", "signal", "fs"] }
tokio-stream = "0.1"
form_urlencoded = "1.0.0"
http = "0.2"

View file

@ -5,7 +5,7 @@ use async_trait::async_trait;
use futures::future::Future;
use http::header::{ACCESS_CONTROL_ALLOW_METHODS, ACCESS_CONTROL_ALLOW_ORIGIN, ALLOW};
use hyper::{Body, Request, Response};
use hyper::{Body, Request, Response, StatusCode};
use opentelemetry::trace::SpanRef;
@ -34,7 +34,10 @@ pub struct AdminApiServer {
}
impl AdminApiServer {
pub fn new(garage: Arc<Garage>) -> Self {
pub fn new(
garage: Arc<Garage>,
#[cfg(feature = "metrics")] exporter: PrometheusExporter,
) -> Self {
let cfg = &garage.config.admin;
let metrics_token = cfg
.metrics_token
@ -47,7 +50,7 @@ impl AdminApiServer {
Self {
garage,
#[cfg(feature = "metrics")]
exporter: opentelemetry_prometheus::exporter().init(),
exporter,
metrics_token,
admin_token,
}
@ -66,7 +69,7 @@ impl AdminApiServer {
fn handle_options(&self, _req: &Request<Body>) -> Result<Response<Body>, Error> {
Ok(Response::builder()
.status(204)
.status(StatusCode::NO_CONTENT)
.header(ALLOW, "OPTIONS, GET, POST")
.header(ACCESS_CONTROL_ALLOW_METHODS, "OPTIONS, GET, POST")
.header(ACCESS_CONTROL_ALLOW_ORIGIN, "*")
@ -91,7 +94,7 @@ impl AdminApiServer {
.ok_or_internal_error("Could not serialize metrics")?;
Ok(Response::builder()
.status(200)
.status(StatusCode::OK)
.header(http::header::CONTENT_TYPE, encoder.format_type())
.body(Body::from(buffer))?)
}

View file

@ -210,7 +210,7 @@ async fn bucket_info_results(
.collect::<Vec<_>>(),
objects: counters.get(OBJECTS).cloned().unwrap_or_default(),
bytes: counters.get(BYTES).cloned().unwrap_or_default(),
unfinshed_uploads: counters
unfinished_uploads: counters
.get(UNFINISHED_UPLOADS)
.cloned()
.unwrap_or_default(),
@ -234,7 +234,7 @@ struct GetBucketInfoResult {
keys: Vec<GetBucketInfoKey>,
objects: i64,
bytes: i64,
unfinshed_uploads: i64,
unfinished_uploads: i64,
quotas: ApiBucketQuotas,
}

View file

@ -151,7 +151,7 @@ pub async fn handle_update_cluster_layout(
garage.system.update_cluster_layout(&layout).await?;
Ok(Response::builder()
.status(StatusCode::OK)
.status(StatusCode::NO_CONTENT)
.body(Body::empty())?)
}
@ -166,7 +166,7 @@ pub async fn handle_apply_cluster_layout(
garage.system.update_cluster_layout(&layout).await?;
Ok(Response::builder()
.status(StatusCode::OK)
.status(StatusCode::NO_CONTENT)
.body(Body::empty())?)
}
@ -181,7 +181,7 @@ pub async fn handle_revert_cluster_layout(
garage.system.update_cluster_layout(&layout).await?;
Ok(Response::builder()
.status(StatusCode::OK)
.status(StatusCode::NO_CONTENT)
.body(Body::empty())?)
}

View file

@ -174,7 +174,11 @@ impl<A: ApiHandler> ApiServer<A> {
let current_context = Context::current();
let current_span = current_context.span();
current_span.update_name::<String>(format!("S3 API {}", endpoint.name()));
current_span.update_name::<String>(format!(
"{} API {}",
A::API_NAME_DISPLAY,
endpoint.name()
));
current_span.set_attribute(KeyValue::new("endpoint", endpoint.name()));
endpoint.add_span_attributes(current_span);

View file

@ -42,7 +42,7 @@ pub async fn handle_insert_batch(
garage.k2v.rpc.insert_batch(bucket_id, items2).await?;
Ok(Response::builder()
.status(StatusCode::OK)
.status(StatusCode::NO_CONTENT)
.body(Body::empty())?)
}

View file

@ -153,7 +153,7 @@ pub async fn handle_insert_item(
.await?;
Ok(Response::builder()
.status(StatusCode::OK)
.status(StatusCode::NO_CONTENT)
.body(Body::empty())?)
}

View file

@ -295,7 +295,6 @@ fn parse_create_bucket_xml(xml_bytes: &[u8]) -> Option<Option<String>> {
let mut ret = None;
for item in cbc.children() {
println!("{:?}", item);
if item.has_tag_name("LocationConstraint") {
if ret != None {
return None;

View file

@ -5,9 +5,12 @@ use std::time::{Duration, SystemTime, UNIX_EPOCH};
use futures::{stream, stream::Stream, StreamExt, TryFutureExt};
use md5::{Digest as Md5Digest, Md5};
use bytes::Bytes;
use hyper::{Body, Request, Response};
use serde::Serialize;
use garage_rpc::netapp::bytes_buf::BytesBuf;
use garage_rpc::rpc_helper::OrderTag;
use garage_table::*;
use garage_util::data::*;
use garage_util::time::*;
@ -305,13 +308,18 @@ pub async fn handle_upload_part_copy(
// if and only if the block returned is a block that already existed
// in the Garage data store (thus we don't need to save it again).
let garage2 = garage.clone();
let order_stream = OrderTag::stream();
let source_blocks = stream::iter(blocks_to_copy)
.flat_map(|(block_hash, range_to_copy)| {
.enumerate()
.flat_map(|(i, (block_hash, range_to_copy))| {
let garage3 = garage2.clone();
stream::once(async move {
let data = garage3.block_manager.rpc_get_block(&block_hash).await?;
let data = garage3
.block_manager
.rpc_get_block(&block_hash, Some(order_stream.order(i as u64)))
.await?;
match range_to_copy {
Some(r) => Ok((data[r].to_vec(), None)),
Some(r) => Ok((data.slice(r), None)),
None => Ok((data, Some(block_hash))),
}
})
@ -553,13 +561,13 @@ impl CopyPreconditionHeaders {
}
}
type BlockStreamItemOk = (Vec<u8>, Option<Hash>);
type BlockStreamItemOk = (Bytes, Option<Hash>);
type BlockStreamItem = Result<BlockStreamItemOk, garage_util::error::Error>;
struct Defragmenter<S: Stream<Item = BlockStreamItem>> {
block_size: usize,
block_stream: Pin<Box<stream::Peekable<S>>>,
buffer: Vec<u8>,
buffer: BytesBuf,
hash: Option<Hash>,
}
@ -568,7 +576,7 @@ impl<S: Stream<Item = BlockStreamItem>> Defragmenter<S> {
Self {
block_size,
block_stream,
buffer: vec![],
buffer: BytesBuf::new(),
hash: None,
}
}
@ -586,7 +594,7 @@ impl<S: Stream<Item = BlockStreamItem>> Defragmenter<S> {
if self.buffer.is_empty() {
let (next_block, next_block_hash) = self.block_stream.next().await.unwrap()?;
self.buffer = next_block;
self.buffer.extend(next_block);
self.hash = next_block_hash;
} else if self.buffer.len() + peeked_next_block.len() > self.block_size {
break;
@ -597,11 +605,11 @@ impl<S: Stream<Item = BlockStreamItem>> Defragmenter<S> {
}
}
Ok((std::mem::take(&mut self.buffer), self.hash.take()))
Ok((self.buffer.take_all(), self.hash.take()))
}
}
#[derive(Debug, Serialize, PartialEq)]
#[derive(Debug, Serialize, PartialEq, Eq)]
pub struct CopyObjectResult {
#[serde(rename = "LastModified")]
pub last_modified: s3_xml::Value,
@ -609,7 +617,7 @@ pub struct CopyObjectResult {
pub etag: s3_xml::Value,
}
#[derive(Debug, Serialize, PartialEq)]
#[derive(Debug, Serialize, PartialEq, Eq)]
pub struct CopyPartResult {
#[serde(serialize_with = "xmlns_tag")]
pub xmlns: (),
@ -654,7 +662,6 @@ mod tests {
last_modified: s3_xml::Value("2011-04-11T20:34:56.000Z".into()),
etag: s3_xml::Value("\"9b2cf535f27731c974343645a3985328\"".into()),
};
println!("{}", to_xml_with_header(&v)?);
assert_eq!(to_xml_with_header(&v)?, expected_retval);

View file

@ -64,14 +64,13 @@ pub async fn handle_delete(
bucket_id: Uuid,
key: &str,
) -> Result<Response<Body>, Error> {
let (_deleted_version, delete_marker_version) =
handle_delete_internal(&garage, bucket_id, key).await?;
Ok(Response::builder()
.header("x-amz-version-id", hex::encode(delete_marker_version))
.status(StatusCode::NO_CONTENT)
.body(Body::from(vec![]))
.unwrap())
match handle_delete_internal(&garage, bucket_id, key).await {
Ok(_) | Err(Error::NoSuchKey) => Ok(Response::builder()
.status(StatusCode::NO_CONTENT)
.body(Body::from(vec![]))
.unwrap()),
Err(e) => Err(e),
}
}
pub async fn handle_delete_objects(

View file

@ -2,16 +2,19 @@
use std::sync::Arc;
use std::time::{Duration, UNIX_EPOCH};
use futures::stream::*;
use futures::future;
use futures::stream::{self, StreamExt};
use http::header::{
ACCEPT_RANGES, CONTENT_LENGTH, CONTENT_RANGE, CONTENT_TYPE, ETAG, IF_MODIFIED_SINCE,
IF_NONE_MATCH, LAST_MODIFIED, RANGE,
};
use hyper::body::Bytes;
use hyper::{Body, Request, Response, StatusCode};
use tokio::sync::mpsc;
use garage_rpc::rpc_helper::{netapp::stream::ByteStream, OrderTag};
use garage_table::EmptyKey;
use garage_util::data::*;
use garage_util::error::OkOrMessage;
use garage_model::garage::Garage;
use garage_model::s3::object_table::*;
@ -242,36 +245,56 @@ pub async fn handle_get(
Ok(resp_builder.body(body)?)
}
ObjectVersionData::FirstBlock(_, first_block_hash) => {
let read_first_block = garage.block_manager.rpc_get_block(first_block_hash);
let get_next_blocks = garage.version_table.get(&last_v.uuid, &EmptyKey);
let (tx, rx) = mpsc::channel(2);
let (first_block, version) = futures::try_join!(read_first_block, get_next_blocks)?;
let version = version.ok_or(Error::NoSuchKey)?;
let order_stream = OrderTag::stream();
let first_block_hash = *first_block_hash;
let version_uuid = last_v.uuid;
let mut blocks = version
.blocks
.items()
.iter()
.map(|(_, vb)| (vb.hash, None))
.collect::<Vec<_>>();
blocks[0].1 = Some(first_block);
tokio::spawn(async move {
match async {
let garage2 = garage.clone();
let version_fut = tokio::spawn(async move {
garage2.version_table.get(&version_uuid, &EmptyKey).await
});
let body_stream = futures::stream::iter(blocks)
.map(move |(hash, data_opt)| {
let garage = garage.clone();
async move {
if let Some(data) = data_opt {
Ok(Bytes::from(data))
} else {
garage
.block_manager
.rpc_get_block(&hash)
.await
.map(Bytes::from)
}
let stream_block_0 = garage
.block_manager
.rpc_get_block_streaming(&first_block_hash, Some(order_stream.order(0)))
.await?;
tx.send(stream_block_0)
.await
.ok_or_message("channel closed")?;
let version = version_fut.await.unwrap()?.ok_or(Error::NoSuchKey)?;
for (i, (_, vb)) in version.blocks.items().iter().enumerate().skip(1) {
let stream_block_i = garage
.block_manager
.rpc_get_block_streaming(&vb.hash, Some(order_stream.order(i as u64)))
.await?;
tx.send(stream_block_i)
.await
.ok_or_message("channel closed")?;
}
})
.buffered(2);
Ok::<(), Error>(())
}
.await
{
Ok(()) => (),
Err(e) => {
let err = std::io::Error::new(
std::io::ErrorKind::Other,
format!("Error while getting object data: {}", e),
);
let _ = tx
.send(Box::pin(stream::once(future::ready(Err(err)))))
.await;
}
}
});
let body_stream = tokio_stream::wrappers::ReceiverStream::new(rx).flatten();
let body = hyper::body::Body::wrap_stream(body_stream);
Ok(resp_builder.body(body)?)
@ -422,40 +445,79 @@ fn body_from_blocks_range(
all_blocks.len(),
4 + ((end - begin) / std::cmp::max(all_blocks[0].1.size as u64, 1024)) as usize,
));
let mut true_offset = 0;
let mut block_offset: u64 = 0;
for (_, b) in all_blocks.iter() {
if true_offset >= end {
if block_offset >= end {
break;
}
// Keep only blocks that have an intersection with the requested range
if true_offset < end && true_offset + b.size > begin {
blocks.push((*b, true_offset));
if block_offset < end && block_offset + b.size > begin {
blocks.push((*b, block_offset));
}
true_offset += b.size;
block_offset += b.size as u64;
}
let order_stream = OrderTag::stream();
let body_stream = futures::stream::iter(blocks)
.map(move |(block, true_offset)| {
.enumerate()
.map(move |(i, (block, block_offset))| {
let garage = garage.clone();
async move {
let data = garage.block_manager.rpc_get_block(&block.hash).await?;
let data = Bytes::from(data);
let start_in_block = if true_offset > begin {
0
} else {
begin - true_offset
};
let end_in_block = if true_offset + block.size < end {
block.size
} else {
end - true_offset
};
Result::<Bytes, Error>::Ok(
data.slice(start_in_block as usize..end_in_block as usize),
)
garage
.block_manager
.rpc_get_block_streaming(&block.hash, Some(order_stream.order(i as u64)))
.await
.unwrap_or_else(|e| error_stream(i, e))
.scan(block_offset, move |chunk_offset, chunk| {
let r = match chunk {
Ok(chunk_bytes) => {
let chunk_len = chunk_bytes.len() as u64;
let r = if *chunk_offset >= end {
// The current chunk is after the part we want to read.
// Returning None here will stop the scan, the rest of the
// stream will be ignored
None
} else if *chunk_offset + chunk_len <= begin {
// The current chunk is before the part we want to read.
// We return a None that will be removed by the filter_map
// below.
Some(None)
} else {
// The chunk has an intersection with the requested range
let start_in_chunk = if *chunk_offset > begin {
0
} else {
begin - *chunk_offset
};
let end_in_chunk = if *chunk_offset + chunk_len < end {
chunk_len
} else {
end - *chunk_offset
};
Some(Some(Ok(chunk_bytes
.slice(start_in_chunk as usize..end_in_chunk as usize))))
};
*chunk_offset += chunk_bytes.len() as u64;
r
}
Err(e) => Some(Some(Err(e))),
};
futures::future::ready(r)
})
.filter_map(futures::future::ready)
}
})
.buffered(2);
.buffered(2)
.flatten();
hyper::body::Body::wrap_stream(body_stream)
}
fn error_stream(i: usize, e: garage_util::error::Error) -> ByteStream {
Box::pin(futures::stream::once(async move {
Err(std::io::Error::new(
std::io::ErrorKind::Other,
format!("Could not get block {}: {}", i, e),
))
}))
}

View file

@ -1,4 +1,4 @@
use std::collections::{BTreeMap, BTreeSet, HashMap, VecDeque};
use std::collections::{BTreeMap, BTreeSet, HashMap};
use std::sync::Arc;
use futures::prelude::*;
@ -8,7 +8,14 @@ use hyper::{Request, Response};
use md5::{digest::generic_array::*, Digest as Md5Digest, Md5};
use sha2::Sha256;
use opentelemetry::{
trace::{FutureExt as OtelFutureExt, TraceContextExt, Tracer},
Context,
};
use garage_rpc::netapp::bytes_buf::BytesBuf;
use garage_table::*;
use garage_util::async_hash::*;
use garage_util::data::*;
use garage_util::error::Error as GarageError;
use garage_util::time::*;
@ -102,7 +109,7 @@ pub(crate) async fn save_stream<S: Stream<Item = Result<Bytes, Error>> + Unpin>(
size,
etag: data_md5sum_hex.clone(),
},
first_block,
first_block.to_vec(),
)),
};
@ -130,7 +137,7 @@ pub(crate) async fn save_stream<S: Stream<Item = Result<Bytes, Error>> + Unpin>(
garage.version_table.insert(&version).await?;
// Transfer data and verify checksum
let first_block_hash = blake2sum(&first_block[..]);
let first_block_hash = async_blake2sum(first_block.clone()).await;
let tx_result = (|| async {
let (total_size, data_md5sum, data_sha256sum) = read_and_put_blocks(
@ -273,14 +280,23 @@ async fn read_and_put_blocks<S: Stream<Item = Result<Bytes, Error>> + Unpin>(
garage: &Garage,
version: &Version,
part_number: u64,
first_block: Vec<u8>,
first_block: Bytes,
first_block_hash: Hash,
chunker: &mut StreamChunker<S>,
) -> Result<(u64, GenericArray<u8, typenum::U16>, Hash), Error> {
let mut md5hasher = Md5::new();
let mut sha256hasher = Sha256::new();
md5hasher.update(&first_block[..]);
sha256hasher.update(&first_block[..]);
let tracer = opentelemetry::global::tracer("garage");
let md5hasher = AsyncHasher::<Md5>::new();
let sha256hasher = AsyncHasher::<Sha256>::new();
futures::future::join(
md5hasher.update(first_block.clone()),
sha256hasher.update(first_block.clone()),
)
.with_context(Context::current_with_span(
tracer.start("Hash first block (md5, sha256)"),
))
.await;
let mut next_offset = first_block.len();
let mut put_curr_version_block = put_block_meta(
@ -302,9 +318,15 @@ async fn read_and_put_blocks<S: Stream<Item = Result<Bytes, Error>> + Unpin>(
chunker.next(),
)?;
if let Some(block) = next_block {
md5hasher.update(&block[..]);
sha256hasher.update(&block[..]);
let block_hash = blake2sum(&block[..]);
let (_, _, block_hash) = futures::future::join3(
md5hasher.update(block.clone()),
sha256hasher.update(block.clone()),
async_blake2sum(block.clone()),
)
.with_context(Context::current_with_span(
tracer.start("Hash block (md5, sha256, blake2)"),
))
.await;
let block_len = block.len();
put_curr_version_block = put_block_meta(
garage,
@ -322,9 +344,9 @@ async fn read_and_put_blocks<S: Stream<Item = Result<Bytes, Error>> + Unpin>(
}
let total_size = next_offset as u64;
let data_md5sum = md5hasher.finalize();
let data_md5sum = md5hasher.finalize().await;
let data_sha256sum = sha256hasher.finalize();
let data_sha256sum = sha256hasher.finalize().await;
let data_sha256sum = Hash::try_from(&data_sha256sum[..]).unwrap();
Ok((total_size, data_md5sum, data_sha256sum))
@ -364,7 +386,7 @@ struct StreamChunker<S: Stream<Item = Result<Bytes, Error>>> {
stream: S,
read_all: bool,
block_size: usize,
buf: VecDeque<u8>,
buf: BytesBuf,
}
impl<S: Stream<Item = Result<Bytes, Error>> + Unpin> StreamChunker<S> {
@ -373,11 +395,11 @@ impl<S: Stream<Item = Result<Bytes, Error>> + Unpin> StreamChunker<S> {
stream,
read_all: false,
block_size,
buf: VecDeque::with_capacity(2 * block_size),
buf: BytesBuf::new(),
}
}
async fn next(&mut self) -> Result<Option<Vec<u8>>, Error> {
async fn next(&mut self) -> Result<Option<Bytes>, Error> {
while !self.read_all && self.buf.len() < self.block_size {
if let Some(block) = self.stream.next().await {
let bytes = block?;
@ -390,12 +412,8 @@ impl<S: Stream<Item = Result<Bytes, Error>> + Unpin> StreamChunker<S> {
if self.buf.is_empty() {
Ok(None)
} else if self.buf.len() <= self.block_size {
let block = self.buf.drain(..).collect::<Vec<u8>>();
Ok(Some(block))
} else {
let block = self.buf.drain(..self.block_size).collect::<Vec<u8>>();
Ok(Some(block))
Ok(Some(self.buf.take_max(self.block_size)))
}
}
}
@ -504,7 +522,9 @@ pub async fn handle_put_part(
// Copy block to store
let version = Version::new(version_uuid, bucket_id, key, false);
let first_block_hash = blake2sum(&first_block[..]);
let first_block_hash = async_blake2sum(first_block.clone()).await;
let (_, data_md5sum, data_sha256sum) = read_and_put_blocks(
&garage,
&version,

View file

@ -25,7 +25,7 @@ impl From<&str> for Value {
#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)]
pub struct IntValue(#[serde(rename = "$value")] pub i64);
#[derive(Debug, Serialize, PartialEq)]
#[derive(Debug, Serialize, PartialEq, Eq)]
pub struct Bucket {
#[serde(rename = "CreationDate")]
pub creation_date: Value,
@ -33,7 +33,7 @@ pub struct Bucket {
pub name: Value,
}
#[derive(Debug, Serialize, PartialEq)]
#[derive(Debug, Serialize, PartialEq, Eq)]
pub struct Owner {
#[serde(rename = "DisplayName")]
pub display_name: Value,
@ -41,13 +41,13 @@ pub struct Owner {
pub id: Value,
}
#[derive(Debug, Serialize, PartialEq)]
#[derive(Debug, Serialize, PartialEq, Eq)]
pub struct BucketList {
#[serde(rename = "Bucket")]
pub entries: Vec<Bucket>,
}
#[derive(Debug, Serialize, PartialEq)]
#[derive(Debug, Serialize, PartialEq, Eq)]
pub struct ListAllMyBucketsResult {
#[serde(rename = "Buckets")]
pub buckets: BucketList,
@ -55,7 +55,7 @@ pub struct ListAllMyBucketsResult {
pub owner: Owner,
}
#[derive(Debug, Serialize, PartialEq)]
#[derive(Debug, Serialize, PartialEq, Eq)]
pub struct LocationConstraint {
#[serde(serialize_with = "xmlns_tag")]
pub xmlns: (),
@ -63,7 +63,7 @@ pub struct LocationConstraint {
pub region: String,
}
#[derive(Debug, Serialize, PartialEq)]
#[derive(Debug, Serialize, PartialEq, Eq)]
pub struct Deleted {
#[serde(rename = "Key")]
pub key: Value,
@ -73,7 +73,7 @@ pub struct Deleted {
pub delete_marker_version_id: Value,
}
#[derive(Debug, Serialize, PartialEq)]
#[derive(Debug, Serialize, PartialEq, Eq)]
pub struct Error {
#[serde(rename = "Code")]
pub code: Value,
@ -85,7 +85,7 @@ pub struct Error {
pub region: Option<Value>,
}
#[derive(Debug, Serialize, PartialEq)]
#[derive(Debug, Serialize, PartialEq, Eq)]
pub struct DeleteError {
#[serde(rename = "Code")]
pub code: Value,
@ -97,7 +97,7 @@ pub struct DeleteError {
pub version_id: Option<Value>,
}
#[derive(Debug, Serialize, PartialEq)]
#[derive(Debug, Serialize, PartialEq, Eq)]
pub struct DeleteResult {
#[serde(serialize_with = "xmlns_tag")]
pub xmlns: (),
@ -107,7 +107,7 @@ pub struct DeleteResult {
pub errors: Vec<DeleteError>,
}
#[derive(Debug, Serialize, PartialEq)]
#[derive(Debug, Serialize, PartialEq, Eq)]
pub struct InitiateMultipartUploadResult {
#[serde(serialize_with = "xmlns_tag")]
pub xmlns: (),
@ -119,7 +119,7 @@ pub struct InitiateMultipartUploadResult {
pub upload_id: Value,
}
#[derive(Debug, Serialize, PartialEq)]
#[derive(Debug, Serialize, PartialEq, Eq)]
pub struct CompleteMultipartUploadResult {
#[serde(serialize_with = "xmlns_tag")]
pub xmlns: (),
@ -133,7 +133,7 @@ pub struct CompleteMultipartUploadResult {
pub etag: Value,
}
#[derive(Debug, Serialize, PartialEq)]
#[derive(Debug, Serialize, PartialEq, Eq)]
pub struct Initiator {
#[serde(rename = "DisplayName")]
pub display_name: Value,
@ -141,7 +141,7 @@ pub struct Initiator {
pub id: Value,
}
#[derive(Debug, Serialize, PartialEq)]
#[derive(Debug, Serialize, PartialEq, Eq)]
pub struct ListMultipartItem {
#[serde(rename = "Initiated")]
pub initiated: Value,
@ -157,7 +157,7 @@ pub struct ListMultipartItem {
pub storage_class: Value,
}
#[derive(Debug, Serialize, PartialEq)]
#[derive(Debug, Serialize, PartialEq, Eq)]
pub struct ListMultipartUploadsResult {
#[serde(serialize_with = "xmlns_tag")]
pub xmlns: (),
@ -187,7 +187,7 @@ pub struct ListMultipartUploadsResult {
pub encoding_type: Option<Value>,
}
#[derive(Debug, Serialize, PartialEq)]
#[derive(Debug, Serialize, PartialEq, Eq)]
pub struct PartItem {
#[serde(rename = "ETag")]
pub etag: Value,
@ -199,7 +199,7 @@ pub struct PartItem {
pub size: IntValue,
}
#[derive(Debug, Serialize, PartialEq)]
#[derive(Debug, Serialize, PartialEq, Eq)]
pub struct ListPartsResult {
#[serde(serialize_with = "xmlns_tag")]
pub xmlns: (),
@ -227,7 +227,7 @@ pub struct ListPartsResult {
pub storage_class: Value,
}
#[derive(Debug, Serialize, PartialEq)]
#[derive(Debug, Serialize, PartialEq, Eq)]
pub struct ListBucketItem {
#[serde(rename = "Key")]
pub key: Value,
@ -241,13 +241,13 @@ pub struct ListBucketItem {
pub storage_class: Value,
}
#[derive(Debug, Serialize, PartialEq)]
#[derive(Debug, Serialize, PartialEq, Eq)]
pub struct CommonPrefix {
#[serde(rename = "Prefix")]
pub prefix: Value,
}
#[derive(Debug, Serialize, PartialEq)]
#[derive(Debug, Serialize, PartialEq, Eq)]
pub struct ListBucketResult {
#[serde(serialize_with = "xmlns_tag")]
pub xmlns: (),
@ -281,7 +281,7 @@ pub struct ListBucketResult {
pub common_prefixes: Vec<CommonPrefix>,
}
#[derive(Debug, Serialize, PartialEq)]
#[derive(Debug, Serialize, PartialEq, Eq)]
pub struct VersioningConfiguration {
#[serde(serialize_with = "xmlns_tag")]
pub xmlns: (),
@ -289,7 +289,7 @@ pub struct VersioningConfiguration {
pub status: Option<Value>,
}
#[derive(Debug, Serialize, PartialEq)]
#[derive(Debug, Serialize, PartialEq, Eq)]
pub struct PostObject {
#[serde(serialize_with = "xmlns_tag")]
pub xmlns: (),

View file

@ -1,5 +1,5 @@
use chrono::{DateTime, Utc};
use hmac::{Hmac, Mac, NewMac};
use hmac::{Hmac, Mac};
use sha2::Sha256;
use garage_util::data::{sha256sum, Hash};
@ -29,17 +29,17 @@ pub fn signing_hmac(
secret_key: &str,
region: &str,
service: &str,
) -> Result<HmacSha256, crypto_mac::InvalidKeyLength> {
) -> Result<HmacSha256, crypto_common::InvalidLength> {
let secret = String::from("AWS4") + secret_key;
let mut date_hmac = HmacSha256::new_varkey(secret.as_bytes())?;
let mut date_hmac = HmacSha256::new_from_slice(secret.as_bytes())?;
date_hmac.update(datetime.format(SHORT_DATE).to_string().as_bytes());
let mut region_hmac = HmacSha256::new_varkey(&date_hmac.finalize().into_bytes())?;
let mut region_hmac = HmacSha256::new_from_slice(&date_hmac.finalize().into_bytes())?;
region_hmac.update(region.as_bytes());
let mut service_hmac = HmacSha256::new_varkey(&region_hmac.finalize().into_bytes())?;
let mut service_hmac = HmacSha256::new_from_slice(&region_hmac.finalize().into_bytes())?;
service_hmac.update(service.as_bytes());
let mut signing_hmac = HmacSha256::new_varkey(&service_hmac.finalize().into_bytes())?;
let mut signing_hmac = HmacSha256::new_from_slice(&service_hmac.finalize().into_bytes())?;
signing_hmac.update(b"aws4_request");
let hmac = HmacSha256::new_varkey(&signing_hmac.finalize().into_bytes())?;
let hmac = HmacSha256::new_from_slice(&signing_hmac.finalize().into_bytes())?;
Ok(hmac)
}

View file

@ -27,6 +27,8 @@ bytes = "1.0"
hex = "0.4"
tracing = "0.1.30"
rand = "0.8"
async-compression = { version = "0.3", features = ["tokio", "zstd"] }
zstd = { version = "0.9", default-features = false }
rmp-serde = "0.15"
@ -36,7 +38,7 @@ serde_bytes = "0.11"
futures = "0.3"
futures-util = "0.3"
tokio = { version = "1.0", default-features = false, features = ["rt", "rt-multi-thread", "io-util", "net", "time", "macros", "sync", "signal", "fs"] }
tokio-util = { version = "0.6", features = ["io"] }
[features]
system-libs = [ "zstd/pkg-config" ]

View file

@ -1,16 +1,22 @@
use bytes::Bytes;
use serde::{Deserialize, Serialize};
use zstd::stream::{decode_all as zstd_decode, Encoder};
use garage_util::data::*;
use garage_util::error::*;
#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
pub enum DataBlockHeader {
Plain,
Compressed,
}
/// A possibly compressed block of data
#[derive(Debug, Serialize, Deserialize)]
pub enum DataBlock {
/// Uncompressed data
Plain(#[serde(with = "serde_bytes")] Vec<u8>),
Plain(Bytes),
/// Data compressed with zstd
Compressed(#[serde(with = "serde_bytes")] Vec<u8>),
Compressed(Bytes),
}
impl DataBlock {
@ -30,7 +36,7 @@ impl DataBlock {
/// Get the buffer, possibly decompressing it, and verify it's integrity.
/// For Plain block, data is compared to hash, for Compressed block, zstd checksumming system
/// is used instead.
pub fn verify_get(self, hash: Hash) -> Result<Vec<u8>, Error> {
pub fn verify_get(self, hash: Hash) -> Result<Bytes, Error> {
match self {
DataBlock::Plain(data) => {
if blake2sum(&data) == hash {
@ -39,9 +45,9 @@ impl DataBlock {
Err(Error::CorruptData(hash))
}
}
DataBlock::Compressed(data) => {
zstd_decode(&data[..]).map_err(|_| Error::CorruptData(hash))
}
DataBlock::Compressed(data) => zstd_decode(&data[..])
.map_err(|_| Error::CorruptData(hash))
.map(Bytes::from),
}
}
@ -61,13 +67,31 @@ impl DataBlock {
}
}
pub fn from_buffer(data: Vec<u8>, level: Option<i32>) -> DataBlock {
if let Some(level) = level {
if let Ok(data) = zstd_encode(&data[..], level) {
return DataBlock::Compressed(data);
pub async fn from_buffer(data: Bytes, level: Option<i32>) -> DataBlock {
tokio::task::spawn_blocking(move || {
if let Some(level) = level {
if let Ok(data) = zstd_encode(&data[..], level) {
return DataBlock::Compressed(data.into());
}
}
DataBlock::Plain(data)
})
.await
.unwrap()
}
pub fn into_parts(self) -> (DataBlockHeader, Bytes) {
match self {
DataBlock::Plain(data) => (DataBlockHeader::Plain, data),
DataBlock::Compressed(data) => (DataBlockHeader::Compressed, data),
}
}
pub fn from_parts(h: DataBlockHeader, bytes: Bytes) -> Self {
match h {
DataBlockHeader::Plain => DataBlock::Plain(bytes),
DataBlockHeader::Compressed => DataBlock::Compressed(bytes),
}
DataBlock::Plain(data)
}
}

View file

@ -1,13 +1,24 @@
use std::path::PathBuf;
use std::pin::Pin;
use std::sync::Arc;
use std::time::Duration;
use async_trait::async_trait;
use bytes::Bytes;
use serde::{Deserialize, Serialize};
use futures::Stream;
use futures_util::stream::StreamExt;
use tokio::fs;
use tokio::io::{AsyncReadExt, AsyncWriteExt};
use tokio::sync::{mpsc, Mutex};
use tokio::io::{AsyncReadExt, AsyncWriteExt, BufReader};
use tokio::sync::{mpsc, Mutex, MutexGuard};
use opentelemetry::{
trace::{FutureExt as OtelFutureExt, TraceContextExt, Tracer},
Context,
};
use garage_rpc::rpc_helper::netapp::stream::{stream_asyncread, ByteStream};
use garage_db as db;
@ -15,6 +26,7 @@ use garage_util::data::*;
use garage_util::error::*;
use garage_util::metrics::RecordDuration;
use garage_rpc::rpc_helper::OrderTag;
use garage_rpc::system::System;
use garage_rpc::*;
@ -29,9 +41,6 @@ use crate::resync::*;
/// Size under which data will be stored inlined in database instead of as files
pub const INLINE_THRESHOLD: usize = 3072;
// Timeout for RPCs that read and write blocks to remote nodes
pub(crate) const BLOCK_RW_TIMEOUT: Duration = Duration::from_secs(30);
// The delay between the moment when the reference counter
// drops to zero, and the moment where we allow ourselves
// to delete the block locally.
@ -42,12 +51,12 @@ pub(crate) const BLOCK_GC_DELAY: Duration = Duration::from_secs(600);
pub enum BlockRpc {
Ok,
/// Message to ask for a block of data, by hash
GetBlock(Hash),
GetBlock(Hash, Option<OrderTag>),
/// Message to send a block of data, either because requested, of for first delivery of new
/// block
PutBlock {
hash: Hash,
data: DataBlock,
header: DataBlockHeader,
},
/// Ask other node if they should have this block, but don't actually have it
NeedBlockQuery(Hash),
@ -68,7 +77,7 @@ pub struct BlockManager {
compression_level: Option<i32>,
mutation_lock: Mutex<BlockManagerLocked>,
mutation_lock: [Mutex<BlockManagerLocked>; 256],
pub(crate) rc: BlockRc,
pub resync: BlockResyncManager,
@ -105,8 +114,6 @@ impl BlockManager {
.netapp
.endpoint("garage_block/manager.rs/Rpc".to_string());
let manager_locked = BlockManagerLocked();
let metrics = BlockManagerMetrics::new(resync.queue.clone(), resync.errors.clone());
let (scrub_tx, scrub_rx) = mpsc::channel(1);
@ -115,7 +122,7 @@ impl BlockManager {
replication,
data_dir,
compression_level,
mutation_lock: Mutex::new(manager_locked),
mutation_lock: [(); 256].map(|_| Mutex::new(BlockManagerLocked())),
rc,
resync,
system,
@ -139,55 +146,161 @@ impl BlockManager {
}
/// Ask nodes that might have a (possibly compressed) block for it
pub(crate) async fn rpc_get_raw_block(&self, hash: &Hash) -> Result<DataBlock, Error> {
/// Return it as a stream with a header
async fn rpc_get_raw_block_streaming(
&self,
hash: &Hash,
order_tag: Option<OrderTag>,
) -> Result<(DataBlockHeader, ByteStream), Error> {
let who = self.replication.read_nodes(hash);
let resps = self
.system
.rpc
.try_call_many(
&self.endpoint,
&who[..],
BlockRpc::GetBlock(*hash),
RequestStrategy::with_priority(PRIO_NORMAL)
.with_quorum(1)
.with_timeout(BLOCK_RW_TIMEOUT)
.interrupt_after_quorum(true),
)
.await?;
let who = self.system.rpc.request_order(&who);
for resp in resps {
if let BlockRpc::PutBlock { data, .. } = resp {
return Ok(data);
}
for node in who.iter() {
let node_id = NodeID::from(*node);
let rpc = self.endpoint.call_streaming(
&node_id,
BlockRpc::GetBlock(*hash, order_tag),
PRIO_NORMAL | PRIO_SECONDARY,
);
tokio::select! {
res = rpc => {
let res = match res {
Ok(res) => res,
Err(e) => {
debug!("Node {:?} returned error: {}", node, e);
continue;
}
};
let (header, stream) = match res.into_parts() {
(Ok(BlockRpc::PutBlock { hash: _, header }), Some(stream)) => (header, stream),
_ => {
debug!("Node {:?} returned a malformed response", node);
continue;
}
};
return Ok((header, stream));
}
_ = tokio::time::sleep(self.system.rpc.rpc_timeout()) => {
debug!("Node {:?} didn't return block in time, trying next.", node);
}
};
}
Err(Error::Message(format!(
"Unable to read block {:?}: no valid blocks returned",
"Unable to read block {:?}: no node returned a valid block",
hash
)))
}
/// Ask nodes that might have a (possibly compressed) block for it
/// Return its entire body
pub(crate) async fn rpc_get_raw_block(
&self,
hash: &Hash,
order_tag: Option<OrderTag>,
) -> Result<DataBlock, Error> {
let who = self.replication.read_nodes(hash);
let who = self.system.rpc.request_order(&who);
for node in who.iter() {
let node_id = NodeID::from(*node);
let rpc = self.endpoint.call_streaming(
&node_id,
BlockRpc::GetBlock(*hash, order_tag),
PRIO_NORMAL | PRIO_SECONDARY,
);
tokio::select! {
res = rpc => {
let res = match res {
Ok(res) => res,
Err(e) => {
debug!("Node {:?} returned error: {}", node, e);
continue;
}
};
let (header, stream) = match res.into_parts() {
(Ok(BlockRpc::PutBlock { hash: _, header }), Some(stream)) => (header, stream),
_ => {
debug!("Node {:?} returned a malformed response", node);
continue;
}
};
match read_stream_to_end(stream).await {
Ok(bytes) => return Ok(DataBlock::from_parts(header, bytes)),
Err(e) => {
debug!("Error reading stream from node {:?}: {}", node, e);
}
}
}
_ = tokio::time::sleep(self.system.rpc.rpc_timeout()) => {
debug!("Node {:?} didn't return block in time, trying next.", node);
}
};
}
Err(Error::Message(format!(
"Unable to read block {:?}: no node returned a valid block",
hash
)))
}
// ---- Public interface ----
/// Ask nodes that might have a block for it,
/// return it as a stream
pub async fn rpc_get_block_streaming(
&self,
hash: &Hash,
order_tag: Option<OrderTag>,
) -> Result<
Pin<Box<dyn Stream<Item = Result<Bytes, std::io::Error>> + Send + Sync + 'static>>,
Error,
> {
let (header, stream) = self.rpc_get_raw_block_streaming(hash, order_tag).await?;
match header {
DataBlockHeader::Plain => Ok(stream),
DataBlockHeader::Compressed => {
// Too many things, I hate it.
let reader = stream_asyncread(stream);
let reader = BufReader::new(reader);
let reader = async_compression::tokio::bufread::ZstdDecoder::new(reader);
Ok(Box::pin(tokio_util::io::ReaderStream::new(reader)))
}
}
}
/// Ask nodes that might have a block for it
pub async fn rpc_get_block(&self, hash: &Hash) -> Result<Vec<u8>, Error> {
self.rpc_get_raw_block(hash).await?.verify_get(*hash)
pub async fn rpc_get_block(
&self,
hash: &Hash,
order_tag: Option<OrderTag>,
) -> Result<Bytes, Error> {
self.rpc_get_raw_block(hash, order_tag)
.await?
.verify_get(*hash)
}
/// Send block to nodes that should have it
pub async fn rpc_put_block(&self, hash: Hash, data: Vec<u8>) -> Result<(), Error> {
pub async fn rpc_put_block(&self, hash: Hash, data: Bytes) -> Result<(), Error> {
let who = self.replication.write_nodes(&hash);
let data = DataBlock::from_buffer(data, self.compression_level);
let (header, bytes) = DataBlock::from_buffer(data, self.compression_level)
.await
.into_parts();
let put_block_rpc =
Req::new(BlockRpc::PutBlock { hash, header })?.with_stream_from_buffer(bytes);
self.system
.rpc
.try_call_many(
&self.endpoint,
&who[..],
BlockRpc::PutBlock { hash, data },
RequestStrategy::with_priority(PRIO_NORMAL)
.with_quorum(self.replication.write_quorum())
.with_timeout(BLOCK_RW_TIMEOUT),
put_block_rpc,
RequestStrategy::with_priority(PRIO_NORMAL | PRIO_SECONDARY)
.with_quorum(self.replication.write_quorum()),
)
.await?;
Ok(())
}
@ -219,7 +332,10 @@ impl BlockManager {
// we will fecth it from someone.
let this = self.clone();
tokio::spawn(async move {
if let Err(e) = this.resync.put_to_resync(&hash, 2 * BLOCK_RW_TIMEOUT) {
if let Err(e) = this
.resync
.put_to_resync(&hash, 2 * this.system.rpc.rpc_timeout())
{
error!("Block {:?} could not be put in resync queue: {}.", hash, e);
}
});
@ -254,29 +370,61 @@ impl BlockManager {
// ---- Reading and writing blocks locally ----
/// Write a block to disk
pub(crate) async fn write_block(
async fn handle_put_block(
&self,
hash: &Hash,
data: &DataBlock,
) -> Result<BlockRpc, Error> {
hash: Hash,
header: DataBlockHeader,
stream: Option<ByteStream>,
) -> Result<(), Error> {
let stream = stream.ok_or_message("missing stream")?;
let bytes = read_stream_to_end(stream).await?;
let data = DataBlock::from_parts(header, bytes);
self.write_block(&hash, &data).await
}
/// Write a block to disk
pub(crate) async fn write_block(&self, hash: &Hash, data: &DataBlock) -> Result<(), Error> {
let tracer = opentelemetry::global::tracer("garage");
let write_size = data.inner_buffer().len() as u64;
let res = self
.mutation_lock
.lock()
self.lock_mutate(hash)
.await
.write_block(hash, data, self)
.bound_record_duration(&self.metrics.block_write_duration)
.with_context(Context::current_with_span(
tracer.start("BlockManagerLocked::write_block"),
))
.await?;
self.metrics.bytes_written.add(write_size);
Ok(res)
Ok(())
}
async fn handle_get_block(&self, hash: &Hash, order_tag: Option<OrderTag>) -> Resp<BlockRpc> {
let block = match self.read_block(hash).await {
Ok(data) => data,
Err(e) => return Resp::new(Err(e)),
};
let (header, data) = block.into_parts();
let resp = Resp::new(Ok(BlockRpc::PutBlock {
hash: *hash,
header,
}))
.with_stream_from_buffer(data);
if let Some(order_tag) = order_tag {
resp.with_order_tag(order_tag)
} else {
resp
}
}
/// Read block from disk, verifying it's integrity
pub(crate) async fn read_block(&self, hash: &Hash) -> Result<BlockRpc, Error> {
pub(crate) async fn read_block(&self, hash: &Hash) -> Result<DataBlock, Error> {
let data = self
.read_block_internal(hash)
.bound_record_duration(&self.metrics.block_read_duration)
@ -286,7 +434,7 @@ impl BlockManager {
.bytes_read
.add(data.inner_buffer().len() as u64);
Ok(BlockRpc::PutBlock { hash: *hash, data })
Ok(data)
}
async fn read_block_internal(&self, hash: &Hash) -> Result<DataBlock, Error> {
@ -295,7 +443,8 @@ impl BlockManager {
Ok(c) => c,
Err(e) => {
// Not found but maybe we should have had it ??
self.resync.put_to_resync(hash, 2 * BLOCK_RW_TIMEOUT)?;
self.resync
.put_to_resync(hash, 2 * self.system.rpc.rpc_timeout())?;
return Err(Into::into(e));
}
};
@ -309,16 +458,15 @@ impl BlockManager {
drop(f);
let data = if compressed {
DataBlock::Compressed(data)
DataBlock::Compressed(data.into())
} else {
DataBlock::Plain(data)
DataBlock::Plain(data.into())
};
if data.verify(*hash).is_err() {
self.metrics.corruption_counter.add(1);
self.mutation_lock
.lock()
self.lock_mutate(hash)
.await
.move_block_to_corrupted(hash, self)
.await?;
@ -331,8 +479,7 @@ impl BlockManager {
/// Check if this node has a block and whether it needs it
pub(crate) async fn check_block_status(&self, hash: &Hash) -> Result<BlockStatus, Error> {
self.mutation_lock
.lock()
self.lock_mutate(hash)
.await
.check_block_status(hash, self)
.await
@ -346,8 +493,7 @@ impl BlockManager {
/// Delete block if it is not needed anymore
pub(crate) async fn delete_if_unneeded(&self, hash: &Hash) -> Result<(), Error> {
self.mutation_lock
.lock()
self.lock_mutate(hash)
.await
.delete_if_unneeded(hash, self)
.await
@ -379,20 +525,32 @@ impl BlockManager {
path.set_extension("");
fs::metadata(&path).await.map(|_| false).map_err(Into::into)
}
async fn lock_mutate(&self, hash: &Hash) -> MutexGuard<'_, BlockManagerLocked> {
let tracer = opentelemetry::global::tracer("garage");
self.mutation_lock[hash.as_slice()[0] as usize]
.lock()
.with_context(Context::current_with_span(
tracer.start("Acquire mutation_lock"),
))
.await
}
}
#[async_trait]
impl EndpointHandler<BlockRpc> for BlockManager {
async fn handle(
self: &Arc<Self>,
message: &BlockRpc,
_from: NodeID,
) -> Result<BlockRpc, Error> {
match message {
BlockRpc::PutBlock { hash, data } => self.write_block(hash, data).await,
BlockRpc::GetBlock(h) => self.read_block(h).await,
BlockRpc::NeedBlockQuery(h) => self.need_block(h).await.map(BlockRpc::NeedBlockReply),
m => Err(Error::unexpected_rpc_message(m)),
impl StreamingEndpointHandler<BlockRpc> for BlockManager {
async fn handle(self: &Arc<Self>, mut message: Req<BlockRpc>, _from: NodeID) -> Resp<BlockRpc> {
match message.msg() {
BlockRpc::PutBlock { hash, header } => Resp::new(
self.handle_put_block(*hash, *header, message.take_stream())
.await
.map(|_| BlockRpc::Ok),
),
BlockRpc::GetBlock(h, order_tag) => self.handle_get_block(h, *order_tag).await,
BlockRpc::NeedBlockQuery(h) => {
Resp::new(self.need_block(h).await.map(BlockRpc::NeedBlockReply))
}
m => Resp::new(Err(Error::unexpected_rpc_message(m))),
}
}
}
@ -419,7 +577,7 @@ impl BlockManagerLocked {
hash: &Hash,
data: &DataBlock,
mgr: &BlockManager,
) -> Result<BlockRpc, Error> {
) -> Result<(), Error> {
let compressed = data.is_compressed();
let data = data.inner_buffer();
@ -430,8 +588,8 @@ impl BlockManagerLocked {
fs::create_dir_all(&directory).await?;
let to_delete = match (mgr.is_block_compressed(hash).await, compressed) {
(Ok(true), _) => return Ok(BlockRpc::Ok),
(Ok(false), false) => return Ok(BlockRpc::Ok),
(Ok(true), _) => return Ok(()),
(Ok(false), false) => return Ok(()),
(Ok(false), true) => {
let path_to_delete = path.clone();
path.set_extension("zst");
@ -470,7 +628,7 @@ impl BlockManagerLocked {
dir.sync_all().await?;
drop(dir);
Ok(BlockRpc::Ok)
Ok(())
}
async fn move_block_to_corrupted(&self, hash: &Hash, mgr: &BlockManager) -> Result<(), Error> {
@ -504,3 +662,17 @@ impl BlockManagerLocked {
Ok(())
}
}
async fn read_stream_to_end(mut stream: ByteStream) -> Result<Bytes, Error> {
let mut parts: Vec<Bytes> = vec![];
while let Some(part) = stream.next().await {
parts.push(part.ok_or_message("error in stream")?);
}
Ok(parts
.iter()
.map(|x| &x[..])
.collect::<Vec<_>>()
.concat()
.into())
}

View file

@ -7,7 +7,6 @@ use arc_swap::ArcSwap;
use async_trait::async_trait;
use serde::{Deserialize, Serialize};
use futures::future::*;
use tokio::select;
use tokio::sync::{watch, Notify};
@ -34,10 +33,6 @@ use garage_table::replication::TableReplication;
use crate::manager::*;
// Timeout for RPCs that ask other nodes whether they need a copy
// of a given block before we delete it locally
pub(crate) const NEED_BLOCK_QUERY_TIMEOUT: Duration = Duration::from_secs(5);
// The delay between the time where a resync operation fails
// and the time when it is retried, with exponential backoff
// (multiplied by 2, 4, 8, 16, etc. for every consecutive failure).
@ -336,24 +331,23 @@ impl BlockResyncManager {
}
who.retain(|id| *id != manager.system.id);
let msg = Arc::new(BlockRpc::NeedBlockQuery(*hash));
let who_needs_fut = who.iter().map(|to| {
manager.system.rpc.call_arc(
let who_needs_resps = manager
.system
.rpc
.call_many(
&manager.endpoint,
*to,
msg.clone(),
RequestStrategy::with_priority(PRIO_BACKGROUND)
.with_timeout(NEED_BLOCK_QUERY_TIMEOUT),
&who,
BlockRpc::NeedBlockQuery(*hash),
RequestStrategy::with_priority(PRIO_BACKGROUND),
)
});
let who_needs_resps = join_all(who_needs_fut).await;
.await?;
let mut need_nodes = vec![];
for (node, needed) in who.iter().zip(who_needs_resps.into_iter()) {
for (node, needed) in who_needs_resps {
match needed.err_context("NeedBlockQuery RPC")? {
BlockRpc::NeedBlockReply(needed) => {
if needed {
need_nodes.push(*node);
need_nodes.push(node);
}
}
m => {
@ -376,7 +370,13 @@ impl BlockResyncManager {
.add(1, &[KeyValue::new("to", format!("{:?}", node))]);
}
let put_block_message = manager.read_block(hash).await?;
let block = manager.read_block(hash).await?;
let (header, bytes) = block.into_parts();
let put_block_message = Req::new(BlockRpc::PutBlock {
hash: *hash,
header,
})?
.with_stream_from_buffer(bytes);
manager
.system
.rpc
@ -385,8 +385,7 @@ impl BlockResyncManager {
&need_nodes[..],
put_block_message,
RequestStrategy::with_priority(PRIO_BACKGROUND)
.with_quorum(need_nodes.len())
.with_timeout(BLOCK_RW_TIMEOUT),
.with_quorum(need_nodes.len()),
)
.await
.err_context("PutBlock RPC")?;
@ -409,7 +408,7 @@ impl BlockResyncManager {
hash
);
let block_data = manager.rpc_get_raw_block(hash).await?;
let block_data = manager.rpc_get_raw_block(hash, None).await?;
manager.metrics.resync_recv_counter.add(1);

Some files were not shown because too many files have changed in this diff Show more