Merge branch 'dev-0.2'
All checks were successful
continuous-integration/drone/push Build is passing
36
.drone.yml
|
@ -2,20 +2,9 @@ kind: pipeline
|
||||||
name: default
|
name: default
|
||||||
|
|
||||||
workspace:
|
workspace:
|
||||||
base: /drone
|
base: /drone/garage
|
||||||
|
|
||||||
clone:
|
|
||||||
disable: true
|
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: clone
|
|
||||||
image: alpine/git
|
|
||||||
commands:
|
|
||||||
- mkdir -p cargo
|
|
||||||
- git clone https://git.deuxfleurs.fr/Deuxfleurs/garage.git
|
|
||||||
- cd garage
|
|
||||||
- git checkout $DRONE_COMMIT
|
|
||||||
|
|
||||||
- name: restore-cache
|
- name: restore-cache
|
||||||
image: meltwater/drone-cache:dev
|
image: meltwater/drone-cache:dev
|
||||||
environment:
|
environment:
|
||||||
|
@ -31,11 +20,11 @@ steps:
|
||||||
cache_key: '{{ .Repo.Name }}_{{ checksum "garage/Cargo.lock" }}_{{ arch }}_{{ os }}_gzip'
|
cache_key: '{{ .Repo.Name }}_{{ checksum "garage/Cargo.lock" }}_{{ arch }}_{{ os }}_gzip'
|
||||||
region: garage
|
region: garage
|
||||||
mount:
|
mount:
|
||||||
- 'garage/target'
|
- 'target'
|
||||||
- 'cargo/registry/index'
|
- '/drone/cargo/registry/index'
|
||||||
- 'cargo/registry/cache'
|
- '/drone/cargo/registry/cache'
|
||||||
- 'cargo/git/db'
|
- '/drone/cargo/bin'
|
||||||
- 'cargo/bin'
|
- '/drone/cargo/git/db'
|
||||||
path_style: true
|
path_style: true
|
||||||
endpoint: https://garage.deuxfleurs.fr
|
endpoint: https://garage.deuxfleurs.fr
|
||||||
|
|
||||||
|
@ -47,7 +36,6 @@ steps:
|
||||||
- apt-get update
|
- apt-get update
|
||||||
- apt-get install --yes libsodium-dev
|
- apt-get install --yes libsodium-dev
|
||||||
- pwd
|
- pwd
|
||||||
- cd garage
|
|
||||||
- cargo build
|
- cargo build
|
||||||
|
|
||||||
- name: cargo-test
|
- name: cargo-test
|
||||||
|
@ -57,7 +45,6 @@ steps:
|
||||||
commands:
|
commands:
|
||||||
- apt-get update
|
- apt-get update
|
||||||
- apt-get install --yes libsodium-dev
|
- apt-get install --yes libsodium-dev
|
||||||
- cd garage
|
|
||||||
- cargo test
|
- cargo test
|
||||||
|
|
||||||
- name: rebuild-cache
|
- name: rebuild-cache
|
||||||
|
@ -75,11 +62,11 @@ steps:
|
||||||
cache_key: '{{ .Repo.Name }}_{{ checksum "garage/Cargo.lock" }}_{{ arch }}_{{ os }}_gzip'
|
cache_key: '{{ .Repo.Name }}_{{ checksum "garage/Cargo.lock" }}_{{ arch }}_{{ os }}_gzip'
|
||||||
region: garage
|
region: garage
|
||||||
mount:
|
mount:
|
||||||
- 'garage/target'
|
- 'target'
|
||||||
- 'cargo/registry/index'
|
- '/drone/cargo/registry/index'
|
||||||
- 'cargo/registry/cache'
|
- '/drone/cargo/registry/cache'
|
||||||
- 'cargo/git/db'
|
- '/drone/cargo/git/db'
|
||||||
- 'cargo/bin'
|
- '/drone/cargo/bin'
|
||||||
path_style: true
|
path_style: true
|
||||||
endpoint: https://garage.deuxfleurs.fr
|
endpoint: https://garage.deuxfleurs.fr
|
||||||
|
|
||||||
|
@ -91,5 +78,4 @@ steps:
|
||||||
- apt-get update
|
- apt-get update
|
||||||
- apt-get install --yes libsodium-dev awscli python-pip
|
- apt-get install --yes libsodium-dev awscli python-pip
|
||||||
- pip install s3cmd
|
- pip install s3cmd
|
||||||
- cd garage
|
|
||||||
- ./script/test-smoke.sh || (cat /tmp/garage.log; false)
|
- ./script/test-smoke.sh || (cat /tmp/garage.log; false)
|
||||||
|
|
785
Cargo.lock
generated
142
LICENSE
|
@ -1,5 +1,5 @@
|
||||||
GNU GENERAL PUBLIC LICENSE
|
GNU AFFERO GENERAL PUBLIC LICENSE
|
||||||
Version 3, 29 June 2007
|
Version 3, 19 November 2007
|
||||||
|
|
||||||
Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
|
Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
|
||||||
Everyone is permitted to copy and distribute verbatim copies
|
Everyone is permitted to copy and distribute verbatim copies
|
||||||
|
@ -7,17 +7,15 @@
|
||||||
|
|
||||||
Preamble
|
Preamble
|
||||||
|
|
||||||
The GNU General Public License is a free, copyleft license for
|
The GNU Affero General Public License is a free, copyleft license for
|
||||||
software and other kinds of works.
|
software and other kinds of works, specifically designed to ensure
|
||||||
|
cooperation with the community in the case of network server software.
|
||||||
|
|
||||||
The licenses for most software and other practical works are designed
|
The licenses for most software and other practical works are designed
|
||||||
to take away your freedom to share and change the works. By contrast,
|
to take away your freedom to share and change the works. By contrast,
|
||||||
the GNU General Public License is intended to guarantee your freedom to
|
our General Public Licenses are intended to guarantee your freedom to
|
||||||
share and change all versions of a program--to make sure it remains free
|
share and change all versions of a program--to make sure it remains free
|
||||||
software for all its users. We, the Free Software Foundation, use the
|
software for all its users.
|
||||||
GNU General Public License for most of our software; it applies also to
|
|
||||||
any other work released this way by its authors. You can apply it to
|
|
||||||
your programs, too.
|
|
||||||
|
|
||||||
When we speak of free software, we are referring to freedom, not
|
When we speak of free software, we are referring to freedom, not
|
||||||
price. Our General Public Licenses are designed to make sure that you
|
price. Our General Public Licenses are designed to make sure that you
|
||||||
|
@ -26,44 +24,34 @@ them if you wish), that you receive source code or can get it if you
|
||||||
want it, that you can change the software or use pieces of it in new
|
want it, that you can change the software or use pieces of it in new
|
||||||
free programs, and that you know you can do these things.
|
free programs, and that you know you can do these things.
|
||||||
|
|
||||||
To protect your rights, we need to prevent others from denying you
|
Developers that use our General Public Licenses protect your rights
|
||||||
these rights or asking you to surrender the rights. Therefore, you have
|
with two steps: (1) assert copyright on the software, and (2) offer
|
||||||
certain responsibilities if you distribute copies of the software, or if
|
you this License which gives you legal permission to copy, distribute
|
||||||
you modify it: responsibilities to respect the freedom of others.
|
and/or modify the software.
|
||||||
|
|
||||||
For example, if you distribute copies of such a program, whether
|
A secondary benefit of defending all users' freedom is that
|
||||||
gratis or for a fee, you must pass on to the recipients the same
|
improvements made in alternate versions of the program, if they
|
||||||
freedoms that you received. You must make sure that they, too, receive
|
receive widespread use, become available for other developers to
|
||||||
or can get the source code. And you must show them these terms so they
|
incorporate. Many developers of free software are heartened and
|
||||||
know their rights.
|
encouraged by the resulting cooperation. However, in the case of
|
||||||
|
software used on network servers, this result may fail to come about.
|
||||||
|
The GNU General Public License permits making a modified version and
|
||||||
|
letting the public access it on a server without ever releasing its
|
||||||
|
source code to the public.
|
||||||
|
|
||||||
Developers that use the GNU GPL protect your rights with two steps:
|
The GNU Affero General Public License is designed specifically to
|
||||||
(1) assert copyright on the software, and (2) offer you this License
|
ensure that, in such cases, the modified source code becomes available
|
||||||
giving you legal permission to copy, distribute and/or modify it.
|
to the community. It requires the operator of a network server to
|
||||||
|
provide the source code of the modified version running there to the
|
||||||
|
users of that server. Therefore, public use of a modified version, on
|
||||||
|
a publicly accessible server, gives the public access to the source
|
||||||
|
code of the modified version.
|
||||||
|
|
||||||
For the developers' and authors' protection, the GPL clearly explains
|
An older license, called the Affero General Public License and
|
||||||
that there is no warranty for this free software. For both users' and
|
published by Affero, was designed to accomplish similar goals. This is
|
||||||
authors' sake, the GPL requires that modified versions be marked as
|
a different license, not a version of the Affero GPL, but Affero has
|
||||||
changed, so that their problems will not be attributed erroneously to
|
released a new version of the Affero GPL which permits relicensing under
|
||||||
authors of previous versions.
|
this license.
|
||||||
|
|
||||||
Some devices are designed to deny users access to install or run
|
|
||||||
modified versions of the software inside them, although the manufacturer
|
|
||||||
can do so. This is fundamentally incompatible with the aim of
|
|
||||||
protecting users' freedom to change the software. The systematic
|
|
||||||
pattern of such abuse occurs in the area of products for individuals to
|
|
||||||
use, which is precisely where it is most unacceptable. Therefore, we
|
|
||||||
have designed this version of the GPL to prohibit the practice for those
|
|
||||||
products. If such problems arise substantially in other domains, we
|
|
||||||
stand ready to extend this provision to those domains in future versions
|
|
||||||
of the GPL, as needed to protect the freedom of users.
|
|
||||||
|
|
||||||
Finally, every program is threatened constantly by software patents.
|
|
||||||
States should not allow patents to restrict development and use of
|
|
||||||
software on general-purpose computers, but in those that do, we wish to
|
|
||||||
avoid the special danger that patents applied to a free program could
|
|
||||||
make it effectively proprietary. To prevent this, the GPL assures that
|
|
||||||
patents cannot be used to render the program non-free.
|
|
||||||
|
|
||||||
The precise terms and conditions for copying, distribution and
|
The precise terms and conditions for copying, distribution and
|
||||||
modification follow.
|
modification follow.
|
||||||
|
@ -72,7 +60,7 @@ modification follow.
|
||||||
|
|
||||||
0. Definitions.
|
0. Definitions.
|
||||||
|
|
||||||
"This License" refers to version 3 of the GNU General Public License.
|
"This License" refers to version 3 of the GNU Affero General Public License.
|
||||||
|
|
||||||
"Copyright" also means copyright-like laws that apply to other kinds of
|
"Copyright" also means copyright-like laws that apply to other kinds of
|
||||||
works, such as semiconductor masks.
|
works, such as semiconductor masks.
|
||||||
|
@ -549,35 +537,45 @@ to collect a royalty for further conveying from those to whom you convey
|
||||||
the Program, the only way you could satisfy both those terms and this
|
the Program, the only way you could satisfy both those terms and this
|
||||||
License would be to refrain entirely from conveying the Program.
|
License would be to refrain entirely from conveying the Program.
|
||||||
|
|
||||||
13. Use with the GNU Affero General Public License.
|
13. Remote Network Interaction; Use with the GNU General Public License.
|
||||||
|
|
||||||
|
Notwithstanding any other provision of this License, if you modify the
|
||||||
|
Program, your modified version must prominently offer all users
|
||||||
|
interacting with it remotely through a computer network (if your version
|
||||||
|
supports such interaction) an opportunity to receive the Corresponding
|
||||||
|
Source of your version by providing access to the Corresponding Source
|
||||||
|
from a network server at no charge, through some standard or customary
|
||||||
|
means of facilitating copying of software. This Corresponding Source
|
||||||
|
shall include the Corresponding Source for any work covered by version 3
|
||||||
|
of the GNU General Public License that is incorporated pursuant to the
|
||||||
|
following paragraph.
|
||||||
|
|
||||||
Notwithstanding any other provision of this License, you have
|
Notwithstanding any other provision of this License, you have
|
||||||
permission to link or combine any covered work with a work licensed
|
permission to link or combine any covered work with a work licensed
|
||||||
under version 3 of the GNU Affero General Public License into a single
|
under version 3 of the GNU General Public License into a single
|
||||||
combined work, and to convey the resulting work. The terms of this
|
combined work, and to convey the resulting work. The terms of this
|
||||||
License will continue to apply to the part which is the covered work,
|
License will continue to apply to the part which is the covered work,
|
||||||
but the special requirements of the GNU Affero General Public License,
|
but the work with which it is combined will remain governed by version
|
||||||
section 13, concerning interaction through a network will apply to the
|
3 of the GNU General Public License.
|
||||||
combination as such.
|
|
||||||
|
|
||||||
14. Revised Versions of this License.
|
14. Revised Versions of this License.
|
||||||
|
|
||||||
The Free Software Foundation may publish revised and/or new versions of
|
The Free Software Foundation may publish revised and/or new versions of
|
||||||
the GNU General Public License from time to time. Such new versions will
|
the GNU Affero General Public License from time to time. Such new versions
|
||||||
be similar in spirit to the present version, but may differ in detail to
|
will be similar in spirit to the present version, but may differ in detail to
|
||||||
address new problems or concerns.
|
address new problems or concerns.
|
||||||
|
|
||||||
Each version is given a distinguishing version number. If the
|
Each version is given a distinguishing version number. If the
|
||||||
Program specifies that a certain numbered version of the GNU General
|
Program specifies that a certain numbered version of the GNU Affero General
|
||||||
Public License "or any later version" applies to it, you have the
|
Public License "or any later version" applies to it, you have the
|
||||||
option of following the terms and conditions either of that numbered
|
option of following the terms and conditions either of that numbered
|
||||||
version or of any later version published by the Free Software
|
version or of any later version published by the Free Software
|
||||||
Foundation. If the Program does not specify a version number of the
|
Foundation. If the Program does not specify a version number of the
|
||||||
GNU General Public License, you may choose any version ever published
|
GNU Affero General Public License, you may choose any version ever published
|
||||||
by the Free Software Foundation.
|
by the Free Software Foundation.
|
||||||
|
|
||||||
If the Program specifies that a proxy can decide which future
|
If the Program specifies that a proxy can decide which future
|
||||||
versions of the GNU General Public License can be used, that proxy's
|
versions of the GNU Affero General Public License can be used, that proxy's
|
||||||
public statement of acceptance of a version permanently authorizes you
|
public statement of acceptance of a version permanently authorizes you
|
||||||
to choose that version for the Program.
|
to choose that version for the Program.
|
||||||
|
|
||||||
|
@ -635,41 +633,29 @@ the "copyright" line and a pointer to where the full notice is found.
|
||||||
Copyright (C) <year> <name of author>
|
Copyright (C) <year> <name of author>
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify
|
This program is free software: you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU Affero General Public License as published by
|
||||||
the Free Software Foundation, either version 3 of the License, or
|
the Free Software Foundation, either version 3 of the License, or
|
||||||
(at your option) any later version.
|
(at your option) any later version.
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
This program is distributed in the hope that it will be useful,
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
GNU General Public License for more details.
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License
|
You should have received a copy of the GNU Affero General Public License
|
||||||
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
Also add information on how to contact you by electronic and paper mail.
|
Also add information on how to contact you by electronic and paper mail.
|
||||||
|
|
||||||
If the program does terminal interaction, make it output a short
|
If your software can interact with users remotely through a computer
|
||||||
notice like this when it starts in an interactive mode:
|
network, you should also make sure that it provides a way for users to
|
||||||
|
get its source. For example, if your program is a web application, its
|
||||||
<program> Copyright (C) <year> <name of author>
|
interface could display a "Source" link that leads users to an archive
|
||||||
This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
|
of the code. There are many ways you could offer source, and different
|
||||||
This is free software, and you are welcome to redistribute it
|
solutions will be better for different programs; see section 13 for the
|
||||||
under certain conditions; type `show c' for details.
|
specific requirements.
|
||||||
|
|
||||||
The hypothetical commands `show w' and `show c' should show the appropriate
|
|
||||||
parts of the General Public License. Of course, your program's commands
|
|
||||||
might be different; for a GUI interface, you would use an "about box".
|
|
||||||
|
|
||||||
You should also get your employer (if you work as a programmer) or school,
|
You should also get your employer (if you work as a programmer) or school,
|
||||||
if any, to sign a "copyright disclaimer" for the program, if necessary.
|
if any, to sign a "copyright disclaimer" for the program, if necessary.
|
||||||
For more information on this, and how to apply and follow the GNU GPL, see
|
For more information on this, and how to apply and follow the GNU AGPL, see
|
||||||
<https://www.gnu.org/licenses/>.
|
<https://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
The GNU General Public License does not permit incorporating your program
|
|
||||||
into proprietary programs. If your program is a subroutine library, you
|
|
||||||
may consider it more useful to permit linking proprietary applications with
|
|
||||||
the library. If this is what you want to do, use the GNU Lesser General
|
|
||||||
Public License instead of this License. But first, please read
|
|
||||||
<https://www.gnu.org/licenses/why-not-lgpl.html>.
|
|
||||||
|
|
||||||
|
|
2
Makefile
|
@ -4,7 +4,7 @@ DOCKER=lxpz/garage_amd64
|
||||||
all:
|
all:
|
||||||
#cargo fmt || true
|
#cargo fmt || true
|
||||||
#RUSTFLAGS="-C link-arg=-fuse-ld=lld" cargo build
|
#RUSTFLAGS="-C link-arg=-fuse-ld=lld" cargo build
|
||||||
cargo build
|
clear; cargo build
|
||||||
|
|
||||||
$(BIN):
|
$(BIN):
|
||||||
#RUSTFLAGS="-C link-arg=-fuse-ld=lld" cargo build --release
|
#RUSTFLAGS="-C link-arg=-fuse-ld=lld" cargo build --release
|
||||||
|
|
|
@ -1,6 +1,11 @@
|
||||||
# Garage
|
Garage [![Build Status](https://drone.deuxfleurs.fr/api/badges/Deuxfleurs/garage/status.svg)](https://drone.deuxfleurs.fr/Deuxfleurs/garage)
|
||||||
|
===
|
||||||
|
|
||||||
[![Build Status](https://drone.deuxfleurs.fr/api/badges/Deuxfleurs/garage/status.svg)](https://drone.deuxfleurs.fr/Deuxfleurs/garage)
|
<p align="center" style="text-align:center;">
|
||||||
|
<a href="https://git.deuxfleurs.fr/Deuxfleurs/garage">
|
||||||
|
<img alt="Garage logo" src="doc/logo/garage.png" height="200" />
|
||||||
|
</a>
|
||||||
|
</p>
|
||||||
|
|
||||||
Garage is a lightweight S3-compatible distributed object store, with the following goals:
|
Garage is a lightweight S3-compatible distributed object store, with the following goals:
|
||||||
|
|
||||||
|
|
BIN
doc/logo/garage-dark-notext.png
Normal file
After Width: | Height: | Size: 8.1 KiB |
113
doc/logo/garage-dark-notext.svg
Normal file
|
@ -0,0 +1,113 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||||
|
<svg
|
||||||
|
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||||
|
xmlns:cc="http://creativecommons.org/ns#"
|
||||||
|
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||||
|
xmlns:svg="http://www.w3.org/2000/svg"
|
||||||
|
xmlns="http://www.w3.org/2000/svg"
|
||||||
|
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
|
||||||
|
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
|
||||||
|
width="250"
|
||||||
|
height="250"
|
||||||
|
viewBox="0 0 66.145832 66.145831"
|
||||||
|
version="1.1"
|
||||||
|
id="svg916"
|
||||||
|
inkscape:version="1.0.2 (e86c870879, 2021-01-15)"
|
||||||
|
sodipodi:docname="garage-dark-notext.svg"
|
||||||
|
inkscape:export-filename="/home/lx/Deuxfleurs/garage/garage-dark-notext.png"
|
||||||
|
inkscape:export-xdpi="96"
|
||||||
|
inkscape:export-ydpi="96">
|
||||||
|
<defs
|
||||||
|
id="defs910" />
|
||||||
|
<sodipodi:namedview
|
||||||
|
id="base"
|
||||||
|
pagecolor="#ffffff"
|
||||||
|
bordercolor="#666666"
|
||||||
|
borderopacity="1.0"
|
||||||
|
inkscape:pageopacity="0.0"
|
||||||
|
inkscape:pageshadow="2"
|
||||||
|
inkscape:zoom="2.3640695"
|
||||||
|
inkscape:cx="127.28732"
|
||||||
|
inkscape:cy="150.37984"
|
||||||
|
inkscape:document-units="mm"
|
||||||
|
inkscape:current-layer="layer1"
|
||||||
|
inkscape:document-rotation="0"
|
||||||
|
showgrid="false"
|
||||||
|
fit-margin-top="0"
|
||||||
|
fit-margin-left="0"
|
||||||
|
fit-margin-right="0"
|
||||||
|
fit-margin-bottom="0"
|
||||||
|
units="px"
|
||||||
|
inkscape:window-width="1920"
|
||||||
|
inkscape:window-height="1039"
|
||||||
|
inkscape:window-x="0"
|
||||||
|
inkscape:window-y="20"
|
||||||
|
inkscape:window-maximized="0" />
|
||||||
|
<metadata
|
||||||
|
id="metadata913">
|
||||||
|
<rdf:RDF>
|
||||||
|
<cc:Work
|
||||||
|
rdf:about="">
|
||||||
|
<dc:format>image/svg+xml</dc:format>
|
||||||
|
<dc:type
|
||||||
|
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
|
||||||
|
<dc:title></dc:title>
|
||||||
|
</cc:Work>
|
||||||
|
</rdf:RDF>
|
||||||
|
</metadata>
|
||||||
|
<g
|
||||||
|
inkscape:label="Layer 1"
|
||||||
|
inkscape:groupmode="layer"
|
||||||
|
id="layer1"
|
||||||
|
transform="translate(-141.5009,-98.254059)">
|
||||||
|
<rect
|
||||||
|
style="fill:#4e4e4e;fill-opacity:1;stroke-width:1.01574"
|
||||||
|
id="rect858"
|
||||||
|
width="66.592186"
|
||||||
|
height="66.832306"
|
||||||
|
x="141.5009"
|
||||||
|
y="98.056282" />
|
||||||
|
<g
|
||||||
|
id="g1775"
|
||||||
|
transform="matrix(1.9019239,0,0,1.9019239,-157.45231,-108.13709)">
|
||||||
|
<path
|
||||||
|
class="cls-2"
|
||||||
|
d="m 187.70646,127.72029 a 0.39366647,0.39366647 0 0 1 -0.0176,0.1366 0.02790919,0.02790919 0 0 1 0,0.0117 l -0.0176,0.0455 v 0 l -0.0176,0.0338 -2.83058,5.59653 c -0.39367,0.77705 -1.11784,0.75355 -0.99592,-0.0323 l 0.56994,-3.18164 c 0.0191,-0.1043 0.18655,-0.83875 0.34666,-1.37049 l -5.46286,1.7054 c -0.85784,5.57155 -8.18914,5.66409 -9.38483,0 l -5.47461,-1.70981 c 0.16011,0.53174 0.32904,1.2706 0.34813,1.3749 l 0.56994,3.18164 c 0.12192,0.78587 -0.60225,0.80937 -0.99592,0.0323 l -2.84822,-5.63031 a 0.20417776,0.20417776 0 0 1 -0.0176,-0.047 0.42304456,0.42304456 0 0 1 0.22181,-0.56552 l 11.69689,-5.17495 a 2.9113691,2.9113691 0 0 1 2.35024,0 l 11.69689,5.17495 a 0.41863785,0.41863785 0 0 1 0.26293,0.41864 z"
|
||||||
|
id="path24-31"
|
||||||
|
style="stroke-width:0.146891" />
|
||||||
|
<path
|
||||||
|
class="cls-3"
|
||||||
|
d="m 178.30988,128.69564 5.05744,-2.03591 a 0.21446009,0.21446009 0 0 0 0,-0.39807 c -0.58756,-0.2453 -1.3132,-0.52733 -2.02415,-0.82259 -0.13073,-0.0543 -1.36902,0.83434 -1.48213,0.92542 l -2.17985,1.74212 c -0.52734,0.44214 -0.0705,0.86959 0.62869,0.58903 z"
|
||||||
|
id="path26-9"
|
||||||
|
style="stroke-width:0.146891" />
|
||||||
|
<circle
|
||||||
|
class="cls-3"
|
||||||
|
cx="174.64349"
|
||||||
|
cy="130.68452"
|
||||||
|
r="2.6366842"
|
||||||
|
id="circle28-4"
|
||||||
|
style="stroke-width:0.146891" />
|
||||||
|
<path
|
||||||
|
id="path24-3-6-9-0"
|
||||||
|
style="fill:#ff9329;fill-opacity:1;stroke-width:0.146891"
|
||||||
|
d="m 174.54269,116.93385 a 2.9113691,2.9113691 0 0 0 -1.14618,0.24753 l -11.69696,5.17488 a 0.42304456,0.42304456 0 0 0 -0.22169,0.56586 0.20417776,0.20417776 0 0 0 0.0176,0.047 l 0.79634,1.57355 11.10475,-4.91288 a 2.9113691,2.9113691 0 0 1 1.14618,-0.24753 2.9113691,2.9113691 0 0 1 1.20406,0.24753 l 11.12387,4.92115 0.7829,-1.54823 0.0176,-0.0336 0.0181,-0.0455 a 0.02790919,0.02790919 0 0 0 0,-0.0119 0.39366647,0.39366647 0 0 0 0.0176,-0.13642 0.41863785,0.41863785 0 0 0 -0.26303,-0.4191 l -11.69697,-5.17488 a 2.9113691,2.9113691 0 0 0 -1.20406,-0.24753 z m -10.12134,9.52449 c 0.0218,0.0723 0.0408,0.14674 0.0615,0.22066 h 0.51831 l -0.008,-0.0419 z m 20.32227,0.005 -0.57103,0.17828 -0.007,0.0377 h 0.5178 c 0.0202,-0.0723 0.0386,-0.14514 0.0599,-0.216 z" />
|
||||||
|
<path
|
||||||
|
class="cls-2"
|
||||||
|
d="m 187.70647,127.72029 a 0.39366647,0.39366647 0 0 1 -0.0176,0.13661 0.02790919,0.02790919 0 0 1 0,0.0117 l -0.0176,0.0455 v 0 l -0.0176,0.0338 -2.83058,5.59652 c -0.39366,0.77705 -1.11783,0.75355 -0.99591,-0.0323 l 0.56993,-3.18165 c 0.0191,-0.10429 0.18655,-0.83874 0.34666,-1.37049 l -5.46285,1.7054 c -0.85784,5.57156 -8.18915,5.6641 -9.38484,0 l -5.4746,-1.70981 c 0.16011,0.53175 0.32903,1.27061 0.34813,1.3749 l 0.56993,3.18165 c 0.12192,0.78586 -0.60225,0.80936 -0.99592,0.0323 l -2.84822,-5.63031 a 0.20417776,0.20417776 0 0 1 -0.0176,-0.047 0.42304456,0.42304456 0 0 1 0.22181,-0.56553 l 11.69688,-5.17495 a 2.9113691,2.9113691 0 0 1 2.35025,0 l 11.69689,5.17495 a 0.41863785,0.41863785 0 0 1 0.26293,0.41864 z"
|
||||||
|
id="path24-0-3"
|
||||||
|
style="fill:#ff9329;fill-opacity:1;stroke-width:0.146891" />
|
||||||
|
<path
|
||||||
|
class="cls-3"
|
||||||
|
d="m 178.30988,128.69564 5.05744,-2.0359 a 0.21446009,0.21446009 0 0 0 0,-0.39807 c -0.58756,-0.24531 -1.3132,-0.52734 -2.02415,-0.82259 -0.13073,-0.0543 -1.36902,0.83434 -1.48212,0.92541 l -2.17986,1.74212 c -0.52734,0.44214 -0.0705,0.86959 0.62869,0.58903 z"
|
||||||
|
id="path26-2-2"
|
||||||
|
style="fill:#4e4e4e;fill-opacity:1;stroke-width:0.146891" />
|
||||||
|
<circle
|
||||||
|
class="cls-3"
|
||||||
|
cx="174.64349"
|
||||||
|
cy="130.68452"
|
||||||
|
r="2.6366842"
|
||||||
|
id="circle28-3-0"
|
||||||
|
style="fill:#4e4e4e;fill-opacity:1;stroke-width:0.146891" />
|
||||||
|
</g>
|
||||||
|
</g>
|
||||||
|
</svg>
|
After Width: | Height: | Size: 5.8 KiB |
174
doc/logo/garage-dark.svg
Normal file
|
@ -0,0 +1,174 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||||
|
<svg
|
||||||
|
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||||
|
xmlns:cc="http://creativecommons.org/ns#"
|
||||||
|
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||||
|
xmlns:svg="http://www.w3.org/2000/svg"
|
||||||
|
xmlns="http://www.w3.org/2000/svg"
|
||||||
|
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
|
||||||
|
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
|
||||||
|
width="250"
|
||||||
|
height="250"
|
||||||
|
viewBox="0 0 66.145832 66.145831"
|
||||||
|
version="1.1"
|
||||||
|
id="svg916"
|
||||||
|
inkscape:version="1.0.2 (e86c870879, 2021-01-15)"
|
||||||
|
sodipodi:docname="garage-dark.svg">
|
||||||
|
<defs
|
||||||
|
id="defs910" />
|
||||||
|
<sodipodi:namedview
|
||||||
|
id="base"
|
||||||
|
pagecolor="#ffffff"
|
||||||
|
bordercolor="#666666"
|
||||||
|
borderopacity="1.0"
|
||||||
|
inkscape:pageopacity="0.0"
|
||||||
|
inkscape:pageshadow="2"
|
||||||
|
inkscape:zoom="2.3640695"
|
||||||
|
inkscape:cx="132.7426"
|
||||||
|
inkscape:cy="151.74366"
|
||||||
|
inkscape:document-units="mm"
|
||||||
|
inkscape:current-layer="layer1"
|
||||||
|
inkscape:document-rotation="0"
|
||||||
|
showgrid="false"
|
||||||
|
fit-margin-top="0"
|
||||||
|
fit-margin-left="0"
|
||||||
|
fit-margin-right="0"
|
||||||
|
fit-margin-bottom="0"
|
||||||
|
units="px"
|
||||||
|
inkscape:window-width="1920"
|
||||||
|
inkscape:window-height="1039"
|
||||||
|
inkscape:window-x="0"
|
||||||
|
inkscape:window-y="20"
|
||||||
|
inkscape:window-maximized="0" />
|
||||||
|
<metadata
|
||||||
|
id="metadata913">
|
||||||
|
<rdf:RDF>
|
||||||
|
<cc:Work
|
||||||
|
rdf:about="">
|
||||||
|
<dc:format>image/svg+xml</dc:format>
|
||||||
|
<dc:type
|
||||||
|
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
|
||||||
|
<dc:title></dc:title>
|
||||||
|
</cc:Work>
|
||||||
|
</rdf:RDF>
|
||||||
|
</metadata>
|
||||||
|
<g
|
||||||
|
inkscape:label="Layer 1"
|
||||||
|
inkscape:groupmode="layer"
|
||||||
|
id="layer1"
|
||||||
|
transform="translate(-141.5009,-98.254059)">
|
||||||
|
<rect
|
||||||
|
style="fill:#4e4e4e;fill-opacity:1;stroke-width:1.01574"
|
||||||
|
id="rect858"
|
||||||
|
width="66.592186"
|
||||||
|
height="66.832306"
|
||||||
|
x="141.5009"
|
||||||
|
y="98.056282" />
|
||||||
|
<g
|
||||||
|
id="g1637"
|
||||||
|
transform="translate(1.5164686,-0.22143797)">
|
||||||
|
<g
|
||||||
|
id="g1034-5"
|
||||||
|
transform="matrix(0.26458333,0,0,0.26458333,140.0054,98.562655)">
|
||||||
|
<path
|
||||||
|
class="cls-1"
|
||||||
|
d="m 85.377935,159.38378 5.163143,-0.0333 h 0.06662 q 2.864711,0 2.864711,2.69816 v 8.69407 a 24.849705,24.849705 0 0 1 -8.649651,1.43235 q -4.730105,0 -7.128468,-3.21447 -2.398363,-3.21447 -2.398363,-8.76068 0,-5.55177 2.981299,-8.62745 a 9.7600046,9.7600046 0 0 1 7.29502,-3.08123 13.368653,13.368653 0 0 1 7.811335,2.43167 3.9250986,3.9250986 0 0 1 -0.682867,1.76547 4.7634152,4.7634152 0 0 1 -1.282458,1.33242 9.798867,9.798867 0 0 0 -5.679457,-1.96533 5.3574542,5.3574542 0 0 0 -4.480275,2.04861 q -1.598909,2.03749 -1.598909,6.41229 0,8.22771 6.062529,8.22771 a 16.910679,16.910679 0 0 0 3.697476,-0.43303 v -3.16451 q 0,-1.49898 0.06662,-2.22071 h -2.442777 a 2.2873276,2.2873276 0 0 1 -1.515632,-0.41638 1.6655298,1.6655298 0 0 1 -0.483004,-1.33242 5.7072154,5.7072154 0 0 1 0.333106,-1.79322 z"
|
||||||
|
id="path8-2"
|
||||||
|
style="stroke-width:0.555177" />
|
||||||
|
<path
|
||||||
|
class="cls-1"
|
||||||
|
d="m 111.07151,169.8433 a 4.3137222,4.3137222 0 0 1 -0.55518,1.18253 4.0305821,4.0305821 0 0 1 -0.84942,0.94935 3.7640973,3.7640973 0 0 1 -3.05902,-1.95422 6.7453957,6.7453957 0 0 1 -4.76342,2.13188 q -2.564913,0 -3.886233,-1.49898 a 5.1298318,5.1298318 0 0 1 -1.299113,-3.4643 q 0,-2.77588 1.815427,-4.21379 a 7.3338829,7.3338829 0 0 1 4.669039,-1.3935 q 1.53228,0 2.89802,0.13325 v -0.99932 q 0,-2.63154 -2.53161,-2.63154 -1.79877,0 -5.096518,1.19918 a 4.674587,4.674587 0 0 1 -1.110353,-2.96464 18.581761,18.581761 0 0 1 7.217291,-1.49898 5.8682167,5.8682167 0 0 1 4.0639,1.39905 q 1.56559,1.39904 1.56559,4.23044 v 6.79537 q -0.0111,1.83208 0.9216,2.59822 z m -8.36096,-0.83276 a 4.7134493,4.7134493 0 0 0 3.33106,-1.59891 v -2.94244 a 22.368065,22.368065 0 0 0 -2.53161,-0.13324 2.775883,2.775883 0 0 0 -2.06525,0.68842 2.3928111,2.3928111 0 0 0 -0.69953,1.76546 2.3539488,2.3539488 0 0 0 0.55518,1.66553 1.8431863,1.8431863 0 0 0 1.41015,0.55518 z"
|
||||||
|
id="path10-28"
|
||||||
|
style="stroke-width:0.555177" />
|
||||||
|
<path
|
||||||
|
class="cls-1"
|
||||||
|
d="m 113.76966,157.11865 a 3.986168,3.986168 0 0 1 0.55518,-1.21583 3.3310596,3.3310596 0 0 1 0.84942,-0.94935 4.1638245,4.1638245 0 0 1 3.51427,2.96464 q 1.33242,-2.96464 4.29707,-2.96464 a 10.215249,10.215249 0 0 1 1.93201,0.23317 7.4782288,7.4782288 0 0 1 -0.99932,3.88624 8.4497879,8.4497879 0 0 0 -1.49897,-0.19987 q -2.03195,0 -3.26444,2.16519 v 10.64829 a 11.575432,11.575432 0 0 1 -2.03195,0.16655 12.769062,12.769062 0 0 1 -2.09857,-0.16655 v -11.15905 q -0.0222,-2.40947 -1.2547,-3.40879 z"
|
||||||
|
id="path12-9"
|
||||||
|
style="stroke-width:0.555177" />
|
||||||
|
<path
|
||||||
|
class="cls-1"
|
||||||
|
d="m 140.38483,169.8433 a 4.3137222,4.3137222 0 0 1 -0.58293,1.18253 4.0305821,4.0305821 0 0 1 -0.84942,0.94935 3.7640973,3.7640973 0 0 1 -3.05348,-1.95422 6.7453957,6.7453957 0 0 1 -4.76341,2.13188 q -2.56492,0 -3.88624,-1.49898 a 5.1298318,5.1298318 0 0 1 -1.29911,-3.4643 q 0,-2.77588 1.81543,-4.21379 a 7.3338829,7.3338829 0 0 1 4.64682,-1.4157 q 1.53229,0 2.89803,0.13324 v -0.99932 q 0,-2.63153 -2.53161,-2.63153 -1.79877,0 -5.09652,1.19918 a 4.674587,4.674587 0 0 1 -1.11035,-2.96465 18.581761,18.581761 0 0 1 7.21729,-1.49897 5.8682167,5.8682167 0 0 1 4.0639,1.39904 q 1.56559,1.39905 1.56559,4.23045 v 6.81757 q 0.0333,1.83208 0.96601,2.59822 z m -8.37206,-0.83276 a 4.7134493,4.7134493 0 0 0 3.33106,-1.59891 v -2.94244 a 22.368065,22.368065 0 0 0 -2.53161,-0.13324 2.775883,2.775883 0 0 0 -2.06526,0.69952 2.3928111,2.3928111 0 0 0 -0.69952,1.76546 2.3539488,2.3539488 0 0 0 0.55518,1.66553 1.8431863,1.8431863 0 0 0 1.41015,0.54408 z"
|
||||||
|
id="path14-7"
|
||||||
|
style="stroke-width:0.555177" />
|
||||||
|
<path
|
||||||
|
class="cls-1"
|
||||||
|
d="m 144.48203,169.71006 q -1.49897,-2.29843 -1.49897,-6.34567 0,-4.04724 1.8987,-6.34567 a 5.740526,5.740526 0 0 1 4.56355,-2.29843 6.4400486,6.4400486 0 0 1 4.49693,1.66553 3.7696491,3.7696491 0 0 1 2.63154,-1.43235 3.1200925,3.1200925 0 0 1 0.88273,0.93269 3.8862362,3.8862362 0 0 1 0.55518,1.16587 q -0.9327,0.79946 -0.9327,2.86472 v 9.438 q 0,5.29638 -1.73215,7.49488 -1.73215,2.1985 -5.69611,2.22071 a 16.100121,16.100121 0 0 1 -5.9626,-1.11036 4.4802752,4.4802752 0 0 1 1.03263,-3.03126 10.892565,10.892565 0 0 0 4.48028,1.03263 q 2.18184,0 3.0146,-1.11035 a 4.9965894,4.9965894 0 0 0 0.83277,-3.06458 V 170.454 a 6.4011862,6.4011862 0 0 1 -4.16383,1.56559 4.9188647,4.9188647 0 0 1 -4.40255,-2.30953 z m 8.56083,-2.69816 v -7.72806 a 4.2915151,4.2915151 0 0 0 -2.86471,-1.36573 2.4039147,2.4039147 0 0 0 -2.18185,1.43235 8.6885138,8.6885138 0 0 0 -0.7828,4.09721 q 0,2.66485 0.71618,3.93065 a 2.1318781,2.1318781 0 0 0 1.88205,1.2658 4.2304457,4.2304457 0 0 0 3.23113,-1.63222 z"
|
||||||
|
id="path16-3"
|
||||||
|
style="stroke-width:0.555177" />
|
||||||
|
<path
|
||||||
|
class="cls-1"
|
||||||
|
d="m 174.20619,164.78009 h -9.32697 a 5.6405943,5.6405943 0 0 0 0.88273,3.04792 q 0.7828,1.0826 2.74813,1.0826 a 10.120869,10.120869 0 0 0 4.36369,-1.16587 4.3803434,4.3803434 0 0 1 1.19918,2.5316 10.759323,10.759323 0 0 1 -6.41229,1.8987 q -3.74744,0 -5.37966,-2.43167 -1.63222,-2.43167 -1.63222,-6.2957 0,-3.88624 1.79877,-6.2957 a 6.0181143,6.0181143 0 0 1 5.14649,-2.43168 q 3.33106,0 5.14648,2.01529 a 7.3449864,7.3449864 0 0 1 1.79878,5.07987 13.04665,13.04665 0 0 1 -0.33311,2.96464 z m -6.42895,-7.06184 q -2.73146,0 -2.93133,4.13051 h 5.79605 v -0.39973 a 4.7245529,4.7245529 0 0 0 -0.69953,-2.69816 2.4316735,2.4316735 0 0 0 -2.14298,-1.03262 z"
|
||||||
|
id="path18-6"
|
||||||
|
style="stroke-width:0.555177" />
|
||||||
|
<path
|
||||||
|
class="cls-2"
|
||||||
|
d="m 174.55595,111.039 a 1.4878733,1.4878733 0 0 1 -0.0666,0.51631 0.10548355,0.10548355 0 0 1 0,0.0444 l -0.0666,0.17211 v 0 l -0.0666,0.12769 -10.69826,21.15223 c -1.48787,2.93688 -4.22489,2.84806 -3.76409,-0.12214 l 2.15408,-12.02512 c 0.0722,-0.39418 0.70508,-3.17006 1.31022,-5.1798 l -20.64702,6.4456 c -3.24223,21.05785 -30.95109,21.40761 -35.47023,0 l -20.691432,-6.46226 c 0.605143,2.00974 1.243596,4.80228 1.315769,5.19646 l 2.154085,12.02512 c 0.460796,2.9702 -2.276224,3.05902 -3.764098,0.12214 L 75.49024,111.77183 a 0.77169547,0.77169547 0 0 1 -0.06662,-0.17766 1.5989086,1.5989086 0 0 1 0.838317,-2.13743 L 120.47065,89.897871 a 11.0036,11.0036 0 0 1 8.88282,0 l 44.20871,19.558869 a 1.5822533,1.5822533 0 0 1 0.99377,1.58226 z"
|
||||||
|
id="path24-31"
|
||||||
|
style="stroke-width:0.555177" />
|
||||||
|
<path
|
||||||
|
class="cls-3"
|
||||||
|
d="m 139.0413,114.72537 19.11473,-7.69475 a 0.81055784,0.81055784 0 0 0 0,-1.50453 c -2.2207,-0.92714 -4.96328,-1.99308 -7.65033,-3.10899 -0.49411,-0.20541 -5.17425,3.15341 -5.60173,3.49762 l -8.23882,6.58439 c -1.99309,1.67108 -0.26649,3.28665 2.37615,2.22626 z"
|
||||||
|
id="path26-9"
|
||||||
|
style="stroke-width:0.555177" />
|
||||||
|
<circle
|
||||||
|
class="cls-3"
|
||||||
|
cx="125.18409"
|
||||||
|
cy="122.24245"
|
||||||
|
r="9.9654207"
|
||||||
|
id="circle28-4"
|
||||||
|
style="stroke-width:0.555177" />
|
||||||
|
</g>
|
||||||
|
<path
|
||||||
|
class="cls-1"
|
||||||
|
d="m 162.59498,140.73295 1.36608,-0.009 h 0.0176 q 0.75796,0 0.75796,0.71389 v 2.30031 a 6.5748177,6.5748177 0 0 1 -2.28855,0.37897 q -1.25151,0 -1.88608,-0.85049 -0.63456,-0.8505 -0.63456,-2.31793 0,-1.46891 0.7888,-2.28268 a 2.5823345,2.5823345 0 0 1 1.93014,-0.81524 3.5371227,3.5371227 0 0 1 2.06675,0.64338 1.0385157,1.0385157 0 0 1 -0.18068,0.46711 1.2603203,1.2603203 0 0 1 -0.33931,0.35254 2.5926169,2.5926169 0 0 0 -1.5027,-0.52 1.4174931,1.4174931 0 0 0 -1.1854,0.54203 q -0.42305,0.53909 -0.42305,1.69658 0,2.17692 1.60405,2.17692 a 4.4742838,4.4742838 0 0 0 0.97829,-0.11457 v -0.83728 q 0,-0.3966 0.0176,-0.58756 h -0.64632 a 0.60518875,0.60518875 0 0 1 -0.40101,-0.11017 0.44067142,0.44067142 0 0 1 -0.12779,-0.35254 1.5100341,1.5100341 0 0 1 0.0881,-0.47445 z"
|
||||||
|
id="path8-6-4"
|
||||||
|
style="fill:#c3c3c3;fill-opacity:1;stroke-width:0.146891" />
|
||||||
|
<path
|
||||||
|
class="cls-1"
|
||||||
|
d="m 169.39307,143.50037 a 1.141339,1.141339 0 0 1 -0.14689,0.31288 1.0664248,1.0664248 0 0 1 -0.22474,0.25118 0.9959174,0.9959174 0 0 1 -0.80937,-0.51706 1.7847193,1.7847193 0 0 1 -1.26032,0.56406 q -0.67863,0 -1.02823,-0.3966 a 1.357268,1.357268 0 0 1 -0.34373,-0.9166 q 0,-0.73445 0.48034,-1.1149 a 1.9404232,1.9404232 0 0 1 1.23535,-0.36869 q 0.40541,0 0.76676,0.0352 v -0.2644 q 0,-0.69626 -0.66982,-0.69626 -0.47592,0 -1.34845,0.31728 a 1.2368178,1.2368178 0 0 1 -0.29378,-0.78439 4.9164242,4.9164242 0 0 1 1.90957,-0.39661 1.5526323,1.5526323 0 0 1 1.07524,0.37017 q 0.41423,0.37016 0.41423,1.1193 v 1.79794 q -0.003,0.48474 0.24384,0.68745 z m -2.21217,-0.22034 a 1.2471001,1.2471001 0 0 0 0.88134,-0.42304 v -0.77852 a 5.9182171,5.9182171 0 0 0 -0.66982,-0.0353 0.73445237,0.73445237 0 0 0 -0.54643,0.18215 0.63309793,0.63309793 0 0 0 -0.18508,0.46711 0.62281561,0.62281561 0 0 0 0.14689,0.44067 0.48767637,0.48767637 0 0 0 0.3731,0.14689 z"
|
||||||
|
id="path10-2-5"
|
||||||
|
style="fill:#c3c3c3;fill-opacity:1;stroke-width:0.146891" />
|
||||||
|
<path
|
||||||
|
class="cls-1"
|
||||||
|
d="m 170.10696,140.13364 a 1.0546736,1.0546736 0 0 1 0.14689,-0.32169 0.88134284,0.88134284 0 0 1 0.22474,-0.25118 1.1016786,1.1016786 0 0 1 0.92982,0.78439 q 0.35254,-0.78439 1.13693,-0.78439 a 2.7027846,2.7027846 0 0 1 0.51118,0.0617 1.9786147,1.9786147 0 0 1 -0.2644,1.02823 2.235673,2.235673 0 0 0 -0.39661,-0.0529 q -0.53762,0 -0.86371,0.57287 v 2.81736 a 3.0626663,3.0626663 0 0 1 -0.53762,0.0441 3.3784809,3.3784809 0 0 1 -0.55525,-0.0441 v -2.95249 q -0.006,-0.63751 -0.33197,-0.90191 z"
|
||||||
|
id="path12-6-0"
|
||||||
|
style="fill:#c3c3c3;fill-opacity:1;stroke-width:0.146891" />
|
||||||
|
<path
|
||||||
|
class="cls-1"
|
||||||
|
d="m 177.14889,143.50037 a 1.141339,1.141339 0 0 1 -0.15424,0.31288 1.0664248,1.0664248 0 0 1 -0.22474,0.25118 0.9959174,0.9959174 0 0 1 -0.8079,-0.51706 1.7847193,1.7847193 0 0 1 -1.26032,0.56406 q -0.67863,0 -1.02823,-0.3966 a 1.357268,1.357268 0 0 1 -0.34372,-0.9166 q 0,-0.73445 0.48033,-1.1149 a 1.9404232,1.9404232 0 0 1 1.22947,-0.37457 q 0.40542,0 0.76677,0.0353 v -0.26441 q 0,-0.69626 -0.66982,-0.69626 -0.47593,0 -1.34846,0.31729 a 1.2368178,1.2368178 0 0 1 -0.29378,-0.7844 4.9164242,4.9164242 0 0 1 1.90958,-0.3966 1.5526323,1.5526323 0 0 1 1.07524,0.37016 q 0.41423,0.37017 0.41423,1.11931 v 1.80381 q 0.009,0.48474 0.25559,0.68745 z m -2.21511,-0.22034 a 1.2471001,1.2471001 0 0 0 0.88134,-0.42304 v -0.77852 a 5.9182171,5.9182171 0 0 0 -0.66982,-0.0353 0.73445237,0.73445237 0 0 0 -0.54643,0.18509 0.63309793,0.63309793 0 0 0 -0.18508,0.46711 0.62281561,0.62281561 0 0 0 0.14689,0.44067 0.48767637,0.48767637 0 0 0 0.3731,0.14395 z"
|
||||||
|
id="path14-1-3"
|
||||||
|
style="fill:#c3c3c3;fill-opacity:1;stroke-width:0.146891" />
|
||||||
|
<path
|
||||||
|
class="cls-1"
|
||||||
|
d="m 178.23294,143.46511 q -0.3966,-0.60812 -0.3966,-1.67895 0,-1.07084 0.50236,-1.67896 a 1.5188475,1.5188475 0 0 1 1.20744,-0.60813 1.7039295,1.7039295 0 0 1 1.18981,0.44067 0.99738631,0.99738631 0 0 1 0.69626,-0.37897 0.82552446,0.82552446 0 0 1 0.23356,0.24677 1.0282333,1.0282333 0 0 1 0.14689,0.30847 q -0.24678,0.21152 -0.24678,0.75796 v 2.49714 q 0,1.40133 -0.45829,1.98302 -0.4583,0.58168 -1.5071,0.58756 a 4.2598236,4.2598236 0 0 1 -1.5776,-0.29378 1.1854061,1.1854061 0 0 1 0.27321,-0.80203 2.8819911,2.8819911 0 0 0 1.18541,0.27322 q 0.57728,0 0.79761,-0.29378 a 1.3220143,1.3220143 0 0 0 0.22034,-0.81084 v -0.35253 a 1.6936472,1.6936472 0 0 1 -1.10168,0.41423 1.3014496,1.3014496 0 0 1 -1.16484,-0.61107 z m 2.26505,-0.71388 v -2.04472 a 1.1354634,1.1354634 0 0 0 -0.75795,-0.36135 0.63603576,0.63603576 0 0 0 -0.57728,0.37898 2.2988359,2.2988359 0 0 0 -0.20712,1.08405 q 0,0.70508 0.18949,1.03998 a 0.56405941,0.56405941 0 0 0 0.49796,0.33491 1.1193054,1.1193054 0 0 0 0.8549,-0.43185 z"
|
||||||
|
id="path16-8-6"
|
||||||
|
style="fill:#c3c3c3;fill-opacity:1;stroke-width:0.146891" />
|
||||||
|
<path
|
||||||
|
class="cls-1"
|
||||||
|
d="m 186.09746,142.16073 h -2.46776 a 1.4924072,1.4924072 0 0 0 0.23355,0.80643 q 0.20712,0.28643 0.72711,0.28643 a 2.6778132,2.6778132 0 0 0 1.15456,-0.30847 1.1589658,1.1589658 0 0 1 0.31728,0.66982 2.8467375,2.8467375 0 0 1 -1.69658,0.50237 q -0.99151,0 -1.42337,-0.64338 -0.43186,-0.64338 -0.43186,-1.66574 0,-1.02823 0.47593,-1.66574 a 1.5922927,1.5922927 0 0 1 1.36167,-0.64338 q 0.88134,0 1.36167,0.53321 a 1.943361,1.943361 0 0 1 0.47593,1.34405 3.4519261,3.4519261 0 0 1 -0.0881,0.7844 z m -1.701,-1.86845 q -0.7227,0 -0.77558,1.09287 h 1.53354 v -0.10577 a 1.2500379,1.2500379 0 0 0 -0.18508,-0.71388 0.64338027,0.64338027 0 0 0 -0.567,-0.27322 z"
|
||||||
|
id="path18-7-1"
|
||||||
|
style="fill:#c3c3c3;fill-opacity:1;stroke-width:0.146891" />
|
||||||
|
<path
|
||||||
|
id="path24-3-6-9-0"
|
||||||
|
style="fill:#ff9329;fill-opacity:1;stroke-width:0.146891"
|
||||||
|
d="m 173.02622,117.15529 a 2.9113691,2.9113691 0 0 0 -1.14618,0.24753 l -11.69696,5.17488 a 0.42304456,0.42304456 0 0 0 -0.22169,0.56586 0.20417776,0.20417776 0 0 0 0.0176,0.047 l 0.79634,1.57355 11.10475,-4.91288 a 2.9113691,2.9113691 0 0 1 1.14618,-0.24753 2.9113691,2.9113691 0 0 1 1.20406,0.24753 l 11.12387,4.92115 0.7829,-1.54823 0.0176,-0.0336 0.0181,-0.0455 a 0.02790919,0.02790919 0 0 0 0,-0.0119 0.39366647,0.39366647 0 0 0 0.0176,-0.13642 0.41863785,0.41863785 0 0 0 -0.26303,-0.4191 l -11.69697,-5.17488 a 2.9113691,2.9113691 0 0 0 -1.20406,-0.24753 z m -10.12134,9.52449 c 0.0218,0.0723 0.0408,0.14674 0.0615,0.22066 h 0.51831 l -0.008,-0.0419 z m 20.32227,0.005 -0.57103,0.17828 -0.007,0.0377 h 0.5178 c 0.0202,-0.0723 0.0386,-0.14514 0.0599,-0.216 z" />
|
||||||
|
<path
|
||||||
|
class="cls-2"
|
||||||
|
d="m 186.19,127.94173 a 0.39366647,0.39366647 0 0 1 -0.0176,0.13661 0.02790919,0.02790919 0 0 1 0,0.0117 l -0.0176,0.0455 v 0 l -0.0176,0.0338 -2.83058,5.59652 c -0.39366,0.77705 -1.11783,0.75355 -0.99591,-0.0323 l 0.56993,-3.18165 c 0.0191,-0.10429 0.18655,-0.83874 0.34666,-1.37049 l -5.46285,1.7054 c -0.85784,5.57156 -8.18915,5.6641 -9.38484,0 l -5.4746,-1.70981 c 0.16011,0.53175 0.32903,1.27061 0.34813,1.3749 l 0.56993,3.18165 c 0.12192,0.78586 -0.60225,0.80936 -0.99592,0.0323 l -2.84822,-5.63031 a 0.20417776,0.20417776 0 0 1 -0.0176,-0.047 0.42304456,0.42304456 0 0 1 0.22181,-0.56553 l 11.69688,-5.17495 a 2.9113691,2.9113691 0 0 1 2.35025,0 l 11.69689,5.17495 a 0.41863785,0.41863785 0 0 1 0.26293,0.41864 z"
|
||||||
|
id="path24-0-3"
|
||||||
|
style="fill:#ff9329;fill-opacity:1;stroke-width:0.146891" />
|
||||||
|
<path
|
||||||
|
class="cls-3"
|
||||||
|
d="m 176.79341,128.91708 5.05744,-2.0359 a 0.21446009,0.21446009 0 0 0 0,-0.39807 c -0.58756,-0.24531 -1.3132,-0.52734 -2.02415,-0.82259 -0.13073,-0.0543 -1.36902,0.83434 -1.48212,0.92541 l -2.17986,1.74212 c -0.52734,0.44214 -0.0705,0.86959 0.62869,0.58903 z"
|
||||||
|
id="path26-2-2"
|
||||||
|
style="fill:#4e4e4e;fill-opacity:1;stroke-width:0.146891" />
|
||||||
|
<circle
|
||||||
|
class="cls-3"
|
||||||
|
cx="173.12703"
|
||||||
|
cy="130.90596"
|
||||||
|
r="2.6366842"
|
||||||
|
id="circle28-3-0"
|
||||||
|
style="fill:#4e4e4e;fill-opacity:1;stroke-width:0.146891" />
|
||||||
|
</g>
|
||||||
|
</g>
|
||||||
|
</svg>
|
After Width: | Height: | Size: 17 KiB |
BIN
doc/logo/garage-notext.png
Normal file
After Width: | Height: | Size: 7.8 KiB |
146
doc/logo/garage-notext.svg
Normal file
|
@ -0,0 +1,146 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||||
|
<svg
|
||||||
|
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||||
|
xmlns:cc="http://creativecommons.org/ns#"
|
||||||
|
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||||
|
xmlns:svg="http://www.w3.org/2000/svg"
|
||||||
|
xmlns="http://www.w3.org/2000/svg"
|
||||||
|
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
|
||||||
|
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
|
||||||
|
id="Calque_1"
|
||||||
|
data-name="Calque 1"
|
||||||
|
width="250"
|
||||||
|
height="250"
|
||||||
|
viewBox="0 0 249.99999 250"
|
||||||
|
version="1.1"
|
||||||
|
sodipodi:docname="garage-notext.svg"
|
||||||
|
inkscape:version="1.0.2 (e86c870879, 2021-01-15)"
|
||||||
|
inkscape:export-filename="/home/lx/Deuxfleurs/garage/garage-notext.png"
|
||||||
|
inkscape:export-xdpi="96"
|
||||||
|
inkscape:export-ydpi="96">
|
||||||
|
<metadata
|
||||||
|
id="metadata33">
|
||||||
|
<rdf:RDF>
|
||||||
|
<cc:Work
|
||||||
|
rdf:about="">
|
||||||
|
<dc:format>image/svg+xml</dc:format>
|
||||||
|
<dc:type
|
||||||
|
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
|
||||||
|
<dc:title></dc:title>
|
||||||
|
</cc:Work>
|
||||||
|
</rdf:RDF>
|
||||||
|
</metadata>
|
||||||
|
<sodipodi:namedview
|
||||||
|
pagecolor="#ffffff"
|
||||||
|
bordercolor="#666666"
|
||||||
|
borderopacity="1"
|
||||||
|
objecttolerance="10"
|
||||||
|
gridtolerance="10"
|
||||||
|
guidetolerance="10"
|
||||||
|
inkscape:pageopacity="1"
|
||||||
|
inkscape:pageshadow="2"
|
||||||
|
inkscape:window-width="1920"
|
||||||
|
inkscape:window-height="1039"
|
||||||
|
id="namedview31"
|
||||||
|
showgrid="false"
|
||||||
|
inkscape:zoom="2.1842656"
|
||||||
|
inkscape:cx="143.86571"
|
||||||
|
inkscape:cy="118.5836"
|
||||||
|
inkscape:window-x="0"
|
||||||
|
inkscape:window-y="20"
|
||||||
|
inkscape:window-maximized="0"
|
||||||
|
inkscape:current-layer="Calque_1"
|
||||||
|
inkscape:document-rotation="0"
|
||||||
|
units="px"
|
||||||
|
showguides="false"
|
||||||
|
inkscape:guide-bbox="true"
|
||||||
|
inkscape:snap-global="false"
|
||||||
|
width="250mm">
|
||||||
|
<sodipodi:guide
|
||||||
|
position="102.90662,161.07694"
|
||||||
|
orientation="0,-1"
|
||||||
|
id="guide1016" />
|
||||||
|
<sodipodi:guide
|
||||||
|
position="122.45269,170.65683"
|
||||||
|
orientation="0,-1"
|
||||||
|
id="guide1018" />
|
||||||
|
<sodipodi:guide
|
||||||
|
position="128.86504,180.08221"
|
||||||
|
orientation="0,-1"
|
||||||
|
id="guide1020" />
|
||||||
|
</sodipodi:namedview>
|
||||||
|
<defs
|
||||||
|
id="defs4">
|
||||||
|
<style
|
||||||
|
id="style2">.cls-1{fill:#3b2100;}.cls-2{fill:#ffd952;}.cls-3{fill:#45c8ff;}</style>
|
||||||
|
</defs>
|
||||||
|
<rect
|
||||||
|
style="fill:#ffffff;stroke-width:3.60793"
|
||||||
|
id="rect3824"
|
||||||
|
width="251.68179"
|
||||||
|
height="250.98253"
|
||||||
|
x="-0.59092933"
|
||||||
|
y="-0.31321606" />
|
||||||
|
<g
|
||||||
|
id="g1719"
|
||||||
|
transform="matrix(1.9099251,0,0,1.9099251,-113.74064,-74.610597)">
|
||||||
|
<path
|
||||||
|
d="m 138.41049,100.63656 a 8.327649,8.327649 0 0 1 -2.77589,-0.28869 l -34.78736,-9.388039 a 8.4442361,8.4442361 0 0 1 -2.620438,-1.238044 z"
|
||||||
|
id="path6"
|
||||||
|
style="stroke-width:0.555177" />
|
||||||
|
<path
|
||||||
|
id="path24-3-6"
|
||||||
|
style="fill:#ffd952;fill-opacity:1;stroke-width:0.555177"
|
||||||
|
d="m 124.88254,70.600847 a 11.0036,11.0036 0 0 0 -4.33203,0.935547 L 76.341524,91.094987 a 1.5989086,1.5989086 0 0 0 -0.837891,2.138672 0.77169547,0.77169547 0 0 0 0.06641,0.177735 l 7.09375,14.021486 h 6.15625 l -0.875,-4.88867 c -0.07217,-0.39418 -0.711263,-3.187537 -1.316406,-5.197269 l 20.691403,6.462899 c 0.27198,1.28839 0.63292,2.49204 1.0625,3.62304 h 33.54883 c 0.36964,-1.13128 0.66138,-2.33705 0.85938,-3.62304 l 20.64648,-6.445321 c -0.60514,2.009734 -1.23639,4.785511 -1.30859,5.179691 l -0.875,4.88867 h 6.15429 l 7.02735,-13.894533 0.0664,-0.126953 0.0684,-0.171875 a 0.10548355,0.10548355 0 0 0 0,-0.04492 1.4878733,1.4878733 0 0 0 0.0664,-0.515625 1.5822533,1.5822533 0 0 0 -0.99414,-1.583985 L 129.43333,71.536394 a 11.0036,11.0036 0 0 0 -4.55079,-0.935547 z" />
|
||||||
|
<path
|
||||||
|
id="path24-3"
|
||||||
|
style="fill:#49c8fa;fill-opacity:1;stroke-width:0.555177"
|
||||||
|
d="m 124.88254,79.854518 a 11.0036,11.0036 0 0 0 -4.33203,0.935547 L 76.341524,100.34866 a 1.5989086,1.5989086 0 0 0 -0.837891,2.13672 0.77169547,0.77169547 0 0 0 0.06641,0.17773 l 3.847657,7.60352 h 8.175781 c -0.257897,-1.08856 -0.591943,-2.42953 -0.964844,-3.66797 l 11.744141,3.66797 h 53.371092 l 11.69336,-3.65039 c -0.37193,1.23522 -0.70076,2.56719 -0.95703,3.65039 h 8.17383 l 3.78125,-7.47656 0.0664,-0.12696 0.0684,-0.17187 a 0.10548355,0.10548355 0 0 0 0,-0.0449 1.4878733,1.4878733 0 0 0 0.0664,-0.51563 1.5822533,1.5822533 0 0 0 -0.99414,-1.58203 L 129.43333,80.790065 a 11.0036,11.0036 0 0 0 -4.55079,-0.935547 z" />
|
||||||
|
<path
|
||||||
|
class="cls-2"
|
||||||
|
d="m 174.63576,111.36813 a 1.4878733,1.4878733 0 0 1 -0.0666,0.51631 0.10548355,0.10548355 0 0 1 0,0.0444 l -0.0666,0.17211 v 0 l -0.0666,0.12769 -10.69826,21.15223 c -1.48787,2.93688 -4.22489,2.84806 -3.76409,-0.12214 l 2.15408,-12.02512 c 0.0722,-0.39418 0.70508,-3.17006 1.31022,-5.1798 l -20.64702,6.4456 c -3.24223,21.05785 -30.95109,21.40761 -35.47023,0 l -20.691437,-6.46226 c 0.605143,2.00974 1.243596,4.80228 1.315769,5.19646 l 2.154085,12.02512 c 0.460796,2.9702 -2.276224,3.05902 -3.764098,0.12214 L 75.570045,112.10096 a 0.77169547,0.77169547 0 0 1 -0.06662,-0.17766 1.5989086,1.5989086 0 0 1 0.838317,-2.13743 L 120.55046,90.226998 a 11.0036,11.0036 0 0 1 8.88282,0 l 44.20871,19.558872 a 1.5822533,1.5822533 0 0 1 0.99377,1.58226 z"
|
||||||
|
id="path24"
|
||||||
|
style="stroke-width:0.555177" />
|
||||||
|
<path
|
||||||
|
class="cls-3"
|
||||||
|
d="m 139.12111,115.0545 19.11473,-7.69475 a 0.81055784,0.81055784 0 0 0 0,-1.50453 c -2.2207,-0.92714 -4.96328,-1.99308 -7.65033,-3.10899 -0.49411,-0.20541 -5.17425,3.15341 -5.60173,3.49762 l -8.23882,6.58439 c -1.99309,1.67108 -0.26649,3.28665 2.37615,2.22626 z"
|
||||||
|
id="path26"
|
||||||
|
style="stroke-width:0.555177" />
|
||||||
|
<circle
|
||||||
|
class="cls-3"
|
||||||
|
cx="125.26389"
|
||||||
|
cy="122.57157"
|
||||||
|
r="9.9654207"
|
||||||
|
id="circle28"
|
||||||
|
style="stroke-width:0.555177" />
|
||||||
|
<path
|
||||||
|
d="m 138.41049,100.63656 a 8.327649,8.327649 0 0 1 -2.77589,-0.28869 l -34.78736,-9.388039 a 8.4442361,8.4442361 0 0 1 -2.620438,-1.238044 z"
|
||||||
|
id="path6-0"
|
||||||
|
style="stroke-width:0.555177" />
|
||||||
|
<path
|
||||||
|
id="path24-3-6-9"
|
||||||
|
style="fill:#ff9329;fill-opacity:1;stroke-width:0.555177"
|
||||||
|
d="m 124.88254,70.600847 a 11.0036,11.0036 0 0 0 -4.33203,0.935547 L 76.341524,91.094987 a 1.5989086,1.5989086 0 0 0 -0.837891,2.138672 0.77169547,0.77169547 0 0 0 0.06641,0.177735 l 7.09375,14.021486 h 6.15625 l -0.875,-4.88867 c -0.07217,-0.39418 -0.711263,-3.187537 -1.316406,-5.197269 l 20.691403,6.462899 c 0.27198,1.28839 0.63292,2.49204 1.0625,3.62304 h 33.54883 c 0.36964,-1.13128 0.66138,-2.33705 0.85938,-3.62304 l 20.64648,-6.445321 c -0.60514,2.009734 -1.23639,4.785511 -1.30859,5.179691 l -0.875,4.88867 h 6.15429 l 7.02735,-13.894533 0.0664,-0.126953 0.0684,-0.171875 a 0.10548355,0.10548355 0 0 0 0,-0.04492 1.4878733,1.4878733 0 0 0 0.0664,-0.515625 1.5822533,1.5822533 0 0 0 -0.99414,-1.583985 L 129.43333,71.536394 a 11.0036,11.0036 0 0 0 -4.55079,-0.935547 z" />
|
||||||
|
<path
|
||||||
|
id="path24-3-2"
|
||||||
|
style="fill:#4e4e4e;fill-opacity:1;stroke-width:0.555177"
|
||||||
|
d="m 124.88254,79.854518 a 11.0036,11.0036 0 0 0 -4.33203,0.935547 L 76.341524,100.34866 a 1.5989086,1.5989086 0 0 0 -0.837891,2.13672 0.77169547,0.77169547 0 0 0 0.06641,0.17773 l 3.847657,7.60352 h 8.175781 c -0.257897,-1.08856 -0.591943,-2.42953 -0.964844,-3.66797 l 11.744141,3.66797 h 53.371092 l 11.69336,-3.65039 c -0.37193,1.23522 -0.70076,2.56719 -0.95703,3.65039 h 8.17383 l 3.78125,-7.47656 0.0664,-0.12696 0.0684,-0.17187 a 0.10548355,0.10548355 0 0 0 0,-0.0449 1.4878733,1.4878733 0 0 0 0.0664,-0.51563 1.5822533,1.5822533 0 0 0 -0.99414,-1.58203 L 129.43333,80.790065 a 11.0036,11.0036 0 0 0 -4.55079,-0.935547 z" />
|
||||||
|
<path
|
||||||
|
class="cls-2"
|
||||||
|
d="m 174.63576,111.36813 a 1.4878733,1.4878733 0 0 1 -0.0666,0.51631 0.10548355,0.10548355 0 0 1 0,0.0444 l -0.0666,0.17211 v 0 l -0.0666,0.12769 -10.69826,21.15223 c -1.48787,2.93688 -4.22489,2.84806 -3.76409,-0.12214 l 2.15408,-12.02512 c 0.0722,-0.39418 0.70508,-3.17006 1.31022,-5.1798 l -20.64702,6.4456 c -3.24223,21.05785 -30.95109,21.40761 -35.47023,0 l -20.691437,-6.46226 c 0.605143,2.00974 1.243596,4.80228 1.315769,5.19646 l 2.154085,12.02512 c 0.460796,2.9702 -2.276224,3.05902 -3.764098,0.12214 L 75.570045,112.10096 a 0.77169547,0.77169547 0 0 1 -0.06662,-0.17766 1.5989086,1.5989086 0 0 1 0.838317,-2.13743 L 120.55046,90.226998 a 11.0036,11.0036 0 0 1 8.88282,0 l 44.20871,19.558872 a 1.5822533,1.5822533 0 0 1 0.99377,1.58226 z"
|
||||||
|
id="path24-0"
|
||||||
|
style="fill:#ff9329;fill-opacity:1;stroke-width:0.555177" />
|
||||||
|
<path
|
||||||
|
class="cls-3"
|
||||||
|
d="m 139.12111,115.0545 19.11473,-7.69475 a 0.81055784,0.81055784 0 0 0 0,-1.50453 c -2.2207,-0.92714 -4.96328,-1.99308 -7.65033,-3.10899 -0.49411,-0.20541 -5.17425,3.15341 -5.60173,3.49762 l -8.23882,6.58439 c -1.99309,1.67108 -0.26649,3.28665 2.37615,2.22626 z"
|
||||||
|
id="path26-2"
|
||||||
|
style="fill:#4e4e4e;fill-opacity:1;stroke-width:0.555177" />
|
||||||
|
<circle
|
||||||
|
class="cls-3"
|
||||||
|
cx="125.26389"
|
||||||
|
cy="122.57157"
|
||||||
|
r="9.9654207"
|
||||||
|
id="circle28-3"
|
||||||
|
style="fill:#4e4e4e;fill-opacity:1;stroke-width:0.555177" />
|
||||||
|
</g>
|
||||||
|
</svg>
|
After Width: | Height: | Size: 8.8 KiB |
BIN
doc/logo/garage.png
Normal file
After Width: | Height: | Size: 12 KiB |
206
doc/logo/garage.svg
Normal file
|
@ -0,0 +1,206 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||||
|
<svg
|
||||||
|
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||||
|
xmlns:cc="http://creativecommons.org/ns#"
|
||||||
|
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||||
|
xmlns:svg="http://www.w3.org/2000/svg"
|
||||||
|
xmlns="http://www.w3.org/2000/svg"
|
||||||
|
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
|
||||||
|
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
|
||||||
|
id="Calque_1"
|
||||||
|
data-name="Calque 1"
|
||||||
|
width="250"
|
||||||
|
height="250"
|
||||||
|
viewBox="0 0 249.99999 250"
|
||||||
|
version="1.1"
|
||||||
|
sodipodi:docname="garage.svg"
|
||||||
|
inkscape:version="1.0.2 (e86c870879, 2021-01-15)"
|
||||||
|
inkscape:export-filename="/home/lx/Deuxfleurs/garage/doc/logo/garage.png"
|
||||||
|
inkscape:export-xdpi="96"
|
||||||
|
inkscape:export-ydpi="96">
|
||||||
|
<metadata
|
||||||
|
id="metadata33">
|
||||||
|
<rdf:RDF>
|
||||||
|
<cc:Work
|
||||||
|
rdf:about="">
|
||||||
|
<dc:format>image/svg+xml</dc:format>
|
||||||
|
<dc:type
|
||||||
|
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
|
||||||
|
<dc:title />
|
||||||
|
</cc:Work>
|
||||||
|
</rdf:RDF>
|
||||||
|
</metadata>
|
||||||
|
<sodipodi:namedview
|
||||||
|
pagecolor="#ffffff"
|
||||||
|
bordercolor="#666666"
|
||||||
|
borderopacity="1"
|
||||||
|
objecttolerance="10"
|
||||||
|
gridtolerance="10"
|
||||||
|
guidetolerance="10"
|
||||||
|
inkscape:pageopacity="1"
|
||||||
|
inkscape:pageshadow="2"
|
||||||
|
inkscape:window-width="1920"
|
||||||
|
inkscape:window-height="1080"
|
||||||
|
id="namedview31"
|
||||||
|
showgrid="false"
|
||||||
|
inkscape:zoom="2.1842656"
|
||||||
|
inkscape:cx="90.853672"
|
||||||
|
inkscape:cy="123.63257"
|
||||||
|
inkscape:window-x="0"
|
||||||
|
inkscape:window-y="0"
|
||||||
|
inkscape:window-maximized="0"
|
||||||
|
inkscape:current-layer="Calque_1"
|
||||||
|
inkscape:document-rotation="0"
|
||||||
|
units="px"
|
||||||
|
showguides="false"
|
||||||
|
inkscape:guide-bbox="true"
|
||||||
|
inkscape:snap-global="false"
|
||||||
|
width="250mm">
|
||||||
|
<sodipodi:guide
|
||||||
|
position="102.90662,161.07694"
|
||||||
|
orientation="0,-1"
|
||||||
|
id="guide1016" />
|
||||||
|
<sodipodi:guide
|
||||||
|
position="122.45269,170.65683"
|
||||||
|
orientation="0,-1"
|
||||||
|
id="guide1018" />
|
||||||
|
<sodipodi:guide
|
||||||
|
position="128.86504,180.08221"
|
||||||
|
orientation="0,-1"
|
||||||
|
id="guide1020" />
|
||||||
|
</sodipodi:namedview>
|
||||||
|
<defs
|
||||||
|
id="defs4">
|
||||||
|
<style
|
||||||
|
id="style2">.cls-1{fill:#3b2100;}.cls-2{fill:#ffd952;}.cls-3{fill:#45c8ff;}</style>
|
||||||
|
</defs>
|
||||||
|
<rect
|
||||||
|
style="fill:#ffffff;stroke-width:3.60793"
|
||||||
|
id="rect3824"
|
||||||
|
width="251.68179"
|
||||||
|
height="250.98253"
|
||||||
|
x="-0.59092933"
|
||||||
|
y="-0.31321606" />
|
||||||
|
<g
|
||||||
|
id="g1663"
|
||||||
|
transform="matrix(1.7099534,0,0,1.7099534,-88.607712,-87.994557)">
|
||||||
|
<path
|
||||||
|
d="m 138.33068,100.19817 a 8.327649,8.327649 0 0 1 -2.77589,-0.288688 l -34.78736,-9.388036 a 8.4442361,8.4442361 0 0 1 -2.620433,-1.238044 z"
|
||||||
|
id="path6"
|
||||||
|
style="stroke-width:0.555177" />
|
||||||
|
<path
|
||||||
|
class="cls-1"
|
||||||
|
d="m 85.377935,159.27452 5.163143,-0.0333 h 0.06662 q 2.864711,0 2.864711,2.69816 v 8.69407 a 24.849705,24.849705 0 0 1 -8.649651,1.43235 q -4.730105,0 -7.128468,-3.21447 -2.398363,-3.21447 -2.398363,-8.76068 0,-5.55177 2.981299,-8.62745 a 9.7600046,9.7600046 0 0 1 7.29502,-3.08123 13.368653,13.368653 0 0 1 7.811335,2.43167 3.9250986,3.9250986 0 0 1 -0.682867,1.76547 4.7634152,4.7634152 0 0 1 -1.282458,1.33242 9.798867,9.798867 0 0 0 -5.679457,-1.96533 5.3574542,5.3574542 0 0 0 -4.480275,2.04861 q -1.598909,2.03749 -1.598909,6.41229 0,8.22771 6.062529,8.22771 a 16.910679,16.910679 0 0 0 3.697476,-0.43303 v -3.16451 q 0,-1.49898 0.06662,-2.22071 h -2.442777 a 2.2873276,2.2873276 0 0 1 -1.515632,-0.41638 1.6655298,1.6655298 0 0 1 -0.483004,-1.33242 5.7072154,5.7072154 0 0 1 0.333106,-1.79322 z"
|
||||||
|
id="path8"
|
||||||
|
style="fill:#4e4e4e;fill-opacity:1;stroke-width:0.555177" />
|
||||||
|
<path
|
||||||
|
class="cls-1"
|
||||||
|
d="m 111.07151,169.73404 a 4.3137222,4.3137222 0 0 1 -0.55518,1.18253 4.0305821,4.0305821 0 0 1 -0.84942,0.94935 3.7640973,3.7640973 0 0 1 -3.05902,-1.95422 6.7453957,6.7453957 0 0 1 -4.76342,2.13188 q -2.564913,0 -3.886233,-1.49898 a 5.1298318,5.1298318 0 0 1 -1.299113,-3.4643 q 0,-2.77588 1.815427,-4.21379 a 7.3338829,7.3338829 0 0 1 4.669039,-1.3935 q 1.53228,0 2.89802,0.13325 v -0.99932 q 0,-2.63154 -2.53161,-2.63154 -1.79877,0 -5.096518,1.19918 a 4.674587,4.674587 0 0 1 -1.110353,-2.96464 18.581761,18.581761 0 0 1 7.217291,-1.49898 5.8682167,5.8682167 0 0 1 4.0639,1.39905 q 1.56559,1.39904 1.56559,4.23044 v 6.79537 q -0.0111,1.83208 0.9216,2.59822 z m -8.36096,-0.83276 a 4.7134493,4.7134493 0 0 0 3.33106,-1.59891 v -2.94244 a 22.368065,22.368065 0 0 0 -2.53161,-0.13324 2.775883,2.775883 0 0 0 -2.06525,0.68842 2.3928111,2.3928111 0 0 0 -0.69953,1.76546 2.3539488,2.3539488 0 0 0 0.55518,1.66553 1.8431863,1.8431863 0 0 0 1.41015,0.55518 z"
|
||||||
|
id="path10"
|
||||||
|
style="fill:#4e4e4e;fill-opacity:1;stroke-width:0.555177" />
|
||||||
|
<path
|
||||||
|
class="cls-1"
|
||||||
|
d="m 113.76966,157.00939 a 3.986168,3.986168 0 0 1 0.55518,-1.21583 3.3310596,3.3310596 0 0 1 0.84942,-0.94935 4.1638245,4.1638245 0 0 1 3.51427,2.96464 q 1.33242,-2.96464 4.29707,-2.96464 a 10.215249,10.215249 0 0 1 1.93201,0.23317 7.4782288,7.4782288 0 0 1 -0.99932,3.88624 8.4497879,8.4497879 0 0 0 -1.49897,-0.19987 q -2.03195,0 -3.26444,2.16519 v 10.64829 a 11.575432,11.575432 0 0 1 -2.03195,0.16655 12.769062,12.769062 0 0 1 -2.09857,-0.16655 v -11.15905 q -0.0222,-2.40947 -1.2547,-3.40879 z"
|
||||||
|
id="path12"
|
||||||
|
style="fill:#4e4e4e;fill-opacity:1;stroke-width:0.555177" />
|
||||||
|
<path
|
||||||
|
class="cls-1"
|
||||||
|
d="m 140.38483,169.73404 a 4.3137222,4.3137222 0 0 1 -0.58293,1.18253 4.0305821,4.0305821 0 0 1 -0.84942,0.94935 3.7640973,3.7640973 0 0 1 -3.05348,-1.95422 6.7453957,6.7453957 0 0 1 -4.76341,2.13188 q -2.56492,0 -3.88624,-1.49898 a 5.1298318,5.1298318 0 0 1 -1.29911,-3.4643 q 0,-2.77588 1.81543,-4.21379 a 7.3338829,7.3338829 0 0 1 4.64682,-1.4157 q 1.53229,0 2.89803,0.13324 v -0.99932 q 0,-2.63153 -2.53161,-2.63153 -1.79877,0 -5.09652,1.19918 a 4.674587,4.674587 0 0 1 -1.11035,-2.96465 18.581761,18.581761 0 0 1 7.21729,-1.49897 5.8682167,5.8682167 0 0 1 4.0639,1.39904 q 1.56559,1.39905 1.56559,4.23045 v 6.81757 q 0.0333,1.83208 0.96601,2.59822 z m -8.37206,-0.83276 a 4.7134493,4.7134493 0 0 0 3.33106,-1.59891 v -2.94244 a 22.368065,22.368065 0 0 0 -2.53161,-0.13324 2.775883,2.775883 0 0 0 -2.06526,0.69952 2.3928111,2.3928111 0 0 0 -0.69952,1.76546 2.3539488,2.3539488 0 0 0 0.55518,1.66553 1.8431863,1.8431863 0 0 0 1.41015,0.54408 z"
|
||||||
|
id="path14"
|
||||||
|
style="fill:#4e4e4e;fill-opacity:1;stroke-width:0.555177" />
|
||||||
|
<path
|
||||||
|
class="cls-1"
|
||||||
|
d="m 144.48203,169.6008 q -1.49897,-2.29843 -1.49897,-6.34567 0,-4.04724 1.8987,-6.34567 a 5.740526,5.740526 0 0 1 4.56355,-2.29843 6.4400486,6.4400486 0 0 1 4.49693,1.66553 3.7696491,3.7696491 0 0 1 2.63154,-1.43235 3.1200925,3.1200925 0 0 1 0.88273,0.93269 3.8862362,3.8862362 0 0 1 0.55518,1.16587 q -0.9327,0.79946 -0.9327,2.86472 v 9.438 q 0,5.29638 -1.73215,7.49488 -1.73215,2.1985 -5.69611,2.22071 a 16.100121,16.100121 0 0 1 -5.9626,-1.11036 4.4802752,4.4802752 0 0 1 1.03263,-3.03126 10.892565,10.892565 0 0 0 4.48028,1.03263 q 2.18184,0 3.0146,-1.11035 a 4.9965894,4.9965894 0 0 0 0.83277,-3.06458 v -1.33242 a 6.4011862,6.4011862 0 0 1 -4.16383,1.56559 4.9188647,4.9188647 0 0 1 -4.40255,-2.30953 z m 8.56083,-2.69816 v -7.72806 a 4.2915151,4.2915151 0 0 0 -2.86471,-1.36573 2.4039147,2.4039147 0 0 0 -2.18185,1.43235 8.6885138,8.6885138 0 0 0 -0.7828,4.09721 q 0,2.66485 0.71618,3.93065 a 2.1318781,2.1318781 0 0 0 1.88205,1.2658 4.2304457,4.2304457 0 0 0 3.23113,-1.63222 z"
|
||||||
|
id="path16"
|
||||||
|
style="fill:#4e4e4e;fill-opacity:1;stroke-width:0.555177" />
|
||||||
|
<path
|
||||||
|
class="cls-1"
|
||||||
|
d="m 174.20619,164.67083 h -9.32697 a 5.6405943,5.6405943 0 0 0 0.88273,3.04792 q 0.7828,1.0826 2.74813,1.0826 a 10.120869,10.120869 0 0 0 4.36369,-1.16587 4.3803434,4.3803434 0 0 1 1.19918,2.5316 10.759323,10.759323 0 0 1 -6.41229,1.8987 q -3.74744,0 -5.37966,-2.43167 -1.63222,-2.43167 -1.63222,-6.2957 0,-3.88624 1.79877,-6.2957 a 6.0181143,6.0181143 0 0 1 5.14649,-2.43168 q 3.33106,0 5.14648,2.01529 a 7.3449864,7.3449864 0 0 1 1.79878,5.07987 13.04665,13.04665 0 0 1 -0.33311,2.96464 z m -6.42895,-7.06184 q -2.73146,0 -2.93133,4.13051 h 5.79605 v -0.39973 a 4.7245529,4.7245529 0 0 0 -0.69953,-2.69816 2.4316735,2.4316735 0 0 0 -2.14298,-1.03262 z"
|
||||||
|
id="path18"
|
||||||
|
style="fill:#4e4e4e;fill-opacity:1;stroke-width:0.555177" />
|
||||||
|
<path
|
||||||
|
id="path24-3-6"
|
||||||
|
style="fill:#ffd952;fill-opacity:1;stroke-width:0.555177"
|
||||||
|
d="m 124.80273,70.162462 a 11.0036,11.0036 0 0 0 -4.33203,0.935547 L 76.261719,90.656602 a 1.5989086,1.5989086 0 0 0 -0.837891,2.138672 0.77169547,0.77169547 0 0 0 0.06641,0.177735 l 7.09375,14.021481 h 6.15625 l -0.875,-4.88867 c -0.07217,-0.39418 -0.711263,-3.187532 -1.316406,-5.197264 l 20.691398,6.462894 c 0.27198,1.28839 0.63292,2.49204 1.0625,3.62304 h 33.54883 c 0.36964,-1.13128 0.66138,-2.33705 0.85938,-3.62304 l 20.64648,-6.445316 c -0.60514,2.009734 -1.23639,4.785506 -1.30859,5.179686 l -0.875,4.88867 h 6.15429 l 7.02735,-13.894528 0.0664,-0.126953 0.0684,-0.171875 a 0.10548355,0.10548355 0 0 0 0,-0.04492 1.4878733,1.4878733 0 0 0 0.0664,-0.515625 1.5822533,1.5822533 0 0 0 -0.99414,-1.583985 L 129.35352,71.098009 a 11.0036,11.0036 0 0 0 -4.55079,-0.935547 z" />
|
||||||
|
<path
|
||||||
|
id="path24-3"
|
||||||
|
style="fill:#49c8fa;fill-opacity:1;stroke-width:0.555177"
|
||||||
|
d="M 124.80273,79.416133 A 11.0036,11.0036 0 0 0 120.4707,80.35168 L 76.261719,99.910272 a 1.5989086,1.5989086 0 0 0 -0.837891,2.136718 0.77169547,0.77169547 0 0 0 0.06641,0.17773 l 3.847657,7.60352 h 8.175781 c -0.257897,-1.08856 -0.591943,-2.42953 -0.964844,-3.66797 l 11.744141,3.66797 h 53.371087 l 11.69336,-3.65039 c -0.37193,1.23522 -0.70076,2.56719 -0.95703,3.65039 h 8.17383 l 3.78125,-7.47656 0.0664,-0.12696 0.0684,-0.17187 a 0.10548355,0.10548355 0 0 0 0,-0.0449 1.4878733,1.4878733 0 0 0 0.0664,-0.51563 1.5822533,1.5822533 0 0 0 -0.99414,-1.582028 L 129.35352,80.35168 a 11.0036,11.0036 0 0 0 -4.55079,-0.935547 z" />
|
||||||
|
<path
|
||||||
|
class="cls-2"
|
||||||
|
d="m 174.55595,110.92974 a 1.4878733,1.4878733 0 0 1 -0.0666,0.51631 0.10548355,0.10548355 0 0 1 0,0.0444 l -0.0666,0.17211 v 0 l -0.0666,0.12769 -10.69826,21.15223 c -1.48787,2.93688 -4.22489,2.84806 -3.76409,-0.12214 l 2.15408,-12.02512 c 0.0722,-0.39418 0.70508,-3.17006 1.31022,-5.1798 l -20.64702,6.4456 c -3.24223,21.05785 -30.95109,21.40761 -35.47023,0 l -20.691432,-6.46226 c 0.605143,2.00974 1.243596,4.80228 1.315769,5.19646 l 2.154085,12.02512 c 0.460796,2.9702 -2.276224,3.05902 -3.764098,0.12214 L 75.49024,111.66257 a 0.77169547,0.77169547 0 0 1 -0.06662,-0.17766 1.5989086,1.5989086 0 0 1 0.838317,-2.13743 L 120.47065,89.788613 a 11.0036,11.0036 0 0 1 8.88282,0 l 44.20871,19.558867 a 1.5822533,1.5822533 0 0 1 0.99377,1.58226 z"
|
||||||
|
id="path24"
|
||||||
|
style="stroke-width:0.555177" />
|
||||||
|
<path
|
||||||
|
class="cls-3"
|
||||||
|
d="m 139.0413,114.61611 19.11473,-7.69475 a 0.81055784,0.81055784 0 0 0 0,-1.50453 c -2.2207,-0.92714 -4.96328,-1.99308 -7.65033,-3.10899 -0.49411,-0.20541 -5.17425,3.15341 -5.60173,3.49762 l -8.23882,6.58439 c -1.99309,1.67108 -0.26649,3.28665 2.37615,2.22626 z"
|
||||||
|
id="path26"
|
||||||
|
style="stroke-width:0.555177" />
|
||||||
|
<circle
|
||||||
|
class="cls-3"
|
||||||
|
cx="125.18409"
|
||||||
|
cy="122.13319"
|
||||||
|
r="9.9654207"
|
||||||
|
id="circle28"
|
||||||
|
style="stroke-width:0.555177" />
|
||||||
|
<path
|
||||||
|
d="m 138.33068,100.19817 a 8.327649,8.327649 0 0 1 -2.77589,-0.288688 l -34.78736,-9.388036 a 8.4442361,8.4442361 0 0 1 -2.620433,-1.238044 z"
|
||||||
|
id="path6-0"
|
||||||
|
style="stroke-width:0.555177" />
|
||||||
|
<path
|
||||||
|
class="cls-1"
|
||||||
|
d="m 85.377935,159.27452 5.163143,-0.0333 h 0.06662 q 2.864711,0 2.864711,2.69816 v 8.69407 a 24.849705,24.849705 0 0 1 -8.649651,1.43235 q -4.730105,0 -7.128468,-3.21447 -2.398363,-3.21447 -2.398363,-8.76068 0,-5.55177 2.981299,-8.62745 a 9.7600046,9.7600046 0 0 1 7.29502,-3.08123 13.368653,13.368653 0 0 1 7.811335,2.43167 3.9250986,3.9250986 0 0 1 -0.682867,1.76547 4.7634152,4.7634152 0 0 1 -1.282458,1.33242 9.798867,9.798867 0 0 0 -5.679457,-1.96533 5.3574542,5.3574542 0 0 0 -4.480275,2.04861 q -1.598909,2.03749 -1.598909,6.41229 0,8.22771 6.062529,8.22771 a 16.910679,16.910679 0 0 0 3.697476,-0.43303 v -3.16451 q 0,-1.49898 0.06662,-2.22071 h -2.442777 a 2.2873276,2.2873276 0 0 1 -1.515632,-0.41638 1.6655298,1.6655298 0 0 1 -0.483004,-1.33242 5.7072154,5.7072154 0 0 1 0.333106,-1.79322 z"
|
||||||
|
id="path8-6"
|
||||||
|
style="fill:#4e4e4e;fill-opacity:1;stroke-width:0.555177" />
|
||||||
|
<path
|
||||||
|
class="cls-1"
|
||||||
|
d="m 111.07151,169.73404 a 4.3137222,4.3137222 0 0 1 -0.55518,1.18253 4.0305821,4.0305821 0 0 1 -0.84942,0.94935 3.7640973,3.7640973 0 0 1 -3.05902,-1.95422 6.7453957,6.7453957 0 0 1 -4.76342,2.13188 q -2.564913,0 -3.886233,-1.49898 a 5.1298318,5.1298318 0 0 1 -1.299113,-3.4643 q 0,-2.77588 1.815427,-4.21379 a 7.3338829,7.3338829 0 0 1 4.669039,-1.3935 q 1.53228,0 2.89802,0.13325 v -0.99932 q 0,-2.63154 -2.53161,-2.63154 -1.79877,0 -5.096518,1.19918 a 4.674587,4.674587 0 0 1 -1.110353,-2.96464 18.581761,18.581761 0 0 1 7.217291,-1.49898 5.8682167,5.8682167 0 0 1 4.0639,1.39905 q 1.56559,1.39904 1.56559,4.23044 v 6.79537 q -0.0111,1.83208 0.9216,2.59822 z m -8.36096,-0.83276 a 4.7134493,4.7134493 0 0 0 3.33106,-1.59891 v -2.94244 a 22.368065,22.368065 0 0 0 -2.53161,-0.13324 2.775883,2.775883 0 0 0 -2.06525,0.68842 2.3928111,2.3928111 0 0 0 -0.69953,1.76546 2.3539488,2.3539488 0 0 0 0.55518,1.66553 1.8431863,1.8431863 0 0 0 1.41015,0.55518 z"
|
||||||
|
id="path10-2"
|
||||||
|
style="fill:#4e4e4e;fill-opacity:1;stroke-width:0.555177" />
|
||||||
|
<path
|
||||||
|
class="cls-1"
|
||||||
|
d="m 113.76966,157.00939 a 3.986168,3.986168 0 0 1 0.55518,-1.21583 3.3310596,3.3310596 0 0 1 0.84942,-0.94935 4.1638245,4.1638245 0 0 1 3.51427,2.96464 q 1.33242,-2.96464 4.29707,-2.96464 a 10.215249,10.215249 0 0 1 1.93201,0.23317 7.4782288,7.4782288 0 0 1 -0.99932,3.88624 8.4497879,8.4497879 0 0 0 -1.49897,-0.19987 q -2.03195,0 -3.26444,2.16519 v 10.64829 a 11.575432,11.575432 0 0 1 -2.03195,0.16655 12.769062,12.769062 0 0 1 -2.09857,-0.16655 v -11.15905 q -0.0222,-2.40947 -1.2547,-3.40879 z"
|
||||||
|
id="path12-6"
|
||||||
|
style="fill:#4e4e4e;fill-opacity:1;stroke-width:0.555177" />
|
||||||
|
<path
|
||||||
|
class="cls-1"
|
||||||
|
d="m 140.38483,169.73404 a 4.3137222,4.3137222 0 0 1 -0.58293,1.18253 4.0305821,4.0305821 0 0 1 -0.84942,0.94935 3.7640973,3.7640973 0 0 1 -3.05348,-1.95422 6.7453957,6.7453957 0 0 1 -4.76341,2.13188 q -2.56492,0 -3.88624,-1.49898 a 5.1298318,5.1298318 0 0 1 -1.29911,-3.4643 q 0,-2.77588 1.81543,-4.21379 a 7.3338829,7.3338829 0 0 1 4.64682,-1.4157 q 1.53229,0 2.89803,0.13324 v -0.99932 q 0,-2.63153 -2.53161,-2.63153 -1.79877,0 -5.09652,1.19918 a 4.674587,4.674587 0 0 1 -1.11035,-2.96465 18.581761,18.581761 0 0 1 7.21729,-1.49897 5.8682167,5.8682167 0 0 1 4.0639,1.39904 q 1.56559,1.39905 1.56559,4.23045 v 6.81757 q 0.0333,1.83208 0.96601,2.59822 z m -8.37206,-0.83276 a 4.7134493,4.7134493 0 0 0 3.33106,-1.59891 v -2.94244 a 22.368065,22.368065 0 0 0 -2.53161,-0.13324 2.775883,2.775883 0 0 0 -2.06526,0.69952 2.3928111,2.3928111 0 0 0 -0.69952,1.76546 2.3539488,2.3539488 0 0 0 0.55518,1.66553 1.8431863,1.8431863 0 0 0 1.41015,0.54408 z"
|
||||||
|
id="path14-1"
|
||||||
|
style="fill:#4e4e4e;fill-opacity:1;stroke-width:0.555177" />
|
||||||
|
<path
|
||||||
|
class="cls-1"
|
||||||
|
d="m 144.48203,169.6008 q -1.49897,-2.29843 -1.49897,-6.34567 0,-4.04724 1.8987,-6.34567 a 5.740526,5.740526 0 0 1 4.56355,-2.29843 6.4400486,6.4400486 0 0 1 4.49693,1.66553 3.7696491,3.7696491 0 0 1 2.63154,-1.43235 3.1200925,3.1200925 0 0 1 0.88273,0.93269 3.8862362,3.8862362 0 0 1 0.55518,1.16587 q -0.9327,0.79946 -0.9327,2.86472 v 9.438 q 0,5.29638 -1.73215,7.49488 -1.73215,2.1985 -5.69611,2.22071 a 16.100121,16.100121 0 0 1 -5.9626,-1.11036 4.4802752,4.4802752 0 0 1 1.03263,-3.03126 10.892565,10.892565 0 0 0 4.48028,1.03263 q 2.18184,0 3.0146,-1.11035 a 4.9965894,4.9965894 0 0 0 0.83277,-3.06458 v -1.33242 a 6.4011862,6.4011862 0 0 1 -4.16383,1.56559 4.9188647,4.9188647 0 0 1 -4.40255,-2.30953 z m 8.56083,-2.69816 v -7.72806 a 4.2915151,4.2915151 0 0 0 -2.86471,-1.36573 2.4039147,2.4039147 0 0 0 -2.18185,1.43235 8.6885138,8.6885138 0 0 0 -0.7828,4.09721 q 0,2.66485 0.71618,3.93065 a 2.1318781,2.1318781 0 0 0 1.88205,1.2658 4.2304457,4.2304457 0 0 0 3.23113,-1.63222 z"
|
||||||
|
id="path16-8"
|
||||||
|
style="fill:#4e4e4e;fill-opacity:1;stroke-width:0.555177" />
|
||||||
|
<path
|
||||||
|
class="cls-1"
|
||||||
|
d="m 174.20619,164.67083 h -9.32697 a 5.6405943,5.6405943 0 0 0 0.88273,3.04792 q 0.7828,1.0826 2.74813,1.0826 a 10.120869,10.120869 0 0 0 4.36369,-1.16587 4.3803434,4.3803434 0 0 1 1.19918,2.5316 10.759323,10.759323 0 0 1 -6.41229,1.8987 q -3.74744,0 -5.37966,-2.43167 -1.63222,-2.43167 -1.63222,-6.2957 0,-3.88624 1.79877,-6.2957 a 6.0181143,6.0181143 0 0 1 5.14649,-2.43168 q 3.33106,0 5.14648,2.01529 a 7.3449864,7.3449864 0 0 1 1.79878,5.07987 13.04665,13.04665 0 0 1 -0.33311,2.96464 z m -6.42895,-7.06184 q -2.73146,0 -2.93133,4.13051 h 5.79605 v -0.39973 a 4.7245529,4.7245529 0 0 0 -0.69953,-2.69816 2.4316735,2.4316735 0 0 0 -2.14298,-1.03262 z"
|
||||||
|
id="path18-7"
|
||||||
|
style="fill:#4e4e4e;fill-opacity:1;stroke-width:0.555177" />
|
||||||
|
<path
|
||||||
|
id="path24-3-6-9"
|
||||||
|
style="fill:#ff9329;fill-opacity:1;stroke-width:0.555177"
|
||||||
|
d="m 124.80273,70.162462 a 11.0036,11.0036 0 0 0 -4.33203,0.935547 L 76.261719,90.656602 a 1.5989086,1.5989086 0 0 0 -0.837891,2.138672 0.77169547,0.77169547 0 0 0 0.06641,0.177735 l 7.09375,14.021481 h 6.15625 l -0.875,-4.88867 c -0.07217,-0.39418 -0.711263,-3.187532 -1.316406,-5.197264 l 20.691398,6.462894 c 0.27198,1.28839 0.63292,2.49204 1.0625,3.62304 h 33.54883 c 0.36964,-1.13128 0.66138,-2.33705 0.85938,-3.62304 l 20.64648,-6.445316 c -0.60514,2.009734 -1.23639,4.785506 -1.30859,5.179686 l -0.875,4.88867 h 6.15429 l 7.02735,-13.894528 0.0664,-0.126953 0.0684,-0.171875 a 0.10548355,0.10548355 0 0 0 0,-0.04492 1.4878733,1.4878733 0 0 0 0.0664,-0.515625 1.5822533,1.5822533 0 0 0 -0.99414,-1.583985 L 129.35352,71.098009 a 11.0036,11.0036 0 0 0 -4.55079,-0.935547 z" />
|
||||||
|
<path
|
||||||
|
id="path24-3-2"
|
||||||
|
style="fill:#4e4e4e;fill-opacity:1;stroke-width:0.555177"
|
||||||
|
d="M 124.80273,79.416133 A 11.0036,11.0036 0 0 0 120.4707,80.35168 L 76.261719,99.910272 a 1.5989086,1.5989086 0 0 0 -0.837891,2.136718 0.77169547,0.77169547 0 0 0 0.06641,0.17773 l 3.847657,7.60352 h 8.175781 c -0.257897,-1.08856 -0.591943,-2.42953 -0.964844,-3.66797 l 11.744141,3.66797 h 53.371087 l 11.69336,-3.65039 c -0.37193,1.23522 -0.70076,2.56719 -0.95703,3.65039 h 8.17383 l 3.78125,-7.47656 0.0664,-0.12696 0.0684,-0.17187 a 0.10548355,0.10548355 0 0 0 0,-0.0449 1.4878733,1.4878733 0 0 0 0.0664,-0.51563 1.5822533,1.5822533 0 0 0 -0.99414,-1.582028 L 129.35352,80.35168 a 11.0036,11.0036 0 0 0 -4.55079,-0.935547 z" />
|
||||||
|
<path
|
||||||
|
class="cls-2"
|
||||||
|
d="m 174.55595,110.92974 a 1.4878733,1.4878733 0 0 1 -0.0666,0.51631 0.10548355,0.10548355 0 0 1 0,0.0444 l -0.0666,0.17211 v 0 l -0.0666,0.12769 -10.69826,21.15223 c -1.48787,2.93688 -4.22489,2.84806 -3.76409,-0.12214 l 2.15408,-12.02512 c 0.0722,-0.39418 0.70508,-3.17006 1.31022,-5.1798 l -20.64702,6.4456 c -3.24223,21.05785 -30.95109,21.40761 -35.47023,0 l -20.691432,-6.46226 c 0.605143,2.00974 1.243596,4.80228 1.315769,5.19646 l 2.154085,12.02512 c 0.460796,2.9702 -2.276224,3.05902 -3.764098,0.12214 L 75.49024,111.66257 a 0.77169547,0.77169547 0 0 1 -0.06662,-0.17766 1.5989086,1.5989086 0 0 1 0.838317,-2.13743 L 120.47065,89.788613 a 11.0036,11.0036 0 0 1 8.88282,0 l 44.20871,19.558867 a 1.5822533,1.5822533 0 0 1 0.99377,1.58226 z"
|
||||||
|
id="path24-0"
|
||||||
|
style="fill:#ff9329;fill-opacity:1;stroke-width:0.555177" />
|
||||||
|
<path
|
||||||
|
class="cls-3"
|
||||||
|
d="m 139.0413,114.61611 19.11473,-7.69475 a 0.81055784,0.81055784 0 0 0 0,-1.50453 c -2.2207,-0.92714 -4.96328,-1.99308 -7.65033,-3.10899 -0.49411,-0.20541 -5.17425,3.15341 -5.60173,3.49762 l -8.23882,6.58439 c -1.99309,1.67108 -0.26649,3.28665 2.37615,2.22626 z"
|
||||||
|
id="path26-2"
|
||||||
|
style="fill:#4e4e4e;fill-opacity:1;stroke-width:0.555177" />
|
||||||
|
<circle
|
||||||
|
class="cls-3"
|
||||||
|
cx="125.18409"
|
||||||
|
cy="122.13319"
|
||||||
|
r="9.9654207"
|
||||||
|
id="circle28-3"
|
||||||
|
style="fill:#4e4e4e;fill-opacity:1;stroke-width:0.555177" />
|
||||||
|
</g>
|
||||||
|
</svg>
|
After Width: | Height: | Size: 20 KiB |
BIN
garage.png
Before Width: | Height: | Size: 15 KiB |
119
garage.svg
|
@ -1,119 +0,0 @@
|
||||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
|
||||||
<svg
|
|
||||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
|
||||||
xmlns:cc="http://creativecommons.org/ns#"
|
|
||||||
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
|
||||||
xmlns:svg="http://www.w3.org/2000/svg"
|
|
||||||
xmlns="http://www.w3.org/2000/svg"
|
|
||||||
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
|
|
||||||
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
|
|
||||||
width="108.2099mm"
|
|
||||||
height="108.00987mm"
|
|
||||||
viewBox="0 0 108.2099 108.00987"
|
|
||||||
version="1.1"
|
|
||||||
id="svg8"
|
|
||||||
inkscape:version="1.0.1 (3bc2e813f5, 2020-09-07)"
|
|
||||||
sodipodi:docname="garage.svg"
|
|
||||||
inkscape:export-filename="/home/lx/garage.png"
|
|
||||||
inkscape:export-xdpi="96"
|
|
||||||
inkscape:export-ydpi="96">
|
|
||||||
<defs
|
|
||||||
id="defs2" />
|
|
||||||
<sodipodi:namedview
|
|
||||||
id="base"
|
|
||||||
pagecolor="#ffffff"
|
|
||||||
bordercolor="#666666"
|
|
||||||
borderopacity="1.0"
|
|
||||||
inkscape:pageopacity="1"
|
|
||||||
inkscape:pageshadow="2"
|
|
||||||
inkscape:zoom="0.5"
|
|
||||||
inkscape:cx="-212.52783"
|
|
||||||
inkscape:cy="204.9547"
|
|
||||||
inkscape:document-units="mm"
|
|
||||||
inkscape:current-layer="layer1"
|
|
||||||
inkscape:document-rotation="0"
|
|
||||||
showgrid="false"
|
|
||||||
fit-margin-top="20"
|
|
||||||
fit-margin-left="20"
|
|
||||||
fit-margin-right="20"
|
|
||||||
fit-margin-bottom="20"
|
|
||||||
inkscape:window-width="1404"
|
|
||||||
inkscape:window-height="1016"
|
|
||||||
inkscape:window-x="103"
|
|
||||||
inkscape:window-y="27"
|
|
||||||
inkscape:window-maximized="0" />
|
|
||||||
<metadata
|
|
||||||
id="metadata5">
|
|
||||||
<rdf:RDF>
|
|
||||||
<cc:Work
|
|
||||||
rdf:about="">
|
|
||||||
<dc:format>image/svg+xml</dc:format>
|
|
||||||
<dc:type
|
|
||||||
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
|
|
||||||
<dc:title></dc:title>
|
|
||||||
</cc:Work>
|
|
||||||
</rdf:RDF>
|
|
||||||
</metadata>
|
|
||||||
<g
|
|
||||||
inkscape:label="Layer 1"
|
|
||||||
inkscape:groupmode="layer"
|
|
||||||
id="layer1"
|
|
||||||
transform="translate(-45.667412,-33.028536)">
|
|
||||||
<path
|
|
||||||
style="fill:none;stroke:#000000;stroke-width:2.065;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
|
|
||||||
d="M 66.78016,73.340623 99.921832,54.219898 132.84481,73.130965 V 120.00591 H 66.701651 Z"
|
|
||||||
id="path124"
|
|
||||||
sodipodi:nodetypes="cccccc" />
|
|
||||||
<g
|
|
||||||
id="g1106-5"
|
|
||||||
transform="matrix(0,0.95201267,-0.95201267,0,194.01664,-65.058377)"
|
|
||||||
style="stroke-width:2.17959;stroke-miterlimit:4;stroke-dasharray:none">
|
|
||||||
<g
|
|
||||||
id="g1061-3"
|
|
||||||
style="stroke-width:2.17959;stroke-miterlimit:4;stroke-dasharray:none">
|
|
||||||
<circle
|
|
||||||
style="fill:none;stroke:#000000;stroke-width:2.17959;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
|
|
||||||
id="path956-5"
|
|
||||||
cx="168.8569"
|
|
||||||
cy="92.889587"
|
|
||||||
r="13.125794" />
|
|
||||||
<circle
|
|
||||||
style="fill:none;stroke:#000000;stroke-width:2.17959;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
|
|
||||||
id="path958-6"
|
|
||||||
cx="168.77444"
|
|
||||||
cy="92.702293"
|
|
||||||
r="3.0778286" />
|
|
||||||
<path
|
|
||||||
id="path960-2"
|
|
||||||
style="fill:none;stroke:#000000;stroke-width:2.17959;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
|
|
||||||
d="m 169.46072,82.84435 c 4.95795,0.336608 8.87296,4.341959 9.09638,9.306301"
|
|
||||||
sodipodi:nodetypes="cc" />
|
|
||||||
</g>
|
|
||||||
<path
|
|
||||||
style="fill:none;stroke:#000000;stroke-width:2.17959;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
|
|
||||||
d="m 154.67824,112.84018 11.89881,-13.038071 c 1.46407,-1.552664 3.79541,0.878511 2.81832,2.089181 l -10.57965,14.481 c -1.8851,2.02632 -6.10786,-1.06119 -4.13748,-3.53211 z"
|
|
||||||
id="path964-9"
|
|
||||||
sodipodi:nodetypes="ccccc" />
|
|
||||||
<g
|
|
||||||
id="g1071-1"
|
|
||||||
style="stroke-width:2.17959;stroke-miterlimit:4;stroke-dasharray:none" />
|
|
||||||
<g
|
|
||||||
id="g1065-3"
|
|
||||||
style="stroke-width:2.17959;stroke-miterlimit:4;stroke-dasharray:none">
|
|
||||||
<rect
|
|
||||||
style="fill:none;stroke:#000000;stroke-width:2.17959;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
|
|
||||||
id="rect949-6"
|
|
||||||
width="35.576611"
|
|
||||||
height="48.507355"
|
|
||||||
x="150.9623"
|
|
||||||
y="74.698929"
|
|
||||||
ry="2.7302756" />
|
|
||||||
<path
|
|
||||||
style="fill:none;stroke:#000000;stroke-width:2.17959;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
|
|
||||||
d="m 150.76919,106.16944 6.36181,-0.0223 c 2.53845,3.46232 6.29787,4.20243 10.1055,4.40362 l 0.0176,13.09251"
|
|
||||||
id="path1033-0"
|
|
||||||
sodipodi:nodetypes="cccc" />
|
|
||||||
</g>
|
|
||||||
</g>
|
|
||||||
</g>
|
|
||||||
</svg>
|
|
Before Width: | Height: | Size: 4.5 KiB |
|
@ -11,7 +11,7 @@ PATH="${GARAGE_DEBUG}:${GARAGE_RELEASE}:$PATH"
|
||||||
garage bucket create eprouvette
|
garage bucket create eprouvette
|
||||||
KEY_INFO=`garage key new --name opérateur`
|
KEY_INFO=`garage key new --name opérateur`
|
||||||
ACCESS_KEY=`echo $KEY_INFO|grep -Po 'GK[a-f0-9]+'`
|
ACCESS_KEY=`echo $KEY_INFO|grep -Po 'GK[a-f0-9]+'`
|
||||||
SECRET_KEY=`echo $KEY_INFO|grep -Po 'secret_key: "[a-f0-9]+'|grep -Po '[a-f0-9]+$'`
|
SECRET_KEY=`echo $KEY_INFO|grep -Po 'Secret key: [a-f0-9]+'|grep -Po '[a-f0-9]+$'`
|
||||||
garage bucket allow eprouvette --read --write --key $ACCESS_KEY
|
garage bucket allow eprouvette --read --write --key $ACCESS_KEY
|
||||||
echo "$ACCESS_KEY $SECRET_KEY" > /tmp/garage.s3
|
echo "$ACCESS_KEY $SECRET_KEY" > /tmp/garage.s3
|
||||||
|
|
||||||
|
|
|
@ -17,26 +17,25 @@ garage_util = { version = "0.1.1", path = "../util" }
|
||||||
garage_table = { version = "0.1.1", path = "../table" }
|
garage_table = { version = "0.1.1", path = "../table" }
|
||||||
garage_model = { version = "0.1.1", path = "../model" }
|
garage_model = { version = "0.1.1", path = "../model" }
|
||||||
|
|
||||||
err-derive = "0.2.3"
|
err-derive = "0.3"
|
||||||
bytes = "0.4"
|
bytes = "1.0"
|
||||||
hex = "0.3"
|
hex = "0.4"
|
||||||
base64 = "0.13"
|
base64 = "0.13"
|
||||||
log = "0.4"
|
log = "0.4"
|
||||||
chrono = "0.4"
|
chrono = "0.4"
|
||||||
md-5 = "0.9.1"
|
md-5 = "0.9"
|
||||||
sha2 = "0.8"
|
sha2 = "0.9"
|
||||||
hmac = "0.7"
|
hmac = "0.10"
|
||||||
crypto-mac = "0.7"
|
crypto-mac = "0.10"
|
||||||
rand = "0.7"
|
|
||||||
|
|
||||||
futures = "0.3"
|
futures = "0.3"
|
||||||
futures-util = "0.3"
|
futures-util = "0.3"
|
||||||
tokio = { version = "0.2", default-features = false, features = ["rt-core", "rt-threaded", "io-driver", "net", "tcp", "time", "macros", "sync", "signal", "fs"] }
|
tokio = { version = "1.0", default-features = false, features = ["rt", "rt-multi-thread", "io-util", "net", "time", "macros", "sync", "signal", "fs"] }
|
||||||
|
|
||||||
http = "0.2"
|
http = "0.2"
|
||||||
hyper = "^0.13.6"
|
hyper = "0.14"
|
||||||
url = "2.1"
|
url = "2.1"
|
||||||
httpdate = "0.3"
|
httpdate = "0.3"
|
||||||
percent-encoding = "2.1.0"
|
percent-encoding = "2.1.0"
|
||||||
roxmltree = "0.11"
|
roxmltree = "0.14"
|
||||||
http-range = "0.1"
|
http-range = "0.1"
|
||||||
|
|
|
@ -137,7 +137,10 @@ async fn handler_inner(garage: Arc<Garage>, req: Request<Body>) -> Result<Respon
|
||||||
)));
|
)));
|
||||||
}
|
}
|
||||||
let source_key = source_key.ok_or_bad_request("No source key specified")?;
|
let source_key = source_key.ok_or_bad_request("No source key specified")?;
|
||||||
Ok(handle_copy(garage, &bucket, &key, &source_bucket, &source_key).await?)
|
Ok(
|
||||||
|
handle_copy(garage, &req, &bucket, &key, &source_bucket, &source_key)
|
||||||
|
.await?,
|
||||||
|
)
|
||||||
} else {
|
} else {
|
||||||
// PutObject query
|
// PutObject query
|
||||||
Ok(handle_put(garage, req, &bucket, &key, content_sha256).await?)
|
Ok(handle_put(garage, req, &bucket, &key, content_sha256).await?)
|
||||||
|
|
|
@ -33,7 +33,7 @@ pub enum Error {
|
||||||
InvalidBase64(#[error(source)] base64::DecodeError),
|
InvalidBase64(#[error(source)] base64::DecodeError),
|
||||||
|
|
||||||
#[error(display = "Invalid XML: {}", _0)]
|
#[error(display = "Invalid XML: {}", _0)]
|
||||||
InvalidXML(#[error(source)] roxmltree::Error),
|
InvalidXML(String),
|
||||||
|
|
||||||
#[error(display = "Invalid header value: {}", _0)]
|
#[error(display = "Invalid header value: {}", _0)]
|
||||||
InvalidHeader(#[error(source)] hyper::header::ToStrError),
|
InvalidHeader(#[error(source)] hyper::header::ToStrError),
|
||||||
|
@ -45,6 +45,12 @@ pub enum Error {
|
||||||
BadRequest(String),
|
BadRequest(String),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl From<roxmltree::Error> for Error {
|
||||||
|
fn from(err: roxmltree::Error) -> Self {
|
||||||
|
Self::InvalidXML(format!("{}", err))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl Error {
|
impl Error {
|
||||||
pub fn http_status_code(&self) -> StatusCode {
|
pub fn http_status_code(&self) -> StatusCode {
|
||||||
match self {
|
match self {
|
||||||
|
|
|
@ -1,11 +1,11 @@
|
||||||
use std::fmt::Write;
|
use std::fmt::Write;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use chrono::{SecondsFormat, Utc};
|
use hyper::{Body, Request, Response};
|
||||||
use hyper::{Body, Response};
|
|
||||||
|
|
||||||
use garage_table::*;
|
use garage_table::*;
|
||||||
use garage_util::data::*;
|
use garage_util::data::*;
|
||||||
|
use garage_util::time::*;
|
||||||
|
|
||||||
use garage_model::block_ref_table::*;
|
use garage_model::block_ref_table::*;
|
||||||
use garage_model::garage::Garage;
|
use garage_model::garage::Garage;
|
||||||
|
@ -13,9 +13,11 @@ use garage_model::object_table::*;
|
||||||
use garage_model::version_table::*;
|
use garage_model::version_table::*;
|
||||||
|
|
||||||
use crate::error::*;
|
use crate::error::*;
|
||||||
|
use crate::s3_put::get_headers;
|
||||||
|
|
||||||
pub async fn handle_copy(
|
pub async fn handle_copy(
|
||||||
garage: Arc<Garage>,
|
garage: Arc<Garage>,
|
||||||
|
req: &Request<Body>,
|
||||||
dest_bucket: &str,
|
dest_bucket: &str,
|
||||||
dest_key: &str,
|
dest_key: &str,
|
||||||
source_bucket: &str,
|
source_bucket: &str,
|
||||||
|
@ -41,17 +43,37 @@ pub async fn handle_copy(
|
||||||
};
|
};
|
||||||
|
|
||||||
let new_uuid = gen_uuid();
|
let new_uuid = gen_uuid();
|
||||||
let dest_object_version = ObjectVersion {
|
let new_timestamp = now_msec();
|
||||||
uuid: new_uuid,
|
|
||||||
timestamp: now_msec(),
|
|
||||||
state: ObjectVersionState::Complete(source_last_state.clone()),
|
|
||||||
};
|
|
||||||
|
|
||||||
match &source_last_state {
|
// Implement x-amz-metadata-directive: REPLACE
|
||||||
|
let old_meta = match source_last_state {
|
||||||
ObjectVersionData::DeleteMarker => {
|
ObjectVersionData::DeleteMarker => {
|
||||||
return Err(Error::NotFound);
|
return Err(Error::NotFound);
|
||||||
}
|
}
|
||||||
ObjectVersionData::Inline(_meta, _bytes) => {
|
ObjectVersionData::Inline(meta, _bytes) => meta,
|
||||||
|
ObjectVersionData::FirstBlock(meta, _fbh) => meta,
|
||||||
|
};
|
||||||
|
let new_meta = match req.headers().get("x-amz-metadata-directive") {
|
||||||
|
Some(v) if v == hyper::header::HeaderValue::from_static("REPLACE") => ObjectVersionMeta {
|
||||||
|
headers: get_headers(req)?,
|
||||||
|
size: old_meta.size,
|
||||||
|
etag: old_meta.etag.clone(),
|
||||||
|
},
|
||||||
|
_ => old_meta.clone(),
|
||||||
|
};
|
||||||
|
|
||||||
|
// Save object copy
|
||||||
|
match source_last_state {
|
||||||
|
ObjectVersionData::DeleteMarker => unreachable!(),
|
||||||
|
ObjectVersionData::Inline(_meta, bytes) => {
|
||||||
|
let dest_object_version = ObjectVersion {
|
||||||
|
uuid: new_uuid,
|
||||||
|
timestamp: new_timestamp,
|
||||||
|
state: ObjectVersionState::Complete(ObjectVersionData::Inline(
|
||||||
|
new_meta,
|
||||||
|
bytes.clone(),
|
||||||
|
)),
|
||||||
|
};
|
||||||
let dest_object = Object::new(
|
let dest_object = Object::new(
|
||||||
dest_bucket.to_string(),
|
dest_bucket.to_string(),
|
||||||
dest_key.to_string(),
|
dest_key.to_string(),
|
||||||
|
@ -59,44 +81,84 @@ pub async fn handle_copy(
|
||||||
);
|
);
|
||||||
garage.object_table.insert(&dest_object).await?;
|
garage.object_table.insert(&dest_object).await?;
|
||||||
}
|
}
|
||||||
ObjectVersionData::FirstBlock(_meta, _first_block_hash) => {
|
ObjectVersionData::FirstBlock(_meta, first_block_hash) => {
|
||||||
|
// Get block list from source version
|
||||||
let source_version = garage
|
let source_version = garage
|
||||||
.version_table
|
.version_table
|
||||||
.get(&source_last_v.uuid, &EmptyKey)
|
.get(&source_last_v.uuid, &EmptyKey)
|
||||||
.await?;
|
.await?;
|
||||||
let source_version = source_version.ok_or(Error::NotFound)?;
|
let source_version = source_version.ok_or(Error::NotFound)?;
|
||||||
|
|
||||||
let dest_version = Version::new(
|
// Write an "uploading" marker in Object table
|
||||||
|
// This holds a reference to the object in the Version table
|
||||||
|
// so that it won't be deleted, e.g. by repair_versions.
|
||||||
|
let tmp_dest_object_version = ObjectVersion {
|
||||||
|
uuid: new_uuid,
|
||||||
|
timestamp: new_timestamp,
|
||||||
|
state: ObjectVersionState::Uploading(new_meta.headers.clone()),
|
||||||
|
};
|
||||||
|
let tmp_dest_object = Object::new(
|
||||||
|
dest_bucket.to_string(),
|
||||||
|
dest_key.to_string(),
|
||||||
|
vec![tmp_dest_object_version],
|
||||||
|
);
|
||||||
|
garage.object_table.insert(&tmp_dest_object).await?;
|
||||||
|
|
||||||
|
// Write version in the version table. Even with empty block list,
|
||||||
|
// this means that the BlockRef entries linked to this version cannot be
|
||||||
|
// marked as deleted (they are marked as deleted only if the Version
|
||||||
|
// doesn't exist or is marked as deleted).
|
||||||
|
let mut dest_version = Version::new(
|
||||||
new_uuid,
|
new_uuid,
|
||||||
dest_bucket.to_string(),
|
dest_bucket.to_string(),
|
||||||
dest_key.to_string(),
|
dest_key.to_string(),
|
||||||
false,
|
false,
|
||||||
source_version.blocks().to_vec(),
|
|
||||||
);
|
);
|
||||||
|
garage.version_table.insert(&dest_version).await?;
|
||||||
|
|
||||||
|
// Fill in block list for version and insert block refs
|
||||||
|
for (bk, bv) in source_version.blocks.items().iter() {
|
||||||
|
dest_version.blocks.put(*bk, *bv);
|
||||||
|
}
|
||||||
|
let dest_block_refs = dest_version
|
||||||
|
.blocks
|
||||||
|
.items()
|
||||||
|
.iter()
|
||||||
|
.map(|b| BlockRef {
|
||||||
|
block: b.1.hash,
|
||||||
|
version: new_uuid,
|
||||||
|
deleted: false.into(),
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
futures::try_join!(
|
||||||
|
garage.version_table.insert(&dest_version),
|
||||||
|
garage.block_ref_table.insert_many(&dest_block_refs[..]),
|
||||||
|
)?;
|
||||||
|
|
||||||
|
// Insert final object
|
||||||
|
// We do this last because otherwise there is a race condition in the case where
|
||||||
|
// the copy call has the same source and destination (this happens, rclone does
|
||||||
|
// it to update the modification timestamp for instance). If we did this concurrently
|
||||||
|
// with the stuff before, the block's reference counts could be decremented before
|
||||||
|
// they are incremented again for the new version, leading to data being deleted.
|
||||||
|
let dest_object_version = ObjectVersion {
|
||||||
|
uuid: new_uuid,
|
||||||
|
timestamp: new_timestamp,
|
||||||
|
state: ObjectVersionState::Complete(ObjectVersionData::FirstBlock(
|
||||||
|
new_meta,
|
||||||
|
*first_block_hash,
|
||||||
|
)),
|
||||||
|
};
|
||||||
let dest_object = Object::new(
|
let dest_object = Object::new(
|
||||||
dest_bucket.to_string(),
|
dest_bucket.to_string(),
|
||||||
dest_key.to_string(),
|
dest_key.to_string(),
|
||||||
vec![dest_object_version],
|
vec![dest_object_version],
|
||||||
);
|
);
|
||||||
let dest_block_refs = dest_version
|
garage.object_table.insert(&dest_object).await?;
|
||||||
.blocks()
|
|
||||||
.iter()
|
|
||||||
.map(|b| BlockRef {
|
|
||||||
block: b.hash,
|
|
||||||
version: new_uuid,
|
|
||||||
deleted: false,
|
|
||||||
})
|
|
||||||
.collect::<Vec<_>>();
|
|
||||||
futures::try_join!(
|
|
||||||
garage.object_table.insert(&dest_object),
|
|
||||||
garage.version_table.insert(&dest_version),
|
|
||||||
garage.block_ref_table.insert_many(&dest_block_refs[..]),
|
|
||||||
)?;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let now = Utc::now();
|
let last_modified = msec_to_rfc3339(new_timestamp);
|
||||||
let last_modified = now.to_rfc3339_opts(SecondsFormat::Secs, true);
|
|
||||||
let mut xml = String::new();
|
let mut xml = String::new();
|
||||||
writeln!(&mut xml, r#"<?xml version="1.0" encoding="UTF-8"?>"#).unwrap();
|
writeln!(&mut xml, r#"<?xml version="1.0" encoding="UTF-8"?>"#).unwrap();
|
||||||
writeln!(&mut xml, r#"<CopyObjectResult>"#).unwrap();
|
writeln!(&mut xml, r#"<CopyObjectResult>"#).unwrap();
|
||||||
|
|
|
@ -4,6 +4,7 @@ use std::sync::Arc;
|
||||||
use hyper::{Body, Request, Response};
|
use hyper::{Body, Request, Response};
|
||||||
|
|
||||||
use garage_util::data::*;
|
use garage_util::data::*;
|
||||||
|
use garage_util::time::*;
|
||||||
|
|
||||||
use garage_model::garage::Garage;
|
use garage_model::garage::Garage;
|
||||||
use garage_model::object_table::*;
|
use garage_model::object_table::*;
|
||||||
|
@ -29,16 +30,16 @@ async fn handle_delete_internal(
|
||||||
_ => true,
|
_ => true,
|
||||||
});
|
});
|
||||||
|
|
||||||
let mut must_delete = None;
|
let mut version_to_delete = None;
|
||||||
let mut timestamp = now_msec();
|
let mut timestamp = now_msec();
|
||||||
for v in interesting_versions {
|
for v in interesting_versions {
|
||||||
if v.timestamp + 1 > timestamp || must_delete.is_none() {
|
if v.timestamp + 1 > timestamp || version_to_delete.is_none() {
|
||||||
must_delete = Some(v.uuid);
|
version_to_delete = Some(v.uuid);
|
||||||
}
|
}
|
||||||
timestamp = std::cmp::max(timestamp, v.timestamp + 1);
|
timestamp = std::cmp::max(timestamp, v.timestamp + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
let deleted_version = must_delete.ok_or(Error::NotFound)?;
|
let deleted_version = version_to_delete.ok_or(Error::NotFound)?;
|
||||||
|
|
||||||
let version_uuid = gen_uuid();
|
let version_uuid = gen_uuid();
|
||||||
|
|
||||||
|
@ -47,7 +48,7 @@ async fn handle_delete_internal(
|
||||||
key.into(),
|
key.into(),
|
||||||
vec![ObjectVersion {
|
vec![ObjectVersion {
|
||||||
uuid: version_uuid,
|
uuid: version_uuid,
|
||||||
timestamp: now_msec(),
|
timestamp,
|
||||||
state: ObjectVersionState::Complete(ObjectVersionData::DeleteMarker),
|
state: ObjectVersionState::Complete(ObjectVersionData::DeleteMarker),
|
||||||
}],
|
}],
|
||||||
);
|
);
|
||||||
|
|
|
@ -146,9 +146,10 @@ pub async fn handle_get(
|
||||||
let version = version.ok_or(Error::NotFound)?;
|
let version = version.ok_or(Error::NotFound)?;
|
||||||
|
|
||||||
let mut blocks = version
|
let mut blocks = version
|
||||||
.blocks()
|
.blocks
|
||||||
|
.items()
|
||||||
.iter()
|
.iter()
|
||||||
.map(|vb| (vb.hash, None))
|
.map(|(_, vb)| (vb.hash, None))
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
blocks[0].1 = Some(first_block);
|
blocks[0].1 = Some(first_block);
|
||||||
|
|
||||||
|
@ -219,11 +220,12 @@ pub async fn handle_get_range(
|
||||||
// file (whereas block.offset designates the offset of the block WITHIN THE PART
|
// file (whereas block.offset designates the offset of the block WITHIN THE PART
|
||||||
// block.part_number, which is not the same in the case of a multipart upload)
|
// block.part_number, which is not the same in the case of a multipart upload)
|
||||||
let mut blocks = Vec::with_capacity(std::cmp::min(
|
let mut blocks = Vec::with_capacity(std::cmp::min(
|
||||||
version.blocks().len(),
|
version.blocks.len(),
|
||||||
4 + ((end - begin) / std::cmp::max(version.blocks()[0].size as u64, 1024)) as usize,
|
4 + ((end - begin) / std::cmp::max(version.blocks.items()[0].1.size as u64, 1024))
|
||||||
|
as usize,
|
||||||
));
|
));
|
||||||
let mut true_offset = 0;
|
let mut true_offset = 0;
|
||||||
for b in version.blocks().iter() {
|
for (_, b) in version.blocks.items().iter() {
|
||||||
if true_offset >= end {
|
if true_offset >= end {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,10 +2,10 @@ use std::collections::{BTreeMap, BTreeSet, HashMap};
|
||||||
use std::fmt::Write;
|
use std::fmt::Write;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use chrono::{DateTime, NaiveDateTime, SecondsFormat, Utc};
|
|
||||||
use hyper::{Body, Response};
|
use hyper::{Body, Response};
|
||||||
|
|
||||||
use garage_util::error::Error as GarageError;
|
use garage_util::error::Error as GarageError;
|
||||||
|
use garage_util::time::*;
|
||||||
|
|
||||||
use garage_model::garage::Garage;
|
use garage_model::garage::Garage;
|
||||||
use garage_model::object_table::*;
|
use garage_model::object_table::*;
|
||||||
|
@ -42,7 +42,7 @@ pub fn parse_list_objects_query(
|
||||||
Ok(ListObjectsQuery {
|
Ok(ListObjectsQuery {
|
||||||
is_v2: params.get("list-type").map(|x| x == "2").unwrap_or(false),
|
is_v2: params.get("list-type").map(|x| x == "2").unwrap_or(false),
|
||||||
bucket: bucket.to_string(),
|
bucket: bucket.to_string(),
|
||||||
delimiter: params.get("delimiter").cloned(),
|
delimiter: params.get("delimiter").filter(|x| !x.is_empty()).cloned(),
|
||||||
max_keys: params
|
max_keys: params
|
||||||
.get("max-keys")
|
.get("max-keys")
|
||||||
.map(|x| {
|
.map(|x| {
|
||||||
|
@ -247,9 +247,7 @@ pub async fn handle_list(
|
||||||
}
|
}
|
||||||
|
|
||||||
for (key, info) in result_keys.iter() {
|
for (key, info) in result_keys.iter() {
|
||||||
let last_modif = NaiveDateTime::from_timestamp(info.last_modified as i64 / 1000, 0);
|
let last_modif = msec_to_rfc3339(info.last_modified);
|
||||||
let last_modif = DateTime::<Utc>::from_utc(last_modif, Utc);
|
|
||||||
let last_modif = last_modif.to_rfc3339_opts(SecondsFormat::Millis, true);
|
|
||||||
writeln!(&mut xml, "\t<Contents>").unwrap();
|
writeln!(&mut xml, "\t<Contents>").unwrap();
|
||||||
writeln!(
|
writeln!(
|
||||||
&mut xml,
|
&mut xml,
|
||||||
|
|
|
@ -5,11 +5,12 @@ use std::sync::Arc;
|
||||||
use futures::stream::*;
|
use futures::stream::*;
|
||||||
use hyper::{Body, Request, Response};
|
use hyper::{Body, Request, Response};
|
||||||
use md5::{digest::generic_array::*, Digest as Md5Digest, Md5};
|
use md5::{digest::generic_array::*, Digest as Md5Digest, Md5};
|
||||||
use sha2::{Digest as Sha256Digest, Sha256};
|
use sha2::Sha256;
|
||||||
|
|
||||||
use garage_table::*;
|
use garage_table::*;
|
||||||
use garage_util::data::*;
|
use garage_util::data::*;
|
||||||
use garage_util::error::Error as GarageError;
|
use garage_util::error::Error as GarageError;
|
||||||
|
use garage_util::time::*;
|
||||||
|
|
||||||
use garage_model::block::INLINE_THRESHOLD;
|
use garage_model::block::INLINE_THRESHOLD;
|
||||||
use garage_model::block_ref_table::*;
|
use garage_model::block_ref_table::*;
|
||||||
|
@ -52,14 +53,14 @@ pub async fn handle_put(
|
||||||
if first_block.len() < INLINE_THRESHOLD {
|
if first_block.len() < INLINE_THRESHOLD {
|
||||||
let mut md5sum = Md5::new();
|
let mut md5sum = Md5::new();
|
||||||
md5sum.update(&first_block[..]);
|
md5sum.update(&first_block[..]);
|
||||||
let md5sum_arr = md5sum.finalize();
|
let data_md5sum = md5sum.finalize();
|
||||||
let md5sum_hex = hex::encode(md5sum_arr);
|
let data_md5sum_hex = hex::encode(data_md5sum);
|
||||||
|
|
||||||
let sha256sum_hash = sha256sum(&first_block[..]);
|
let data_sha256sum = sha256sum(&first_block[..]);
|
||||||
|
|
||||||
ensure_checksum_matches(
|
ensure_checksum_matches(
|
||||||
md5sum_arr.as_slice(),
|
data_md5sum.as_slice(),
|
||||||
sha256sum_hash,
|
data_sha256sum,
|
||||||
content_md5.as_deref(),
|
content_md5.as_deref(),
|
||||||
content_sha256,
|
content_sha256,
|
||||||
)?;
|
)?;
|
||||||
|
@ -71,7 +72,7 @@ pub async fn handle_put(
|
||||||
ObjectVersionMeta {
|
ObjectVersionMeta {
|
||||||
headers,
|
headers,
|
||||||
size: first_block.len() as u64,
|
size: first_block.len() as u64,
|
||||||
etag: md5sum_hex.clone(),
|
etag: data_md5sum_hex.clone(),
|
||||||
},
|
},
|
||||||
first_block,
|
first_block,
|
||||||
)),
|
)),
|
||||||
|
@ -80,41 +81,45 @@ pub async fn handle_put(
|
||||||
let object = Object::new(bucket.into(), key.into(), vec![object_version]);
|
let object = Object::new(bucket.into(), key.into(), vec![object_version]);
|
||||||
garage.object_table.insert(&object).await?;
|
garage.object_table.insert(&object).await?;
|
||||||
|
|
||||||
return Ok(put_response(version_uuid, md5sum_hex));
|
return Ok(put_response(version_uuid, data_md5sum_hex));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Write version identifier in object table so that we have a trace
|
// Write version identifier in object table so that we have a trace
|
||||||
// that we are uploading something
|
// that we are uploading something
|
||||||
let mut object_version = ObjectVersion {
|
let mut object_version = ObjectVersion {
|
||||||
uuid: version_uuid,
|
uuid: version_uuid,
|
||||||
timestamp: now_msec(),
|
timestamp: version_timestamp,
|
||||||
state: ObjectVersionState::Uploading(headers.clone()),
|
state: ObjectVersionState::Uploading(headers.clone()),
|
||||||
};
|
};
|
||||||
let object = Object::new(bucket.into(), key.into(), vec![object_version.clone()]);
|
let object = Object::new(bucket.into(), key.into(), vec![object_version.clone()]);
|
||||||
garage.object_table.insert(&object).await?;
|
garage.object_table.insert(&object).await?;
|
||||||
|
|
||||||
// Initialize corresponding entry in version table
|
// Initialize corresponding entry in version table
|
||||||
let version = Version::new(version_uuid, bucket.into(), key.into(), false, vec![]);
|
// Write this entry now, even with empty block list,
|
||||||
let first_block_hash = sha256sum(&first_block[..]);
|
// to prevent block_ref entries from being deleted (they can be deleted
|
||||||
|
// if the reference a version that isn't found in the version table)
|
||||||
|
let version = Version::new(version_uuid, bucket.into(), key.into(), false);
|
||||||
|
garage.version_table.insert(&version).await?;
|
||||||
|
|
||||||
// Transfer data and verify checksum
|
// Transfer data and verify checksum
|
||||||
|
let first_block_hash = blake2sum(&first_block[..]);
|
||||||
let tx_result = read_and_put_blocks(
|
let tx_result = read_and_put_blocks(
|
||||||
&garage,
|
&garage,
|
||||||
version,
|
&version,
|
||||||
1,
|
1,
|
||||||
first_block,
|
first_block,
|
||||||
first_block_hash,
|
first_block_hash,
|
||||||
&mut chunker,
|
&mut chunker,
|
||||||
)
|
)
|
||||||
.await
|
.await
|
||||||
.and_then(|(total_size, md5sum_arr, sha256sum)| {
|
.and_then(|(total_size, data_md5sum, data_sha256sum)| {
|
||||||
ensure_checksum_matches(
|
ensure_checksum_matches(
|
||||||
md5sum_arr.as_slice(),
|
data_md5sum.as_slice(),
|
||||||
sha256sum,
|
data_sha256sum,
|
||||||
content_md5.as_deref(),
|
content_md5.as_deref(),
|
||||||
content_sha256,
|
content_sha256,
|
||||||
)
|
)
|
||||||
.map(|()| (total_size, md5sum_arr))
|
.map(|()| (total_size, data_md5sum))
|
||||||
});
|
});
|
||||||
|
|
||||||
// If something went wrong, clean up
|
// If something went wrong, clean up
|
||||||
|
@ -148,13 +153,13 @@ pub async fn handle_put(
|
||||||
/// Validate MD5 sum against content-md5 header
|
/// Validate MD5 sum against content-md5 header
|
||||||
/// and sha256sum against signed content-sha256
|
/// and sha256sum against signed content-sha256
|
||||||
fn ensure_checksum_matches(
|
fn ensure_checksum_matches(
|
||||||
md5sum: &[u8],
|
data_md5sum: &[u8],
|
||||||
sha256sum: garage_util::data::FixedBytes32,
|
data_sha256sum: garage_util::data::FixedBytes32,
|
||||||
content_md5: Option<&str>,
|
content_md5: Option<&str>,
|
||||||
content_sha256: Option<garage_util::data::FixedBytes32>,
|
content_sha256: Option<garage_util::data::FixedBytes32>,
|
||||||
) -> Result<(), Error> {
|
) -> Result<(), Error> {
|
||||||
if let Some(expected_sha256) = content_sha256 {
|
if let Some(expected_sha256) = content_sha256 {
|
||||||
if expected_sha256 != sha256sum {
|
if expected_sha256 != data_sha256sum {
|
||||||
return Err(Error::BadRequest(format!(
|
return Err(Error::BadRequest(format!(
|
||||||
"Unable to validate x-amz-content-sha256"
|
"Unable to validate x-amz-content-sha256"
|
||||||
)));
|
)));
|
||||||
|
@ -163,7 +168,7 @@ fn ensure_checksum_matches(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if let Some(expected_md5) = content_md5 {
|
if let Some(expected_md5) = content_md5 {
|
||||||
if expected_md5.trim_matches('"') != base64::encode(md5sum) {
|
if expected_md5.trim_matches('"') != base64::encode(data_md5sum) {
|
||||||
return Err(Error::BadRequest(format!("Unable to validate content-md5")));
|
return Err(Error::BadRequest(format!("Unable to validate content-md5")));
|
||||||
} else {
|
} else {
|
||||||
trace!("Successfully validated content-md5");
|
trace!("Successfully validated content-md5");
|
||||||
|
@ -173,8 +178,8 @@ fn ensure_checksum_matches(
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn read_and_put_blocks(
|
async fn read_and_put_blocks(
|
||||||
garage: &Arc<Garage>,
|
garage: &Garage,
|
||||||
version: Version,
|
version: &Version,
|
||||||
part_number: u64,
|
part_number: u64,
|
||||||
first_block: Vec<u8>,
|
first_block: Vec<u8>,
|
||||||
first_block_hash: Hash,
|
first_block_hash: Hash,
|
||||||
|
@ -183,11 +188,11 @@ async fn read_and_put_blocks(
|
||||||
let mut md5hasher = Md5::new();
|
let mut md5hasher = Md5::new();
|
||||||
let mut sha256hasher = Sha256::new();
|
let mut sha256hasher = Sha256::new();
|
||||||
md5hasher.update(&first_block[..]);
|
md5hasher.update(&first_block[..]);
|
||||||
sha256hasher.input(&first_block[..]);
|
sha256hasher.update(&first_block[..]);
|
||||||
|
|
||||||
let mut next_offset = first_block.len();
|
let mut next_offset = first_block.len();
|
||||||
let mut put_curr_version_block = put_block_meta(
|
let mut put_curr_version_block = put_block_meta(
|
||||||
garage.clone(),
|
&garage,
|
||||||
&version,
|
&version,
|
||||||
part_number,
|
part_number,
|
||||||
0,
|
0,
|
||||||
|
@ -203,11 +208,11 @@ async fn read_and_put_blocks(
|
||||||
futures::try_join!(put_curr_block, put_curr_version_block, chunker.next())?;
|
futures::try_join!(put_curr_block, put_curr_version_block, chunker.next())?;
|
||||||
if let Some(block) = next_block {
|
if let Some(block) = next_block {
|
||||||
md5hasher.update(&block[..]);
|
md5hasher.update(&block[..]);
|
||||||
sha256hasher.input(&block[..]);
|
sha256hasher.update(&block[..]);
|
||||||
let block_hash = sha256sum(&block[..]);
|
let block_hash = blake2sum(&block[..]);
|
||||||
let block_len = block.len();
|
let block_len = block.len();
|
||||||
put_curr_version_block = put_block_meta(
|
put_curr_version_block = put_block_meta(
|
||||||
garage.clone(),
|
&garage,
|
||||||
&version,
|
&version,
|
||||||
part_number,
|
part_number,
|
||||||
next_offset as u64,
|
next_offset as u64,
|
||||||
|
@ -222,39 +227,35 @@ async fn read_and_put_blocks(
|
||||||
}
|
}
|
||||||
|
|
||||||
let total_size = next_offset as u64;
|
let total_size = next_offset as u64;
|
||||||
let md5sum_arr = md5hasher.finalize();
|
let data_md5sum = md5hasher.finalize();
|
||||||
|
|
||||||
let sha256sum_arr = sha256hasher.result();
|
let data_sha256sum = sha256hasher.finalize();
|
||||||
let mut hash = [0u8; 32];
|
let data_sha256sum = Hash::try_from(&data_sha256sum[..]).unwrap();
|
||||||
hash.copy_from_slice(&sha256sum_arr[..]);
|
|
||||||
let sha256sum_arr = Hash::from(hash);
|
|
||||||
|
|
||||||
Ok((total_size, md5sum_arr, sha256sum_arr))
|
Ok((total_size, data_md5sum, data_sha256sum))
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn put_block_meta(
|
async fn put_block_meta(
|
||||||
garage: Arc<Garage>,
|
garage: &Garage,
|
||||||
version: &Version,
|
version: &Version,
|
||||||
part_number: u64,
|
part_number: u64,
|
||||||
offset: u64,
|
offset: u64,
|
||||||
hash: Hash,
|
hash: Hash,
|
||||||
size: u64,
|
size: u64,
|
||||||
) -> Result<(), GarageError> {
|
) -> Result<(), GarageError> {
|
||||||
// TODO: don't clone, restart from empty block list ??
|
|
||||||
let mut version = version.clone();
|
let mut version = version.clone();
|
||||||
version
|
version.blocks.put(
|
||||||
.add_block(VersionBlock {
|
VersionBlockKey {
|
||||||
part_number,
|
part_number,
|
||||||
offset,
|
offset,
|
||||||
hash,
|
},
|
||||||
size,
|
VersionBlock { hash, size },
|
||||||
})
|
);
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
let block_ref = BlockRef {
|
let block_ref = BlockRef {
|
||||||
block: hash,
|
block: hash,
|
||||||
version: version.uuid,
|
version: version.uuid,
|
||||||
deleted: false,
|
deleted: false.into(),
|
||||||
};
|
};
|
||||||
|
|
||||||
futures::try_join!(
|
futures::try_join!(
|
||||||
|
@ -319,6 +320,7 @@ pub async fn handle_create_multipart_upload(
|
||||||
let version_uuid = gen_uuid();
|
let version_uuid = gen_uuid();
|
||||||
let headers = get_headers(req)?;
|
let headers = get_headers(req)?;
|
||||||
|
|
||||||
|
// Create object in object table
|
||||||
let object_version = ObjectVersion {
|
let object_version = ObjectVersion {
|
||||||
uuid: version_uuid,
|
uuid: version_uuid,
|
||||||
timestamp: now_msec(),
|
timestamp: now_msec(),
|
||||||
|
@ -327,6 +329,14 @@ pub async fn handle_create_multipart_upload(
|
||||||
let object = Object::new(bucket.to_string(), key.to_string(), vec![object_version]);
|
let object = Object::new(bucket.to_string(), key.to_string(), vec![object_version]);
|
||||||
garage.object_table.insert(&object).await?;
|
garage.object_table.insert(&object).await?;
|
||||||
|
|
||||||
|
// Insert empty version so that block_ref entries refer to something
|
||||||
|
// (they are inserted concurrently with blocks in the version table, so
|
||||||
|
// there is the possibility that they are inserted before the version table
|
||||||
|
// is created, in which case it is allowed to delete them, e.g. in repair_*)
|
||||||
|
let version = Version::new(version_uuid, bucket.into(), key.into(), false);
|
||||||
|
garage.version_table.insert(&version).await?;
|
||||||
|
|
||||||
|
// Send success response
|
||||||
let mut xml = String::new();
|
let mut xml = String::new();
|
||||||
writeln!(&mut xml, r#"<?xml version="1.0" encoding="UTF-8"?>"#).unwrap();
|
writeln!(&mut xml, r#"<?xml version="1.0" encoding="UTF-8"?>"#).unwrap();
|
||||||
writeln!(
|
writeln!(
|
||||||
|
@ -389,11 +399,11 @@ pub async fn handle_put_part(
|
||||||
}
|
}
|
||||||
|
|
||||||
// Copy block to store
|
// Copy block to store
|
||||||
let version = Version::new(version_uuid, bucket, key, false, vec![]);
|
let version = Version::new(version_uuid, bucket, key, false);
|
||||||
let first_block_hash = sha256sum(&first_block[..]);
|
let first_block_hash = blake2sum(&first_block[..]);
|
||||||
let (_, md5sum_arr, sha256sum) = read_and_put_blocks(
|
let (_, data_md5sum, data_sha256sum) = read_and_put_blocks(
|
||||||
&garage,
|
&garage,
|
||||||
version,
|
&version,
|
||||||
part_number,
|
part_number,
|
||||||
first_block,
|
first_block,
|
||||||
first_block_hash,
|
first_block_hash,
|
||||||
|
@ -401,15 +411,24 @@ pub async fn handle_put_part(
|
||||||
)
|
)
|
||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
|
// Verify that checksums map
|
||||||
ensure_checksum_matches(
|
ensure_checksum_matches(
|
||||||
md5sum_arr.as_slice(),
|
data_md5sum.as_slice(),
|
||||||
sha256sum,
|
data_sha256sum,
|
||||||
content_md5.as_deref(),
|
content_md5.as_deref(),
|
||||||
content_sha256,
|
content_sha256,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
|
// Store part etag in version
|
||||||
|
let data_md5sum_hex = hex::encode(data_md5sum);
|
||||||
|
let mut version = version;
|
||||||
|
version
|
||||||
|
.parts_etags
|
||||||
|
.put(part_number, data_md5sum_hex.clone());
|
||||||
|
garage.version_table.insert(&version).await?;
|
||||||
|
|
||||||
let response = Response::builder()
|
let response = Response::builder()
|
||||||
.header("ETag", format!("\"{}\"", hex::encode(md5sum_arr)))
|
.header("ETag", format!("\"{}\"", data_md5sum_hex))
|
||||||
.body(Body::from(vec![]))
|
.body(Body::from(vec![]))
|
||||||
.unwrap();
|
.unwrap();
|
||||||
Ok(response)
|
Ok(response)
|
||||||
|
@ -444,17 +463,15 @@ pub async fn handle_complete_multipart_upload(
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
let object = object.ok_or(Error::BadRequest(format!("Object not found")))?;
|
let object = object.ok_or(Error::BadRequest(format!("Object not found")))?;
|
||||||
let object_version = object
|
let mut object_version = object
|
||||||
.versions()
|
.versions()
|
||||||
.iter()
|
.iter()
|
||||||
.find(|v| v.uuid == version_uuid && v.is_uploading());
|
.find(|v| v.uuid == version_uuid && v.is_uploading())
|
||||||
let mut object_version = match object_version {
|
.cloned()
|
||||||
None => return Err(Error::NotFound),
|
.ok_or(Error::BadRequest(format!("Version not found")))?;
|
||||||
Some(x) => x.clone(),
|
|
||||||
};
|
|
||||||
|
|
||||||
let version = version.ok_or(Error::BadRequest(format!("Version not found")))?;
|
let version = version.ok_or(Error::BadRequest(format!("Version not found")))?;
|
||||||
if version.blocks().len() == 0 {
|
if version.blocks.len() == 0 {
|
||||||
return Err(Error::BadRequest(format!("No data was uploaded")));
|
return Err(Error::BadRequest(format!("No data was uploaded")));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -464,53 +481,50 @@ pub async fn handle_complete_multipart_upload(
|
||||||
};
|
};
|
||||||
|
|
||||||
// Check that the list of parts they gave us corresponds to the parts we have here
|
// Check that the list of parts they gave us corresponds to the parts we have here
|
||||||
// TODO: check MD5 sum of all uploaded parts? but that would mean we have to store them somewhere...
|
debug!("Expected parts from request: {:?}", body_list_of_parts);
|
||||||
let mut parts = version
|
debug!("Parts stored in version: {:?}", version.parts_etags.items());
|
||||||
.blocks()
|
let parts = version
|
||||||
|
.parts_etags
|
||||||
|
.items()
|
||||||
.iter()
|
.iter()
|
||||||
.map(|x| x.part_number)
|
.map(|pair| (&pair.0, &pair.1));
|
||||||
.collect::<Vec<_>>();
|
|
||||||
parts.dedup();
|
|
||||||
let same_parts = body_list_of_parts
|
let same_parts = body_list_of_parts
|
||||||
.iter()
|
.iter()
|
||||||
.map(|x| &x.part_number)
|
.map(|x| (&x.part_number, &x.etag))
|
||||||
.eq(parts.iter());
|
.eq(parts);
|
||||||
if !same_parts {
|
if !same_parts {
|
||||||
return Err(Error::BadRequest(format!("We don't have the same parts")));
|
return Err(Error::BadRequest(format!("We don't have the same parts")));
|
||||||
}
|
}
|
||||||
|
|
||||||
// ETag calculation: we produce ETags that have the same form as
|
// Calculate etag of final object
|
||||||
// those of S3 multipart uploads, but we don't use their actual
|
// To understand how etags are calculated, read more here:
|
||||||
// calculation for the first part (we use random bytes). This
|
// https://teppen.io/2018/06/23/aws_s3_etags/
|
||||||
// shouldn't impact compatibility as the S3 docs specify that
|
let num_parts = version.blocks.items().last().unwrap().0.part_number
|
||||||
// the ETag is an opaque value in case of a multipart upload.
|
- version.blocks.items().first().unwrap().0.part_number
|
||||||
// See also: https://teppen.io/2018/06/23/aws_s3_etags/
|
|
||||||
let num_parts = version.blocks().last().unwrap().part_number
|
|
||||||
- version.blocks().first().unwrap().part_number
|
|
||||||
+ 1;
|
+ 1;
|
||||||
let etag = format!(
|
let mut etag_md5_hasher = Md5::new();
|
||||||
"{}-{}",
|
for (_, etag) in version.parts_etags.items().iter() {
|
||||||
hex::encode(&rand::random::<[u8; 16]>()[..]),
|
etag_md5_hasher.update(etag.as_bytes());
|
||||||
num_parts
|
}
|
||||||
);
|
let etag = format!("{}-{}", hex::encode(etag_md5_hasher.finalize()), num_parts);
|
||||||
|
|
||||||
let total_size = version
|
// Calculate total size of final object
|
||||||
.blocks()
|
let total_size = version.blocks.items().iter().map(|x| x.1.size).sum();
|
||||||
.iter()
|
|
||||||
.map(|x| x.size)
|
// Write final object version
|
||||||
.fold(0, |x, y| x + y);
|
|
||||||
object_version.state = ObjectVersionState::Complete(ObjectVersionData::FirstBlock(
|
object_version.state = ObjectVersionState::Complete(ObjectVersionData::FirstBlock(
|
||||||
ObjectVersionMeta {
|
ObjectVersionMeta {
|
||||||
headers,
|
headers,
|
||||||
size: total_size,
|
size: total_size,
|
||||||
etag: etag,
|
etag,
|
||||||
},
|
},
|
||||||
version.blocks()[0].hash,
|
version.blocks.items()[0].1.hash,
|
||||||
));
|
));
|
||||||
|
|
||||||
let final_object = Object::new(bucket.clone(), key.clone(), vec![object_version]);
|
let final_object = Object::new(bucket.clone(), key.clone(), vec![object_version]);
|
||||||
garage.object_table.insert(&final_object).await?;
|
garage.object_table.insert(&final_object).await?;
|
||||||
|
|
||||||
|
// Send response saying ok we're done
|
||||||
let mut xml = String::new();
|
let mut xml = String::new();
|
||||||
writeln!(&mut xml, r#"<?xml version="1.0" encoding="UTF-8"?>"#).unwrap();
|
writeln!(&mut xml, r#"<?xml version="1.0" encoding="UTF-8"?>"#).unwrap();
|
||||||
writeln!(
|
writeln!(
|
||||||
|
@ -570,17 +584,19 @@ fn get_mime_type(req: &Request<Body>) -> Result<String, Error> {
|
||||||
.to_string())
|
.to_string())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_headers(req: &Request<Body>) -> Result<ObjectVersionHeaders, Error> {
|
pub(crate) fn get_headers(req: &Request<Body>) -> Result<ObjectVersionHeaders, Error> {
|
||||||
let content_type = get_mime_type(req)?;
|
let content_type = get_mime_type(req)?;
|
||||||
let other_headers = vec![
|
let mut other = BTreeMap::new();
|
||||||
|
|
||||||
|
// Preserve standard headers
|
||||||
|
let standard_header = vec![
|
||||||
hyper::header::CACHE_CONTROL,
|
hyper::header::CACHE_CONTROL,
|
||||||
hyper::header::CONTENT_DISPOSITION,
|
hyper::header::CONTENT_DISPOSITION,
|
||||||
hyper::header::CONTENT_ENCODING,
|
hyper::header::CONTENT_ENCODING,
|
||||||
hyper::header::CONTENT_LANGUAGE,
|
hyper::header::CONTENT_LANGUAGE,
|
||||||
hyper::header::EXPIRES,
|
hyper::header::EXPIRES,
|
||||||
];
|
];
|
||||||
let mut other = BTreeMap::new();
|
for h in standard_header.iter() {
|
||||||
for h in other_headers.iter() {
|
|
||||||
if let Some(v) = req.headers().get(h) {
|
if let Some(v) = req.headers().get(h) {
|
||||||
match v.to_str() {
|
match v.to_str() {
|
||||||
Ok(v_str) => {
|
Ok(v_str) => {
|
||||||
|
@ -592,6 +608,21 @@ fn get_headers(req: &Request<Body>) -> Result<ObjectVersionHeaders, Error> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Preserve x-amz-meta- headers
|
||||||
|
for (k, v) in req.headers().iter() {
|
||||||
|
if k.as_str().starts_with("x-amz-meta-") {
|
||||||
|
match v.to_str() {
|
||||||
|
Ok(v_str) => {
|
||||||
|
other.insert(k.to_string(), v_str.to_string());
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
warn!("Discarding header {}, error in .to_str(): {}", k, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Ok(ObjectVersionHeaders {
|
Ok(ObjectVersionHeaders {
|
||||||
content_type,
|
content_type,
|
||||||
other,
|
other,
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
|
||||||
use chrono::{DateTime, Duration, NaiveDateTime, Utc};
|
use chrono::{DateTime, Duration, NaiveDateTime, Utc};
|
||||||
use hmac::{Hmac, Mac};
|
use hmac::{Hmac, Mac, NewMac};
|
||||||
use hyper::{Body, Method, Request};
|
use hyper::{Body, Method, Request};
|
||||||
use sha2::{Digest, Sha256};
|
use sha2::{Digest, Sha256};
|
||||||
|
|
||||||
|
@ -91,8 +91,8 @@ pub async fn check_signature(
|
||||||
"s3",
|
"s3",
|
||||||
)
|
)
|
||||||
.ok_or_internal_error("Unable to build signing HMAC")?;
|
.ok_or_internal_error("Unable to build signing HMAC")?;
|
||||||
hmac.input(string_to_sign.as_bytes());
|
hmac.update(string_to_sign.as_bytes());
|
||||||
let signature = hex::encode(hmac.result().code());
|
let signature = hex::encode(hmac.finalize().into_bytes());
|
||||||
|
|
||||||
if authorization.signature != signature {
|
if authorization.signature != signature {
|
||||||
trace!("Canonical request: ``{}``", canonical_request);
|
trace!("Canonical request: ``{}``", canonical_request);
|
||||||
|
@ -106,12 +106,10 @@ pub async fn check_signature(
|
||||||
} else {
|
} else {
|
||||||
let bytes = hex::decode(authorization.content_sha256)
|
let bytes = hex::decode(authorization.content_sha256)
|
||||||
.ok_or_bad_request("Invalid content sha256 hash")?;
|
.ok_or_bad_request("Invalid content sha256 hash")?;
|
||||||
let mut hash = [0u8; 32];
|
Some(
|
||||||
if bytes.len() != 32 {
|
Hash::try_from(&bytes[..])
|
||||||
return Err(Error::BadRequest(format!("Invalid content sha256 hash")));
|
.ok_or(Error::BadRequest(format!("Invalid content sha256 hash")))?,
|
||||||
}
|
)
|
||||||
hash.copy_from_slice(&bytes[..]);
|
|
||||||
Some(Hash::from(hash))
|
|
||||||
};
|
};
|
||||||
|
|
||||||
Ok((key, content_sha256))
|
Ok((key, content_sha256))
|
||||||
|
@ -220,12 +218,12 @@ fn parse_credential(cred: &str) -> Result<(String, String), Error> {
|
||||||
|
|
||||||
fn string_to_sign(datetime: &DateTime<Utc>, scope_string: &str, canonical_req: &str) -> String {
|
fn string_to_sign(datetime: &DateTime<Utc>, scope_string: &str, canonical_req: &str) -> String {
|
||||||
let mut hasher = Sha256::default();
|
let mut hasher = Sha256::default();
|
||||||
hasher.input(canonical_req.as_bytes());
|
hasher.update(canonical_req.as_bytes());
|
||||||
[
|
[
|
||||||
"AWS4-HMAC-SHA256",
|
"AWS4-HMAC-SHA256",
|
||||||
&datetime.format(LONG_DATETIME).to_string(),
|
&datetime.format(LONG_DATETIME).to_string(),
|
||||||
scope_string,
|
scope_string,
|
||||||
&hex::encode(hasher.result().as_slice()),
|
&hex::encode(hasher.finalize().as_slice()),
|
||||||
]
|
]
|
||||||
.join("\n")
|
.join("\n")
|
||||||
}
|
}
|
||||||
|
@ -238,14 +236,14 @@ fn signing_hmac(
|
||||||
) -> Result<HmacSha256, crypto_mac::InvalidKeyLength> {
|
) -> Result<HmacSha256, crypto_mac::InvalidKeyLength> {
|
||||||
let secret = String::from("AWS4") + secret_key;
|
let secret = String::from("AWS4") + secret_key;
|
||||||
let mut date_hmac = HmacSha256::new_varkey(secret.as_bytes())?;
|
let mut date_hmac = HmacSha256::new_varkey(secret.as_bytes())?;
|
||||||
date_hmac.input(datetime.format(SHORT_DATE).to_string().as_bytes());
|
date_hmac.update(datetime.format(SHORT_DATE).to_string().as_bytes());
|
||||||
let mut region_hmac = HmacSha256::new_varkey(&date_hmac.result().code())?;
|
let mut region_hmac = HmacSha256::new_varkey(&date_hmac.finalize().into_bytes())?;
|
||||||
region_hmac.input(region.as_bytes());
|
region_hmac.update(region.as_bytes());
|
||||||
let mut service_hmac = HmacSha256::new_varkey(®ion_hmac.result().code())?;
|
let mut service_hmac = HmacSha256::new_varkey(®ion_hmac.finalize().into_bytes())?;
|
||||||
service_hmac.input(service.as_bytes());
|
service_hmac.update(service.as_bytes());
|
||||||
let mut signing_hmac = HmacSha256::new_varkey(&service_hmac.result().code())?;
|
let mut signing_hmac = HmacSha256::new_varkey(&service_hmac.finalize().into_bytes())?;
|
||||||
signing_hmac.input(b"aws4_request");
|
signing_hmac.update(b"aws4_request");
|
||||||
let hmac = HmacSha256::new_varkey(&signing_hmac.result().code())?;
|
let hmac = HmacSha256::new_varkey(&signing_hmac.finalize().into_bytes())?;
|
||||||
Ok(hmac)
|
Ok(hmac)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -21,21 +21,20 @@ garage_model = { version = "0.1.1", path = "../model" }
|
||||||
garage_api = { version = "0.1.1", path = "../api" }
|
garage_api = { version = "0.1.1", path = "../api" }
|
||||||
garage_web = { version = "0.1.1", path = "../web" }
|
garage_web = { version = "0.1.1", path = "../web" }
|
||||||
|
|
||||||
bytes = "0.4"
|
bytes = "1.0"
|
||||||
rand = "0.7"
|
rand = "0.8"
|
||||||
hex = "0.3"
|
hex = "0.4"
|
||||||
sha2 = "0.8"
|
|
||||||
log = "0.4"
|
log = "0.4"
|
||||||
pretty_env_logger = "0.4"
|
pretty_env_logger = "0.4"
|
||||||
|
git-version = "0.3.4"
|
||||||
|
|
||||||
sled = "0.34"
|
sled = "0.34"
|
||||||
old_sled = { package = "sled", version = "0.31" }
|
|
||||||
|
|
||||||
structopt = { version = "0.3", default-features = false }
|
structopt = { version = "0.3", default-features = false }
|
||||||
toml = "0.5"
|
toml = "0.5"
|
||||||
rmp-serde = "0.14.3"
|
rmp-serde = "0.15"
|
||||||
serde = { version = "1.0", default-features = false, features = ["derive", "rc"] }
|
serde = { version = "1.0", default-features = false, features = ["derive", "rc"] }
|
||||||
|
|
||||||
futures = "0.3"
|
futures = "0.3"
|
||||||
futures-util = "0.3"
|
futures-util = "0.3"
|
||||||
tokio = { version = "0.2", default-features = false, features = ["rt-core", "rt-threaded", "io-driver", "net", "tcp", "time", "macros", "sync", "signal", "fs"] }
|
tokio = { version = "1.0", default-features = false, features = ["rt", "rt-multi-thread", "io-util", "net", "time", "macros", "sync", "signal", "fs"] }
|
||||||
|
|
|
@ -1,3 +1,5 @@
|
||||||
|
use std::collections::HashMap;
|
||||||
|
use std::fmt::Write;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
@ -5,6 +7,7 @@ use serde::{Deserialize, Serialize};
|
||||||
use garage_util::error::Error;
|
use garage_util::error::Error;
|
||||||
|
|
||||||
use garage_table::crdt::CRDT;
|
use garage_table::crdt::CRDT;
|
||||||
|
use garage_table::replication::*;
|
||||||
use garage_table::*;
|
use garage_table::*;
|
||||||
|
|
||||||
use garage_rpc::rpc_client::*;
|
use garage_rpc::rpc_client::*;
|
||||||
|
@ -14,6 +17,7 @@ use garage_model::bucket_table::*;
|
||||||
use garage_model::garage::Garage;
|
use garage_model::garage::Garage;
|
||||||
use garage_model::key_table::*;
|
use garage_model::key_table::*;
|
||||||
|
|
||||||
|
use crate::cli::*;
|
||||||
use crate::repair::Repair;
|
use crate::repair::Repair;
|
||||||
use crate::*;
|
use crate::*;
|
||||||
|
|
||||||
|
@ -25,6 +29,7 @@ pub enum AdminRPC {
|
||||||
BucketOperation(BucketOperation),
|
BucketOperation(BucketOperation),
|
||||||
KeyOperation(KeyOperation),
|
KeyOperation(KeyOperation),
|
||||||
LaunchRepair(RepairOpt),
|
LaunchRepair(RepairOpt),
|
||||||
|
Stats(StatsOpt),
|
||||||
|
|
||||||
// Replies
|
// Replies
|
||||||
Ok(String),
|
Ok(String),
|
||||||
|
@ -55,6 +60,7 @@ impl AdminRpcHandler {
|
||||||
AdminRPC::BucketOperation(bo) => self2.handle_bucket_cmd(bo).await,
|
AdminRPC::BucketOperation(bo) => self2.handle_bucket_cmd(bo).await,
|
||||||
AdminRPC::KeyOperation(ko) => self2.handle_key_cmd(ko).await,
|
AdminRPC::KeyOperation(ko) => self2.handle_key_cmd(ko).await,
|
||||||
AdminRPC::LaunchRepair(opt) => self2.handle_launch_repair(opt).await,
|
AdminRPC::LaunchRepair(opt) => self2.handle_launch_repair(opt).await,
|
||||||
|
AdminRPC::Stats(opt) => self2.handle_stats(opt).await,
|
||||||
_ => Err(Error::BadRPC(format!("Invalid RPC"))),
|
_ => Err(Error::BadRPC(format!("Invalid RPC"))),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -116,7 +122,7 @@ impl AdminRpcHandler {
|
||||||
for (key_id, _, _) in bucket.authorized_keys() {
|
for (key_id, _, _) in bucket.authorized_keys() {
|
||||||
if let Some(key) = self.garage.key_table.get(&EmptyKey, key_id).await? {
|
if let Some(key) = self.garage.key_table.get(&EmptyKey, key_id).await? {
|
||||||
if !key.deleted.get() {
|
if !key.deleted.get() {
|
||||||
self.update_key_bucket(key, &bucket.name, false, false)
|
self.update_key_bucket(&key, &bucket.name, false, false)
|
||||||
.await?;
|
.await?;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
@ -128,31 +134,31 @@ impl AdminRpcHandler {
|
||||||
Ok(AdminRPC::Ok(format!("Bucket {} was deleted.", query.name)))
|
Ok(AdminRPC::Ok(format!("Bucket {} was deleted.", query.name)))
|
||||||
}
|
}
|
||||||
BucketOperation::Allow(query) => {
|
BucketOperation::Allow(query) => {
|
||||||
let key = self.get_existing_key(&query.key_id).await?;
|
let key = self.get_existing_key(&query.key_pattern).await?;
|
||||||
let bucket = self.get_existing_bucket(&query.bucket).await?;
|
let bucket = self.get_existing_bucket(&query.bucket).await?;
|
||||||
let allow_read = query.read || key.allow_read(&query.bucket);
|
let allow_read = query.read || key.allow_read(&query.bucket);
|
||||||
let allow_write = query.write || key.allow_write(&query.bucket);
|
let allow_write = query.write || key.allow_write(&query.bucket);
|
||||||
self.update_key_bucket(key, &query.bucket, allow_read, allow_write)
|
self.update_key_bucket(&key, &query.bucket, allow_read, allow_write)
|
||||||
.await?;
|
.await?;
|
||||||
self.update_bucket_key(bucket, &query.key_id, allow_read, allow_write)
|
self.update_bucket_key(bucket, &key.key_id, allow_read, allow_write)
|
||||||
.await?;
|
.await?;
|
||||||
Ok(AdminRPC::Ok(format!(
|
Ok(AdminRPC::Ok(format!(
|
||||||
"New permissions for {} on {}: read {}, write {}.",
|
"New permissions for {} on {}: read {}, write {}.",
|
||||||
&query.key_id, &query.bucket, allow_read, allow_write
|
&key.key_id, &query.bucket, allow_read, allow_write
|
||||||
)))
|
)))
|
||||||
}
|
}
|
||||||
BucketOperation::Deny(query) => {
|
BucketOperation::Deny(query) => {
|
||||||
let key = self.get_existing_key(&query.key_id).await?;
|
let key = self.get_existing_key(&query.key_pattern).await?;
|
||||||
let bucket = self.get_existing_bucket(&query.bucket).await?;
|
let bucket = self.get_existing_bucket(&query.bucket).await?;
|
||||||
let allow_read = !query.read && key.allow_read(&query.bucket);
|
let allow_read = !query.read && key.allow_read(&query.bucket);
|
||||||
let allow_write = !query.write && key.allow_write(&query.bucket);
|
let allow_write = !query.write && key.allow_write(&query.bucket);
|
||||||
self.update_key_bucket(key, &query.bucket, allow_read, allow_write)
|
self.update_key_bucket(&key, &query.bucket, allow_read, allow_write)
|
||||||
.await?;
|
.await?;
|
||||||
self.update_bucket_key(bucket, &query.key_id, allow_read, allow_write)
|
self.update_bucket_key(bucket, &key.key_id, allow_read, allow_write)
|
||||||
.await?;
|
.await?;
|
||||||
Ok(AdminRPC::Ok(format!(
|
Ok(AdminRPC::Ok(format!(
|
||||||
"New permissions for {} on {}: read {}, write {}.",
|
"New permissions for {} on {}: read {}, write {}.",
|
||||||
&query.key_id, &query.bucket, allow_read, allow_write
|
&key.key_id, &query.bucket, allow_read, allow_write
|
||||||
)))
|
)))
|
||||||
}
|
}
|
||||||
BucketOperation::Website(query) => {
|
BucketOperation::Website(query) => {
|
||||||
|
@ -187,7 +193,12 @@ impl AdminRpcHandler {
|
||||||
let key_ids = self
|
let key_ids = self
|
||||||
.garage
|
.garage
|
||||||
.key_table
|
.key_table
|
||||||
.get_range(&EmptyKey, None, Some(DeletedFilter::NotDeleted), 10000)
|
.get_range(
|
||||||
|
&EmptyKey,
|
||||||
|
None,
|
||||||
|
Some(KeyFilter::Deleted(DeletedFilter::NotDeleted)),
|
||||||
|
10000,
|
||||||
|
)
|
||||||
.await?
|
.await?
|
||||||
.iter()
|
.iter()
|
||||||
.map(|k| (k.key_id.to_string(), k.name.get().clone()))
|
.map(|k| (k.key_id.to_string(), k.name.get().clone()))
|
||||||
|
@ -195,7 +206,7 @@ impl AdminRpcHandler {
|
||||||
Ok(AdminRPC::KeyList(key_ids))
|
Ok(AdminRPC::KeyList(key_ids))
|
||||||
}
|
}
|
||||||
KeyOperation::Info(query) => {
|
KeyOperation::Info(query) => {
|
||||||
let key = self.get_existing_key(&query.key_id).await?;
|
let key = self.get_existing_key(&query.key_pattern).await?;
|
||||||
Ok(AdminRPC::KeyInfo(key))
|
Ok(AdminRPC::KeyInfo(key))
|
||||||
}
|
}
|
||||||
KeyOperation::New(query) => {
|
KeyOperation::New(query) => {
|
||||||
|
@ -204,13 +215,13 @@ impl AdminRpcHandler {
|
||||||
Ok(AdminRPC::KeyInfo(key))
|
Ok(AdminRPC::KeyInfo(key))
|
||||||
}
|
}
|
||||||
KeyOperation::Rename(query) => {
|
KeyOperation::Rename(query) => {
|
||||||
let mut key = self.get_existing_key(&query.key_id).await?;
|
let mut key = self.get_existing_key(&query.key_pattern).await?;
|
||||||
key.name.update(query.new_name);
|
key.name.update(query.new_name);
|
||||||
self.garage.key_table.insert(&key).await?;
|
self.garage.key_table.insert(&key).await?;
|
||||||
Ok(AdminRPC::KeyInfo(key))
|
Ok(AdminRPC::KeyInfo(key))
|
||||||
}
|
}
|
||||||
KeyOperation::Delete(query) => {
|
KeyOperation::Delete(query) => {
|
||||||
let key = self.get_existing_key(&query.key_id).await?;
|
let key = self.get_existing_key(&query.key_pattern).await?;
|
||||||
if !query.yes {
|
if !query.yes {
|
||||||
return Err(Error::BadRPC(format!(
|
return Err(Error::BadRPC(format!(
|
||||||
"Add --yes flag to really perform this operation"
|
"Add --yes flag to really perform this operation"
|
||||||
|
@ -227,13 +238,24 @@ impl AdminRpcHandler {
|
||||||
return Err(Error::Message(format!("Bucket not found: {}", ab_name)));
|
return Err(Error::Message(format!("Bucket not found: {}", ab_name)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
let del_key = Key::delete(key.key_id);
|
let del_key = Key::delete(key.key_id.to_string());
|
||||||
self.garage.key_table.insert(&del_key).await?;
|
self.garage.key_table.insert(&del_key).await?;
|
||||||
Ok(AdminRPC::Ok(format!(
|
Ok(AdminRPC::Ok(format!(
|
||||||
"Key {} was deleted successfully.",
|
"Key {} was deleted successfully.",
|
||||||
query.key_id
|
key.key_id
|
||||||
)))
|
)))
|
||||||
}
|
}
|
||||||
|
KeyOperation::Import(query) => {
|
||||||
|
let prev_key = self.garage.key_table.get(&EmptyKey, &query.key_id)
|
||||||
|
.await?;
|
||||||
|
if prev_key.is_some() {
|
||||||
|
return Err(Error::Message(format!("Key {} already exists in data store. Even if it is deleted, we can't let you create a new key with the same ID. Sorry.", query.key_id)));
|
||||||
|
}
|
||||||
|
let imported_key = Key::import(&query.key_id, &query.secret_key, &query.name);
|
||||||
|
self.garage.key_table.insert(&imported_key).await?;
|
||||||
|
Ok(AdminRPC::KeyInfo(imported_key))
|
||||||
|
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -250,14 +272,28 @@ impl AdminRpcHandler {
|
||||||
))))
|
))))
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn get_existing_key(&self, id: &String) -> Result<Key, Error> {
|
async fn get_existing_key(&self, pattern: &str) -> Result<Key, Error> {
|
||||||
self.garage
|
let candidates = self
|
||||||
|
.garage
|
||||||
.key_table
|
.key_table
|
||||||
.get(&EmptyKey, id)
|
.get_range(
|
||||||
|
&EmptyKey,
|
||||||
|
None,
|
||||||
|
Some(KeyFilter::Matches(pattern.to_string())),
|
||||||
|
10,
|
||||||
|
)
|
||||||
.await?
|
.await?
|
||||||
|
.into_iter()
|
||||||
.filter(|k| !k.deleted.get())
|
.filter(|k| !k.deleted.get())
|
||||||
.map(Ok)
|
.collect::<Vec<_>>();
|
||||||
.unwrap_or(Err(Error::BadRPC(format!("Key {} does not exist", id))))
|
if candidates.len() != 1 {
|
||||||
|
Err(Error::Message(format!(
|
||||||
|
"{} matching keys",
|
||||||
|
candidates.len()
|
||||||
|
)))
|
||||||
|
} else {
|
||||||
|
Ok(candidates.into_iter().next().unwrap())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Update **bucket table** to inform of the new linked key
|
/// Update **bucket table** to inform of the new linked key
|
||||||
|
@ -290,11 +326,12 @@ impl AdminRpcHandler {
|
||||||
/// Update **key table** to inform of the new linked bucket
|
/// Update **key table** to inform of the new linked bucket
|
||||||
async fn update_key_bucket(
|
async fn update_key_bucket(
|
||||||
&self,
|
&self,
|
||||||
mut key: Key,
|
key: &Key,
|
||||||
bucket: &String,
|
bucket: &String,
|
||||||
allow_read: bool,
|
allow_read: bool,
|
||||||
allow_write: bool,
|
allow_write: bool,
|
||||||
) -> Result<(), Error> {
|
) -> Result<(), Error> {
|
||||||
|
let mut key = key.clone();
|
||||||
let old_map = key.authorized_buckets.take_and_clear();
|
let old_map = key.authorized_buckets.take_and_clear();
|
||||||
key.authorized_buckets.merge(&old_map.update_mutator(
|
key.authorized_buckets.merge(&old_map.update_mutator(
|
||||||
bucket.clone(),
|
bucket.clone(),
|
||||||
|
@ -350,12 +387,118 @@ impl AdminRpcHandler {
|
||||||
.background
|
.background
|
||||||
.spawn_worker("Repair worker".into(), move |must_exit| async move {
|
.spawn_worker("Repair worker".into(), move |must_exit| async move {
|
||||||
repair.repair_worker(opt, must_exit).await
|
repair.repair_worker(opt, must_exit).await
|
||||||
})
|
});
|
||||||
.await;
|
|
||||||
Ok(AdminRPC::Ok(format!(
|
Ok(AdminRPC::Ok(format!(
|
||||||
"Repair launched on {:?}",
|
"Repair launched on {:?}",
|
||||||
self.garage.system.id
|
self.garage.system.id
|
||||||
)))
|
)))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn handle_stats(&self, opt: StatsOpt) -> Result<AdminRPC, Error> {
|
||||||
|
if opt.all_nodes {
|
||||||
|
let mut ret = String::new();
|
||||||
|
let ring = self.garage.system.ring.borrow().clone();
|
||||||
|
|
||||||
|
for node in ring.config.members.keys() {
|
||||||
|
let mut opt = opt.clone();
|
||||||
|
opt.all_nodes = false;
|
||||||
|
|
||||||
|
writeln!(&mut ret, "\n======================").unwrap();
|
||||||
|
writeln!(&mut ret, "Stats for node {:?}:", node).unwrap();
|
||||||
|
match self
|
||||||
|
.rpc_client
|
||||||
|
.call(*node, AdminRPC::Stats(opt), ADMIN_RPC_TIMEOUT)
|
||||||
|
.await
|
||||||
|
{
|
||||||
|
Ok(AdminRPC::Ok(s)) => writeln!(&mut ret, "{}", s).unwrap(),
|
||||||
|
Ok(x) => writeln!(&mut ret, "Bad answer: {:?}", x).unwrap(),
|
||||||
|
Err(e) => writeln!(&mut ret, "Error: {}", e).unwrap(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(AdminRPC::Ok(ret))
|
||||||
|
} else {
|
||||||
|
Ok(AdminRPC::Ok(self.gather_stats_local(opt)?))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn gather_stats_local(&self, opt: StatsOpt) -> Result<String, Error> {
|
||||||
|
let mut ret = String::new();
|
||||||
|
writeln!(
|
||||||
|
&mut ret,
|
||||||
|
"\nGarage version: {}",
|
||||||
|
git_version::git_version!()
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// Gather ring statistics
|
||||||
|
let ring = self.garage.system.ring.borrow().clone();
|
||||||
|
let mut ring_nodes = HashMap::new();
|
||||||
|
for r in ring.ring.iter() {
|
||||||
|
for n in r.nodes.iter() {
|
||||||
|
if !ring_nodes.contains_key(n) {
|
||||||
|
ring_nodes.insert(*n, 0usize);
|
||||||
|
}
|
||||||
|
*ring_nodes.get_mut(n).unwrap() += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
writeln!(&mut ret, "\nRing nodes & partition count:").unwrap();
|
||||||
|
for (n, c) in ring_nodes.iter() {
|
||||||
|
writeln!(&mut ret, " {:?} {}", n, c).unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
self.gather_table_stats(&mut ret, &self.garage.bucket_table, &opt)?;
|
||||||
|
self.gather_table_stats(&mut ret, &self.garage.key_table, &opt)?;
|
||||||
|
self.gather_table_stats(&mut ret, &self.garage.object_table, &opt)?;
|
||||||
|
self.gather_table_stats(&mut ret, &self.garage.version_table, &opt)?;
|
||||||
|
self.gather_table_stats(&mut ret, &self.garage.block_ref_table, &opt)?;
|
||||||
|
|
||||||
|
writeln!(&mut ret, "\nBlock manager stats:").unwrap();
|
||||||
|
if opt.detailed {
|
||||||
|
writeln!(
|
||||||
|
&mut ret,
|
||||||
|
" number of blocks: {}",
|
||||||
|
self.garage.block_manager.rc_len()
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
}
|
||||||
|
writeln!(
|
||||||
|
&mut ret,
|
||||||
|
" resync queue length: {}",
|
||||||
|
self.garage.block_manager.resync_queue_len()
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
Ok(ret)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn gather_table_stats<F, R>(
|
||||||
|
&self,
|
||||||
|
to: &mut String,
|
||||||
|
t: &Arc<Table<F, R>>,
|
||||||
|
opt: &StatsOpt,
|
||||||
|
) -> Result<(), Error>
|
||||||
|
where
|
||||||
|
F: TableSchema + 'static,
|
||||||
|
R: TableReplication + 'static,
|
||||||
|
{
|
||||||
|
writeln!(to, "\nTable stats for {}", t.data.name).unwrap();
|
||||||
|
if opt.detailed {
|
||||||
|
writeln!(to, " number of items: {}", t.data.store.len()).unwrap();
|
||||||
|
writeln!(
|
||||||
|
to,
|
||||||
|
" Merkle tree size: {}",
|
||||||
|
t.merkle_updater.merkle_tree_len()
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
}
|
||||||
|
writeln!(
|
||||||
|
to,
|
||||||
|
" Merkle updater todo queue length: {}",
|
||||||
|
t.merkle_updater.todo_len()
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
writeln!(to, " GC todo queue length: {}", t.data.gc_todo_len()).unwrap();
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
552
src/garage/cli.rs
Normal file
|
@ -0,0 +1,552 @@
|
||||||
|
use std::collections::HashSet;
|
||||||
|
use std::net::SocketAddr;
|
||||||
|
use std::path::PathBuf;
|
||||||
|
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use structopt::StructOpt;
|
||||||
|
|
||||||
|
use garage_util::error::Error;
|
||||||
|
use garage_util::time::*;
|
||||||
|
|
||||||
|
use garage_rpc::membership::*;
|
||||||
|
use garage_rpc::ring::*;
|
||||||
|
use garage_rpc::rpc_client::*;
|
||||||
|
|
||||||
|
use garage_model::bucket_table::*;
|
||||||
|
use garage_model::key_table::*;
|
||||||
|
|
||||||
|
use crate::admin_rpc::*;
|
||||||
|
|
||||||
|
#[derive(StructOpt, Debug)]
|
||||||
|
pub enum Command {
|
||||||
|
/// Run Garage server
|
||||||
|
#[structopt(name = "server")]
|
||||||
|
Server(ServerOpt),
|
||||||
|
|
||||||
|
/// Get network status
|
||||||
|
#[structopt(name = "status")]
|
||||||
|
Status,
|
||||||
|
|
||||||
|
/// Garage node operations
|
||||||
|
#[structopt(name = "node")]
|
||||||
|
Node(NodeOperation),
|
||||||
|
|
||||||
|
/// Bucket operations
|
||||||
|
#[structopt(name = "bucket")]
|
||||||
|
Bucket(BucketOperation),
|
||||||
|
|
||||||
|
/// Key operations
|
||||||
|
#[structopt(name = "key")]
|
||||||
|
Key(KeyOperation),
|
||||||
|
|
||||||
|
/// Start repair of node data
|
||||||
|
#[structopt(name = "repair")]
|
||||||
|
Repair(RepairOpt),
|
||||||
|
|
||||||
|
/// Gather node statistics
|
||||||
|
#[structopt(name = "stats")]
|
||||||
|
Stats(StatsOpt),
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(StructOpt, Debug)]
|
||||||
|
pub struct ServerOpt {
|
||||||
|
/// Configuration file
|
||||||
|
#[structopt(short = "c", long = "config", default_value = "./config.toml")]
|
||||||
|
pub config_file: PathBuf,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(StructOpt, Debug)]
|
||||||
|
pub enum NodeOperation {
|
||||||
|
/// Configure Garage node
|
||||||
|
#[structopt(name = "configure")]
|
||||||
|
Configure(ConfigureNodeOpt),
|
||||||
|
|
||||||
|
/// Remove Garage node from cluster
|
||||||
|
#[structopt(name = "remove")]
|
||||||
|
Remove(RemoveNodeOpt),
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(StructOpt, Debug)]
|
||||||
|
pub struct ConfigureNodeOpt {
|
||||||
|
/// Node to configure (prefix of hexadecimal node id)
|
||||||
|
node_id: String,
|
||||||
|
|
||||||
|
/// Location (datacenter) of the node
|
||||||
|
#[structopt(short = "d", long = "datacenter")]
|
||||||
|
datacenter: Option<String>,
|
||||||
|
|
||||||
|
/// Capacity (in relative terms, use 1 to represent your smallest server)
|
||||||
|
#[structopt(short = "c", long = "capacity")]
|
||||||
|
capacity: Option<u32>,
|
||||||
|
|
||||||
|
/// Optionnal node tag
|
||||||
|
#[structopt(short = "t", long = "tag")]
|
||||||
|
tag: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(StructOpt, Debug)]
|
||||||
|
pub struct RemoveNodeOpt {
|
||||||
|
/// Node to configure (prefix of hexadecimal node id)
|
||||||
|
node_id: String,
|
||||||
|
|
||||||
|
/// If this flag is not given, the node won't be removed
|
||||||
|
#[structopt(long = "yes")]
|
||||||
|
yes: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize, StructOpt, Debug)]
|
||||||
|
pub enum BucketOperation {
|
||||||
|
/// List buckets
|
||||||
|
#[structopt(name = "list")]
|
||||||
|
List,
|
||||||
|
|
||||||
|
/// Get bucket info
|
||||||
|
#[structopt(name = "info")]
|
||||||
|
Info(BucketOpt),
|
||||||
|
|
||||||
|
/// Create bucket
|
||||||
|
#[structopt(name = "create")]
|
||||||
|
Create(BucketOpt),
|
||||||
|
|
||||||
|
/// Delete bucket
|
||||||
|
#[structopt(name = "delete")]
|
||||||
|
Delete(DeleteBucketOpt),
|
||||||
|
|
||||||
|
/// Allow key to read or write to bucket
|
||||||
|
#[structopt(name = "allow")]
|
||||||
|
Allow(PermBucketOpt),
|
||||||
|
|
||||||
|
/// Allow key to read or write to bucket
|
||||||
|
#[structopt(name = "deny")]
|
||||||
|
Deny(PermBucketOpt),
|
||||||
|
|
||||||
|
/// Expose as website or not
|
||||||
|
#[structopt(name = "website")]
|
||||||
|
Website(WebsiteOpt),
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize, StructOpt, Debug)]
|
||||||
|
pub struct WebsiteOpt {
|
||||||
|
/// Create
|
||||||
|
#[structopt(long = "allow")]
|
||||||
|
pub allow: bool,
|
||||||
|
|
||||||
|
/// Delete
|
||||||
|
#[structopt(long = "deny")]
|
||||||
|
pub deny: bool,
|
||||||
|
|
||||||
|
/// Bucket name
|
||||||
|
pub bucket: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize, StructOpt, Debug)]
|
||||||
|
pub struct BucketOpt {
|
||||||
|
/// Bucket name
|
||||||
|
pub name: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize, StructOpt, Debug)]
|
||||||
|
pub struct DeleteBucketOpt {
|
||||||
|
/// Bucket name
|
||||||
|
pub name: String,
|
||||||
|
|
||||||
|
/// If this flag is not given, the bucket won't be deleted
|
||||||
|
#[structopt(long = "yes")]
|
||||||
|
pub yes: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize, StructOpt, Debug)]
|
||||||
|
pub struct PermBucketOpt {
|
||||||
|
/// Access key name or ID
|
||||||
|
#[structopt(long = "key")]
|
||||||
|
pub key_pattern: String,
|
||||||
|
|
||||||
|
/// Allow/deny read operations
|
||||||
|
#[structopt(long = "read")]
|
||||||
|
pub read: bool,
|
||||||
|
|
||||||
|
/// Allow/deny write operations
|
||||||
|
#[structopt(long = "write")]
|
||||||
|
pub write: bool,
|
||||||
|
|
||||||
|
/// Bucket name
|
||||||
|
pub bucket: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize, StructOpt, Debug)]
|
||||||
|
pub enum KeyOperation {
|
||||||
|
/// List keys
|
||||||
|
#[structopt(name = "list")]
|
||||||
|
List,
|
||||||
|
|
||||||
|
/// Get key info
|
||||||
|
#[structopt(name = "info")]
|
||||||
|
Info(KeyOpt),
|
||||||
|
|
||||||
|
/// Create new key
|
||||||
|
#[structopt(name = "new")]
|
||||||
|
New(KeyNewOpt),
|
||||||
|
|
||||||
|
/// Rename key
|
||||||
|
#[structopt(name = "rename")]
|
||||||
|
Rename(KeyRenameOpt),
|
||||||
|
|
||||||
|
/// Delete key
|
||||||
|
#[structopt(name = "delete")]
|
||||||
|
Delete(KeyDeleteOpt),
|
||||||
|
|
||||||
|
/// Import key
|
||||||
|
#[structopt(name = "import")]
|
||||||
|
Import(KeyImportOpt),
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize, StructOpt, Debug)]
|
||||||
|
pub struct KeyOpt {
|
||||||
|
/// ID or name of the key
|
||||||
|
pub key_pattern: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize, StructOpt, Debug)]
|
||||||
|
pub struct KeyNewOpt {
|
||||||
|
/// Name of the key
|
||||||
|
#[structopt(long = "name", default_value = "Unnamed key")]
|
||||||
|
pub name: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize, StructOpt, Debug)]
|
||||||
|
pub struct KeyRenameOpt {
|
||||||
|
/// ID or name of the key
|
||||||
|
pub key_pattern: String,
|
||||||
|
|
||||||
|
/// New name of the key
|
||||||
|
pub new_name: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize, StructOpt, Debug)]
|
||||||
|
pub struct KeyDeleteOpt {
|
||||||
|
/// ID or name of the key
|
||||||
|
pub key_pattern: String,
|
||||||
|
|
||||||
|
/// Confirm deletion
|
||||||
|
#[structopt(long = "yes")]
|
||||||
|
pub yes: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize, StructOpt, Debug)]
|
||||||
|
pub struct KeyImportOpt {
|
||||||
|
/// Access key ID
|
||||||
|
pub key_id: String,
|
||||||
|
|
||||||
|
/// Secret access key
|
||||||
|
pub secret_key: String,
|
||||||
|
|
||||||
|
/// Key name
|
||||||
|
#[structopt(short = "n", default_value = "Imported key")]
|
||||||
|
pub name: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize, StructOpt, Debug, Clone)]
|
||||||
|
pub struct RepairOpt {
|
||||||
|
/// Launch repair operation on all nodes
|
||||||
|
#[structopt(short = "a", long = "all-nodes")]
|
||||||
|
pub all_nodes: bool,
|
||||||
|
|
||||||
|
/// Confirm the launch of the repair operation
|
||||||
|
#[structopt(long = "yes")]
|
||||||
|
pub yes: bool,
|
||||||
|
|
||||||
|
#[structopt(subcommand)]
|
||||||
|
pub what: Option<RepairWhat>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize, StructOpt, Debug, Eq, PartialEq, Clone)]
|
||||||
|
pub enum RepairWhat {
|
||||||
|
/// Only do a full sync of metadata tables
|
||||||
|
#[structopt(name = "tables")]
|
||||||
|
Tables,
|
||||||
|
/// Only repair (resync/rebalance) the set of stored blocks
|
||||||
|
#[structopt(name = "blocks")]
|
||||||
|
Blocks,
|
||||||
|
/// Only redo the propagation of object deletions to the version table (slow)
|
||||||
|
#[structopt(name = "versions")]
|
||||||
|
Versions,
|
||||||
|
/// Only redo the propagation of version deletions to the block ref table (extremely slow)
|
||||||
|
#[structopt(name = "block_refs")]
|
||||||
|
BlockRefs,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize, StructOpt, Debug, Clone)]
|
||||||
|
pub struct StatsOpt {
|
||||||
|
/// Gather statistics from all nodes
|
||||||
|
#[structopt(short = "a", long = "all-nodes")]
|
||||||
|
pub all_nodes: bool,
|
||||||
|
|
||||||
|
/// Gather detailed statistics (this can be long)
|
||||||
|
#[structopt(short = "d", long = "detailed")]
|
||||||
|
pub detailed: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn cli_cmd(
|
||||||
|
cmd: Command,
|
||||||
|
membership_rpc_cli: RpcAddrClient<Message>,
|
||||||
|
admin_rpc_cli: RpcAddrClient<AdminRPC>,
|
||||||
|
rpc_host: SocketAddr,
|
||||||
|
) -> Result<(), Error> {
|
||||||
|
match cmd {
|
||||||
|
Command::Status => cmd_status(membership_rpc_cli, rpc_host).await,
|
||||||
|
Command::Node(NodeOperation::Configure(configure_opt)) => {
|
||||||
|
cmd_configure(membership_rpc_cli, rpc_host, configure_opt).await
|
||||||
|
}
|
||||||
|
Command::Node(NodeOperation::Remove(remove_opt)) => {
|
||||||
|
cmd_remove(membership_rpc_cli, rpc_host, remove_opt).await
|
||||||
|
}
|
||||||
|
Command::Bucket(bo) => {
|
||||||
|
cmd_admin(admin_rpc_cli, rpc_host, AdminRPC::BucketOperation(bo)).await
|
||||||
|
}
|
||||||
|
Command::Key(ko) => cmd_admin(admin_rpc_cli, rpc_host, AdminRPC::KeyOperation(ko)).await,
|
||||||
|
Command::Repair(ro) => cmd_admin(admin_rpc_cli, rpc_host, AdminRPC::LaunchRepair(ro)).await,
|
||||||
|
Command::Stats(so) => cmd_admin(admin_rpc_cli, rpc_host, AdminRPC::Stats(so)).await,
|
||||||
|
_ => unreachable!(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn cmd_status(
|
||||||
|
rpc_cli: RpcAddrClient<Message>,
|
||||||
|
rpc_host: SocketAddr,
|
||||||
|
) -> Result<(), Error> {
|
||||||
|
let status = match rpc_cli
|
||||||
|
.call(&rpc_host, &Message::PullStatus, ADMIN_RPC_TIMEOUT)
|
||||||
|
.await??
|
||||||
|
{
|
||||||
|
Message::AdvertiseNodesUp(nodes) => nodes,
|
||||||
|
resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
|
||||||
|
};
|
||||||
|
let config = match rpc_cli
|
||||||
|
.call(&rpc_host, &Message::PullConfig, ADMIN_RPC_TIMEOUT)
|
||||||
|
.await??
|
||||||
|
{
|
||||||
|
Message::AdvertiseConfig(cfg) => cfg,
|
||||||
|
resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
|
||||||
|
};
|
||||||
|
|
||||||
|
println!("Healthy nodes:");
|
||||||
|
for adv in status.iter().filter(|x| x.is_up) {
|
||||||
|
if let Some(cfg) = config.members.get(&adv.id) {
|
||||||
|
println!(
|
||||||
|
"{:?}\t{}\t{}\t[{}]\t{}\t{}",
|
||||||
|
adv.id, adv.state_info.hostname, adv.addr, cfg.tag, cfg.datacenter, cfg.capacity
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
println!(
|
||||||
|
"{:?}\t{}\t{}\tUNCONFIGURED/REMOVED",
|
||||||
|
adv.id, adv.state_info.hostname, adv.addr
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let status_keys = status.iter().map(|x| x.id).collect::<HashSet<_>>();
|
||||||
|
let failure_case_1 = status.iter().any(|x| !x.is_up);
|
||||||
|
let failure_case_2 = config
|
||||||
|
.members
|
||||||
|
.iter()
|
||||||
|
.any(|(id, _)| !status_keys.contains(id));
|
||||||
|
if failure_case_1 || failure_case_2 {
|
||||||
|
println!("\nFailed nodes:");
|
||||||
|
for adv in status.iter().filter(|x| !x.is_up) {
|
||||||
|
if let Some(cfg) = config.members.get(&adv.id) {
|
||||||
|
println!(
|
||||||
|
"{:?}\t{}\t{}\t[{}]\t{}\t{}\tlast seen: {}s ago",
|
||||||
|
adv.id,
|
||||||
|
adv.state_info.hostname,
|
||||||
|
adv.addr,
|
||||||
|
cfg.tag,
|
||||||
|
cfg.datacenter,
|
||||||
|
cfg.capacity,
|
||||||
|
(now_msec() - adv.last_seen) / 1000,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (id, cfg) in config.members.iter() {
|
||||||
|
if !status.iter().any(|x| x.id == *id) {
|
||||||
|
println!(
|
||||||
|
"{:?}\t{}\t{}\t{}\tnever seen",
|
||||||
|
id, cfg.tag, cfg.datacenter, cfg.capacity
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn cmd_configure(
|
||||||
|
rpc_cli: RpcAddrClient<Message>,
|
||||||
|
rpc_host: SocketAddr,
|
||||||
|
args: ConfigureNodeOpt,
|
||||||
|
) -> Result<(), Error> {
|
||||||
|
let status = match rpc_cli
|
||||||
|
.call(&rpc_host, &Message::PullStatus, ADMIN_RPC_TIMEOUT)
|
||||||
|
.await??
|
||||||
|
{
|
||||||
|
Message::AdvertiseNodesUp(nodes) => nodes,
|
||||||
|
resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut candidates = vec![];
|
||||||
|
for adv in status.iter() {
|
||||||
|
if hex::encode(&adv.id).starts_with(&args.node_id) {
|
||||||
|
candidates.push(adv.id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if candidates.len() != 1 {
|
||||||
|
return Err(Error::Message(format!(
|
||||||
|
"{} matching nodes",
|
||||||
|
candidates.len()
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut config = match rpc_cli
|
||||||
|
.call(&rpc_host, &Message::PullConfig, ADMIN_RPC_TIMEOUT)
|
||||||
|
.await??
|
||||||
|
{
|
||||||
|
Message::AdvertiseConfig(cfg) => cfg,
|
||||||
|
resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
|
||||||
|
};
|
||||||
|
|
||||||
|
let new_entry = match config.members.get(&candidates[0]) {
|
||||||
|
None => NetworkConfigEntry {
|
||||||
|
datacenter: args
|
||||||
|
.datacenter
|
||||||
|
.expect("Please specifiy a datacenter with the -d flag"),
|
||||||
|
capacity: args
|
||||||
|
.capacity
|
||||||
|
.expect("Please specifiy a capacity with the -c flag"),
|
||||||
|
tag: args.tag.unwrap_or("".to_string()),
|
||||||
|
},
|
||||||
|
Some(old) => NetworkConfigEntry {
|
||||||
|
datacenter: args.datacenter.unwrap_or(old.datacenter.to_string()),
|
||||||
|
capacity: args.capacity.unwrap_or(old.capacity),
|
||||||
|
tag: args.tag.unwrap_or(old.tag.to_string()),
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
config.members.insert(candidates[0].clone(), new_entry);
|
||||||
|
config.version += 1;
|
||||||
|
|
||||||
|
rpc_cli
|
||||||
|
.call(
|
||||||
|
&rpc_host,
|
||||||
|
&Message::AdvertiseConfig(config),
|
||||||
|
ADMIN_RPC_TIMEOUT,
|
||||||
|
)
|
||||||
|
.await??;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn cmd_remove(
|
||||||
|
rpc_cli: RpcAddrClient<Message>,
|
||||||
|
rpc_host: SocketAddr,
|
||||||
|
args: RemoveNodeOpt,
|
||||||
|
) -> Result<(), Error> {
|
||||||
|
let mut config = match rpc_cli
|
||||||
|
.call(&rpc_host, &Message::PullConfig, ADMIN_RPC_TIMEOUT)
|
||||||
|
.await??
|
||||||
|
{
|
||||||
|
Message::AdvertiseConfig(cfg) => cfg,
|
||||||
|
resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut candidates = vec![];
|
||||||
|
for (key, _) in config.members.iter() {
|
||||||
|
if hex::encode(key).starts_with(&args.node_id) {
|
||||||
|
candidates.push(*key);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if candidates.len() != 1 {
|
||||||
|
return Err(Error::Message(format!(
|
||||||
|
"{} matching nodes",
|
||||||
|
candidates.len()
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
if !args.yes {
|
||||||
|
return Err(Error::Message(format!(
|
||||||
|
"Add the flag --yes to really remove {:?} from the cluster",
|
||||||
|
candidates[0]
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
config.members.remove(&candidates[0]);
|
||||||
|
config.version += 1;
|
||||||
|
|
||||||
|
rpc_cli
|
||||||
|
.call(
|
||||||
|
&rpc_host,
|
||||||
|
&Message::AdvertiseConfig(config),
|
||||||
|
ADMIN_RPC_TIMEOUT,
|
||||||
|
)
|
||||||
|
.await??;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn cmd_admin(
|
||||||
|
rpc_cli: RpcAddrClient<AdminRPC>,
|
||||||
|
rpc_host: SocketAddr,
|
||||||
|
args: AdminRPC,
|
||||||
|
) -> Result<(), Error> {
|
||||||
|
match rpc_cli.call(&rpc_host, args, ADMIN_RPC_TIMEOUT).await?? {
|
||||||
|
AdminRPC::Ok(msg) => {
|
||||||
|
println!("{}", msg);
|
||||||
|
}
|
||||||
|
AdminRPC::BucketList(bl) => {
|
||||||
|
println!("List of buckets:");
|
||||||
|
for bucket in bl {
|
||||||
|
println!("{}", bucket);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
AdminRPC::BucketInfo(bucket) => {
|
||||||
|
print_bucket_info(&bucket);
|
||||||
|
}
|
||||||
|
AdminRPC::KeyList(kl) => {
|
||||||
|
println!("List of keys:");
|
||||||
|
for key in kl {
|
||||||
|
println!("{}\t{}", key.0, key.1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
AdminRPC::KeyInfo(key) => {
|
||||||
|
print_key_info(&key);
|
||||||
|
}
|
||||||
|
r => {
|
||||||
|
error!("Unexpected response: {:?}", r);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn print_key_info(key: &Key) {
|
||||||
|
println!("Key name: {}", key.name.get());
|
||||||
|
println!("Key ID: {}", key.key_id);
|
||||||
|
println!("Secret key: {}", key.secret_key);
|
||||||
|
if key.deleted.get() {
|
||||||
|
println!("Key is deleted.");
|
||||||
|
} else {
|
||||||
|
println!("Authorized buckets:");
|
||||||
|
for (b, _, perm) in key.authorized_buckets.items().iter() {
|
||||||
|
println!("- {} R:{} W:{}", b, perm.allow_read, perm.allow_write);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn print_bucket_info(bucket: &Bucket) {
|
||||||
|
println!("Bucket name: {}", bucket.name);
|
||||||
|
match bucket.state.get() {
|
||||||
|
BucketState::Deleted => println!("Bucket is deleted."),
|
||||||
|
BucketState::Present(p) => {
|
||||||
|
println!("Authorized keys:");
|
||||||
|
for (k, _, perm) in p.authorized_keys.items().iter() {
|
||||||
|
println!("- {} R:{} W:{}", k, perm.allow_read, perm.allow_write);
|
||||||
|
}
|
||||||
|
println!("Website access: {}", p.website.get());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
|
@ -4,289 +4,67 @@
|
||||||
extern crate log;
|
extern crate log;
|
||||||
|
|
||||||
mod admin_rpc;
|
mod admin_rpc;
|
||||||
|
mod cli;
|
||||||
mod repair;
|
mod repair;
|
||||||
mod server;
|
mod server;
|
||||||
|
|
||||||
use std::collections::HashSet;
|
|
||||||
use std::net::SocketAddr;
|
use std::net::SocketAddr;
|
||||||
use std::path::PathBuf;
|
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
|
|
||||||
use serde::{Deserialize, Serialize};
|
|
||||||
use structopt::StructOpt;
|
use structopt::StructOpt;
|
||||||
|
|
||||||
use garage_util::config::TlsConfig;
|
use garage_util::config::TlsConfig;
|
||||||
use garage_util::data::*;
|
|
||||||
use garage_util::error::Error;
|
use garage_util::error::Error;
|
||||||
|
|
||||||
use garage_rpc::membership::*;
|
use garage_rpc::membership::*;
|
||||||
use garage_rpc::ring::*;
|
|
||||||
use garage_rpc::rpc_client::*;
|
use garage_rpc::rpc_client::*;
|
||||||
|
|
||||||
use admin_rpc::*;
|
use admin_rpc::*;
|
||||||
|
use cli::*;
|
||||||
|
|
||||||
#[derive(StructOpt, Debug)]
|
#[derive(StructOpt, Debug)]
|
||||||
#[structopt(name = "garage")]
|
#[structopt(name = "garage")]
|
||||||
pub struct Opt {
|
pub struct Opt {
|
||||||
/// RPC connect to this host to execute client operations
|
/// RPC connect to this host to execute client operations
|
||||||
#[structopt(short = "h", long = "rpc-host", default_value = "127.0.0.1:3901")]
|
#[structopt(short = "h", long = "rpc-host", default_value = "127.0.0.1:3901")]
|
||||||
rpc_host: SocketAddr,
|
pub rpc_host: SocketAddr,
|
||||||
|
|
||||||
#[structopt(long = "ca-cert")]
|
#[structopt(long = "ca-cert")]
|
||||||
ca_cert: Option<String>,
|
pub ca_cert: Option<String>,
|
||||||
#[structopt(long = "client-cert")]
|
#[structopt(long = "client-cert")]
|
||||||
client_cert: Option<String>,
|
pub client_cert: Option<String>,
|
||||||
#[structopt(long = "client-key")]
|
#[structopt(long = "client-key")]
|
||||||
client_key: Option<String>,
|
pub client_key: Option<String>,
|
||||||
|
|
||||||
#[structopt(subcommand)]
|
#[structopt(subcommand)]
|
||||||
cmd: Command,
|
cmd: Command,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(StructOpt, Debug)]
|
|
||||||
pub enum Command {
|
|
||||||
/// Run Garage server
|
|
||||||
#[structopt(name = "server")]
|
|
||||||
Server(ServerOpt),
|
|
||||||
|
|
||||||
/// Get network status
|
|
||||||
#[structopt(name = "status")]
|
|
||||||
Status,
|
|
||||||
|
|
||||||
/// Garage node operations
|
|
||||||
#[structopt(name = "node")]
|
|
||||||
Node(NodeOperation),
|
|
||||||
|
|
||||||
/// Bucket operations
|
|
||||||
#[structopt(name = "bucket")]
|
|
||||||
Bucket(BucketOperation),
|
|
||||||
|
|
||||||
/// Key operations
|
|
||||||
#[structopt(name = "key")]
|
|
||||||
Key(KeyOperation),
|
|
||||||
|
|
||||||
/// Start repair of node data
|
|
||||||
#[structopt(name = "repair")]
|
|
||||||
Repair(RepairOpt),
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(StructOpt, Debug)]
|
|
||||||
pub struct ServerOpt {
|
|
||||||
/// Configuration file
|
|
||||||
#[structopt(short = "c", long = "config", default_value = "./config.toml")]
|
|
||||||
config_file: PathBuf,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(StructOpt, Debug)]
|
|
||||||
pub enum NodeOperation {
|
|
||||||
/// Configure Garage node
|
|
||||||
#[structopt(name = "configure")]
|
|
||||||
Configure(ConfigureNodeOpt),
|
|
||||||
|
|
||||||
/// Remove Garage node from cluster
|
|
||||||
#[structopt(name = "remove")]
|
|
||||||
Remove(RemoveNodeOpt),
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(StructOpt, Debug)]
|
|
||||||
pub struct ConfigureNodeOpt {
|
|
||||||
/// Node to configure (prefix of hexadecimal node id)
|
|
||||||
node_id: String,
|
|
||||||
|
|
||||||
/// Location (datacenter) of the node
|
|
||||||
#[structopt(short = "d", long = "datacenter")]
|
|
||||||
datacenter: Option<String>,
|
|
||||||
|
|
||||||
/// Capacity (in relative terms, use 1 to represent your smallest server)
|
|
||||||
#[structopt(short = "c", long = "capacity")]
|
|
||||||
capacity: Option<u32>,
|
|
||||||
|
|
||||||
/// Optionnal node tag
|
|
||||||
#[structopt(short = "t", long = "tag")]
|
|
||||||
tag: Option<String>,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(StructOpt, Debug)]
|
|
||||||
pub struct RemoveNodeOpt {
|
|
||||||
/// Node to configure (prefix of hexadecimal node id)
|
|
||||||
node_id: String,
|
|
||||||
|
|
||||||
/// If this flag is not given, the node won't be removed
|
|
||||||
#[structopt(long = "yes")]
|
|
||||||
yes: bool,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize, StructOpt, Debug)]
|
|
||||||
pub enum BucketOperation {
|
|
||||||
/// List buckets
|
|
||||||
#[structopt(name = "list")]
|
|
||||||
List,
|
|
||||||
|
|
||||||
/// Get bucket info
|
|
||||||
#[structopt(name = "info")]
|
|
||||||
Info(BucketOpt),
|
|
||||||
|
|
||||||
/// Create bucket
|
|
||||||
#[structopt(name = "create")]
|
|
||||||
Create(BucketOpt),
|
|
||||||
|
|
||||||
/// Delete bucket
|
|
||||||
#[structopt(name = "delete")]
|
|
||||||
Delete(DeleteBucketOpt),
|
|
||||||
|
|
||||||
/// Allow key to read or write to bucket
|
|
||||||
#[structopt(name = "allow")]
|
|
||||||
Allow(PermBucketOpt),
|
|
||||||
|
|
||||||
/// Allow key to read or write to bucket
|
|
||||||
#[structopt(name = "deny")]
|
|
||||||
Deny(PermBucketOpt),
|
|
||||||
|
|
||||||
/// Expose as website or not
|
|
||||||
#[structopt(name = "website")]
|
|
||||||
Website(WebsiteOpt),
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize, StructOpt, Debug)]
|
|
||||||
pub struct WebsiteOpt {
|
|
||||||
/// Create
|
|
||||||
#[structopt(long = "allow")]
|
|
||||||
pub allow: bool,
|
|
||||||
|
|
||||||
/// Delete
|
|
||||||
#[structopt(long = "deny")]
|
|
||||||
pub deny: bool,
|
|
||||||
|
|
||||||
/// Bucket name
|
|
||||||
pub bucket: String,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize, StructOpt, Debug)]
|
|
||||||
pub struct BucketOpt {
|
|
||||||
/// Bucket name
|
|
||||||
pub name: String,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize, StructOpt, Debug)]
|
|
||||||
pub struct DeleteBucketOpt {
|
|
||||||
/// Bucket name
|
|
||||||
pub name: String,
|
|
||||||
|
|
||||||
/// If this flag is not given, the bucket won't be deleted
|
|
||||||
#[structopt(long = "yes")]
|
|
||||||
pub yes: bool,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize, StructOpt, Debug)]
|
|
||||||
pub struct PermBucketOpt {
|
|
||||||
/// Access key ID
|
|
||||||
#[structopt(long = "key")]
|
|
||||||
pub key_id: String,
|
|
||||||
|
|
||||||
/// Allow/deny read operations
|
|
||||||
#[structopt(long = "read")]
|
|
||||||
pub read: bool,
|
|
||||||
|
|
||||||
/// Allow/deny write operations
|
|
||||||
#[structopt(long = "write")]
|
|
||||||
pub write: bool,
|
|
||||||
|
|
||||||
/// Bucket name
|
|
||||||
pub bucket: String,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize, StructOpt, Debug)]
|
|
||||||
pub enum KeyOperation {
|
|
||||||
/// List keys
|
|
||||||
#[structopt(name = "list")]
|
|
||||||
List,
|
|
||||||
|
|
||||||
/// Get key info
|
|
||||||
#[structopt(name = "info")]
|
|
||||||
Info(KeyOpt),
|
|
||||||
|
|
||||||
/// Create new key
|
|
||||||
#[structopt(name = "new")]
|
|
||||||
New(KeyNewOpt),
|
|
||||||
|
|
||||||
/// Rename key
|
|
||||||
#[structopt(name = "rename")]
|
|
||||||
Rename(KeyRenameOpt),
|
|
||||||
|
|
||||||
/// Delete key
|
|
||||||
#[structopt(name = "delete")]
|
|
||||||
Delete(KeyDeleteOpt),
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize, StructOpt, Debug)]
|
|
||||||
pub struct KeyOpt {
|
|
||||||
/// ID of the key
|
|
||||||
key_id: String,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize, StructOpt, Debug)]
|
|
||||||
pub struct KeyNewOpt {
|
|
||||||
/// Name of the key
|
|
||||||
#[structopt(long = "name", default_value = "Unnamed key")]
|
|
||||||
name: String,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize, StructOpt, Debug)]
|
|
||||||
pub struct KeyRenameOpt {
|
|
||||||
/// ID of the key
|
|
||||||
key_id: String,
|
|
||||||
|
|
||||||
/// New name of the key
|
|
||||||
new_name: String,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize, StructOpt, Debug)]
|
|
||||||
pub struct KeyDeleteOpt {
|
|
||||||
/// ID of the key
|
|
||||||
key_id: String,
|
|
||||||
|
|
||||||
/// Confirm deletion
|
|
||||||
#[structopt(long = "yes")]
|
|
||||||
yes: bool,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize, StructOpt, Debug, Clone)]
|
|
||||||
pub struct RepairOpt {
|
|
||||||
/// Launch repair operation on all nodes
|
|
||||||
#[structopt(short = "a", long = "all-nodes")]
|
|
||||||
pub all_nodes: bool,
|
|
||||||
|
|
||||||
/// Confirm the launch of the repair operation
|
|
||||||
#[structopt(long = "yes")]
|
|
||||||
pub yes: bool,
|
|
||||||
|
|
||||||
#[structopt(subcommand)]
|
|
||||||
pub what: Option<RepairWhat>,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize, StructOpt, Debug, Eq, PartialEq, Clone)]
|
|
||||||
pub enum RepairWhat {
|
|
||||||
/// Only do a full sync of metadata tables
|
|
||||||
#[structopt(name = "tables")]
|
|
||||||
Tables,
|
|
||||||
/// Only repair (resync/rebalance) the set of stored blocks
|
|
||||||
#[structopt(name = "blocks")]
|
|
||||||
Blocks,
|
|
||||||
/// Only redo the propagation of object deletions to the version table (slow)
|
|
||||||
#[structopt(name = "versions")]
|
|
||||||
Versions,
|
|
||||||
/// Only redo the propagation of version deletions to the block ref table (extremely slow)
|
|
||||||
#[structopt(name = "block_refs")]
|
|
||||||
BlockRefs,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::main]
|
#[tokio::main]
|
||||||
async fn main() {
|
async fn main() {
|
||||||
pretty_env_logger::init();
|
pretty_env_logger::init();
|
||||||
|
|
||||||
let opt = Opt::from_args();
|
let opt = Opt::from_args();
|
||||||
|
|
||||||
|
let res = if let Command::Server(server_opt) = opt.cmd {
|
||||||
|
// Abort on panic (same behavior as in Go)
|
||||||
|
std::panic::set_hook(Box::new(|panic_info| {
|
||||||
|
error!("{}", panic_info.to_string());
|
||||||
|
std::process::abort();
|
||||||
|
}));
|
||||||
|
|
||||||
|
server::run_server(server_opt.config_file).await
|
||||||
|
} else {
|
||||||
|
cli_command(opt).await
|
||||||
|
};
|
||||||
|
|
||||||
|
if let Err(e) = res {
|
||||||
|
error!("{}", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn cli_command(opt: Opt) -> Result<(), Error> {
|
||||||
let tls_config = match (opt.ca_cert, opt.client_cert, opt.client_key) {
|
let tls_config = match (opt.ca_cert, opt.client_cert, opt.client_key) {
|
||||||
(Some(ca_cert), Some(client_cert), Some(client_key)) => Some(TlsConfig {
|
(Some(ca_cert), Some(client_cert), Some(client_key)) => Some(TlsConfig {
|
||||||
ca_cert,
|
ca_cert,
|
||||||
|
@ -306,245 +84,5 @@ async fn main() {
|
||||||
RpcAddrClient::new(rpc_http_cli.clone(), MEMBERSHIP_RPC_PATH.to_string());
|
RpcAddrClient::new(rpc_http_cli.clone(), MEMBERSHIP_RPC_PATH.to_string());
|
||||||
let admin_rpc_cli = RpcAddrClient::new(rpc_http_cli.clone(), ADMIN_RPC_PATH.to_string());
|
let admin_rpc_cli = RpcAddrClient::new(rpc_http_cli.clone(), ADMIN_RPC_PATH.to_string());
|
||||||
|
|
||||||
let resp = match opt.cmd {
|
cli_cmd(opt.cmd, membership_rpc_cli, admin_rpc_cli, opt.rpc_host).await
|
||||||
Command::Server(server_opt) => {
|
|
||||||
// Abort on panic (same behavior as in Go)
|
|
||||||
std::panic::set_hook(Box::new(|panic_info| {
|
|
||||||
error!("{}", panic_info.to_string());
|
|
||||||
std::process::abort();
|
|
||||||
}));
|
|
||||||
|
|
||||||
server::run_server(server_opt.config_file).await
|
|
||||||
}
|
|
||||||
Command::Status => cmd_status(membership_rpc_cli, opt.rpc_host).await,
|
|
||||||
Command::Node(NodeOperation::Configure(configure_opt)) => {
|
|
||||||
cmd_configure(membership_rpc_cli, opt.rpc_host, configure_opt).await
|
|
||||||
}
|
|
||||||
Command::Node(NodeOperation::Remove(remove_opt)) => {
|
|
||||||
cmd_remove(membership_rpc_cli, opt.rpc_host, remove_opt).await
|
|
||||||
}
|
|
||||||
Command::Bucket(bo) => {
|
|
||||||
cmd_admin(admin_rpc_cli, opt.rpc_host, AdminRPC::BucketOperation(bo)).await
|
|
||||||
}
|
|
||||||
Command::Key(bo) => {
|
|
||||||
cmd_admin(admin_rpc_cli, opt.rpc_host, AdminRPC::KeyOperation(bo)).await
|
|
||||||
}
|
|
||||||
Command::Repair(ro) => {
|
|
||||||
cmd_admin(admin_rpc_cli, opt.rpc_host, AdminRPC::LaunchRepair(ro)).await
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
if let Err(e) = resp {
|
|
||||||
error!("Error: {}", e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn cmd_status(rpc_cli: RpcAddrClient<Message>, rpc_host: SocketAddr) -> Result<(), Error> {
|
|
||||||
let status = match rpc_cli
|
|
||||||
.call(&rpc_host, &Message::PullStatus, ADMIN_RPC_TIMEOUT)
|
|
||||||
.await??
|
|
||||||
{
|
|
||||||
Message::AdvertiseNodesUp(nodes) => nodes,
|
|
||||||
resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
|
|
||||||
};
|
|
||||||
let config = match rpc_cli
|
|
||||||
.call(&rpc_host, &Message::PullConfig, ADMIN_RPC_TIMEOUT)
|
|
||||||
.await??
|
|
||||||
{
|
|
||||||
Message::AdvertiseConfig(cfg) => cfg,
|
|
||||||
resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
|
|
||||||
};
|
|
||||||
|
|
||||||
println!("Healthy nodes:");
|
|
||||||
for adv in status.iter().filter(|x| x.is_up) {
|
|
||||||
if let Some(cfg) = config.members.get(&adv.id) {
|
|
||||||
println!(
|
|
||||||
"{:?}\t{}\t{}\t[{}]\t{}\t{}",
|
|
||||||
adv.id, adv.state_info.hostname, adv.addr, cfg.tag, cfg.datacenter, cfg.capacity
|
|
||||||
);
|
|
||||||
} else {
|
|
||||||
println!(
|
|
||||||
"{:?}\t{}\t{}\tUNCONFIGURED/REMOVED",
|
|
||||||
adv.id, adv.state_info.hostname, adv.addr
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let status_keys = status.iter().map(|x| x.id).collect::<HashSet<_>>();
|
|
||||||
let failure_case_1 = status.iter().any(|x| !x.is_up);
|
|
||||||
let failure_case_2 = config
|
|
||||||
.members
|
|
||||||
.iter()
|
|
||||||
.any(|(id, _)| !status_keys.contains(id));
|
|
||||||
if failure_case_1 || failure_case_2 {
|
|
||||||
println!("\nFailed nodes:");
|
|
||||||
for adv in status.iter().filter(|x| !x.is_up) {
|
|
||||||
if let Some(cfg) = config.members.get(&adv.id) {
|
|
||||||
println!(
|
|
||||||
"{:?}\t{}\t{}\t[{}]\t{}\t{}\tlast seen: {}s ago",
|
|
||||||
adv.id,
|
|
||||||
adv.state_info.hostname,
|
|
||||||
adv.addr,
|
|
||||||
cfg.tag,
|
|
||||||
cfg.datacenter,
|
|
||||||
cfg.capacity,
|
|
||||||
(now_msec() - adv.last_seen) / 1000,
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for (id, cfg) in config.members.iter() {
|
|
||||||
if !status.iter().any(|x| x.id == *id) {
|
|
||||||
println!(
|
|
||||||
"{:?}\t{}\t{}\t{}\tnever seen",
|
|
||||||
id, cfg.tag, cfg.datacenter, cfg.capacity
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn cmd_configure(
|
|
||||||
rpc_cli: RpcAddrClient<Message>,
|
|
||||||
rpc_host: SocketAddr,
|
|
||||||
args: ConfigureNodeOpt,
|
|
||||||
) -> Result<(), Error> {
|
|
||||||
let status = match rpc_cli
|
|
||||||
.call(&rpc_host, &Message::PullStatus, ADMIN_RPC_TIMEOUT)
|
|
||||||
.await??
|
|
||||||
{
|
|
||||||
Message::AdvertiseNodesUp(nodes) => nodes,
|
|
||||||
resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
|
|
||||||
};
|
|
||||||
|
|
||||||
let mut candidates = vec![];
|
|
||||||
for adv in status.iter() {
|
|
||||||
if hex::encode(&adv.id).starts_with(&args.node_id) {
|
|
||||||
candidates.push(adv.id);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if candidates.len() != 1 {
|
|
||||||
return Err(Error::Message(format!(
|
|
||||||
"{} matching nodes",
|
|
||||||
candidates.len()
|
|
||||||
)));
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut config = match rpc_cli
|
|
||||||
.call(&rpc_host, &Message::PullConfig, ADMIN_RPC_TIMEOUT)
|
|
||||||
.await??
|
|
||||||
{
|
|
||||||
Message::AdvertiseConfig(cfg) => cfg,
|
|
||||||
resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
|
|
||||||
};
|
|
||||||
|
|
||||||
let new_entry = match config.members.get(&candidates[0]) {
|
|
||||||
None => NetworkConfigEntry {
|
|
||||||
datacenter: args
|
|
||||||
.datacenter
|
|
||||||
.expect("Please specifiy a datacenter with the -d flag"),
|
|
||||||
capacity: args
|
|
||||||
.capacity
|
|
||||||
.expect("Please specifiy a capacity with the -c flag"),
|
|
||||||
tag: args.tag.unwrap_or("".to_string()),
|
|
||||||
},
|
|
||||||
Some(old) => NetworkConfigEntry {
|
|
||||||
datacenter: args.datacenter.unwrap_or(old.datacenter.to_string()),
|
|
||||||
capacity: args.capacity.unwrap_or(old.capacity),
|
|
||||||
tag: args.tag.unwrap_or(old.tag.to_string()),
|
|
||||||
},
|
|
||||||
};
|
|
||||||
|
|
||||||
config.members.insert(candidates[0].clone(), new_entry);
|
|
||||||
config.version += 1;
|
|
||||||
|
|
||||||
rpc_cli
|
|
||||||
.call(
|
|
||||||
&rpc_host,
|
|
||||||
&Message::AdvertiseConfig(config),
|
|
||||||
ADMIN_RPC_TIMEOUT,
|
|
||||||
)
|
|
||||||
.await??;
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn cmd_remove(
|
|
||||||
rpc_cli: RpcAddrClient<Message>,
|
|
||||||
rpc_host: SocketAddr,
|
|
||||||
args: RemoveNodeOpt,
|
|
||||||
) -> Result<(), Error> {
|
|
||||||
let mut config = match rpc_cli
|
|
||||||
.call(&rpc_host, &Message::PullConfig, ADMIN_RPC_TIMEOUT)
|
|
||||||
.await??
|
|
||||||
{
|
|
||||||
Message::AdvertiseConfig(cfg) => cfg,
|
|
||||||
resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
|
|
||||||
};
|
|
||||||
|
|
||||||
let mut candidates = vec![];
|
|
||||||
for (key, _) in config.members.iter() {
|
|
||||||
if hex::encode(key).starts_with(&args.node_id) {
|
|
||||||
candidates.push(*key);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if candidates.len() != 1 {
|
|
||||||
return Err(Error::Message(format!(
|
|
||||||
"{} matching nodes",
|
|
||||||
candidates.len()
|
|
||||||
)));
|
|
||||||
}
|
|
||||||
|
|
||||||
if !args.yes {
|
|
||||||
return Err(Error::Message(format!(
|
|
||||||
"Add the flag --yes to really remove {:?} from the cluster",
|
|
||||||
candidates[0]
|
|
||||||
)));
|
|
||||||
}
|
|
||||||
|
|
||||||
config.members.remove(&candidates[0]);
|
|
||||||
config.version += 1;
|
|
||||||
|
|
||||||
rpc_cli
|
|
||||||
.call(
|
|
||||||
&rpc_host,
|
|
||||||
&Message::AdvertiseConfig(config),
|
|
||||||
ADMIN_RPC_TIMEOUT,
|
|
||||||
)
|
|
||||||
.await??;
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn cmd_admin(
|
|
||||||
rpc_cli: RpcAddrClient<AdminRPC>,
|
|
||||||
rpc_host: SocketAddr,
|
|
||||||
args: AdminRPC,
|
|
||||||
) -> Result<(), Error> {
|
|
||||||
match rpc_cli.call(&rpc_host, args, ADMIN_RPC_TIMEOUT).await?? {
|
|
||||||
AdminRPC::Ok(msg) => {
|
|
||||||
println!("{}", msg);
|
|
||||||
}
|
|
||||||
AdminRPC::BucketList(bl) => {
|
|
||||||
println!("List of buckets:");
|
|
||||||
for bucket in bl {
|
|
||||||
println!("{}", bucket);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
AdminRPC::BucketInfo(bucket) => {
|
|
||||||
println!("{:?}", bucket);
|
|
||||||
}
|
|
||||||
AdminRPC::KeyList(kl) => {
|
|
||||||
println!("List of keys:");
|
|
||||||
for key in kl {
|
|
||||||
println!("{}\t{}", key.0, key.1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
AdminRPC::KeyInfo(key) => {
|
|
||||||
println!("{:?}", key);
|
|
||||||
}
|
|
||||||
r => {
|
|
||||||
error!("Unexpected response: {:?}", r);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -16,7 +16,13 @@ pub struct Repair {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Repair {
|
impl Repair {
|
||||||
pub async fn repair_worker(
|
pub async fn repair_worker(&self, opt: RepairOpt, must_exit: watch::Receiver<bool>) {
|
||||||
|
if let Err(e) = self.repair_worker_aux(opt, must_exit).await {
|
||||||
|
warn!("Repair worker failed with error: {}", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn repair_worker_aux(
|
||||||
&self,
|
&self,
|
||||||
opt: RepairOpt,
|
opt: RepairOpt,
|
||||||
must_exit: watch::Receiver<bool>,
|
must_exit: watch::Receiver<bool>,
|
||||||
|
@ -25,41 +31,11 @@ impl Repair {
|
||||||
|
|
||||||
if todo(RepairWhat::Tables) {
|
if todo(RepairWhat::Tables) {
|
||||||
info!("Launching a full sync of tables");
|
info!("Launching a full sync of tables");
|
||||||
self.garage
|
self.garage.bucket_table.syncer.add_full_sync();
|
||||||
.bucket_table
|
self.garage.object_table.syncer.add_full_sync();
|
||||||
.syncer
|
self.garage.version_table.syncer.add_full_sync();
|
||||||
.load_full()
|
self.garage.block_ref_table.syncer.add_full_sync();
|
||||||
.unwrap()
|
self.garage.key_table.syncer.add_full_sync();
|
||||||
.add_full_scan()
|
|
||||||
.await;
|
|
||||||
self.garage
|
|
||||||
.object_table
|
|
||||||
.syncer
|
|
||||||
.load_full()
|
|
||||||
.unwrap()
|
|
||||||
.add_full_scan()
|
|
||||||
.await;
|
|
||||||
self.garage
|
|
||||||
.version_table
|
|
||||||
.syncer
|
|
||||||
.load_full()
|
|
||||||
.unwrap()
|
|
||||||
.add_full_scan()
|
|
||||||
.await;
|
|
||||||
self.garage
|
|
||||||
.block_ref_table
|
|
||||||
.syncer
|
|
||||||
.load_full()
|
|
||||||
.unwrap()
|
|
||||||
.add_full_scan()
|
|
||||||
.await;
|
|
||||||
self.garage
|
|
||||||
.key_table
|
|
||||||
.syncer
|
|
||||||
.load_full()
|
|
||||||
.unwrap()
|
|
||||||
.add_full_scan()
|
|
||||||
.await;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: wait for full sync to finish before proceeding to the rest?
|
// TODO: wait for full sync to finish before proceeding to the rest?
|
||||||
|
@ -93,11 +69,13 @@ impl Repair {
|
||||||
async fn repair_versions(&self, must_exit: &watch::Receiver<bool>) -> Result<(), Error> {
|
async fn repair_versions(&self, must_exit: &watch::Receiver<bool>) -> Result<(), Error> {
|
||||||
let mut pos = vec![];
|
let mut pos = vec![];
|
||||||
|
|
||||||
while let Some((item_key, item_bytes)) = self.garage.version_table.store.get_gt(&pos)? {
|
while let Some((item_key, item_bytes)) =
|
||||||
|
self.garage.version_table.data.store.get_gt(&pos)?
|
||||||
|
{
|
||||||
pos = item_key.to_vec();
|
pos = item_key.to_vec();
|
||||||
|
|
||||||
let version = rmp_serde::decode::from_read_ref::<_, Version>(item_bytes.as_ref())?;
|
let version = rmp_serde::decode::from_read_ref::<_, Version>(item_bytes.as_ref())?;
|
||||||
if version.deleted {
|
if version.deleted.get() {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
let object = self
|
let object = self
|
||||||
|
@ -110,13 +88,7 @@ impl Repair {
|
||||||
.versions()
|
.versions()
|
||||||
.iter()
|
.iter()
|
||||||
.any(|x| x.uuid == version.uuid && x.state != ObjectVersionState::Aborted),
|
.any(|x| x.uuid == version.uuid && x.state != ObjectVersionState::Aborted),
|
||||||
None => {
|
None => false,
|
||||||
warn!(
|
|
||||||
"Repair versions: object for version {:?} not found, skipping.",
|
|
||||||
version
|
|
||||||
);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
if !version_exists {
|
if !version_exists {
|
||||||
info!("Repair versions: marking version as deleted: {:?}", version);
|
info!("Repair versions: marking version as deleted: {:?}", version);
|
||||||
|
@ -127,7 +99,6 @@ impl Repair {
|
||||||
version.bucket,
|
version.bucket,
|
||||||
version.key,
|
version.key,
|
||||||
true,
|
true,
|
||||||
vec![],
|
|
||||||
))
|
))
|
||||||
.await?;
|
.await?;
|
||||||
}
|
}
|
||||||
|
@ -142,11 +113,13 @@ impl Repair {
|
||||||
async fn repair_block_ref(&self, must_exit: &watch::Receiver<bool>) -> Result<(), Error> {
|
async fn repair_block_ref(&self, must_exit: &watch::Receiver<bool>) -> Result<(), Error> {
|
||||||
let mut pos = vec![];
|
let mut pos = vec![];
|
||||||
|
|
||||||
while let Some((item_key, item_bytes)) = self.garage.block_ref_table.store.get_gt(&pos)? {
|
while let Some((item_key, item_bytes)) =
|
||||||
|
self.garage.block_ref_table.data.store.get_gt(&pos)?
|
||||||
|
{
|
||||||
pos = item_key.to_vec();
|
pos = item_key.to_vec();
|
||||||
|
|
||||||
let block_ref = rmp_serde::decode::from_read_ref::<_, BlockRef>(item_bytes.as_ref())?;
|
let block_ref = rmp_serde::decode::from_read_ref::<_, BlockRef>(item_bytes.as_ref())?;
|
||||||
if block_ref.deleted {
|
if block_ref.deleted.get() {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
let version = self
|
let version = self
|
||||||
|
@ -154,16 +127,8 @@ impl Repair {
|
||||||
.version_table
|
.version_table
|
||||||
.get(&block_ref.version, &EmptyKey)
|
.get(&block_ref.version, &EmptyKey)
|
||||||
.await?;
|
.await?;
|
||||||
let ref_exists = match version {
|
// The version might not exist if it has been GC'ed
|
||||||
Some(v) => !v.deleted,
|
let ref_exists = version.map(|v| !v.deleted.get()).unwrap_or(false);
|
||||||
None => {
|
|
||||||
warn!(
|
|
||||||
"Block ref repair: version for block ref {:?} not found, skipping.",
|
|
||||||
block_ref
|
|
||||||
);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
if !ref_exists {
|
if !ref_exists {
|
||||||
info!(
|
info!(
|
||||||
"Repair block ref: marking block_ref as deleted: {:?}",
|
"Repair block ref: marking block_ref as deleted: {:?}",
|
||||||
|
@ -174,7 +139,7 @@ impl Repair {
|
||||||
.insert(&BlockRef {
|
.insert(&BlockRef {
|
||||||
block: block_ref.block,
|
block: block_ref.block,
|
||||||
version: block_ref.version,
|
version: block_ref.version,
|
||||||
deleted: true,
|
deleted: true.into(),
|
||||||
})
|
})
|
||||||
.await?;
|
.await?;
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,13 +21,13 @@ async fn shutdown_signal(send_cancel: watch::Sender<bool>) -> Result<(), Error>
|
||||||
.await
|
.await
|
||||||
.expect("failed to install CTRL+C signal handler");
|
.expect("failed to install CTRL+C signal handler");
|
||||||
info!("Received CTRL+C, shutting down.");
|
info!("Received CTRL+C, shutting down.");
|
||||||
send_cancel.broadcast(true)?;
|
send_cancel.send(true)?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn wait_from(mut chan: watch::Receiver<bool>) -> () {
|
async fn wait_from(mut chan: watch::Receiver<bool>) -> () {
|
||||||
while let Some(exit_now) = chan.recv().await {
|
while !*chan.borrow() {
|
||||||
if exit_now {
|
if chan.changed().await.is_err() {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -40,37 +40,22 @@ pub async fn run_server(config_file: PathBuf) -> Result<(), Error> {
|
||||||
info!("Opening database...");
|
info!("Opening database...");
|
||||||
let mut db_path = config.metadata_dir.clone();
|
let mut db_path = config.metadata_dir.clone();
|
||||||
db_path.push("db");
|
db_path.push("db");
|
||||||
let db = match sled::open(&db_path) {
|
let db = sled::open(&db_path).expect("Unable to open sled DB");
|
||||||
Ok(db) => db,
|
|
||||||
Err(e) => {
|
|
||||||
warn!("Old DB could not be openned ({}), attempting migration.", e);
|
|
||||||
let old = old_sled::open(&db_path).expect("Unable to open old DB for migration");
|
|
||||||
let mut new_path = config.metadata_dir.clone();
|
|
||||||
new_path.push("db2");
|
|
||||||
let new = sled::open(&new_path).expect("Unable to open new DB for migration");
|
|
||||||
new.import(old.export());
|
|
||||||
if old.checksum().expect("unable to compute old db checksum")
|
|
||||||
!= new.checksum().expect("unable to compute new db checksum")
|
|
||||||
{
|
|
||||||
panic!("db checksums don't match after migration");
|
|
||||||
}
|
|
||||||
drop(new);
|
|
||||||
drop(old);
|
|
||||||
std::fs::remove_dir_all(&db_path).expect("Cannot remove old DB folder");
|
|
||||||
std::fs::rename(new_path, &db_path)
|
|
||||||
.expect("Cannot move new DB folder to correct place");
|
|
||||||
sled::open(db_path).expect("Unable to open new DB after migration")
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
info!("Initialize RPC server...");
|
info!("Initialize RPC server...");
|
||||||
let mut rpc_server = RpcServer::new(config.rpc_bind_addr.clone(), config.rpc_tls.clone());
|
let mut rpc_server = RpcServer::new(config.rpc_bind_addr.clone(), config.rpc_tls.clone());
|
||||||
|
|
||||||
info!("Initializing background runner...");
|
info!("Initializing background runner...");
|
||||||
let (send_cancel, watch_cancel) = watch::channel(false);
|
let (send_cancel, watch_cancel) = watch::channel(false);
|
||||||
let background = BackgroundRunner::new(16, watch_cancel.clone());
|
let (background, await_background_done) = BackgroundRunner::new(16, watch_cancel.clone());
|
||||||
|
|
||||||
let garage = Garage::new(config, db, background.clone(), &mut rpc_server).await;
|
info!("Initializing Garage main data store...");
|
||||||
|
let garage = Garage::new(config.clone(), db, background, &mut rpc_server);
|
||||||
|
let bootstrap = garage.system.clone().bootstrap(
|
||||||
|
&config.bootstrap_peers[..],
|
||||||
|
config.consul_host,
|
||||||
|
config.consul_service_name,
|
||||||
|
);
|
||||||
|
|
||||||
info!("Crate admin RPC handler...");
|
info!("Crate admin RPC handler...");
|
||||||
AdminRpcHandler::new(garage.clone()).register_handler(&mut rpc_server);
|
AdminRpcHandler::new(garage.clone()).register_handler(&mut rpc_server);
|
||||||
|
@ -78,18 +63,10 @@ pub async fn run_server(config_file: PathBuf) -> Result<(), Error> {
|
||||||
info!("Initializing RPC and API servers...");
|
info!("Initializing RPC and API servers...");
|
||||||
let run_rpc_server = Arc::new(rpc_server).run(wait_from(watch_cancel.clone()));
|
let run_rpc_server = Arc::new(rpc_server).run(wait_from(watch_cancel.clone()));
|
||||||
let api_server = api_server::run_api_server(garage.clone(), wait_from(watch_cancel.clone()));
|
let api_server = api_server::run_api_server(garage.clone(), wait_from(watch_cancel.clone()));
|
||||||
let web_server = web_server::run_web_server(garage.clone(), wait_from(watch_cancel.clone()));
|
let web_server = web_server::run_web_server(garage, wait_from(watch_cancel.clone()));
|
||||||
|
|
||||||
futures::try_join!(
|
futures::try_join!(
|
||||||
garage
|
bootstrap.map(|rv| {
|
||||||
.system
|
|
||||||
.clone()
|
|
||||||
.bootstrap(
|
|
||||||
&garage.config.bootstrap_peers[..],
|
|
||||||
garage.config.consul_host.clone(),
|
|
||||||
garage.config.consul_service_name.clone()
|
|
||||||
)
|
|
||||||
.map(|rv| {
|
|
||||||
info!("Bootstrap done");
|
info!("Bootstrap done");
|
||||||
Ok(rv)
|
Ok(rv)
|
||||||
}),
|
}),
|
||||||
|
@ -105,9 +82,9 @@ pub async fn run_server(config_file: PathBuf) -> Result<(), Error> {
|
||||||
info!("Web server exited");
|
info!("Web server exited");
|
||||||
rv
|
rv
|
||||||
}),
|
}),
|
||||||
background.run().map(|rv| {
|
await_background_done.map(|rv| {
|
||||||
info!("Background runner exited");
|
info!("Background runner exited: {:?}", rv);
|
||||||
Ok(rv)
|
Ok(())
|
||||||
}),
|
}),
|
||||||
shutdown_signal(send_cancel),
|
shutdown_signal(send_cancel),
|
||||||
)?;
|
)?;
|
||||||
|
|
|
@ -16,23 +16,18 @@ path = "lib.rs"
|
||||||
garage_util = { version = "0.1.1", path = "../util" }
|
garage_util = { version = "0.1.1", path = "../util" }
|
||||||
garage_rpc = { version = "0.1.1", path = "../rpc" }
|
garage_rpc = { version = "0.1.1", path = "../rpc" }
|
||||||
garage_table = { version = "0.1.1", path = "../table" }
|
garage_table = { version = "0.1.1", path = "../table" }
|
||||||
model010 = { package = "garage_model_010b", version = "0.0.1" }
|
|
||||||
|
|
||||||
bytes = "0.4"
|
rand = "0.8"
|
||||||
rand = "0.7"
|
hex = "0.4"
|
||||||
hex = "0.3"
|
arc-swap = "1.0"
|
||||||
sha2 = "0.8"
|
|
||||||
arc-swap = "0.4"
|
|
||||||
log = "0.4"
|
log = "0.4"
|
||||||
|
|
||||||
sled = "0.34"
|
sled = "0.34"
|
||||||
|
|
||||||
rmp-serde = "0.14.3"
|
rmp-serde = "0.15"
|
||||||
serde = { version = "1.0", default-features = false, features = ["derive", "rc"] }
|
serde = { version = "1.0", default-features = false, features = ["derive", "rc"] }
|
||||||
serde_bytes = "0.11"
|
serde_bytes = "0.11"
|
||||||
|
|
||||||
async-trait = "0.1.30"
|
|
||||||
futures = "0.3"
|
futures = "0.3"
|
||||||
futures-util = "0.3"
|
futures-util = "0.3"
|
||||||
tokio = { version = "0.2", default-features = false, features = ["rt-core", "rt-threaded", "io-driver", "net", "tcp", "time", "macros", "sync", "signal", "fs"] }
|
tokio = { version = "1.0", default-features = false, features = ["rt", "rt-multi-thread", "io-util", "net", "time", "macros", "sync", "signal", "fs"] }
|
||||||
|
|
||||||
|
|
|
@ -5,22 +5,20 @@ use std::time::Duration;
|
||||||
use arc_swap::ArcSwapOption;
|
use arc_swap::ArcSwapOption;
|
||||||
use futures::future::*;
|
use futures::future::*;
|
||||||
use futures::select;
|
use futures::select;
|
||||||
use futures::stream::*;
|
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use tokio::fs;
|
use tokio::fs;
|
||||||
use tokio::prelude::*;
|
use tokio::io::{AsyncReadExt, AsyncWriteExt};
|
||||||
use tokio::sync::{watch, Mutex, Notify};
|
use tokio::sync::{watch, Mutex, Notify};
|
||||||
|
|
||||||
use garage_util::data;
|
|
||||||
use garage_util::data::*;
|
use garage_util::data::*;
|
||||||
use garage_util::error::Error;
|
use garage_util::error::Error;
|
||||||
|
use garage_util::time::*;
|
||||||
|
|
||||||
use garage_rpc::membership::System;
|
use garage_rpc::membership::System;
|
||||||
use garage_rpc::rpc_client::*;
|
use garage_rpc::rpc_client::*;
|
||||||
use garage_rpc::rpc_server::*;
|
use garage_rpc::rpc_server::*;
|
||||||
|
|
||||||
use garage_table::table_sharded::TableShardedReplication;
|
use garage_table::replication::{sharded::TableShardedReplication, TableReplication};
|
||||||
use garage_table::TableReplication;
|
|
||||||
|
|
||||||
use crate::block_ref_table::*;
|
use crate::block_ref_table::*;
|
||||||
|
|
||||||
|
@ -28,7 +26,10 @@ use crate::garage::Garage;
|
||||||
|
|
||||||
pub const INLINE_THRESHOLD: usize = 3072;
|
pub const INLINE_THRESHOLD: usize = 3072;
|
||||||
|
|
||||||
|
pub const BACKGROUND_WORKERS: u64 = 1;
|
||||||
|
|
||||||
const BLOCK_RW_TIMEOUT: Duration = Duration::from_secs(42);
|
const BLOCK_RW_TIMEOUT: Duration = Duration::from_secs(42);
|
||||||
|
const BLOCK_GC_TIMEOUT: Duration = Duration::from_secs(60);
|
||||||
const NEED_BLOCK_QUERY_TIMEOUT: Duration = Duration::from_secs(5);
|
const NEED_BLOCK_QUERY_TIMEOUT: Duration = Duration::from_secs(5);
|
||||||
const RESYNC_RETRY_TIMEOUT: Duration = Duration::from_secs(10);
|
const RESYNC_RETRY_TIMEOUT: Duration = Duration::from_secs(10);
|
||||||
|
|
||||||
|
@ -56,14 +57,14 @@ pub struct BlockManager {
|
||||||
pub data_dir: PathBuf,
|
pub data_dir: PathBuf,
|
||||||
pub data_dir_lock: Mutex<()>,
|
pub data_dir_lock: Mutex<()>,
|
||||||
|
|
||||||
pub rc: sled::Tree,
|
rc: sled::Tree,
|
||||||
|
|
||||||
pub resync_queue: sled::Tree,
|
resync_queue: sled::Tree,
|
||||||
pub resync_notify: Notify,
|
resync_notify: Notify,
|
||||||
|
|
||||||
pub system: Arc<System>,
|
system: Arc<System>,
|
||||||
rpc_client: Arc<RpcClient<Message>>,
|
rpc_client: Arc<RpcClient<Message>>,
|
||||||
pub garage: ArcSwapOption<Garage>,
|
pub(crate) garage: ArcSwapOption<Garage>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl BlockManager {
|
impl BlockManager {
|
||||||
|
@ -77,7 +78,6 @@ impl BlockManager {
|
||||||
let rc = db
|
let rc = db
|
||||||
.open_tree("block_local_rc")
|
.open_tree("block_local_rc")
|
||||||
.expect("Unable to open block_local_rc tree");
|
.expect("Unable to open block_local_rc tree");
|
||||||
rc.set_merge_operator(rc_merge);
|
|
||||||
|
|
||||||
let resync_queue = db
|
let resync_queue = db
|
||||||
.open_tree("block_local_resync_queue")
|
.open_tree("block_local_resync_queue")
|
||||||
|
@ -127,18 +127,16 @@ impl BlockManager {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn spawn_background_worker(self: Arc<Self>) {
|
pub fn spawn_background_worker(self: Arc<Self>) {
|
||||||
// Launch 2 simultaneous workers for background resync loop preprocessing
|
// Launch 2 simultaneous workers for background resync loop preprocessing
|
||||||
for i in 0..2usize {
|
for i in 0..BACKGROUND_WORKERS {
|
||||||
let bm2 = self.clone();
|
let bm2 = self.clone();
|
||||||
let background = self.system.background.clone();
|
let background = self.system.background.clone();
|
||||||
tokio::spawn(async move {
|
tokio::spawn(async move {
|
||||||
tokio::time::delay_for(Duration::from_secs(10)).await;
|
tokio::time::sleep(Duration::from_secs(10 * (i + 1))).await;
|
||||||
background
|
background.spawn_worker(format!("block resync worker {}", i), move |must_exit| {
|
||||||
.spawn_worker(format!("block resync worker {}", i), move |must_exit| {
|
|
||||||
bm2.resync_loop(must_exit)
|
bm2.resync_loop(must_exit)
|
||||||
})
|
});
|
||||||
.await;
|
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -168,7 +166,7 @@ impl BlockManager {
|
||||||
Ok(f) => f,
|
Ok(f) => f,
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
// Not found but maybe we should have had it ??
|
// Not found but maybe we should have had it ??
|
||||||
self.put_to_resync(hash, 0)?;
|
self.put_to_resync(hash, Duration::from_millis(0))?;
|
||||||
return Err(Into::into(e));
|
return Err(Into::into(e));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -176,11 +174,16 @@ impl BlockManager {
|
||||||
f.read_to_end(&mut data).await?;
|
f.read_to_end(&mut data).await?;
|
||||||
drop(f);
|
drop(f);
|
||||||
|
|
||||||
if data::sha256sum(&data[..]) != *hash {
|
if blake2sum(&data[..]) != *hash {
|
||||||
let _lock = self.data_dir_lock.lock().await;
|
let _lock = self.data_dir_lock.lock().await;
|
||||||
warn!("Block {:?} is corrupted. Deleting and resyncing.", hash);
|
warn!(
|
||||||
fs::remove_file(path).await?;
|
"Block {:?} is corrupted. Renaming to .corrupted and resyncing.",
|
||||||
self.put_to_resync(&hash, 0)?;
|
hash
|
||||||
|
);
|
||||||
|
let mut path2 = path.clone();
|
||||||
|
path2.set_extension(".corrupted");
|
||||||
|
fs::rename(path, path2).await?;
|
||||||
|
self.put_to_resync(&hash, Duration::from_millis(0))?;
|
||||||
return Err(Error::CorruptData(*hash));
|
return Err(Error::CorruptData(*hash));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -191,7 +194,7 @@ impl BlockManager {
|
||||||
let needed = self
|
let needed = self
|
||||||
.rc
|
.rc
|
||||||
.get(hash.as_ref())?
|
.get(hash.as_ref())?
|
||||||
.map(|x| u64_from_bytes(x.as_ref()) > 0)
|
.map(|x| u64_from_be_bytes(x) > 0)
|
||||||
.unwrap_or(false);
|
.unwrap_or(false);
|
||||||
if needed {
|
if needed {
|
||||||
let path = self.block_path(hash);
|
let path = self.block_path(hash);
|
||||||
|
@ -215,84 +218,95 @@ impl BlockManager {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn block_incref(&self, hash: &Hash) -> Result<(), Error> {
|
pub fn block_incref(&self, hash: &Hash) -> Result<(), Error> {
|
||||||
let old_rc = self.rc.get(&hash)?;
|
let old_rc = self.rc.fetch_and_update(&hash, |old| {
|
||||||
self.rc.merge(&hash, vec![1])?;
|
let old_v = old.map(u64_from_be_bytes).unwrap_or(0);
|
||||||
if old_rc.map(|x| u64_from_bytes(&x[..]) == 0).unwrap_or(true) {
|
Some(u64::to_be_bytes(old_v + 1).to_vec())
|
||||||
self.put_to_resync(&hash, BLOCK_RW_TIMEOUT.as_millis() as u64)?;
|
})?;
|
||||||
|
let old_rc = old_rc.map(u64_from_be_bytes).unwrap_or(0);
|
||||||
|
if old_rc == 0 {
|
||||||
|
self.put_to_resync(&hash, BLOCK_RW_TIMEOUT)?;
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn block_decref(&self, hash: &Hash) -> Result<(), Error> {
|
pub fn block_decref(&self, hash: &Hash) -> Result<(), Error> {
|
||||||
let new_rc = self.rc.merge(&hash, vec![0])?;
|
let new_rc = self.rc.update_and_fetch(&hash, |old| {
|
||||||
if new_rc.map(|x| u64_from_bytes(&x[..]) == 0).unwrap_or(true) {
|
let old_v = old.map(u64_from_be_bytes).unwrap_or(0);
|
||||||
self.put_to_resync(&hash, 0)?;
|
if old_v > 1 {
|
||||||
|
Some(u64::to_be_bytes(old_v - 1).to_vec())
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
})?;
|
||||||
|
if new_rc.is_none() {
|
||||||
|
self.put_to_resync(&hash, BLOCK_GC_TIMEOUT)?;
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn put_to_resync(&self, hash: &Hash, delay_millis: u64) -> Result<(), Error> {
|
fn put_to_resync(&self, hash: &Hash, delay: Duration) -> Result<(), Error> {
|
||||||
let when = now_msec() + delay_millis;
|
let when = now_msec() + delay.as_millis() as u64;
|
||||||
trace!("Put resync_queue: {} {:?}", when, hash);
|
trace!("Put resync_queue: {} {:?}", when, hash);
|
||||||
let mut key = u64::to_be_bytes(when).to_vec();
|
let mut key = u64::to_be_bytes(when).to_vec();
|
||||||
key.extend(hash.as_ref());
|
key.extend(hash.as_ref());
|
||||||
self.resync_queue.insert(key, hash.as_ref())?;
|
self.resync_queue.insert(key, hash.as_ref())?;
|
||||||
self.resync_notify.notify();
|
self.resync_notify.notify_waiters();
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn resync_loop(
|
async fn resync_loop(self: Arc<Self>, mut must_exit: watch::Receiver<bool>) {
|
||||||
self: Arc<Self>,
|
|
||||||
mut must_exit: watch::Receiver<bool>,
|
|
||||||
) -> Result<(), Error> {
|
|
||||||
let mut n_failures = 0usize;
|
|
||||||
while !*must_exit.borrow() {
|
while !*must_exit.borrow() {
|
||||||
if let Some((time_bytes, hash_bytes)) = self.resync_queue.pop_min()? {
|
if let Err(e) = self.resync_iter(&mut must_exit).await {
|
||||||
let time_msec = u64_from_bytes(&time_bytes[0..8]);
|
warn!("Error in block resync loop: {}", e);
|
||||||
|
select! {
|
||||||
|
_ = tokio::time::sleep(Duration::from_secs(1)).fuse() => (),
|
||||||
|
_ = must_exit.changed().fuse() => (),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn resync_iter(&self, must_exit: &mut watch::Receiver<bool>) -> Result<(), Error> {
|
||||||
|
if let Some(first_item) = self.resync_queue.iter().next() {
|
||||||
|
let (time_bytes, hash_bytes) = first_item?;
|
||||||
|
let time_msec = u64_from_be_bytes(&time_bytes[0..8]);
|
||||||
let now = now_msec();
|
let now = now_msec();
|
||||||
if now >= time_msec {
|
if now >= time_msec {
|
||||||
let mut hash = [0u8; 32];
|
let hash = Hash::try_from(&hash_bytes[..]).unwrap();
|
||||||
hash.copy_from_slice(hash_bytes.as_ref());
|
let res = self.resync_block(&hash).await;
|
||||||
let hash = Hash::from(hash);
|
if let Err(e) = &res {
|
||||||
|
warn!("Error when resyncing {:?}: {}", hash, e);
|
||||||
if let Err(e) = self.resync_iter(&hash).await {
|
self.put_to_resync(&hash, RESYNC_RETRY_TIMEOUT)?;
|
||||||
warn!("Failed to resync block {:?}, retrying later: {}", hash, e);
|
|
||||||
self.put_to_resync(&hash, RESYNC_RETRY_TIMEOUT.as_millis() as u64)?;
|
|
||||||
n_failures += 1;
|
|
||||||
if n_failures >= 10 {
|
|
||||||
warn!("Too many resync failures, throttling.");
|
|
||||||
tokio::time::delay_for(Duration::from_secs(1)).await;
|
|
||||||
}
|
}
|
||||||
|
self.resync_queue.remove(&time_bytes)?;
|
||||||
|
res?; // propagate error to delay main loop
|
||||||
} else {
|
} else {
|
||||||
n_failures = 0;
|
let delay = tokio::time::sleep(Duration::from_millis(time_msec - now));
|
||||||
}
|
|
||||||
} else {
|
|
||||||
self.resync_queue.insert(time_bytes, hash_bytes)?;
|
|
||||||
let delay = tokio::time::delay_for(Duration::from_millis(time_msec - now));
|
|
||||||
select! {
|
select! {
|
||||||
_ = delay.fuse() => (),
|
_ = delay.fuse() => (),
|
||||||
_ = self.resync_notify.notified().fuse() => (),
|
_ = self.resync_notify.notified().fuse() => (),
|
||||||
_ = must_exit.recv().fuse() => (),
|
_ = must_exit.changed().fuse() => (),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
select! {
|
select! {
|
||||||
_ = self.resync_notify.notified().fuse() => (),
|
_ = self.resync_notify.notified().fuse() => (),
|
||||||
_ = must_exit.recv().fuse() => (),
|
_ = must_exit.changed().fuse() => (),
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn resync_iter(&self, hash: &Hash) -> Result<(), Error> {
|
async fn resync_block(&self, hash: &Hash) -> Result<(), Error> {
|
||||||
|
let lock = self.data_dir_lock.lock().await;
|
||||||
|
|
||||||
let path = self.block_path(hash);
|
let path = self.block_path(hash);
|
||||||
|
|
||||||
let exists = fs::metadata(&path).await.is_ok();
|
let exists = fs::metadata(&path).await.is_ok();
|
||||||
let needed = self
|
let needed = self
|
||||||
.rc
|
.rc
|
||||||
.get(hash.as_ref())?
|
.get(hash.as_ref())?
|
||||||
.map(|x| u64_from_bytes(x.as_ref()) > 0)
|
.map(|x| u64_from_be_bytes(x) > 0)
|
||||||
.unwrap_or(false);
|
.unwrap_or(false);
|
||||||
|
|
||||||
if exists != needed {
|
if exists != needed {
|
||||||
|
@ -305,9 +319,10 @@ impl BlockManager {
|
||||||
if exists && !needed {
|
if exists && !needed {
|
||||||
trace!("Offloading block {:?}", hash);
|
trace!("Offloading block {:?}", hash);
|
||||||
|
|
||||||
let ring = self.system.ring.borrow().clone();
|
let mut who = self.replication.write_nodes(&hash);
|
||||||
|
if who.len() < self.replication.write_quorum() {
|
||||||
let mut who = self.replication.replication_nodes(&hash, &ring);
|
return Err(Error::Message(format!("Not trying to offload block because we don't have a quorum of nodes to write to")));
|
||||||
|
}
|
||||||
who.retain(|id| *id != self.system.id);
|
who.retain(|id| *id != self.system.id);
|
||||||
|
|
||||||
let msg = Arc::new(Message::NeedBlockQuery(*hash));
|
let msg = Arc::new(Message::NeedBlockQuery(*hash));
|
||||||
|
@ -340,17 +355,17 @@ impl BlockManager {
|
||||||
need_nodes.len()
|
need_nodes.len()
|
||||||
);
|
);
|
||||||
|
|
||||||
let put_block_message = Arc::new(self.read_block(hash).await?);
|
let put_block_message = self.read_block(hash).await?;
|
||||||
let put_resps = join_all(need_nodes.iter().map(|to| {
|
|
||||||
self.rpc_client
|
self.rpc_client
|
||||||
.call_arc(*to, put_block_message.clone(), BLOCK_RW_TIMEOUT)
|
.try_call_many(
|
||||||
}))
|
&need_nodes[..],
|
||||||
.await;
|
put_block_message,
|
||||||
for resp in put_resps {
|
RequestStrategy::with_quorum(need_nodes.len())
|
||||||
resp?;
|
.with_timeout(BLOCK_RW_TIMEOUT),
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
}
|
}
|
||||||
}
|
info!(
|
||||||
trace!(
|
|
||||||
"Deleting block {:?}, offload finished ({} / {})",
|
"Deleting block {:?}, offload finished ({} / {})",
|
||||||
hash,
|
hash,
|
||||||
need_nodes.len(),
|
need_nodes.len(),
|
||||||
|
@ -358,10 +373,11 @@ impl BlockManager {
|
||||||
);
|
);
|
||||||
|
|
||||||
fs::remove_file(path).await?;
|
fs::remove_file(path).await?;
|
||||||
self.resync_queue.remove(&hash)?;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if needed && !exists {
|
if needed && !exists {
|
||||||
|
drop(lock);
|
||||||
|
|
||||||
// TODO find a way to not do this if they are sending it to us
|
// TODO find a way to not do this if they are sending it to us
|
||||||
// Let's suppose this isn't an issue for now with the BLOCK_RW_TIMEOUT delay
|
// Let's suppose this isn't an issue for now with the BLOCK_RW_TIMEOUT delay
|
||||||
// between the RC being incremented and this part being called.
|
// between the RC being incremented and this part being called.
|
||||||
|
@ -373,7 +389,7 @@ impl BlockManager {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn rpc_get_block(&self, hash: &Hash) -> Result<Vec<u8>, Error> {
|
pub async fn rpc_get_block(&self, hash: &Hash) -> Result<Vec<u8>, Error> {
|
||||||
let who = self.replication.read_nodes(&hash, &self.system);
|
let who = self.replication.read_nodes(&hash);
|
||||||
let resps = self
|
let resps = self
|
||||||
.rpc_client
|
.rpc_client
|
||||||
.try_call_many(
|
.try_call_many(
|
||||||
|
@ -397,12 +413,12 @@ impl BlockManager {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn rpc_put_block(&self, hash: Hash, data: Vec<u8>) -> Result<(), Error> {
|
pub async fn rpc_put_block(&self, hash: Hash, data: Vec<u8>) -> Result<(), Error> {
|
||||||
let who = self.replication.write_nodes(&hash, &self.system);
|
let who = self.replication.write_nodes(&hash);
|
||||||
self.rpc_client
|
self.rpc_client
|
||||||
.try_call_many(
|
.try_call_many(
|
||||||
&who[..],
|
&who[..],
|
||||||
Message::PutBlock(PutBlockMessage { hash, data }),
|
Message::PutBlock(PutBlockMessage { hash, data }),
|
||||||
RequestStrategy::with_quorum(self.replication.write_quorum(&self.system))
|
RequestStrategy::with_quorum(self.replication.write_quorum())
|
||||||
.with_timeout(BLOCK_RW_TIMEOUT),
|
.with_timeout(BLOCK_RW_TIMEOUT),
|
||||||
)
|
)
|
||||||
.await?;
|
.await?;
|
||||||
|
@ -414,15 +430,15 @@ impl BlockManager {
|
||||||
let garage = self.garage.load_full().unwrap();
|
let garage = self.garage.load_full().unwrap();
|
||||||
let mut last_hash = None;
|
let mut last_hash = None;
|
||||||
let mut i = 0usize;
|
let mut i = 0usize;
|
||||||
for entry in garage.block_ref_table.store.iter() {
|
for entry in garage.block_ref_table.data.store.iter() {
|
||||||
let (_k, v_bytes) = entry?;
|
let (_k, v_bytes) = entry?;
|
||||||
let block_ref = rmp_serde::decode::from_read_ref::<_, BlockRef>(v_bytes.as_ref())?;
|
let block_ref = rmp_serde::decode::from_read_ref::<_, BlockRef>(v_bytes.as_ref())?;
|
||||||
if Some(&block_ref.block) == last_hash.as_ref() {
|
if Some(&block_ref.block) == last_hash.as_ref() {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if !block_ref.deleted {
|
if !block_ref.deleted.get() {
|
||||||
last_hash = Some(block_ref.block);
|
last_hash = Some(block_ref.block);
|
||||||
self.put_to_resync(&block_ref.block, 0)?;
|
self.put_to_resync(&block_ref.block, Duration::from_secs(0))?;
|
||||||
}
|
}
|
||||||
i += 1;
|
i += 1;
|
||||||
if i & 0xFF == 0 && *must_exit.borrow() {
|
if i & 0xFF == 0 && *must_exit.borrow() {
|
||||||
|
@ -447,8 +463,12 @@ impl BlockManager {
|
||||||
// so that we can offload them if necessary and then delete them locally.
|
// so that we can offload them if necessary and then delete them locally.
|
||||||
async move {
|
async move {
|
||||||
let mut ls_data_dir = fs::read_dir(path).await?;
|
let mut ls_data_dir = fs::read_dir(path).await?;
|
||||||
while let Some(data_dir_ent) = ls_data_dir.next().await {
|
loop {
|
||||||
let data_dir_ent = data_dir_ent?;
|
let data_dir_ent = ls_data_dir.next_entry().await?;
|
||||||
|
let data_dir_ent = match data_dir_ent {
|
||||||
|
Some(x) => x,
|
||||||
|
None => break,
|
||||||
|
};
|
||||||
let name = data_dir_ent.file_name();
|
let name = data_dir_ent.file_name();
|
||||||
let name = match name.into_string() {
|
let name = match name.into_string() {
|
||||||
Ok(x) => x,
|
Ok(x) => x,
|
||||||
|
@ -466,7 +486,7 @@ impl BlockManager {
|
||||||
};
|
};
|
||||||
let mut hash = [0u8; 32];
|
let mut hash = [0u8; 32];
|
||||||
hash.copy_from_slice(&hash_bytes[..]);
|
hash.copy_from_slice(&hash_bytes[..]);
|
||||||
self.put_to_resync(&hash.into(), 0)?;
|
self.put_to_resync(&hash.into(), Duration::from_secs(0))?;
|
||||||
}
|
}
|
||||||
|
|
||||||
if *must_exit.borrow() {
|
if *must_exit.borrow() {
|
||||||
|
@ -477,32 +497,19 @@ impl BlockManager {
|
||||||
}
|
}
|
||||||
.boxed()
|
.boxed()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn resync_queue_len(&self) -> usize {
|
||||||
|
self.resync_queue.len()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn u64_from_bytes(bytes: &[u8]) -> u64 {
|
pub fn rc_len(&self) -> usize {
|
||||||
assert!(bytes.len() == 8);
|
self.rc.len()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn u64_from_be_bytes<T: AsRef<[u8]>>(bytes: T) -> u64 {
|
||||||
|
assert!(bytes.as_ref().len() == 8);
|
||||||
let mut x8 = [0u8; 8];
|
let mut x8 = [0u8; 8];
|
||||||
x8.copy_from_slice(bytes);
|
x8.copy_from_slice(bytes.as_ref());
|
||||||
u64::from_be_bytes(x8)
|
u64::from_be_bytes(x8)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn rc_merge(_key: &[u8], old: Option<&[u8]>, new: &[u8]) -> Option<Vec<u8>> {
|
|
||||||
let old = old.map(u64_from_bytes).unwrap_or(0);
|
|
||||||
assert!(new.len() == 1);
|
|
||||||
let new = match new[0] {
|
|
||||||
0 => {
|
|
||||||
if old > 0 {
|
|
||||||
old - 1
|
|
||||||
} else {
|
|
||||||
0
|
|
||||||
}
|
|
||||||
}
|
|
||||||
1 => old + 1,
|
|
||||||
_ => unreachable!(),
|
|
||||||
};
|
|
||||||
if new == 0 {
|
|
||||||
None
|
|
||||||
} else {
|
|
||||||
Some(u64::to_be_bytes(new).to_vec())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use garage_util::background::*;
|
|
||||||
use garage_util::data::*;
|
use garage_util::data::*;
|
||||||
|
|
||||||
|
use garage_table::crdt::CRDT;
|
||||||
use garage_table::*;
|
use garage_table::*;
|
||||||
|
|
||||||
use crate::block::*;
|
use crate::block::*;
|
||||||
|
@ -17,7 +17,7 @@ pub struct BlockRef {
|
||||||
pub version: UUID,
|
pub version: UUID,
|
||||||
|
|
||||||
// Keep track of deleted status
|
// Keep track of deleted status
|
||||||
pub deleted: bool,
|
pub deleted: crdt::Bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Entry<Hash, UUID> for BlockRef {
|
impl Entry<Hash, UUID> for BlockRef {
|
||||||
|
@ -27,16 +27,18 @@ impl Entry<Hash, UUID> for BlockRef {
|
||||||
fn sort_key(&self) -> &UUID {
|
fn sort_key(&self) -> &UUID {
|
||||||
&self.version
|
&self.version
|
||||||
}
|
}
|
||||||
|
fn is_tombstone(&self) -> bool {
|
||||||
fn merge(&mut self, other: &Self) {
|
self.deleted.get()
|
||||||
if other.deleted {
|
|
||||||
self.deleted = true;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl CRDT for BlockRef {
|
||||||
|
fn merge(&mut self, other: &Self) {
|
||||||
|
self.deleted.merge(&other.deleted);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct BlockRefTable {
|
pub struct BlockRefTable {
|
||||||
pub background: Arc<BackgroundRunner>,
|
|
||||||
pub block_manager: Arc<BlockManager>,
|
pub block_manager: Arc<BlockManager>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -48,8 +50,8 @@ impl TableSchema for BlockRefTable {
|
||||||
|
|
||||||
fn updated(&self, old: Option<Self::E>, new: Option<Self::E>) {
|
fn updated(&self, old: Option<Self::E>, new: Option<Self::E>) {
|
||||||
let block = &old.as_ref().or(new.as_ref()).unwrap().block;
|
let block = &old.as_ref().or(new.as_ref()).unwrap().block;
|
||||||
let was_before = old.as_ref().map(|x| !x.deleted).unwrap_or(false);
|
let was_before = old.as_ref().map(|x| !x.deleted.get()).unwrap_or(false);
|
||||||
let is_after = new.as_ref().map(|x| !x.deleted).unwrap_or(false);
|
let is_after = new.as_ref().map(|x| !x.deleted.get()).unwrap_or(false);
|
||||||
if is_after && !was_before {
|
if is_after && !was_before {
|
||||||
if let Err(e) = self.block_manager.block_incref(block) {
|
if let Err(e) = self.block_manager.block_incref(block) {
|
||||||
warn!("block_incref failed for block {:?}: {}", block, e);
|
warn!("block_incref failed for block {:?}: {}", block, e);
|
||||||
|
@ -63,6 +65,6 @@ impl TableSchema for BlockRefTable {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn matches_filter(entry: &Self::E, filter: &Self::Filter) -> bool {
|
fn matches_filter(entry: &Self::E, filter: &Self::Filter) -> bool {
|
||||||
filter.apply(entry.deleted)
|
filter.apply(entry.deleted.get())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,11 +5,6 @@ use garage_table::*;
|
||||||
|
|
||||||
use crate::key_table::PermissionSet;
|
use crate::key_table::PermissionSet;
|
||||||
|
|
||||||
// We import the same file but in its version 0.1.0.
|
|
||||||
// We can then access v0.1.0 data structures.
|
|
||||||
// We use them to perform migrations.
|
|
||||||
use model010::bucket_table as prev;
|
|
||||||
|
|
||||||
/// A bucket is a collection of objects
|
/// A bucket is a collection of objects
|
||||||
///
|
///
|
||||||
/// Its parameters are not directly accessible as:
|
/// Its parameters are not directly accessible as:
|
||||||
|
@ -89,7 +84,9 @@ impl Entry<EmptyKey, String> for Bucket {
|
||||||
fn sort_key(&self) -> &String {
|
fn sort_key(&self) -> &String {
|
||||||
&self.name
|
&self.name
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl CRDT for Bucket {
|
||||||
fn merge(&mut self, other: &Self) {
|
fn merge(&mut self, other: &Self) {
|
||||||
self.state.merge(&other.state);
|
self.state.merge(&other.state);
|
||||||
}
|
}
|
||||||
|
@ -106,39 +103,4 @@ impl TableSchema for BucketTable {
|
||||||
fn matches_filter(entry: &Self::E, filter: &Self::Filter) -> bool {
|
fn matches_filter(entry: &Self::E, filter: &Self::Filter) -> bool {
|
||||||
filter.apply(entry.is_deleted())
|
filter.apply(entry.is_deleted())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn try_migrate(bytes: &[u8]) -> Option<Self::E> {
|
|
||||||
let old = match rmp_serde::decode::from_read_ref::<_, prev::Bucket>(bytes) {
|
|
||||||
Ok(x) => x,
|
|
||||||
Err(_) => return None,
|
|
||||||
};
|
|
||||||
if old.deleted {
|
|
||||||
Some(Bucket {
|
|
||||||
name: old.name,
|
|
||||||
state: crdt::LWW::migrate_from_raw(old.timestamp, BucketState::Deleted),
|
|
||||||
})
|
|
||||||
} else {
|
|
||||||
let mut keys = crdt::LWWMap::new();
|
|
||||||
for ak in old.authorized_keys() {
|
|
||||||
keys.merge(&crdt::LWWMap::migrate_from_raw_item(
|
|
||||||
ak.key_id.clone(),
|
|
||||||
ak.timestamp,
|
|
||||||
PermissionSet {
|
|
||||||
allow_read: ak.allow_read,
|
|
||||||
allow_write: ak.allow_write,
|
|
||||||
},
|
|
||||||
));
|
|
||||||
}
|
|
||||||
|
|
||||||
let params = BucketParams {
|
|
||||||
authorized_keys: keys,
|
|
||||||
website: crdt::LWW::new(false),
|
|
||||||
};
|
|
||||||
|
|
||||||
Some(Bucket {
|
|
||||||
name: old.name,
|
|
||||||
state: crdt::LWW::migrate_from_raw(old.timestamp, BucketState::Present(params)),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -7,8 +7,8 @@ use garage_rpc::membership::System;
|
||||||
use garage_rpc::rpc_client::RpcHttpClient;
|
use garage_rpc::rpc_client::RpcHttpClient;
|
||||||
use garage_rpc::rpc_server::RpcServer;
|
use garage_rpc::rpc_server::RpcServer;
|
||||||
|
|
||||||
use garage_table::table_fullcopy::*;
|
use garage_table::replication::fullcopy::*;
|
||||||
use garage_table::table_sharded::*;
|
use garage_table::replication::sharded::*;
|
||||||
use garage_table::*;
|
use garage_table::*;
|
||||||
|
|
||||||
use crate::block::*;
|
use crate::block::*;
|
||||||
|
@ -35,7 +35,7 @@ pub struct Garage {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Garage {
|
impl Garage {
|
||||||
pub async fn new(
|
pub fn new(
|
||||||
config: Config,
|
config: Config,
|
||||||
db: sled::Db,
|
db: sled::Db,
|
||||||
background: Arc<BackgroundRunner>,
|
background: Arc<BackgroundRunner>,
|
||||||
|
@ -54,18 +54,23 @@ impl Garage {
|
||||||
);
|
);
|
||||||
|
|
||||||
let data_rep_param = TableShardedReplication {
|
let data_rep_param = TableShardedReplication {
|
||||||
|
system: system.clone(),
|
||||||
replication_factor: config.data_replication_factor,
|
replication_factor: config.data_replication_factor,
|
||||||
write_quorum: (config.data_replication_factor + 1) / 2,
|
write_quorum: (config.data_replication_factor + 1) / 2,
|
||||||
read_quorum: 1,
|
read_quorum: 1,
|
||||||
};
|
};
|
||||||
|
|
||||||
let meta_rep_param = TableShardedReplication {
|
let meta_rep_param = TableShardedReplication {
|
||||||
|
system: system.clone(),
|
||||||
replication_factor: config.meta_replication_factor,
|
replication_factor: config.meta_replication_factor,
|
||||||
write_quorum: (config.meta_replication_factor + 1) / 2,
|
write_quorum: (config.meta_replication_factor + 1) / 2,
|
||||||
read_quorum: (config.meta_replication_factor + 1) / 2,
|
read_quorum: (config.meta_replication_factor + 1) / 2,
|
||||||
};
|
};
|
||||||
|
|
||||||
let control_rep_param = TableFullReplication::new(config.control_write_max_faults);
|
let control_rep_param = TableFullReplication {
|
||||||
|
system: system.clone(),
|
||||||
|
max_faults: config.control_write_max_faults,
|
||||||
|
};
|
||||||
|
|
||||||
info!("Initialize block manager...");
|
info!("Initialize block manager...");
|
||||||
let block_manager = BlockManager::new(
|
let block_manager = BlockManager::new(
|
||||||
|
@ -79,7 +84,6 @@ impl Garage {
|
||||||
info!("Initialize block_ref_table...");
|
info!("Initialize block_ref_table...");
|
||||||
let block_ref_table = Table::new(
|
let block_ref_table = Table::new(
|
||||||
BlockRefTable {
|
BlockRefTable {
|
||||||
background: background.clone(),
|
|
||||||
block_manager: block_manager.clone(),
|
block_manager: block_manager.clone(),
|
||||||
},
|
},
|
||||||
data_rep_param.clone(),
|
data_rep_param.clone(),
|
||||||
|
@ -87,8 +91,7 @@ impl Garage {
|
||||||
&db,
|
&db,
|
||||||
"block_ref".to_string(),
|
"block_ref".to_string(),
|
||||||
rpc_server,
|
rpc_server,
|
||||||
)
|
);
|
||||||
.await;
|
|
||||||
|
|
||||||
info!("Initialize version_table...");
|
info!("Initialize version_table...");
|
||||||
let version_table = Table::new(
|
let version_table = Table::new(
|
||||||
|
@ -101,8 +104,7 @@ impl Garage {
|
||||||
&db,
|
&db,
|
||||||
"version".to_string(),
|
"version".to_string(),
|
||||||
rpc_server,
|
rpc_server,
|
||||||
)
|
);
|
||||||
.await;
|
|
||||||
|
|
||||||
info!("Initialize object_table...");
|
info!("Initialize object_table...");
|
||||||
let object_table = Table::new(
|
let object_table = Table::new(
|
||||||
|
@ -115,8 +117,7 @@ impl Garage {
|
||||||
&db,
|
&db,
|
||||||
"object".to_string(),
|
"object".to_string(),
|
||||||
rpc_server,
|
rpc_server,
|
||||||
)
|
);
|
||||||
.await;
|
|
||||||
|
|
||||||
info!("Initialize bucket_table...");
|
info!("Initialize bucket_table...");
|
||||||
let bucket_table = Table::new(
|
let bucket_table = Table::new(
|
||||||
|
@ -126,8 +127,7 @@ impl Garage {
|
||||||
&db,
|
&db,
|
||||||
"bucket".to_string(),
|
"bucket".to_string(),
|
||||||
rpc_server,
|
rpc_server,
|
||||||
)
|
);
|
||||||
.await;
|
|
||||||
|
|
||||||
info!("Initialize key_table_table...");
|
info!("Initialize key_table_table...");
|
||||||
let key_table = Table::new(
|
let key_table = Table::new(
|
||||||
|
@ -137,8 +137,7 @@ impl Garage {
|
||||||
&db,
|
&db,
|
||||||
"key".to_string(),
|
"key".to_string(),
|
||||||
rpc_server,
|
rpc_server,
|
||||||
)
|
);
|
||||||
.await;
|
|
||||||
|
|
||||||
info!("Initialize Garage...");
|
info!("Initialize Garage...");
|
||||||
let garage = Arc::new(Self {
|
let garage = Arc::new(Self {
|
||||||
|
@ -156,7 +155,7 @@ impl Garage {
|
||||||
|
|
||||||
info!("Start block manager background thread...");
|
info!("Start block manager background thread...");
|
||||||
garage.block_manager.garage.swap(Some(garage.clone()));
|
garage.block_manager.garage.swap(Some(garage.clone()));
|
||||||
garage.block_manager.clone().spawn_background_worker().await;
|
garage.block_manager.clone().spawn_background_worker();
|
||||||
|
|
||||||
garage
|
garage
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,10 +1,8 @@
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
use garage_table::crdt::CRDT;
|
use garage_table::crdt::*;
|
||||||
use garage_table::*;
|
use garage_table::*;
|
||||||
|
|
||||||
use model010::key_table as prev;
|
|
||||||
|
|
||||||
#[derive(PartialEq, Clone, Debug, Serialize, Deserialize)]
|
#[derive(PartialEq, Clone, Debug, Serialize, Deserialize)]
|
||||||
pub struct Key {
|
pub struct Key {
|
||||||
// Primary key
|
// Primary key
|
||||||
|
@ -36,6 +34,15 @@ impl Key {
|
||||||
authorized_buckets: crdt::LWWMap::new(),
|
authorized_buckets: crdt::LWWMap::new(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
pub fn import(key_id: &str, secret_key: &str, name: &str) -> Self {
|
||||||
|
Self {
|
||||||
|
key_id: key_id.to_string(),
|
||||||
|
secret_key: secret_key.to_string(),
|
||||||
|
name: crdt::LWW::new(name.to_string()),
|
||||||
|
deleted: crdt::Bool::new(false),
|
||||||
|
authorized_buckets: crdt::LWWMap::new(),
|
||||||
|
}
|
||||||
|
}
|
||||||
pub fn delete(key_id: String) -> Self {
|
pub fn delete(key_id: String) -> Self {
|
||||||
Self {
|
Self {
|
||||||
key_id,
|
key_id,
|
||||||
|
@ -66,6 +73,10 @@ pub struct PermissionSet {
|
||||||
pub allow_write: bool,
|
pub allow_write: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl AutoCRDT for PermissionSet {
|
||||||
|
const WARN_IF_DIFFERENT: bool = true;
|
||||||
|
}
|
||||||
|
|
||||||
impl Entry<EmptyKey, String> for Key {
|
impl Entry<EmptyKey, String> for Key {
|
||||||
fn partition_key(&self) -> &EmptyKey {
|
fn partition_key(&self) -> &EmptyKey {
|
||||||
&EmptyKey
|
&EmptyKey
|
||||||
|
@ -73,55 +84,43 @@ impl Entry<EmptyKey, String> for Key {
|
||||||
fn sort_key(&self) -> &String {
|
fn sort_key(&self) -> &String {
|
||||||
&self.key_id
|
&self.key_id
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl CRDT for Key {
|
||||||
fn merge(&mut self, other: &Self) {
|
fn merge(&mut self, other: &Self) {
|
||||||
self.name.merge(&other.name);
|
self.name.merge(&other.name);
|
||||||
self.deleted.merge(&other.deleted);
|
self.deleted.merge(&other.deleted);
|
||||||
|
|
||||||
if self.deleted.get() {
|
if self.deleted.get() {
|
||||||
self.authorized_buckets.clear();
|
self.authorized_buckets.clear();
|
||||||
return;
|
} else {
|
||||||
}
|
|
||||||
|
|
||||||
self.authorized_buckets.merge(&other.authorized_buckets);
|
self.authorized_buckets.merge(&other.authorized_buckets);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub struct KeyTable;
|
pub struct KeyTable;
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||||
|
pub enum KeyFilter {
|
||||||
|
Deleted(DeletedFilter),
|
||||||
|
Matches(String),
|
||||||
|
}
|
||||||
|
|
||||||
impl TableSchema for KeyTable {
|
impl TableSchema for KeyTable {
|
||||||
type P = EmptyKey;
|
type P = EmptyKey;
|
||||||
type S = String;
|
type S = String;
|
||||||
type E = Key;
|
type E = Key;
|
||||||
type Filter = DeletedFilter;
|
type Filter = KeyFilter;
|
||||||
|
|
||||||
fn matches_filter(entry: &Self::E, filter: &Self::Filter) -> bool {
|
fn matches_filter(entry: &Self::E, filter: &Self::Filter) -> bool {
|
||||||
filter.apply(entry.deleted.get())
|
match filter {
|
||||||
}
|
KeyFilter::Deleted(df) => df.apply(entry.deleted.get()),
|
||||||
|
KeyFilter::Matches(pat) => {
|
||||||
fn try_migrate(bytes: &[u8]) -> Option<Self::E> {
|
let pat = pat.to_lowercase();
|
||||||
let old = match rmp_serde::decode::from_read_ref::<_, prev::Key>(bytes) {
|
entry.key_id.to_lowercase().starts_with(&pat)
|
||||||
Ok(x) => x,
|
|| entry.name.get().to_lowercase() == pat
|
||||||
Err(_) => return None,
|
}
|
||||||
};
|
}
|
||||||
let mut new = Self::E {
|
|
||||||
key_id: old.key_id.clone(),
|
|
||||||
secret_key: old.secret_key.clone(),
|
|
||||||
name: crdt::LWW::migrate_from_raw(old.name_timestamp, old.name.clone()),
|
|
||||||
deleted: crdt::Bool::new(old.deleted),
|
|
||||||
authorized_buckets: crdt::LWWMap::new(),
|
|
||||||
};
|
|
||||||
for ab in old.authorized_buckets() {
|
|
||||||
let it = crdt::LWWMap::migrate_from_raw_item(
|
|
||||||
ab.bucket.clone(),
|
|
||||||
ab.timestamp,
|
|
||||||
PermissionSet {
|
|
||||||
allow_read: ab.allow_read,
|
|
||||||
allow_write: ab.allow_write,
|
|
||||||
},
|
|
||||||
);
|
|
||||||
new.authorized_buckets.merge(&it);
|
|
||||||
}
|
|
||||||
Some(new)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,13 +5,12 @@ use std::sync::Arc;
|
||||||
use garage_util::background::BackgroundRunner;
|
use garage_util::background::BackgroundRunner;
|
||||||
use garage_util::data::*;
|
use garage_util::data::*;
|
||||||
|
|
||||||
use garage_table::table_sharded::*;
|
use garage_table::crdt::*;
|
||||||
|
use garage_table::replication::sharded::*;
|
||||||
use garage_table::*;
|
use garage_table::*;
|
||||||
|
|
||||||
use crate::version_table::*;
|
use crate::version_table::*;
|
||||||
|
|
||||||
use model010::object_table as prev;
|
|
||||||
|
|
||||||
#[derive(PartialEq, Clone, Debug, Serialize, Deserialize)]
|
#[derive(PartialEq, Clone, Debug, Serialize, Deserialize)]
|
||||||
pub struct Object {
|
pub struct Object {
|
||||||
// Primary key
|
// Primary key
|
||||||
|
@ -70,7 +69,7 @@ pub enum ObjectVersionState {
|
||||||
Aborted,
|
Aborted,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ObjectVersionState {
|
impl CRDT for ObjectVersionState {
|
||||||
fn merge(&mut self, other: &Self) {
|
fn merge(&mut self, other: &Self) {
|
||||||
use ObjectVersionState::*;
|
use ObjectVersionState::*;
|
||||||
match other {
|
match other {
|
||||||
|
@ -91,37 +90,30 @@ impl ObjectVersionState {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(PartialEq, Clone, Debug, Serialize, Deserialize)]
|
#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Debug, Serialize, Deserialize)]
|
||||||
pub enum ObjectVersionData {
|
pub enum ObjectVersionData {
|
||||||
DeleteMarker,
|
DeleteMarker,
|
||||||
Inline(ObjectVersionMeta, #[serde(with = "serde_bytes")] Vec<u8>),
|
Inline(ObjectVersionMeta, #[serde(with = "serde_bytes")] Vec<u8>),
|
||||||
FirstBlock(ObjectVersionMeta, Hash),
|
FirstBlock(ObjectVersionMeta, Hash),
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(PartialEq, Clone, Debug, Serialize, Deserialize)]
|
impl AutoCRDT for ObjectVersionData {
|
||||||
|
const WARN_IF_DIFFERENT: bool = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Debug, Serialize, Deserialize)]
|
||||||
pub struct ObjectVersionMeta {
|
pub struct ObjectVersionMeta {
|
||||||
pub headers: ObjectVersionHeaders,
|
pub headers: ObjectVersionHeaders,
|
||||||
pub size: u64,
|
pub size: u64,
|
||||||
pub etag: String,
|
pub etag: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(PartialEq, Clone, Debug, Serialize, Deserialize)]
|
#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Debug, Serialize, Deserialize)]
|
||||||
pub struct ObjectVersionHeaders {
|
pub struct ObjectVersionHeaders {
|
||||||
pub content_type: String,
|
pub content_type: String,
|
||||||
pub other: BTreeMap<String, String>,
|
pub other: BTreeMap<String, String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ObjectVersionData {
|
|
||||||
fn merge(&mut self, b: &Self) {
|
|
||||||
if *self != *b {
|
|
||||||
warn!(
|
|
||||||
"Inconsistent object version data: {:?} (local) vs {:?} (remote)",
|
|
||||||
self, b
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl ObjectVersion {
|
impl ObjectVersion {
|
||||||
fn cmp_key(&self) -> (u64, UUID) {
|
fn cmp_key(&self) -> (u64, UUID) {
|
||||||
(self.timestamp, self.uuid)
|
(self.timestamp, self.uuid)
|
||||||
|
@ -154,8 +146,14 @@ impl Entry<String, String> for Object {
|
||||||
fn sort_key(&self) -> &String {
|
fn sort_key(&self) -> &String {
|
||||||
&self.key
|
&self.key
|
||||||
}
|
}
|
||||||
|
fn is_tombstone(&self) -> bool {
|
||||||
|
self.versions.len() == 1 && self.versions[0].state == ObjectVersionState::Complete(ObjectVersionData::DeleteMarker)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl CRDT for Object {
|
||||||
fn merge(&mut self, other: &Self) {
|
fn merge(&mut self, other: &Self) {
|
||||||
|
// Merge versions from other into here
|
||||||
for other_v in other.versions.iter() {
|
for other_v in other.versions.iter() {
|
||||||
match self
|
match self
|
||||||
.versions
|
.versions
|
||||||
|
@ -169,6 +167,9 @@ impl Entry<String, String> for Object {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Remove versions which are obsolete, i.e. those that come
|
||||||
|
// before the last version which .is_complete().
|
||||||
let last_complete = self
|
let last_complete = self
|
||||||
.versions
|
.versions
|
||||||
.iter()
|
.iter()
|
||||||
|
@ -212,13 +213,8 @@ impl TableSchema for ObjectTable {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
if newly_deleted {
|
if newly_deleted {
|
||||||
let deleted_version = Version::new(
|
let deleted_version =
|
||||||
v.uuid,
|
Version::new(v.uuid, old_v.bucket.clone(), old_v.key.clone(), true);
|
||||||
old_v.bucket.clone(),
|
|
||||||
old_v.key.clone(),
|
|
||||||
true,
|
|
||||||
vec![],
|
|
||||||
);
|
|
||||||
version_table.insert(&deleted_version).await?;
|
version_table.insert(&deleted_version).await?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -231,55 +227,4 @@ impl TableSchema for ObjectTable {
|
||||||
let deleted = !entry.versions.iter().any(|v| v.is_data());
|
let deleted = !entry.versions.iter().any(|v| v.is_data());
|
||||||
filter.apply(deleted)
|
filter.apply(deleted)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn try_migrate(bytes: &[u8]) -> Option<Self::E> {
|
|
||||||
let old = match rmp_serde::decode::from_read_ref::<_, prev::Object>(bytes) {
|
|
||||||
Ok(x) => x,
|
|
||||||
Err(_) => return None,
|
|
||||||
};
|
|
||||||
let new_v = old
|
|
||||||
.versions()
|
|
||||||
.iter()
|
|
||||||
.map(migrate_version)
|
|
||||||
.collect::<Vec<_>>();
|
|
||||||
let new = Object::new(old.bucket.clone(), old.key.clone(), new_v);
|
|
||||||
Some(new)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn migrate_version(old: &prev::ObjectVersion) -> ObjectVersion {
|
|
||||||
let headers = ObjectVersionHeaders {
|
|
||||||
content_type: old.mime_type.clone(),
|
|
||||||
other: BTreeMap::new(),
|
|
||||||
};
|
|
||||||
let meta = ObjectVersionMeta {
|
|
||||||
headers: headers.clone(),
|
|
||||||
size: old.size,
|
|
||||||
etag: "".to_string(),
|
|
||||||
};
|
|
||||||
let state = match old.state {
|
|
||||||
prev::ObjectVersionState::Uploading => ObjectVersionState::Uploading(headers),
|
|
||||||
prev::ObjectVersionState::Aborted => ObjectVersionState::Aborted,
|
|
||||||
prev::ObjectVersionState::Complete => match &old.data {
|
|
||||||
prev::ObjectVersionData::Uploading => ObjectVersionState::Uploading(headers),
|
|
||||||
prev::ObjectVersionData::DeleteMarker => {
|
|
||||||
ObjectVersionState::Complete(ObjectVersionData::DeleteMarker)
|
|
||||||
}
|
|
||||||
prev::ObjectVersionData::Inline(x) => {
|
|
||||||
ObjectVersionState::Complete(ObjectVersionData::Inline(meta, x.clone()))
|
|
||||||
}
|
|
||||||
prev::ObjectVersionData::FirstBlock(h) => {
|
|
||||||
let mut hash = [0u8; 32];
|
|
||||||
hash.copy_from_slice(h.as_ref());
|
|
||||||
ObjectVersionState::Complete(ObjectVersionData::FirstBlock(meta, Hash::from(hash)))
|
|
||||||
}
|
|
||||||
},
|
|
||||||
};
|
|
||||||
let mut uuid = [0u8; 32];
|
|
||||||
uuid.copy_from_slice(old.uuid.as_ref());
|
|
||||||
ObjectVersion {
|
|
||||||
uuid: UUID::from(uuid),
|
|
||||||
timestamp: old.timestamp,
|
|
||||||
state,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,7 +4,8 @@ use std::sync::Arc;
|
||||||
use garage_util::background::BackgroundRunner;
|
use garage_util::background::BackgroundRunner;
|
||||||
use garage_util::data::*;
|
use garage_util::data::*;
|
||||||
|
|
||||||
use garage_table::table_sharded::*;
|
use garage_table::crdt::*;
|
||||||
|
use garage_table::replication::sharded::*;
|
||||||
use garage_table::*;
|
use garage_table::*;
|
||||||
|
|
||||||
use crate::block_ref_table::*;
|
use crate::block_ref_table::*;
|
||||||
|
@ -15,8 +16,11 @@ pub struct Version {
|
||||||
pub uuid: UUID,
|
pub uuid: UUID,
|
||||||
|
|
||||||
// Actual data: the blocks for this version
|
// Actual data: the blocks for this version
|
||||||
pub deleted: bool,
|
// In the case of a multipart upload, also store the etags
|
||||||
blocks: Vec<VersionBlock>,
|
// of individual parts and check them when doing CompleteMultipartUpload
|
||||||
|
pub deleted: crdt::Bool,
|
||||||
|
pub blocks: crdt::Map<VersionBlockKey, VersionBlock>,
|
||||||
|
pub parts_etags: crdt::Map<u64, String>,
|
||||||
|
|
||||||
// Back link to bucket+key so that we can figure if
|
// Back link to bucket+key so that we can figure if
|
||||||
// this was deleted later on
|
// this was deleted later on
|
||||||
|
@ -25,56 +29,46 @@ pub struct Version {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Version {
|
impl Version {
|
||||||
pub fn new(
|
pub fn new(uuid: UUID, bucket: String, key: String, deleted: bool) -> Self {
|
||||||
uuid: UUID,
|
Self {
|
||||||
bucket: String,
|
|
||||||
key: String,
|
|
||||||
deleted: bool,
|
|
||||||
blocks: Vec<VersionBlock>,
|
|
||||||
) -> Self {
|
|
||||||
let mut ret = Self {
|
|
||||||
uuid,
|
uuid,
|
||||||
deleted,
|
deleted: deleted.into(),
|
||||||
blocks: vec![],
|
blocks: crdt::Map::new(),
|
||||||
|
parts_etags: crdt::Map::new(),
|
||||||
bucket,
|
bucket,
|
||||||
key,
|
key,
|
||||||
};
|
|
||||||
for b in blocks {
|
|
||||||
ret.add_block(b)
|
|
||||||
.expect("Twice the same VersionBlock in Version constructor");
|
|
||||||
}
|
}
|
||||||
ret
|
|
||||||
}
|
|
||||||
/// Adds a block if it wasn't already present
|
|
||||||
pub fn add_block(&mut self, new: VersionBlock) -> Result<(), ()> {
|
|
||||||
match self
|
|
||||||
.blocks
|
|
||||||
.binary_search_by(|b| b.cmp_key().cmp(&new.cmp_key()))
|
|
||||||
{
|
|
||||||
Err(i) => {
|
|
||||||
self.blocks.insert(i, new);
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
Ok(_) => Err(()),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
pub fn blocks(&self) -> &[VersionBlock] {
|
|
||||||
&self.blocks[..]
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(PartialEq, Clone, Debug, Serialize, Deserialize)]
|
#[derive(PartialEq, Eq, Clone, Copy, Debug, Serialize, Deserialize)]
|
||||||
pub struct VersionBlock {
|
pub struct VersionBlockKey {
|
||||||
pub part_number: u64,
|
pub part_number: u64,
|
||||||
pub offset: u64,
|
pub offset: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Ord for VersionBlockKey {
|
||||||
|
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
|
||||||
|
self.part_number
|
||||||
|
.cmp(&other.part_number)
|
||||||
|
.then(self.offset.cmp(&other.offset))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PartialOrd for VersionBlockKey {
|
||||||
|
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
|
||||||
|
Some(self.cmp(other))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(PartialEq, Eq, Ord, PartialOrd, Clone, Copy, Debug, Serialize, Deserialize)]
|
||||||
|
pub struct VersionBlock {
|
||||||
pub hash: Hash,
|
pub hash: Hash,
|
||||||
pub size: u64,
|
pub size: u64,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl VersionBlock {
|
impl AutoCRDT for VersionBlock {
|
||||||
fn cmp_key(&self) -> (u64, u64) {
|
const WARN_IF_DIFFERENT: bool = true;
|
||||||
(self.part_number, self.offset)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Entry<Hash, EmptyKey> for Version {
|
impl Entry<Hash, EmptyKey> for Version {
|
||||||
|
@ -84,23 +78,21 @@ impl Entry<Hash, EmptyKey> for Version {
|
||||||
fn sort_key(&self) -> &EmptyKey {
|
fn sort_key(&self) -> &EmptyKey {
|
||||||
&EmptyKey
|
&EmptyKey
|
||||||
}
|
}
|
||||||
|
fn is_tombstone(&self) -> bool {
|
||||||
|
self.deleted.get()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl CRDT for Version {
|
||||||
fn merge(&mut self, other: &Self) {
|
fn merge(&mut self, other: &Self) {
|
||||||
if other.deleted {
|
self.deleted.merge(&other.deleted);
|
||||||
self.deleted = true;
|
|
||||||
|
if self.deleted.get() {
|
||||||
self.blocks.clear();
|
self.blocks.clear();
|
||||||
} else if !self.deleted {
|
self.parts_etags.clear();
|
||||||
for bi in other.blocks.iter() {
|
} else {
|
||||||
match self
|
self.blocks.merge(&other.blocks);
|
||||||
.blocks
|
self.parts_etags.merge(&other.parts_etags);
|
||||||
.binary_search_by(|x| x.cmp_key().cmp(&bi.cmp_key()))
|
|
||||||
{
|
|
||||||
Ok(_) => (),
|
|
||||||
Err(pos) => {
|
|
||||||
self.blocks.insert(pos, bi.clone());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -121,14 +113,15 @@ impl TableSchema for VersionTable {
|
||||||
self.background.spawn(async move {
|
self.background.spawn(async move {
|
||||||
if let (Some(old_v), Some(new_v)) = (old, new) {
|
if let (Some(old_v), Some(new_v)) = (old, new) {
|
||||||
// Propagate deletion of version blocks
|
// Propagate deletion of version blocks
|
||||||
if new_v.deleted && !old_v.deleted {
|
if new_v.deleted.get() && !old_v.deleted.get() {
|
||||||
let deleted_block_refs = old_v
|
let deleted_block_refs = old_v
|
||||||
.blocks
|
.blocks
|
||||||
|
.items()
|
||||||
.iter()
|
.iter()
|
||||||
.map(|vb| BlockRef {
|
.map(|(_k, vb)| BlockRef {
|
||||||
block: vb.hash,
|
block: vb.hash,
|
||||||
version: old_v.uuid,
|
version: old_v.uuid,
|
||||||
deleted: true,
|
deleted: true.into(),
|
||||||
})
|
})
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
block_ref_table.insert_many(&deleted_block_refs[..]).await?;
|
block_ref_table.insert_many(&deleted_block_refs[..]).await?;
|
||||||
|
@ -139,6 +132,6 @@ impl TableSchema for VersionTable {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn matches_filter(entry: &Self::E, filter: &Self::Filter) -> bool {
|
fn matches_filter(entry: &Self::E, filter: &Self::Filter) -> bool {
|
||||||
filter.apply(entry.deleted)
|
filter.apply(entry.deleted.get())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -15,27 +15,26 @@ path = "lib.rs"
|
||||||
[dependencies]
|
[dependencies]
|
||||||
garage_util = { version = "0.1.1", path = "../util" }
|
garage_util = { version = "0.1.1", path = "../util" }
|
||||||
|
|
||||||
bytes = "0.4"
|
bytes = "1.0"
|
||||||
rand = "0.7"
|
hex = "0.4"
|
||||||
hex = "0.3"
|
arc-swap = "1.0"
|
||||||
sha2 = "0.8"
|
|
||||||
arc-swap = "0.4"
|
|
||||||
gethostname = "0.2"
|
gethostname = "0.2"
|
||||||
log = "0.4"
|
log = "0.4"
|
||||||
|
|
||||||
rmp-serde = "0.14.3"
|
rmp-serde = "0.15"
|
||||||
serde = { version = "1.0", default-features = false, features = ["derive", "rc"] }
|
serde = { version = "1.0", default-features = false, features = ["derive", "rc"] }
|
||||||
serde_json = "1.0"
|
serde_json = "1.0"
|
||||||
|
|
||||||
futures = "0.3"
|
futures = "0.3"
|
||||||
futures-util = "0.3"
|
futures-util = "0.3"
|
||||||
tokio = { version = "0.2", default-features = false, features = ["rt-core", "rt-threaded", "io-driver", "net", "tcp", "time", "macros", "sync", "signal", "fs"] }
|
tokio = { version = "1.0", default-features = false, features = ["rt", "rt-multi-thread", "io-util", "net", "time", "macros", "sync", "signal", "fs"] }
|
||||||
|
tokio-stream = { version = "0.1", features = ["net"] }
|
||||||
|
|
||||||
http = "0.2"
|
http = "0.2"
|
||||||
hyper = "0.13"
|
hyper = { version = "0.14", features = ["full"] }
|
||||||
rustls = "0.17"
|
rustls = "0.19"
|
||||||
tokio-rustls = "0.13"
|
tokio-rustls = "0.22"
|
||||||
hyper-rustls = { version = "0.20", default-features = false }
|
hyper-rustls = { version = "0.22", default-features = false }
|
||||||
webpki = "0.21"
|
webpki = "0.21"
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -11,13 +11,14 @@ use futures::future::join_all;
|
||||||
use futures::select;
|
use futures::select;
|
||||||
use futures_util::future::*;
|
use futures_util::future::*;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use tokio::prelude::*;
|
use tokio::io::AsyncWriteExt;
|
||||||
use tokio::sync::watch;
|
use tokio::sync::watch;
|
||||||
use tokio::sync::Mutex;
|
use tokio::sync::Mutex;
|
||||||
|
|
||||||
use garage_util::background::BackgroundRunner;
|
use garage_util::background::BackgroundRunner;
|
||||||
use garage_util::data::*;
|
use garage_util::data::*;
|
||||||
use garage_util::error::Error;
|
use garage_util::error::Error;
|
||||||
|
use garage_util::time::*;
|
||||||
|
|
||||||
use crate::consul::get_consul_nodes;
|
use crate::consul::get_consul_nodes;
|
||||||
use crate::ring::*;
|
use crate::ring::*;
|
||||||
|
@ -315,23 +316,17 @@ impl System {
|
||||||
self.clone().ping_nodes(bootstrap_peers).await;
|
self.clone().ping_nodes(bootstrap_peers).await;
|
||||||
|
|
||||||
let self2 = self.clone();
|
let self2 = self.clone();
|
||||||
self.clone()
|
self.background
|
||||||
.background
|
|
||||||
.spawn_worker(format!("ping loop"), |stop_signal| {
|
.spawn_worker(format!("ping loop"), |stop_signal| {
|
||||||
self2.ping_loop(stop_signal).map(Ok)
|
self2.ping_loop(stop_signal)
|
||||||
})
|
});
|
||||||
.await;
|
|
||||||
|
|
||||||
if let (Some(consul_host), Some(consul_service_name)) = (consul_host, consul_service_name) {
|
if let (Some(consul_host), Some(consul_service_name)) = (consul_host, consul_service_name) {
|
||||||
let self2 = self.clone();
|
let self2 = self.clone();
|
||||||
self.clone()
|
self.background
|
||||||
.background
|
|
||||||
.spawn_worker(format!("Consul loop"), |stop_signal| {
|
.spawn_worker(format!("Consul loop"), |stop_signal| {
|
||||||
self2
|
self2.consul_loop(stop_signal, consul_host, consul_service_name)
|
||||||
.consul_loop(stop_signal, consul_host, consul_service_name)
|
});
|
||||||
.map(Ok)
|
|
||||||
})
|
|
||||||
.await;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -399,7 +394,7 @@ impl System {
|
||||||
if has_changes {
|
if has_changes {
|
||||||
status.recalculate_hash();
|
status.recalculate_hash();
|
||||||
}
|
}
|
||||||
if let Err(e) = update_locked.0.broadcast(Arc::new(status)) {
|
if let Err(e) = update_locked.0.send(Arc::new(status)) {
|
||||||
error!("In ping_nodes: could not save status update ({})", e);
|
error!("In ping_nodes: could not save status update ({})", e);
|
||||||
}
|
}
|
||||||
drop(update_locked);
|
drop(update_locked);
|
||||||
|
@ -425,7 +420,7 @@ impl System {
|
||||||
let status_hash = status.hash;
|
let status_hash = status.hash;
|
||||||
let config_version = self.ring.borrow().config.version;
|
let config_version = self.ring.borrow().config.version;
|
||||||
|
|
||||||
update_locked.0.broadcast(Arc::new(status))?;
|
update_locked.0.send(Arc::new(status))?;
|
||||||
drop(update_locked);
|
drop(update_locked);
|
||||||
|
|
||||||
if is_new || status_hash != ping.status_hash {
|
if is_new || status_hash != ping.status_hash {
|
||||||
|
@ -507,7 +502,7 @@ impl System {
|
||||||
if has_changed {
|
if has_changed {
|
||||||
status.recalculate_hash();
|
status.recalculate_hash();
|
||||||
}
|
}
|
||||||
update_lock.0.broadcast(Arc::new(status))?;
|
update_lock.0.send(Arc::new(status))?;
|
||||||
drop(update_lock);
|
drop(update_lock);
|
||||||
|
|
||||||
if to_ping.len() > 0 {
|
if to_ping.len() > 0 {
|
||||||
|
@ -527,7 +522,7 @@ impl System {
|
||||||
|
|
||||||
if adv.version > ring.config.version {
|
if adv.version > ring.config.version {
|
||||||
let ring = Ring::new(adv.clone());
|
let ring = Ring::new(adv.clone());
|
||||||
update_lock.1.broadcast(Arc::new(ring))?;
|
update_lock.1.send(Arc::new(ring))?;
|
||||||
drop(update_lock);
|
drop(update_lock);
|
||||||
|
|
||||||
self.background.spawn_cancellable(
|
self.background.spawn_cancellable(
|
||||||
|
@ -543,7 +538,7 @@ impl System {
|
||||||
|
|
||||||
async fn ping_loop(self: Arc<Self>, mut stop_signal: watch::Receiver<bool>) {
|
async fn ping_loop(self: Arc<Self>, mut stop_signal: watch::Receiver<bool>) {
|
||||||
loop {
|
loop {
|
||||||
let restart_at = tokio::time::delay_for(PING_INTERVAL);
|
let restart_at = tokio::time::sleep(PING_INTERVAL);
|
||||||
|
|
||||||
let status = self.status.borrow().clone();
|
let status = self.status.borrow().clone();
|
||||||
let ping_addrs = status
|
let ping_addrs = status
|
||||||
|
@ -557,10 +552,9 @@ impl System {
|
||||||
|
|
||||||
select! {
|
select! {
|
||||||
_ = restart_at.fuse() => (),
|
_ = restart_at.fuse() => (),
|
||||||
must_exit = stop_signal.recv().fuse() => {
|
_ = stop_signal.changed().fuse() => {
|
||||||
match must_exit {
|
if *stop_signal.borrow() {
|
||||||
None | Some(true) => return,
|
return;
|
||||||
_ => (),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -573,8 +567,8 @@ impl System {
|
||||||
consul_host: String,
|
consul_host: String,
|
||||||
consul_service_name: String,
|
consul_service_name: String,
|
||||||
) {
|
) {
|
||||||
loop {
|
while !*stop_signal.borrow() {
|
||||||
let restart_at = tokio::time::delay_for(CONSUL_INTERVAL);
|
let restart_at = tokio::time::sleep(CONSUL_INTERVAL);
|
||||||
|
|
||||||
match get_consul_nodes(&consul_host, &consul_service_name).await {
|
match get_consul_nodes(&consul_host, &consul_service_name).await {
|
||||||
Ok(mut node_list) => {
|
Ok(mut node_list) => {
|
||||||
|
@ -588,12 +582,7 @@ impl System {
|
||||||
|
|
||||||
select! {
|
select! {
|
||||||
_ = restart_at.fuse() => (),
|
_ = restart_at.fuse() => (),
|
||||||
must_exit = stop_signal.recv().fuse() => {
|
_ = stop_signal.changed().fuse() => (),
|
||||||
match must_exit {
|
|
||||||
None | Some(true) => return,
|
|
||||||
_ => (),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,6 +5,11 @@ use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
use garage_util::data::*;
|
use garage_util::data::*;
|
||||||
|
|
||||||
|
// A partition number is encoded on 16 bits,
|
||||||
|
// i.e. we have up to 2**16 partitions.
|
||||||
|
// (in practice we have exactly 2**PARTITION_BITS partitions)
|
||||||
|
pub type Partition = u16;
|
||||||
|
|
||||||
// TODO: make this constant parametrizable in the config file
|
// TODO: make this constant parametrizable in the config file
|
||||||
// For deployments with many nodes it might make sense to bump
|
// For deployments with many nodes it might make sense to bump
|
||||||
// it up to 10.
|
// it up to 10.
|
||||||
|
@ -161,29 +166,48 @@ impl Ring {
|
||||||
})
|
})
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
eprintln!("RING: --");
|
// eprintln!("RING: --");
|
||||||
for e in ring.iter() {
|
// for e in ring.iter() {
|
||||||
eprintln!("{:?}", e);
|
// eprintln!("{:?}", e);
|
||||||
}
|
// }
|
||||||
eprintln!("END --");
|
// eprintln!("END --");
|
||||||
|
|
||||||
Self { config, ring }
|
Self { config, ring }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn partition_of(&self, from: &Hash) -> Partition {
|
||||||
|
let top = u16::from_be_bytes(from.as_slice()[0..2].try_into().unwrap());
|
||||||
|
top >> (16 - PARTITION_BITS)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn partitions(&self) -> Vec<(Partition, Hash)> {
|
||||||
|
let mut ret = vec![];
|
||||||
|
|
||||||
|
for (i, entry) in self.ring.iter().enumerate() {
|
||||||
|
ret.push((i as u16, entry.location));
|
||||||
|
}
|
||||||
|
if ret.len() > 0 {
|
||||||
|
assert_eq!(ret[0].1, [0u8; 32].into());
|
||||||
|
}
|
||||||
|
|
||||||
|
ret
|
||||||
|
}
|
||||||
|
|
||||||
pub fn walk_ring(&self, from: &Hash, n: usize) -> Vec<UUID> {
|
pub fn walk_ring(&self, from: &Hash, n: usize) -> Vec<UUID> {
|
||||||
if self.ring.len() != 1 << PARTITION_BITS {
|
if self.ring.len() != 1 << PARTITION_BITS {
|
||||||
warn!("Ring not yet ready, read/writes will be lost");
|
warn!("Ring not yet ready, read/writes will be lost!");
|
||||||
return vec![];
|
return vec![];
|
||||||
}
|
}
|
||||||
|
|
||||||
let top = u16::from_be_bytes(from.as_slice()[0..2].try_into().unwrap());
|
let top = u16::from_be_bytes(from.as_slice()[0..2].try_into().unwrap());
|
||||||
|
|
||||||
let partition_idx = (top >> (16 - PARTITION_BITS)) as usize;
|
let partition_idx = (top >> (16 - PARTITION_BITS)) as usize;
|
||||||
|
assert_eq!(partition_idx, self.partition_of(from) as usize);
|
||||||
|
|
||||||
let partition = &self.ring[partition_idx];
|
let partition = &self.ring[partition_idx];
|
||||||
|
|
||||||
let partition_top =
|
let partition_top =
|
||||||
u16::from_be_bytes(partition.location.as_slice()[0..2].try_into().unwrap());
|
u16::from_be_bytes(partition.location.as_slice()[0..2].try_into().unwrap());
|
||||||
assert!(partition_top & PARTITION_MASK_U16 == top & PARTITION_MASK_U16);
|
assert_eq!(partition_top & PARTITION_MASK_U16, top & PARTITION_MASK_U16);
|
||||||
|
|
||||||
assert!(n <= partition.nodes.len());
|
assert!(n <= partition.nodes.len());
|
||||||
partition.nodes[..n].iter().cloned().collect::<Vec<_>>()
|
partition.nodes[..n].iter().cloned().collect::<Vec<_>>()
|
||||||
|
|
|
@ -7,7 +7,6 @@ use std::sync::Arc;
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
|
|
||||||
use arc_swap::ArcSwapOption;
|
use arc_swap::ArcSwapOption;
|
||||||
use bytes::IntoBuf;
|
|
||||||
use futures::future::Future;
|
use futures::future::Future;
|
||||||
use futures::stream::futures_unordered::FuturesUnordered;
|
use futures::stream::futures_unordered::FuturesUnordered;
|
||||||
use futures::stream::StreamExt;
|
use futures::stream::StreamExt;
|
||||||
|
@ -197,11 +196,8 @@ impl<M: RpcMessage + 'static> RpcClient<M> {
|
||||||
if !strategy.rs_interrupt_after_quorum {
|
if !strategy.rs_interrupt_after_quorum {
|
||||||
let wait_finished_fut = tokio::spawn(async move {
|
let wait_finished_fut = tokio::spawn(async move {
|
||||||
resp_stream.collect::<Vec<_>>().await;
|
resp_stream.collect::<Vec<_>>().await;
|
||||||
Ok(())
|
|
||||||
});
|
});
|
||||||
self.background.spawn(wait_finished_fut.map(|x| {
|
self.background.spawn(wait_finished_fut.map(|_| Ok(())));
|
||||||
x.unwrap_or_else(|e| Err(Error::Message(format!("Await failed: {}", e))))
|
|
||||||
}));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(results)
|
Ok(results)
|
||||||
|
@ -336,7 +332,7 @@ impl RpcHttpClient {
|
||||||
let body = hyper::body::to_bytes(resp.into_body()).await?;
|
let body = hyper::body::to_bytes(resp.into_body()).await?;
|
||||||
drop(slot);
|
drop(slot);
|
||||||
|
|
||||||
match rmp_serde::decode::from_read::<_, Result<M, String>>(body.into_buf())? {
|
match rmp_serde::decode::from_read::<_, Result<M, String>>(&body[..])? {
|
||||||
Err(e) => Ok(Err(Error::RemoteError(e, status))),
|
Err(e) => Ok(Err(Error::RemoteError(e, status))),
|
||||||
Ok(x) => Ok(Ok(x)),
|
Ok(x) => Ok(Ok(x)),
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,7 +4,6 @@ use std::pin::Pin;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use std::time::Instant;
|
use std::time::Instant;
|
||||||
|
|
||||||
use bytes::IntoBuf;
|
|
||||||
use futures::future::Future;
|
use futures::future::Future;
|
||||||
use futures_util::future::*;
|
use futures_util::future::*;
|
||||||
use futures_util::stream::*;
|
use futures_util::stream::*;
|
||||||
|
@ -15,6 +14,7 @@ use serde::{Deserialize, Serialize};
|
||||||
use tokio::net::{TcpListener, TcpStream};
|
use tokio::net::{TcpListener, TcpStream};
|
||||||
use tokio_rustls::server::TlsStream;
|
use tokio_rustls::server::TlsStream;
|
||||||
use tokio_rustls::TlsAcceptor;
|
use tokio_rustls::TlsAcceptor;
|
||||||
|
use tokio_stream::wrappers::TcpListenerStream;
|
||||||
|
|
||||||
use garage_util::config::TlsConfig;
|
use garage_util::config::TlsConfig;
|
||||||
use garage_util::data::*;
|
use garage_util::data::*;
|
||||||
|
@ -47,11 +47,15 @@ where
|
||||||
{
|
{
|
||||||
let begin_time = Instant::now();
|
let begin_time = Instant::now();
|
||||||
let whole_body = hyper::body::to_bytes(req.into_body()).await?;
|
let whole_body = hyper::body::to_bytes(req.into_body()).await?;
|
||||||
let msg = rmp_serde::decode::from_read::<_, M>(whole_body.into_buf())?;
|
let msg = rmp_serde::decode::from_read::<_, M>(&whole_body[..])?;
|
||||||
|
|
||||||
trace!(
|
trace!(
|
||||||
"Request message: {}",
|
"Request message: {}",
|
||||||
serde_json::to_string(&msg).unwrap_or("<json error>".into())
|
serde_json::to_string(&msg)
|
||||||
|
.unwrap_or("<json error>".into())
|
||||||
|
.chars()
|
||||||
|
.take(100)
|
||||||
|
.collect::<String>()
|
||||||
);
|
);
|
||||||
|
|
||||||
match handler(msg, sockaddr).await {
|
match handler(msg, sockaddr).await {
|
||||||
|
@ -171,8 +175,8 @@ impl RpcServer {
|
||||||
config.set_single_cert([&node_certs[..], &ca_certs[..]].concat(), node_key)?;
|
config.set_single_cert([&node_certs[..], &ca_certs[..]].concat(), node_key)?;
|
||||||
let tls_acceptor = Arc::new(TlsAcceptor::from(Arc::new(config)));
|
let tls_acceptor = Arc::new(TlsAcceptor::from(Arc::new(config)));
|
||||||
|
|
||||||
let mut listener = TcpListener::bind(&self.bind_addr).await?;
|
let listener = TcpListener::bind(&self.bind_addr).await?;
|
||||||
let incoming = listener.incoming().filter_map(|socket| async {
|
let incoming = TcpListenerStream::new(listener).filter_map(|socket| async {
|
||||||
match socket {
|
match socket {
|
||||||
Ok(stream) => match tls_acceptor.clone().accept(stream).await {
|
Ok(stream) => match tls_acceptor.clone().accept(stream).await {
|
||||||
Ok(x) => Some(Ok::<_, hyper::Error>(x)),
|
Ok(x) => Some(Ok::<_, hyper::Error>(x)),
|
||||||
|
|
|
@ -16,21 +16,18 @@ path = "lib.rs"
|
||||||
garage_util = { version = "0.1.1", path = "../util" }
|
garage_util = { version = "0.1.1", path = "../util" }
|
||||||
garage_rpc = { version = "0.1.1", path = "../rpc" }
|
garage_rpc = { version = "0.1.1", path = "../rpc" }
|
||||||
|
|
||||||
bytes = "0.4"
|
bytes = "1.0"
|
||||||
rand = "0.7"
|
rand = "0.8"
|
||||||
hex = "0.3"
|
|
||||||
arc-swap = "0.4"
|
|
||||||
log = "0.4"
|
log = "0.4"
|
||||||
hexdump = "0.1"
|
hexdump = "0.1"
|
||||||
|
|
||||||
sled = "0.34"
|
sled = "0.34"
|
||||||
|
|
||||||
rmp-serde = "0.14.3"
|
rmp-serde = "0.15"
|
||||||
serde = { version = "1.0", default-features = false, features = ["derive", "rc"] }
|
serde = { version = "1.0", default-features = false, features = ["derive", "rc"] }
|
||||||
serde_bytes = "0.11"
|
serde_bytes = "0.11"
|
||||||
|
|
||||||
async-trait = "0.1.30"
|
|
||||||
futures = "0.3"
|
futures = "0.3"
|
||||||
futures-util = "0.3"
|
futures-util = "0.3"
|
||||||
tokio = { version = "0.2", default-features = false, features = ["rt-core", "rt-threaded", "io-driver", "net", "tcp", "time", "macros", "sync", "signal", "fs"] }
|
tokio = { version = "1.0", default-features = false, features = ["rt", "rt-multi-thread", "io-util", "net", "time", "macros", "sync", "signal", "fs"] }
|
||||||
|
|
||||||
|
|
|
@ -1,327 +0,0 @@
|
||||||
//! This package provides a simple implementation of conflict-free replicated data types (CRDTs)
|
|
||||||
//!
|
|
||||||
//! CRDTs are a type of data structures that do not require coordination. In other words, we can
|
|
||||||
//! edit them in parallel, we will always find a way to merge it.
|
|
||||||
//!
|
|
||||||
//! A general example is a counter. Its initial value is 0. Alice and Bob get a copy of the
|
|
||||||
//! counter. Alice does +1 on her copy, she reads 1. Bob does +3 on his copy, he reads 3. Now,
|
|
||||||
//! it is easy to merge their counters, order does not count: we always get 4.
|
|
||||||
//!
|
|
||||||
//! Learn more about CRDT [on Wikipedia](https://en.wikipedia.org/wiki/Conflict-free_replicated_data_type)
|
|
||||||
|
|
||||||
use serde::{Deserialize, Serialize};
|
|
||||||
|
|
||||||
use garage_util::data::*;
|
|
||||||
|
|
||||||
/// Definition of a CRDT - all CRDT Rust types implement this.
|
|
||||||
///
|
|
||||||
/// A CRDT is defined as a merge operator that respects a certain set of axioms.
|
|
||||||
///
|
|
||||||
/// In particular, the merge operator must be commutative, associative,
|
|
||||||
/// idempotent, and monotonic.
|
|
||||||
/// In other words, if `a`, `b` and `c` are CRDTs, and `⊔` denotes the merge operator,
|
|
||||||
/// the following axioms must apply:
|
|
||||||
///
|
|
||||||
/// ```text
|
|
||||||
/// a ⊔ b = b ⊔ a (commutativity)
|
|
||||||
/// (a ⊔ b) ⊔ c = a ⊔ (b ⊔ c) (associativity)
|
|
||||||
/// (a ⊔ b) ⊔ b = a ⊔ b (idempotence)
|
|
||||||
/// ```
|
|
||||||
///
|
|
||||||
/// Moreover, the relationship `≥` defined by `a ≥ b ⇔ ∃c. a = b ⊔ c` must be a partial order.
|
|
||||||
/// This implies a few properties such as: if `a ⊔ b ≠ a`, then there is no `c` such that `(a ⊔ b) ⊔ c = a`,
|
|
||||||
/// as this would imply a cycle in the partial order.
|
|
||||||
pub trait CRDT {
|
|
||||||
/// Merge the two datastructures according to the CRDT rules.
|
|
||||||
/// `self` is modified to contain the merged CRDT value. `other` is not modified.
|
|
||||||
///
|
|
||||||
/// # Arguments
|
|
||||||
///
|
|
||||||
/// * `other` - the other CRDT we wish to merge with
|
|
||||||
fn merge(&mut self, other: &Self);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// All types that implement `Ord` (a total order) also implement a trivial CRDT
|
|
||||||
/// defined by the merge rule: `a ⊔ b = max(a, b)`.
|
|
||||||
impl<T> CRDT for T
|
|
||||||
where
|
|
||||||
T: Ord + Clone,
|
|
||||||
{
|
|
||||||
fn merge(&mut self, other: &Self) {
|
|
||||||
if other > self {
|
|
||||||
*self = other.clone();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// ---- LWW Register ----
|
|
||||||
|
|
||||||
/// Last Write Win (LWW)
|
|
||||||
///
|
|
||||||
/// An LWW CRDT associates a timestamp with a value, in order to implement a
|
|
||||||
/// time-based reconciliation rule: the most recent write wins.
|
|
||||||
/// For completeness, the LWW reconciliation rule must also be defined for two LWW CRDTs
|
|
||||||
/// with the same timestamp but different values.
|
|
||||||
///
|
|
||||||
/// In our case, we add the constraint that the value that is wrapped inside the LWW CRDT must
|
|
||||||
/// itself be a CRDT: in the case when the timestamp does not allow us to decide on which value to
|
|
||||||
/// keep, the merge rule of the inner CRDT is applied on the wrapped values. (Note that all types
|
|
||||||
/// that implement the `Ord` trait get a default CRDT implemetnation that keeps the maximum value.
|
|
||||||
/// This enables us to use LWW directly with primitive data types such as numbers or strings. It is
|
|
||||||
/// generally desirable in this case to never explicitly produce LWW values with the same timestamp
|
|
||||||
/// but different inner values, as the rule to keep the maximum value isn't generally the desired
|
|
||||||
/// semantics.)
|
|
||||||
///
|
|
||||||
/// As multiple computers clocks are always desynchronized,
|
|
||||||
/// when operations are close enough, it is equivalent to
|
|
||||||
/// take one copy and drop the other one.
|
|
||||||
///
|
|
||||||
/// Given that clocks are not too desynchronized, this assumption
|
|
||||||
/// is enough for most cases, as there is few chance that two humans
|
|
||||||
/// coordonate themself faster than the time difference between two NTP servers.
|
|
||||||
///
|
|
||||||
/// As a more concret example, let's suppose you want to upload a file
|
|
||||||
/// with the same key (path) in the same bucket at the very same time.
|
|
||||||
/// For each request, the file will be timestamped by the receiving server
|
|
||||||
/// and may differ from what you observed with your atomic clock!
|
|
||||||
///
|
|
||||||
/// This scheme is used by AWS S3 or Soundcloud and often without knowing
|
|
||||||
/// in entreprise when reconciliating databases with ad-hoc scripts.
|
|
||||||
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
|
|
||||||
pub struct LWW<T> {
|
|
||||||
ts: u64,
|
|
||||||
v: T,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<T> LWW<T>
|
|
||||||
where
|
|
||||||
T: CRDT,
|
|
||||||
{
|
|
||||||
/// Creates a new CRDT
|
|
||||||
///
|
|
||||||
/// CRDT's internal timestamp is set with current node's clock.
|
|
||||||
pub fn new(value: T) -> Self {
|
|
||||||
Self {
|
|
||||||
ts: now_msec(),
|
|
||||||
v: value,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Build a new CRDT from a previous non-compatible one
|
|
||||||
///
|
|
||||||
/// Compared to new, the CRDT's timestamp is not set to now
|
|
||||||
/// but must be set to the previous, non-compatible, CRDT's timestamp.
|
|
||||||
pub fn migrate_from_raw(ts: u64, value: T) -> Self {
|
|
||||||
Self { ts, v: value }
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Update the LWW CRDT while keeping some causal ordering.
|
|
||||||
///
|
|
||||||
/// The timestamp of the LWW CRDT is updated to be the current node's clock
|
|
||||||
/// at time of update, or the previous timestamp + 1 if that's bigger,
|
|
||||||
/// so that the new timestamp is always strictly larger than the previous one.
|
|
||||||
/// This ensures that merging the update with the old value will result in keeping
|
|
||||||
/// the updated value.
|
|
||||||
pub fn update(&mut self, new_value: T) {
|
|
||||||
self.ts = std::cmp::max(self.ts + 1, now_msec());
|
|
||||||
self.v = new_value;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Get the CRDT value
|
|
||||||
pub fn get(&self) -> &T {
|
|
||||||
&self.v
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Get a mutable reference to the CRDT's value
|
|
||||||
///
|
|
||||||
/// This is usefull to mutate the inside value without changing the LWW timestamp.
|
|
||||||
/// When such mutation is done, the merge between two LWW values is done using the inner
|
|
||||||
/// CRDT's merge operation. This is usefull in the case where the inner CRDT is a large
|
|
||||||
/// data type, such as a map, and we only want to change a single item in the map.
|
|
||||||
/// To do this, we can produce a "CRDT delta", i.e. a LWW that contains only the modification.
|
|
||||||
/// This delta consists in a LWW with the same timestamp, and the map
|
|
||||||
/// inside only contains the updated value.
|
|
||||||
/// The advantage of such a delta is that it is much smaller than the whole map.
|
|
||||||
///
|
|
||||||
/// Avoid using this if the inner data type is a primitive type such as a number or a string,
|
|
||||||
/// as you will then rely on the merge function defined on `Ord` types by keeping the maximum
|
|
||||||
/// of both values.
|
|
||||||
pub fn get_mut(&mut self) -> &mut T {
|
|
||||||
&mut self.v
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<T> CRDT for LWW<T>
|
|
||||||
where
|
|
||||||
T: Clone + CRDT,
|
|
||||||
{
|
|
||||||
fn merge(&mut self, other: &Self) {
|
|
||||||
if other.ts > self.ts {
|
|
||||||
self.ts = other.ts;
|
|
||||||
self.v = other.v.clone();
|
|
||||||
} else if other.ts == self.ts {
|
|
||||||
self.v.merge(&other.v);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Boolean, where `true` is an absorbing state
|
|
||||||
#[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq)]
|
|
||||||
pub struct Bool(bool);
|
|
||||||
|
|
||||||
impl Bool {
|
|
||||||
/// Create a new boolean with the specified value
|
|
||||||
pub fn new(b: bool) -> Self {
|
|
||||||
Self(b)
|
|
||||||
}
|
|
||||||
/// Set the boolean to true
|
|
||||||
pub fn set(&mut self) {
|
|
||||||
self.0 = true;
|
|
||||||
}
|
|
||||||
/// Get the boolean value
|
|
||||||
pub fn get(&self) -> bool {
|
|
||||||
self.0
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl CRDT for Bool {
|
|
||||||
fn merge(&mut self, other: &Self) {
|
|
||||||
self.0 = self.0 || other.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Last Write Win Map
|
|
||||||
///
|
|
||||||
/// This types defines a CRDT for a map from keys to values.
|
|
||||||
/// The values have an associated timestamp, such that the last written value
|
|
||||||
/// takes precedence over previous ones. As for the simpler `LWW` type, the value
|
|
||||||
/// type `V` is also required to implement the CRDT trait.
|
|
||||||
/// We do not encourage mutating the values associated with a given key
|
|
||||||
/// without updating the timestamp, in fact at the moment we do not provide a `.get_mut()`
|
|
||||||
/// method that would allow that.
|
|
||||||
///
|
|
||||||
/// Internally, the map is stored as a vector of keys and values, sorted by ascending key order.
|
|
||||||
/// This is why the key type `K` must implement `Ord` (and also to ensure a unique serialization,
|
|
||||||
/// such that two values can be compared for equality based on their hashes). As a consequence,
|
|
||||||
/// insertions take `O(n)` time. This means that LWWMap should be used for reasonably small maps.
|
|
||||||
/// However, note that even if we were using a more efficient data structure such as a `BTreeMap`,
|
|
||||||
/// the serialization cost `O(n)` would still have to be paid at each modification, so we are
|
|
||||||
/// actually not losing anything here.
|
|
||||||
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
|
|
||||||
pub struct LWWMap<K, V> {
|
|
||||||
vals: Vec<(K, u64, V)>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<K, V> LWWMap<K, V>
|
|
||||||
where
|
|
||||||
K: Ord,
|
|
||||||
V: CRDT,
|
|
||||||
{
|
|
||||||
/// Create a new empty map CRDT
|
|
||||||
pub fn new() -> Self {
|
|
||||||
Self { vals: vec![] }
|
|
||||||
}
|
|
||||||
/// Used to migrate from a map defined in an incompatible format. This produces
|
|
||||||
/// a map that contains a single item with the specified timestamp (copied from
|
|
||||||
/// the incompatible format). Do this as many times as you have items to migrate,
|
|
||||||
/// and put them all together using the CRDT merge operator.
|
|
||||||
pub fn migrate_from_raw_item(k: K, ts: u64, v: V) -> Self {
|
|
||||||
Self {
|
|
||||||
vals: vec![(k, ts, v)],
|
|
||||||
}
|
|
||||||
}
|
|
||||||
/// Returns a map that contains a single mapping from the specified key to the specified value.
|
|
||||||
/// This map is a mutator, or a delta-CRDT, such that when it is merged with the original map,
|
|
||||||
/// the previous value will be replaced with the one specified here.
|
|
||||||
/// The timestamp in the provided mutator is set to the maximum of the current system's clock
|
|
||||||
/// and 1 + the previous value's timestamp (if there is one), so that the new value will always
|
|
||||||
/// take precedence (LWW rule).
|
|
||||||
///
|
|
||||||
/// Typically, to update the value associated to a key in the map, you would do the following:
|
|
||||||
///
|
|
||||||
/// ```ignore
|
|
||||||
/// let my_update = my_crdt.update_mutator(key_to_modify, new_value);
|
|
||||||
/// my_crdt.merge(&my_update);
|
|
||||||
/// ```
|
|
||||||
///
|
|
||||||
/// However extracting the mutator on its own and only sending that on the network is very
|
|
||||||
/// interesting as it is much smaller than the whole map.
|
|
||||||
pub fn update_mutator(&self, k: K, new_v: V) -> Self {
|
|
||||||
let new_vals = match self.vals.binary_search_by(|(k2, _, _)| k2.cmp(&k)) {
|
|
||||||
Ok(i) => {
|
|
||||||
let (_, old_ts, _) = self.vals[i];
|
|
||||||
let new_ts = std::cmp::max(old_ts + 1, now_msec());
|
|
||||||
vec![(k, new_ts, new_v)]
|
|
||||||
}
|
|
||||||
Err(_) => vec![(k, now_msec(), new_v)],
|
|
||||||
};
|
|
||||||
Self { vals: new_vals }
|
|
||||||
}
|
|
||||||
/// Takes all of the values of the map and returns them. The current map is reset to the
|
|
||||||
/// empty map. This is very usefull to produce in-place a new map that contains only a delta
|
|
||||||
/// that modifies a certain value:
|
|
||||||
///
|
|
||||||
/// ```ignore
|
|
||||||
/// let mut a = get_my_crdt_value();
|
|
||||||
/// let old_a = a.take_and_clear();
|
|
||||||
/// a.merge(&old_a.update_mutator(key_to_modify, new_value));
|
|
||||||
/// put_my_crdt_value(a);
|
|
||||||
/// ```
|
|
||||||
///
|
|
||||||
/// Of course in this simple example we could have written simply
|
|
||||||
/// `pyt_my_crdt_value(a.update_mutator(key_to_modify, new_value))`,
|
|
||||||
/// but in the case where the map is a field in a struct for instance (as is always the case),
|
|
||||||
/// this becomes very handy:
|
|
||||||
///
|
|
||||||
/// ```ignore
|
|
||||||
/// let mut a = get_my_crdt_value();
|
|
||||||
/// let old_a_map = a.map_field.take_and_clear();
|
|
||||||
/// a.map_field.merge(&old_a_map.update_mutator(key_to_modify, new_value));
|
|
||||||
/// put_my_crdt_value(a);
|
|
||||||
/// ```
|
|
||||||
pub fn take_and_clear(&mut self) -> Self {
|
|
||||||
let vals = std::mem::replace(&mut self.vals, vec![]);
|
|
||||||
Self { vals }
|
|
||||||
}
|
|
||||||
/// Removes all values from the map
|
|
||||||
pub fn clear(&mut self) {
|
|
||||||
self.vals.clear();
|
|
||||||
}
|
|
||||||
/// Get a reference to the value assigned to a key
|
|
||||||
pub fn get(&self, k: &K) -> Option<&V> {
|
|
||||||
match self.vals.binary_search_by(|(k2, _, _)| k2.cmp(&k)) {
|
|
||||||
Ok(i) => Some(&self.vals[i].2),
|
|
||||||
Err(_) => None,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
/// Gets a reference to all of the items, as a slice. Usefull to iterate on all map values.
|
|
||||||
/// In most case you will want to ignore the timestamp (second item of the tuple).
|
|
||||||
pub fn items(&self) -> &[(K, u64, V)] {
|
|
||||||
&self.vals[..]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<K, V> CRDT for LWWMap<K, V>
|
|
||||||
where
|
|
||||||
K: Clone + Ord,
|
|
||||||
V: Clone + CRDT,
|
|
||||||
{
|
|
||||||
fn merge(&mut self, other: &Self) {
|
|
||||||
for (k, ts2, v2) in other.vals.iter() {
|
|
||||||
match self.vals.binary_search_by(|(k2, _, _)| k2.cmp(&k)) {
|
|
||||||
Ok(i) => {
|
|
||||||
let (_, ts1, _v1) = &self.vals[i];
|
|
||||||
if ts2 > ts1 {
|
|
||||||
self.vals[i].1 = *ts2;
|
|
||||||
self.vals[i].2 = v2.clone();
|
|
||||||
} else if ts1 == ts2 {
|
|
||||||
self.vals[i].2.merge(&v2);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Err(i) => {
|
|
||||||
self.vals.insert(i, (k.clone(), *ts2, v2.clone()));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
34
src/table/crdt/bool.rs
Normal file
|
@ -0,0 +1,34 @@
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
use crate::crdt::crdt::*;
|
||||||
|
|
||||||
|
/// Boolean, where `true` is an absorbing state
|
||||||
|
#[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq)]
|
||||||
|
pub struct Bool(bool);
|
||||||
|
|
||||||
|
impl Bool {
|
||||||
|
/// Create a new boolean with the specified value
|
||||||
|
pub fn new(b: bool) -> Self {
|
||||||
|
Self(b)
|
||||||
|
}
|
||||||
|
/// Set the boolean to true
|
||||||
|
pub fn set(&mut self) {
|
||||||
|
self.0 = true;
|
||||||
|
}
|
||||||
|
/// Get the boolean value
|
||||||
|
pub fn get(&self) -> bool {
|
||||||
|
self.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<bool> for Bool {
|
||||||
|
fn from(b: bool) -> Bool {
|
||||||
|
Bool::new(b)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl CRDT for Bool {
|
||||||
|
fn merge(&mut self, other: &Self) {
|
||||||
|
self.0 = self.0 || other.0;
|
||||||
|
}
|
||||||
|
}
|
73
src/table/crdt/crdt.rs
Normal file
|
@ -0,0 +1,73 @@
|
||||||
|
use garage_util::data::*;
|
||||||
|
|
||||||
|
/// Definition of a CRDT - all CRDT Rust types implement this.
|
||||||
|
///
|
||||||
|
/// A CRDT is defined as a merge operator that respects a certain set of axioms.
|
||||||
|
///
|
||||||
|
/// In particular, the merge operator must be commutative, associative,
|
||||||
|
/// idempotent, and monotonic.
|
||||||
|
/// In other words, if `a`, `b` and `c` are CRDTs, and `⊔` denotes the merge operator,
|
||||||
|
/// the following axioms must apply:
|
||||||
|
///
|
||||||
|
/// ```text
|
||||||
|
/// a ⊔ b = b ⊔ a (commutativity)
|
||||||
|
/// (a ⊔ b) ⊔ c = a ⊔ (b ⊔ c) (associativity)
|
||||||
|
/// (a ⊔ b) ⊔ b = a ⊔ b (idempotence)
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// Moreover, the relationship `≥` defined by `a ≥ b ⇔ ∃c. a = b ⊔ c` must be a partial order.
|
||||||
|
/// This implies a few properties such as: if `a ⊔ b ≠ a`, then there is no `c` such that `(a ⊔ b) ⊔ c = a`,
|
||||||
|
/// as this would imply a cycle in the partial order.
|
||||||
|
pub trait CRDT {
|
||||||
|
/// Merge the two datastructures according to the CRDT rules.
|
||||||
|
/// `self` is modified to contain the merged CRDT value. `other` is not modified.
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
///
|
||||||
|
/// * `other` - the other CRDT we wish to merge with
|
||||||
|
fn merge(&mut self, other: &Self);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// All types that implement `Ord` (a total order) can also implement a trivial CRDT
|
||||||
|
/// defined by the merge rule: `a ⊔ b = max(a, b)`. Implement this trait for your type
|
||||||
|
/// to enable this behavior.
|
||||||
|
pub trait AutoCRDT: Ord + Clone + std::fmt::Debug {
|
||||||
|
/// WARN_IF_DIFFERENT: emit a warning when values differ. Set this to true if
|
||||||
|
/// different values in your application should never happen. Set this to false
|
||||||
|
/// if you are actually relying on the semantics of `a ⊔ b = max(a, b)`.
|
||||||
|
const WARN_IF_DIFFERENT: bool;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T> CRDT for T
|
||||||
|
where
|
||||||
|
T: AutoCRDT,
|
||||||
|
{
|
||||||
|
fn merge(&mut self, other: &Self) {
|
||||||
|
if Self::WARN_IF_DIFFERENT && self != other {
|
||||||
|
warn!(
|
||||||
|
"Different CRDT values should be the same (logic error!): {:?} vs {:?}",
|
||||||
|
self, other
|
||||||
|
);
|
||||||
|
if other > self {
|
||||||
|
*self = other.clone();
|
||||||
|
}
|
||||||
|
warn!("Making an arbitrary choice: {:?}", self);
|
||||||
|
} else {
|
||||||
|
if other > self {
|
||||||
|
*self = other.clone();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl AutoCRDT for String {
|
||||||
|
const WARN_IF_DIFFERENT: bool = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl AutoCRDT for bool {
|
||||||
|
const WARN_IF_DIFFERENT: bool = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl AutoCRDT for FixedBytes32 {
|
||||||
|
const WARN_IF_DIFFERENT: bool = true;
|
||||||
|
}
|
114
src/table/crdt/lww.rs
Normal file
|
@ -0,0 +1,114 @@
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
use garage_util::time::now_msec;
|
||||||
|
|
||||||
|
use crate::crdt::crdt::*;
|
||||||
|
|
||||||
|
/// Last Write Win (LWW)
|
||||||
|
///
|
||||||
|
/// An LWW CRDT associates a timestamp with a value, in order to implement a
|
||||||
|
/// time-based reconciliation rule: the most recent write wins.
|
||||||
|
/// For completeness, the LWW reconciliation rule must also be defined for two LWW CRDTs
|
||||||
|
/// with the same timestamp but different values.
|
||||||
|
///
|
||||||
|
/// In our case, we add the constraint that the value that is wrapped inside the LWW CRDT must
|
||||||
|
/// itself be a CRDT: in the case when the timestamp does not allow us to decide on which value to
|
||||||
|
/// keep, the merge rule of the inner CRDT is applied on the wrapped values. (Note that all types
|
||||||
|
/// that implement the `Ord` trait get a default CRDT implemetnation that keeps the maximum value.
|
||||||
|
/// This enables us to use LWW directly with primitive data types such as numbers or strings. It is
|
||||||
|
/// generally desirable in this case to never explicitly produce LWW values with the same timestamp
|
||||||
|
/// but different inner values, as the rule to keep the maximum value isn't generally the desired
|
||||||
|
/// semantics.)
|
||||||
|
///
|
||||||
|
/// As multiple computers clocks are always desynchronized,
|
||||||
|
/// when operations are close enough, it is equivalent to
|
||||||
|
/// take one copy and drop the other one.
|
||||||
|
///
|
||||||
|
/// Given that clocks are not too desynchronized, this assumption
|
||||||
|
/// is enough for most cases, as there is few chance that two humans
|
||||||
|
/// coordonate themself faster than the time difference between two NTP servers.
|
||||||
|
///
|
||||||
|
/// As a more concret example, let's suppose you want to upload a file
|
||||||
|
/// with the same key (path) in the same bucket at the very same time.
|
||||||
|
/// For each request, the file will be timestamped by the receiving server
|
||||||
|
/// and may differ from what you observed with your atomic clock!
|
||||||
|
///
|
||||||
|
/// This scheme is used by AWS S3 or Soundcloud and often without knowing
|
||||||
|
/// in entreprise when reconciliating databases with ad-hoc scripts.
|
||||||
|
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
|
||||||
|
pub struct LWW<T> {
|
||||||
|
ts: u64,
|
||||||
|
v: T,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T> LWW<T>
|
||||||
|
where
|
||||||
|
T: CRDT,
|
||||||
|
{
|
||||||
|
/// Creates a new CRDT
|
||||||
|
///
|
||||||
|
/// CRDT's internal timestamp is set with current node's clock.
|
||||||
|
pub fn new(value: T) -> Self {
|
||||||
|
Self {
|
||||||
|
ts: now_msec(),
|
||||||
|
v: value,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Build a new CRDT from a previous non-compatible one
|
||||||
|
///
|
||||||
|
/// Compared to new, the CRDT's timestamp is not set to now
|
||||||
|
/// but must be set to the previous, non-compatible, CRDT's timestamp.
|
||||||
|
pub fn migrate_from_raw(ts: u64, value: T) -> Self {
|
||||||
|
Self { ts, v: value }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Update the LWW CRDT while keeping some causal ordering.
|
||||||
|
///
|
||||||
|
/// The timestamp of the LWW CRDT is updated to be the current node's clock
|
||||||
|
/// at time of update, or the previous timestamp + 1 if that's bigger,
|
||||||
|
/// so that the new timestamp is always strictly larger than the previous one.
|
||||||
|
/// This ensures that merging the update with the old value will result in keeping
|
||||||
|
/// the updated value.
|
||||||
|
pub fn update(&mut self, new_value: T) {
|
||||||
|
self.ts = std::cmp::max(self.ts + 1, now_msec());
|
||||||
|
self.v = new_value;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the CRDT value
|
||||||
|
pub fn get(&self) -> &T {
|
||||||
|
&self.v
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get a mutable reference to the CRDT's value
|
||||||
|
///
|
||||||
|
/// This is usefull to mutate the inside value without changing the LWW timestamp.
|
||||||
|
/// When such mutation is done, the merge between two LWW values is done using the inner
|
||||||
|
/// CRDT's merge operation. This is usefull in the case where the inner CRDT is a large
|
||||||
|
/// data type, such as a map, and we only want to change a single item in the map.
|
||||||
|
/// To do this, we can produce a "CRDT delta", i.e. a LWW that contains only the modification.
|
||||||
|
/// This delta consists in a LWW with the same timestamp, and the map
|
||||||
|
/// inside only contains the updated value.
|
||||||
|
/// The advantage of such a delta is that it is much smaller than the whole map.
|
||||||
|
///
|
||||||
|
/// Avoid using this if the inner data type is a primitive type such as a number or a string,
|
||||||
|
/// as you will then rely on the merge function defined on `Ord` types by keeping the maximum
|
||||||
|
/// of both values.
|
||||||
|
pub fn get_mut(&mut self) -> &mut T {
|
||||||
|
&mut self.v
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T> CRDT for LWW<T>
|
||||||
|
where
|
||||||
|
T: Clone + CRDT,
|
||||||
|
{
|
||||||
|
fn merge(&mut self, other: &Self) {
|
||||||
|
if other.ts > self.ts {
|
||||||
|
self.ts = other.ts;
|
||||||
|
self.v = other.v.clone();
|
||||||
|
} else if other.ts == self.ts {
|
||||||
|
self.v.merge(&other.v);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
145
src/table/crdt/lww_map.rs
Normal file
|
@ -0,0 +1,145 @@
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
use garage_util::time::now_msec;
|
||||||
|
|
||||||
|
use crate::crdt::crdt::*;
|
||||||
|
|
||||||
|
/// Last Write Win Map
|
||||||
|
///
|
||||||
|
/// This types defines a CRDT for a map from keys to values.
|
||||||
|
/// The values have an associated timestamp, such that the last written value
|
||||||
|
/// takes precedence over previous ones. As for the simpler `LWW` type, the value
|
||||||
|
/// type `V` is also required to implement the CRDT trait.
|
||||||
|
/// We do not encourage mutating the values associated with a given key
|
||||||
|
/// without updating the timestamp, in fact at the moment we do not provide a `.get_mut()`
|
||||||
|
/// method that would allow that.
|
||||||
|
///
|
||||||
|
/// Internally, the map is stored as a vector of keys and values, sorted by ascending key order.
|
||||||
|
/// This is why the key type `K` must implement `Ord` (and also to ensure a unique serialization,
|
||||||
|
/// such that two values can be compared for equality based on their hashes). As a consequence,
|
||||||
|
/// insertions take `O(n)` time. This means that LWWMap should be used for reasonably small maps.
|
||||||
|
/// However, note that even if we were using a more efficient data structure such as a `BTreeMap`,
|
||||||
|
/// the serialization cost `O(n)` would still have to be paid at each modification, so we are
|
||||||
|
/// actually not losing anything here.
|
||||||
|
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
|
||||||
|
pub struct LWWMap<K, V> {
|
||||||
|
vals: Vec<(K, u64, V)>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<K, V> LWWMap<K, V>
|
||||||
|
where
|
||||||
|
K: Ord,
|
||||||
|
V: CRDT,
|
||||||
|
{
|
||||||
|
/// Create a new empty map CRDT
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self { vals: vec![] }
|
||||||
|
}
|
||||||
|
/// Used to migrate from a map defined in an incompatible format. This produces
|
||||||
|
/// a map that contains a single item with the specified timestamp (copied from
|
||||||
|
/// the incompatible format). Do this as many times as you have items to migrate,
|
||||||
|
/// and put them all together using the CRDT merge operator.
|
||||||
|
pub fn migrate_from_raw_item(k: K, ts: u64, v: V) -> Self {
|
||||||
|
Self {
|
||||||
|
vals: vec![(k, ts, v)],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/// Returns a map that contains a single mapping from the specified key to the specified value.
|
||||||
|
/// This map is a mutator, or a delta-CRDT, such that when it is merged with the original map,
|
||||||
|
/// the previous value will be replaced with the one specified here.
|
||||||
|
/// The timestamp in the provided mutator is set to the maximum of the current system's clock
|
||||||
|
/// and 1 + the previous value's timestamp (if there is one), so that the new value will always
|
||||||
|
/// take precedence (LWW rule).
|
||||||
|
///
|
||||||
|
/// Typically, to update the value associated to a key in the map, you would do the following:
|
||||||
|
///
|
||||||
|
/// ```ignore
|
||||||
|
/// let my_update = my_crdt.update_mutator(key_to_modify, new_value);
|
||||||
|
/// my_crdt.merge(&my_update);
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// However extracting the mutator on its own and only sending that on the network is very
|
||||||
|
/// interesting as it is much smaller than the whole map.
|
||||||
|
pub fn update_mutator(&self, k: K, new_v: V) -> Self {
|
||||||
|
let new_vals = match self.vals.binary_search_by(|(k2, _, _)| k2.cmp(&k)) {
|
||||||
|
Ok(i) => {
|
||||||
|
let (_, old_ts, _) = self.vals[i];
|
||||||
|
let new_ts = std::cmp::max(old_ts + 1, now_msec());
|
||||||
|
vec![(k, new_ts, new_v)]
|
||||||
|
}
|
||||||
|
Err(_) => vec![(k, now_msec(), new_v)],
|
||||||
|
};
|
||||||
|
Self { vals: new_vals }
|
||||||
|
}
|
||||||
|
/// Takes all of the values of the map and returns them. The current map is reset to the
|
||||||
|
/// empty map. This is very usefull to produce in-place a new map that contains only a delta
|
||||||
|
/// that modifies a certain value:
|
||||||
|
///
|
||||||
|
/// ```ignore
|
||||||
|
/// let mut a = get_my_crdt_value();
|
||||||
|
/// let old_a = a.take_and_clear();
|
||||||
|
/// a.merge(&old_a.update_mutator(key_to_modify, new_value));
|
||||||
|
/// put_my_crdt_value(a);
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// Of course in this simple example we could have written simply
|
||||||
|
/// `pyt_my_crdt_value(a.update_mutator(key_to_modify, new_value))`,
|
||||||
|
/// but in the case where the map is a field in a struct for instance (as is always the case),
|
||||||
|
/// this becomes very handy:
|
||||||
|
///
|
||||||
|
/// ```ignore
|
||||||
|
/// let mut a = get_my_crdt_value();
|
||||||
|
/// let old_a_map = a.map_field.take_and_clear();
|
||||||
|
/// a.map_field.merge(&old_a_map.update_mutator(key_to_modify, new_value));
|
||||||
|
/// put_my_crdt_value(a);
|
||||||
|
/// ```
|
||||||
|
pub fn take_and_clear(&mut self) -> Self {
|
||||||
|
let vals = std::mem::replace(&mut self.vals, vec![]);
|
||||||
|
Self { vals }
|
||||||
|
}
|
||||||
|
/// Removes all values from the map
|
||||||
|
pub fn clear(&mut self) {
|
||||||
|
self.vals.clear();
|
||||||
|
}
|
||||||
|
/// Get a reference to the value assigned to a key
|
||||||
|
pub fn get(&self, k: &K) -> Option<&V> {
|
||||||
|
match self.vals.binary_search_by(|(k2, _, _)| k2.cmp(&k)) {
|
||||||
|
Ok(i) => Some(&self.vals[i].2),
|
||||||
|
Err(_) => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/// Gets a reference to all of the items, as a slice. Usefull to iterate on all map values.
|
||||||
|
/// In most case you will want to ignore the timestamp (second item of the tuple).
|
||||||
|
pub fn items(&self) -> &[(K, u64, V)] {
|
||||||
|
&self.vals[..]
|
||||||
|
}
|
||||||
|
/// Returns the number of items in the map
|
||||||
|
pub fn len(&self) -> usize {
|
||||||
|
self.vals.len()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<K, V> CRDT for LWWMap<K, V>
|
||||||
|
where
|
||||||
|
K: Clone + Ord,
|
||||||
|
V: Clone + CRDT,
|
||||||
|
{
|
||||||
|
fn merge(&mut self, other: &Self) {
|
||||||
|
for (k, ts2, v2) in other.vals.iter() {
|
||||||
|
match self.vals.binary_search_by(|(k2, _, _)| k2.cmp(&k)) {
|
||||||
|
Ok(i) => {
|
||||||
|
let (_, ts1, _v1) = &self.vals[i];
|
||||||
|
if ts2 > ts1 {
|
||||||
|
self.vals[i].1 = *ts2;
|
||||||
|
self.vals[i].2 = v2.clone();
|
||||||
|
} else if ts1 == ts2 {
|
||||||
|
self.vals[i].2.merge(&v2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(i) => {
|
||||||
|
self.vals.insert(i, (k.clone(), *ts2, v2.clone()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
83
src/table/crdt/map.rs
Normal file
|
@ -0,0 +1,83 @@
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
use crate::crdt::crdt::*;
|
||||||
|
|
||||||
|
/// Simple CRDT Map
|
||||||
|
///
|
||||||
|
/// This types defines a CRDT for a map from keys to values. Values are CRDT types which
|
||||||
|
/// can have their own updating logic.
|
||||||
|
///
|
||||||
|
/// Internally, the map is stored as a vector of keys and values, sorted by ascending key order.
|
||||||
|
/// This is why the key type `K` must implement `Ord` (and also to ensure a unique serialization,
|
||||||
|
/// such that two values can be compared for equality based on their hashes). As a consequence,
|
||||||
|
/// insertions take `O(n)` time. This means that Map should be used for reasonably small maps.
|
||||||
|
/// However, note that even if we were using a more efficient data structure such as a `BTreeMap`,
|
||||||
|
/// the serialization cost `O(n)` would still have to be paid at each modification, so we are
|
||||||
|
/// actually not losing anything here.
|
||||||
|
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
|
||||||
|
pub struct Map<K, V> {
|
||||||
|
vals: Vec<(K, V)>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<K, V> Map<K, V>
|
||||||
|
where
|
||||||
|
K: Clone + Ord,
|
||||||
|
V: Clone + CRDT,
|
||||||
|
{
|
||||||
|
/// Create a new empty map CRDT
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self { vals: vec![] }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns a map that contains a single mapping from the specified key to the specified value.
|
||||||
|
/// This can be used to build a delta-mutator:
|
||||||
|
/// when merged with another map, the value will be added or CRDT-merged if a previous
|
||||||
|
/// value already exists.
|
||||||
|
pub fn put_mutator(k: K, v: V) -> Self {
|
||||||
|
Self { vals: vec![(k, v)] }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn put(&mut self, k: K, v: V) {
|
||||||
|
self.merge(&Self::put_mutator(k, v));
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Removes all values from the map
|
||||||
|
pub fn clear(&mut self) {
|
||||||
|
self.vals.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get a reference to the value assigned to a key
|
||||||
|
pub fn get(&self, k: &K) -> Option<&V> {
|
||||||
|
match self.vals.binary_search_by(|(k2, _)| k2.cmp(&k)) {
|
||||||
|
Ok(i) => Some(&self.vals[i].1),
|
||||||
|
Err(_) => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/// Gets a reference to all of the items, as a slice. Usefull to iterate on all map values.
|
||||||
|
pub fn items(&self) -> &[(K, V)] {
|
||||||
|
&self.vals[..]
|
||||||
|
}
|
||||||
|
/// Returns the number of items in the map
|
||||||
|
pub fn len(&self) -> usize {
|
||||||
|
self.vals.len()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<K, V> CRDT for Map<K, V>
|
||||||
|
where
|
||||||
|
K: Clone + Ord,
|
||||||
|
V: Clone + CRDT,
|
||||||
|
{
|
||||||
|
fn merge(&mut self, other: &Self) {
|
||||||
|
for (k, v2) in other.vals.iter() {
|
||||||
|
match self.vals.binary_search_by(|(k2, _)| k2.cmp(&k)) {
|
||||||
|
Ok(i) => {
|
||||||
|
self.vals[i].1.merge(&v2);
|
||||||
|
}
|
||||||
|
Err(i) => {
|
||||||
|
self.vals.insert(i, (k.clone(), v2.clone()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
22
src/table/crdt/mod.rs
Normal file
|
@ -0,0 +1,22 @@
|
||||||
|
//! This package provides a simple implementation of conflict-free replicated data types (CRDTs)
|
||||||
|
//!
|
||||||
|
//! CRDTs are a type of data structures that do not require coordination. In other words, we can
|
||||||
|
//! edit them in parallel, we will always find a way to merge it.
|
||||||
|
//!
|
||||||
|
//! A general example is a counter. Its initial value is 0. Alice and Bob get a copy of the
|
||||||
|
//! counter. Alice does +1 on her copy, she reads 1. Bob does +3 on his copy, he reads 3. Now,
|
||||||
|
//! it is easy to merge their counters, order does not count: we always get 4.
|
||||||
|
//!
|
||||||
|
//! Learn more about CRDT [on Wikipedia](https://en.wikipedia.org/wiki/Conflict-free_replicated_data_type)
|
||||||
|
|
||||||
|
mod bool;
|
||||||
|
mod crdt;
|
||||||
|
mod lww;
|
||||||
|
mod lww_map;
|
||||||
|
mod map;
|
||||||
|
|
||||||
|
pub use self::bool::*;
|
||||||
|
pub use crdt::*;
|
||||||
|
pub use lww::*;
|
||||||
|
pub use lww_map::*;
|
||||||
|
pub use map::*;
|
254
src/table/data.rs
Normal file
|
@ -0,0 +1,254 @@
|
||||||
|
use core::borrow::Borrow;
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use log::warn;
|
||||||
|
use serde_bytes::ByteBuf;
|
||||||
|
use sled::Transactional;
|
||||||
|
use tokio::sync::Notify;
|
||||||
|
|
||||||
|
use garage_util::data::*;
|
||||||
|
use garage_util::error::*;
|
||||||
|
|
||||||
|
use garage_rpc::membership::System;
|
||||||
|
|
||||||
|
use crate::crdt::CRDT;
|
||||||
|
use crate::replication::*;
|
||||||
|
use crate::schema::*;
|
||||||
|
|
||||||
|
pub struct TableData<F: TableSchema, R: TableReplication> {
|
||||||
|
system: Arc<System>,
|
||||||
|
|
||||||
|
pub name: String,
|
||||||
|
pub(crate) instance: F,
|
||||||
|
pub(crate) replication: R,
|
||||||
|
|
||||||
|
pub store: sled::Tree,
|
||||||
|
|
||||||
|
pub(crate) merkle_tree: sled::Tree,
|
||||||
|
pub(crate) merkle_todo: sled::Tree,
|
||||||
|
pub(crate) merkle_todo_notify: Notify,
|
||||||
|
pub(crate) gc_todo: sled::Tree,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<F, R> TableData<F, R>
|
||||||
|
where
|
||||||
|
F: TableSchema,
|
||||||
|
R: TableReplication,
|
||||||
|
{
|
||||||
|
pub fn new(system: Arc<System>, name: String, instance: F, replication: R, db: &sled::Db) -> Arc<Self> {
|
||||||
|
let store = db
|
||||||
|
.open_tree(&format!("{}:table", name))
|
||||||
|
.expect("Unable to open DB tree");
|
||||||
|
|
||||||
|
let merkle_tree = db
|
||||||
|
.open_tree(&format!("{}:merkle_tree", name))
|
||||||
|
.expect("Unable to open DB Merkle tree tree");
|
||||||
|
let merkle_todo = db
|
||||||
|
.open_tree(&format!("{}:merkle_todo", name))
|
||||||
|
.expect("Unable to open DB Merkle TODO tree");
|
||||||
|
|
||||||
|
let gc_todo = db
|
||||||
|
.open_tree(&format!("{}:gc_todo", name))
|
||||||
|
.expect("Unable to open DB tree");
|
||||||
|
|
||||||
|
Arc::new(Self {
|
||||||
|
system,
|
||||||
|
name,
|
||||||
|
instance,
|
||||||
|
replication,
|
||||||
|
store,
|
||||||
|
merkle_tree,
|
||||||
|
merkle_todo,
|
||||||
|
merkle_todo_notify: Notify::new(),
|
||||||
|
gc_todo,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read functions
|
||||||
|
|
||||||
|
pub fn read_entry(&self, p: &F::P, s: &F::S) -> Result<Option<ByteBuf>, Error> {
|
||||||
|
let tree_key = self.tree_key(p, s);
|
||||||
|
if let Some(bytes) = self.store.get(&tree_key)? {
|
||||||
|
Ok(Some(ByteBuf::from(bytes.to_vec())))
|
||||||
|
} else {
|
||||||
|
Ok(None)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn read_range(
|
||||||
|
&self,
|
||||||
|
p: &F::P,
|
||||||
|
s: &Option<F::S>,
|
||||||
|
filter: &Option<F::Filter>,
|
||||||
|
limit: usize,
|
||||||
|
) -> Result<Vec<Arc<ByteBuf>>, Error> {
|
||||||
|
let partition_hash = p.hash();
|
||||||
|
let first_key = match s {
|
||||||
|
None => partition_hash.to_vec(),
|
||||||
|
Some(sk) => self.tree_key(p, sk),
|
||||||
|
};
|
||||||
|
let mut ret = vec![];
|
||||||
|
for item in self.store.range(first_key..) {
|
||||||
|
let (key, value) = item?;
|
||||||
|
if &key[..32] != partition_hash.as_slice() {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
let keep = match filter {
|
||||||
|
None => true,
|
||||||
|
Some(f) => {
|
||||||
|
let entry = self.decode_entry(value.as_ref())?;
|
||||||
|
F::matches_filter(&entry, f)
|
||||||
|
}
|
||||||
|
};
|
||||||
|
if keep {
|
||||||
|
ret.push(Arc::new(ByteBuf::from(value.as_ref())));
|
||||||
|
}
|
||||||
|
if ret.len() >= limit {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(ret)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Mutation functions
|
||||||
|
// When changing this code, take care of propagating modifications correctly:
|
||||||
|
// - When an entry is modified or deleted, call the updated() function
|
||||||
|
// on the table instance
|
||||||
|
// - When an entry is modified or deleted, add it to the merkle updater's todo list.
|
||||||
|
// This has to be done atomically with the modification for the merkle updater
|
||||||
|
// to maintain consistency. The merkle updater must then be notified with todo_notify.
|
||||||
|
// - When an entry is updated to be a tombstone, add it to the gc_todo tree
|
||||||
|
|
||||||
|
pub(crate) fn update_many<T: Borrow<ByteBuf>>(&self, entries: &[T]) -> Result<(), Error> {
|
||||||
|
for update_bytes in entries.iter() {
|
||||||
|
self.update_entry(update_bytes.borrow().as_slice())?;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn update_entry(&self, update_bytes: &[u8]) -> Result<(), Error> {
|
||||||
|
let update = self.decode_entry(update_bytes)?;
|
||||||
|
let tree_key = self.tree_key(update.partition_key(), update.sort_key());
|
||||||
|
|
||||||
|
let changed = (&self.store, &self.merkle_todo).transaction(|(store, mkl_todo)| {
|
||||||
|
let (old_entry, new_entry) = match store.get(&tree_key)? {
|
||||||
|
Some(prev_bytes) => {
|
||||||
|
let old_entry = self
|
||||||
|
.decode_entry(&prev_bytes)
|
||||||
|
.map_err(sled::transaction::ConflictableTransactionError::Abort)?;
|
||||||
|
let mut new_entry = old_entry.clone();
|
||||||
|
new_entry.merge(&update);
|
||||||
|
(Some(old_entry), new_entry)
|
||||||
|
}
|
||||||
|
None => (None, update.clone()),
|
||||||
|
};
|
||||||
|
|
||||||
|
if Some(&new_entry) != old_entry.as_ref() {
|
||||||
|
let new_bytes = rmp_to_vec_all_named(&new_entry)
|
||||||
|
.map_err(Error::RMPEncode)
|
||||||
|
.map_err(sled::transaction::ConflictableTransactionError::Abort)?;
|
||||||
|
let new_bytes_hash = blake2sum(&new_bytes[..]);
|
||||||
|
mkl_todo.insert(tree_key.clone(), new_bytes_hash.as_slice())?;
|
||||||
|
store.insert(tree_key.clone(), new_bytes)?;
|
||||||
|
Ok(Some((old_entry, new_entry, new_bytes_hash)))
|
||||||
|
} else {
|
||||||
|
Ok(None)
|
||||||
|
}
|
||||||
|
})?;
|
||||||
|
|
||||||
|
if let Some((old_entry, new_entry, new_bytes_hash)) = changed {
|
||||||
|
let is_tombstone = new_entry.is_tombstone();
|
||||||
|
self.instance.updated(old_entry, Some(new_entry));
|
||||||
|
self.merkle_todo_notify.notify_one();
|
||||||
|
if is_tombstone {
|
||||||
|
// We are only responsible for GC'ing this item if we are the
|
||||||
|
// "leader" of the partition, i.e. the first node in the
|
||||||
|
// set of nodes that replicates this partition.
|
||||||
|
// This avoids GC loops and does not change the termination properties
|
||||||
|
// of the GC algorithm, as in all cases GC is suspended if
|
||||||
|
// any node of the partition is unavailable.
|
||||||
|
let pk_hash = Hash::try_from(&tree_key[..32]).unwrap();
|
||||||
|
let nodes = self.replication.write_nodes(&pk_hash);
|
||||||
|
if nodes.first() == Some(&self.system.id) {
|
||||||
|
self.gc_todo.insert(&tree_key, new_bytes_hash.as_slice())?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn delete_if_equal(self: &Arc<Self>, k: &[u8], v: &[u8]) -> Result<bool, Error> {
|
||||||
|
let removed = (&self.store, &self.merkle_todo).transaction(|(store, mkl_todo)| {
|
||||||
|
if let Some(cur_v) = store.get(k)? {
|
||||||
|
if cur_v == v {
|
||||||
|
store.remove(k)?;
|
||||||
|
mkl_todo.insert(k, vec![])?;
|
||||||
|
return Ok(true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(false)
|
||||||
|
})?;
|
||||||
|
|
||||||
|
if removed {
|
||||||
|
let old_entry = self.decode_entry(v)?;
|
||||||
|
self.instance.updated(Some(old_entry), None);
|
||||||
|
self.merkle_todo_notify.notify_one();
|
||||||
|
}
|
||||||
|
Ok(removed)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn delete_if_equal_hash(
|
||||||
|
self: &Arc<Self>,
|
||||||
|
k: &[u8],
|
||||||
|
vhash: Hash,
|
||||||
|
) -> Result<bool, Error> {
|
||||||
|
let removed = (&self.store, &self.merkle_todo).transaction(|(store, mkl_todo)| {
|
||||||
|
if let Some(cur_v) = store.get(k)? {
|
||||||
|
if blake2sum(&cur_v[..]) == vhash {
|
||||||
|
store.remove(k)?;
|
||||||
|
mkl_todo.insert(k, vec![])?;
|
||||||
|
return Ok(Some(cur_v));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(None)
|
||||||
|
})?;
|
||||||
|
|
||||||
|
if let Some(old_v) = removed {
|
||||||
|
let old_entry = self.decode_entry(&old_v[..])?;
|
||||||
|
self.instance.updated(Some(old_entry), None);
|
||||||
|
self.merkle_todo_notify.notify_one();
|
||||||
|
Ok(true)
|
||||||
|
} else {
|
||||||
|
Ok(false)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---- Utility functions ----
|
||||||
|
|
||||||
|
pub(crate) fn tree_key(&self, p: &F::P, s: &F::S) -> Vec<u8> {
|
||||||
|
let mut ret = p.hash().to_vec();
|
||||||
|
ret.extend(s.sort_key());
|
||||||
|
ret
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn decode_entry(&self, bytes: &[u8]) -> Result<F::E, Error> {
|
||||||
|
match rmp_serde::decode::from_read_ref::<_, F::E>(bytes) {
|
||||||
|
Ok(x) => Ok(x),
|
||||||
|
Err(e) => match F::try_migrate(bytes) {
|
||||||
|
Some(x) => Ok(x),
|
||||||
|
None => {
|
||||||
|
warn!("Unable to decode entry of {}: {}", self.name, e);
|
||||||
|
for line in hexdump::hexdump_iter(bytes) {
|
||||||
|
debug!("{}", line);
|
||||||
|
}
|
||||||
|
Err(e.into())
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn gc_todo_len(&self) -> usize {
|
||||||
|
self.gc_todo.len()
|
||||||
|
}
|
||||||
|
}
|
248
src/table/gc.rs
Normal file
|
@ -0,0 +1,248 @@
|
||||||
|
use std::collections::HashMap;
|
||||||
|
use std::sync::Arc;
|
||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use serde_bytes::ByteBuf;
|
||||||
|
|
||||||
|
use futures::future::join_all;
|
||||||
|
use futures::select;
|
||||||
|
use futures_util::future::*;
|
||||||
|
use tokio::sync::watch;
|
||||||
|
|
||||||
|
use garage_util::data::*;
|
||||||
|
use garage_util::error::Error;
|
||||||
|
|
||||||
|
use garage_rpc::membership::System;
|
||||||
|
use garage_rpc::rpc_client::*;
|
||||||
|
use garage_rpc::rpc_server::*;
|
||||||
|
|
||||||
|
use crate::data::*;
|
||||||
|
use crate::replication::*;
|
||||||
|
use crate::schema::*;
|
||||||
|
|
||||||
|
const TABLE_GC_BATCH_SIZE: usize = 1024;
|
||||||
|
const TABLE_GC_RPC_TIMEOUT: Duration = Duration::from_secs(30);
|
||||||
|
|
||||||
|
pub struct TableGC<F: TableSchema, R: TableReplication> {
|
||||||
|
system: Arc<System>,
|
||||||
|
data: Arc<TableData<F, R>>,
|
||||||
|
|
||||||
|
rpc_client: Arc<RpcClient<GcRPC>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize)]
|
||||||
|
enum GcRPC {
|
||||||
|
Update(Vec<ByteBuf>),
|
||||||
|
DeleteIfEqualHash(Vec<(ByteBuf, Hash)>),
|
||||||
|
Ok,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl RpcMessage for GcRPC {}
|
||||||
|
|
||||||
|
impl<F, R> TableGC<F, R>
|
||||||
|
where
|
||||||
|
F: TableSchema + 'static,
|
||||||
|
R: TableReplication + 'static,
|
||||||
|
{
|
||||||
|
pub(crate) fn launch(
|
||||||
|
system: Arc<System>,
|
||||||
|
data: Arc<TableData<F, R>>,
|
||||||
|
rpc_server: &mut RpcServer,
|
||||||
|
) -> Arc<Self> {
|
||||||
|
let rpc_path = format!("table_{}/gc", data.name);
|
||||||
|
let rpc_client = system.rpc_client::<GcRPC>(&rpc_path);
|
||||||
|
|
||||||
|
let gc = Arc::new(Self {
|
||||||
|
system: system.clone(),
|
||||||
|
data: data.clone(),
|
||||||
|
rpc_client,
|
||||||
|
});
|
||||||
|
|
||||||
|
gc.register_handler(rpc_server, rpc_path);
|
||||||
|
|
||||||
|
let gc1 = gc.clone();
|
||||||
|
system.background.spawn_worker(
|
||||||
|
format!("GC loop for {}", data.name),
|
||||||
|
move |must_exit: watch::Receiver<bool>| gc1.gc_loop(must_exit),
|
||||||
|
);
|
||||||
|
|
||||||
|
gc
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn gc_loop(self: Arc<Self>, mut must_exit: watch::Receiver<bool>) {
|
||||||
|
while !*must_exit.borrow() {
|
||||||
|
match self.gc_loop_iter().await {
|
||||||
|
Ok(true) => {
|
||||||
|
// Stuff was done, loop imediately
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
Ok(false) => {
|
||||||
|
// Nothing was done, sleep for some time (below)
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
warn!("({}) Error doing GC: {}", self.data.name, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
select! {
|
||||||
|
_ = tokio::time::sleep(Duration::from_secs(10)).fuse() => (),
|
||||||
|
_ = must_exit.changed().fuse() => (),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn gc_loop_iter(&self) -> Result<bool, Error> {
|
||||||
|
let mut entries = vec![];
|
||||||
|
let mut excluded = vec![];
|
||||||
|
|
||||||
|
for item in self.data.gc_todo.iter() {
|
||||||
|
let (k, vhash) = item?;
|
||||||
|
|
||||||
|
let vhash = Hash::try_from(&vhash[..]).unwrap();
|
||||||
|
|
||||||
|
let v_opt = self
|
||||||
|
.data
|
||||||
|
.store
|
||||||
|
.get(&k[..])?
|
||||||
|
.filter(|v| blake2sum(&v[..]) == vhash);
|
||||||
|
|
||||||
|
if let Some(v) = v_opt {
|
||||||
|
entries.push((ByteBuf::from(k.to_vec()), vhash, ByteBuf::from(v.to_vec())));
|
||||||
|
if entries.len() >= TABLE_GC_BATCH_SIZE {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
excluded.push((k, vhash));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (k, vhash) in excluded {
|
||||||
|
self.todo_remove_if_equal(&k[..], vhash)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
if entries.len() == 0 {
|
||||||
|
// Nothing to do in this iteration
|
||||||
|
return Ok(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
debug!("({}) GC: doing {} items", self.data.name, entries.len());
|
||||||
|
|
||||||
|
let mut partitions = HashMap::new();
|
||||||
|
for (k, vhash, v) in entries {
|
||||||
|
let pkh = Hash::try_from(&k[..32]).unwrap();
|
||||||
|
let mut nodes = self.data.replication.write_nodes(&pkh);
|
||||||
|
nodes.retain(|x| *x != self.system.id);
|
||||||
|
nodes.sort();
|
||||||
|
|
||||||
|
if !partitions.contains_key(&nodes) {
|
||||||
|
partitions.insert(nodes.clone(), vec![]);
|
||||||
|
}
|
||||||
|
partitions.get_mut(&nodes).unwrap().push((k, vhash, v));
|
||||||
|
}
|
||||||
|
|
||||||
|
let resps = join_all(
|
||||||
|
partitions
|
||||||
|
.into_iter()
|
||||||
|
.map(|(nodes, items)| self.try_send_and_delete(nodes, items)),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
|
||||||
|
let mut errs = vec![];
|
||||||
|
for resp in resps {
|
||||||
|
if let Err(e) = resp {
|
||||||
|
errs.push(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if errs.is_empty() {
|
||||||
|
Ok(true)
|
||||||
|
} else {
|
||||||
|
Err(Error::Message(errs.into_iter().map(|x| format!("{}", x)).collect::<Vec<_>>().join(", ")))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn try_send_and_delete(
|
||||||
|
&self,
|
||||||
|
nodes: Vec<UUID>,
|
||||||
|
items: Vec<(ByteBuf, Hash, ByteBuf)>,
|
||||||
|
) -> Result<(), Error> {
|
||||||
|
let n_items = items.len();
|
||||||
|
|
||||||
|
let mut updates = vec![];
|
||||||
|
let mut deletes = vec![];
|
||||||
|
for (k, vhash, v) in items {
|
||||||
|
updates.push(v);
|
||||||
|
deletes.push((k, vhash));
|
||||||
|
}
|
||||||
|
|
||||||
|
self.rpc_client
|
||||||
|
.try_call_many(
|
||||||
|
&nodes[..],
|
||||||
|
GcRPC::Update(updates),
|
||||||
|
RequestStrategy::with_quorum(nodes.len()).with_timeout(TABLE_GC_RPC_TIMEOUT),
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
info!(
|
||||||
|
"({}) GC: {} items successfully pushed, will try to delete.",
|
||||||
|
self.data.name, n_items
|
||||||
|
);
|
||||||
|
|
||||||
|
self.rpc_client
|
||||||
|
.try_call_many(
|
||||||
|
&nodes[..],
|
||||||
|
GcRPC::DeleteIfEqualHash(deletes.clone()),
|
||||||
|
RequestStrategy::with_quorum(nodes.len()).with_timeout(TABLE_GC_RPC_TIMEOUT),
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
for (k, vhash) in deletes {
|
||||||
|
self.data.delete_if_equal_hash(&k[..], vhash)?;
|
||||||
|
self.todo_remove_if_equal(&k[..], vhash)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn todo_remove_if_equal(&self, key: &[u8], vhash: Hash) -> Result<(), Error> {
|
||||||
|
let _ = self
|
||||||
|
.data
|
||||||
|
.gc_todo
|
||||||
|
.compare_and_swap::<_, _, Vec<u8>>(key, Some(vhash), None)?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---- RPC HANDLER ----
|
||||||
|
|
||||||
|
fn register_handler(self: &Arc<Self>, rpc_server: &mut RpcServer, path: String) {
|
||||||
|
let self2 = self.clone();
|
||||||
|
rpc_server.add_handler::<GcRPC, _, _>(path, move |msg, _addr| {
|
||||||
|
let self2 = self2.clone();
|
||||||
|
async move { self2.handle_rpc(&msg).await }
|
||||||
|
});
|
||||||
|
|
||||||
|
let self2 = self.clone();
|
||||||
|
self.rpc_client
|
||||||
|
.set_local_handler(self.system.id, move |msg| {
|
||||||
|
let self2 = self2.clone();
|
||||||
|
async move { self2.handle_rpc(&msg).await }
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn handle_rpc(self: &Arc<Self>, message: &GcRPC) -> Result<GcRPC, Error> {
|
||||||
|
match message {
|
||||||
|
GcRPC::Update(items) => {
|
||||||
|
self.data.update_many(items)?;
|
||||||
|
Ok(GcRPC::Ok)
|
||||||
|
}
|
||||||
|
GcRPC::DeleteIfEqualHash(items) => {
|
||||||
|
for (key, vhash) in items.iter() {
|
||||||
|
self.data.delete_if_equal_hash(&key[..], *vhash)?;
|
||||||
|
self.todo_remove_if_equal(&key[..], *vhash)?;
|
||||||
|
}
|
||||||
|
Ok(GcRPC::Ok)
|
||||||
|
}
|
||||||
|
_ => Err(Error::Message(format!("Unexpected GC RPC"))),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -7,10 +7,12 @@ pub mod crdt;
|
||||||
pub mod schema;
|
pub mod schema;
|
||||||
pub mod util;
|
pub mod util;
|
||||||
|
|
||||||
|
pub mod data;
|
||||||
|
pub mod gc;
|
||||||
|
pub mod merkle;
|
||||||
|
pub mod replication;
|
||||||
|
pub mod sync;
|
||||||
pub mod table;
|
pub mod table;
|
||||||
pub mod table_fullcopy;
|
|
||||||
pub mod table_sharded;
|
|
||||||
pub mod table_sync;
|
|
||||||
|
|
||||||
pub use schema::*;
|
pub use schema::*;
|
||||||
pub use table::*;
|
pub use table::*;
|
||||||
|
|
454
src/table/merkle.rs
Normal file
|
@ -0,0 +1,454 @@
|
||||||
|
use std::sync::Arc;
|
||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
|
use futures::select;
|
||||||
|
use futures_util::future::*;
|
||||||
|
use log::{debug, warn};
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use sled::transaction::{
|
||||||
|
ConflictableTransactionError, ConflictableTransactionResult, TransactionalTree,
|
||||||
|
};
|
||||||
|
use tokio::sync::watch;
|
||||||
|
|
||||||
|
use garage_util::background::BackgroundRunner;
|
||||||
|
use garage_util::data::*;
|
||||||
|
use garage_util::error::Error;
|
||||||
|
|
||||||
|
use garage_rpc::ring::*;
|
||||||
|
|
||||||
|
use crate::data::*;
|
||||||
|
use crate::replication::*;
|
||||||
|
use crate::schema::*;
|
||||||
|
|
||||||
|
// This modules partitions the data in 2**16 partitions, based on the top
|
||||||
|
// 16 bits (two bytes) of item's partition keys' hashes.
|
||||||
|
// It builds one Merkle tree for each of these 2**16 partitions.
|
||||||
|
|
||||||
|
pub struct MerkleUpdater<F: TableSchema, R: TableReplication> {
|
||||||
|
data: Arc<TableData<F, R>>,
|
||||||
|
|
||||||
|
// Content of the todo tree: items where
|
||||||
|
// - key = the key of an item in the main table, ie hash(partition_key)+sort_key
|
||||||
|
// - value = the hash of the full serialized item, if present,
|
||||||
|
// or an empty vec if item is absent (deleted)
|
||||||
|
// Fields in data:
|
||||||
|
// pub(crate) merkle_todo: sled::Tree,
|
||||||
|
// pub(crate) merkle_todo_notify: Notify,
|
||||||
|
|
||||||
|
// Content of the merkle tree: items where
|
||||||
|
// - key = .bytes() for MerkleNodeKey
|
||||||
|
// - value = serialization of a MerkleNode, assumed to be MerkleNode::empty if not found
|
||||||
|
// Field in data:
|
||||||
|
// pub(crate) merkle_tree: sled::Tree,
|
||||||
|
empty_node_hash: Hash,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||||
|
pub struct MerkleNodeKey {
|
||||||
|
// partition number
|
||||||
|
pub partition: Partition,
|
||||||
|
|
||||||
|
// prefix: a prefix for the hash of full keys, i.e. hash(hash(partition_key)+sort_key)
|
||||||
|
#[serde(with = "serde_bytes")]
|
||||||
|
pub prefix: Vec<u8>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(PartialEq, Eq, Debug, Serialize, Deserialize)]
|
||||||
|
pub enum MerkleNode {
|
||||||
|
// The empty Merkle node
|
||||||
|
Empty,
|
||||||
|
|
||||||
|
// An intermediate Merkle tree node for a prefix
|
||||||
|
// Contains the hashes of the 256 possible next prefixes
|
||||||
|
Intermediate(Vec<(u8, Hash)>),
|
||||||
|
|
||||||
|
// A final node for an item
|
||||||
|
// Contains the full key of the item and the hash of the value
|
||||||
|
Leaf(Vec<u8>, Hash),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<F, R> MerkleUpdater<F, R>
|
||||||
|
where
|
||||||
|
F: TableSchema + 'static,
|
||||||
|
R: TableReplication + 'static,
|
||||||
|
{
|
||||||
|
pub(crate) fn launch(background: &BackgroundRunner, data: Arc<TableData<F, R>>) -> Arc<Self> {
|
||||||
|
let empty_node_hash = blake2sum(&rmp_to_vec_all_named(&MerkleNode::Empty).unwrap()[..]);
|
||||||
|
|
||||||
|
let ret = Arc::new(Self {
|
||||||
|
data,
|
||||||
|
empty_node_hash,
|
||||||
|
});
|
||||||
|
|
||||||
|
let ret2 = ret.clone();
|
||||||
|
background.spawn_worker(
|
||||||
|
format!("Merkle tree updater for {}", ret.data.name),
|
||||||
|
|must_exit: watch::Receiver<bool>| ret2.updater_loop(must_exit),
|
||||||
|
);
|
||||||
|
|
||||||
|
ret
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn updater_loop(self: Arc<Self>, mut must_exit: watch::Receiver<bool>) {
|
||||||
|
while !*must_exit.borrow() {
|
||||||
|
if let Some(x) = self.data.merkle_todo.iter().next() {
|
||||||
|
match x {
|
||||||
|
Ok((key, valhash)) => {
|
||||||
|
if let Err(e) = self.update_item(&key[..], &valhash[..]) {
|
||||||
|
warn!(
|
||||||
|
"({}) Error while updating Merkle tree item: {}",
|
||||||
|
self.data.name, e
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
warn!(
|
||||||
|
"({}) Error while iterating on Merkle todo tree: {}",
|
||||||
|
self.data.name, e
|
||||||
|
);
|
||||||
|
tokio::time::sleep(Duration::from_secs(10)).await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
select! {
|
||||||
|
_ = self.data.merkle_todo_notify.notified().fuse() => (),
|
||||||
|
_ = must_exit.changed().fuse() => (),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn update_item(&self, k: &[u8], vhash_by: &[u8]) -> Result<(), Error> {
|
||||||
|
let khash = blake2sum(k);
|
||||||
|
|
||||||
|
let new_vhash = if vhash_by.len() == 0 {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some(Hash::try_from(&vhash_by[..]).unwrap())
|
||||||
|
};
|
||||||
|
|
||||||
|
let key = MerkleNodeKey {
|
||||||
|
partition: self
|
||||||
|
.data
|
||||||
|
.replication
|
||||||
|
.partition_of(&Hash::try_from(&k[0..32]).unwrap()),
|
||||||
|
prefix: vec![],
|
||||||
|
};
|
||||||
|
self.data
|
||||||
|
.merkle_tree
|
||||||
|
.transaction(|tx| self.update_item_rec(tx, k, &khash, &key, new_vhash))?;
|
||||||
|
|
||||||
|
let deleted = self
|
||||||
|
.data
|
||||||
|
.merkle_todo
|
||||||
|
.compare_and_swap::<_, _, Vec<u8>>(k, Some(vhash_by), None)?
|
||||||
|
.is_ok();
|
||||||
|
|
||||||
|
if !deleted {
|
||||||
|
debug!(
|
||||||
|
"({}) Item not deleted from Merkle todo because it changed: {:?}",
|
||||||
|
self.data.name, k
|
||||||
|
);
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn update_item_rec(
|
||||||
|
&self,
|
||||||
|
tx: &TransactionalTree,
|
||||||
|
k: &[u8],
|
||||||
|
khash: &Hash,
|
||||||
|
key: &MerkleNodeKey,
|
||||||
|
new_vhash: Option<Hash>,
|
||||||
|
) -> ConflictableTransactionResult<Option<Hash>, Error> {
|
||||||
|
let i = key.prefix.len();
|
||||||
|
|
||||||
|
// Read node at current position (defined by the prefix stored in key)
|
||||||
|
// Calculate an update to apply to this node
|
||||||
|
// This update is an Option<_>, so that it is None if the update is a no-op
|
||||||
|
// and we can thus skip recalculating and re-storing everything
|
||||||
|
let mutate = match self.read_node_txn(tx, &key)? {
|
||||||
|
MerkleNode::Empty => {
|
||||||
|
if let Some(vhv) = new_vhash {
|
||||||
|
Some(MerkleNode::Leaf(k.to_vec(), vhv))
|
||||||
|
} else {
|
||||||
|
// Nothing to do, keep empty node
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
MerkleNode::Intermediate(mut children) => {
|
||||||
|
let key2 = key.next_key(khash);
|
||||||
|
if let Some(subhash) = self.update_item_rec(tx, k, khash, &key2, new_vhash)? {
|
||||||
|
// Subtree changed, update this node as well
|
||||||
|
if subhash == self.empty_node_hash {
|
||||||
|
intermediate_rm_child(&mut children, key2.prefix[i]);
|
||||||
|
} else {
|
||||||
|
intermediate_set_child(&mut children, key2.prefix[i], subhash);
|
||||||
|
}
|
||||||
|
|
||||||
|
if children.len() == 0 {
|
||||||
|
// should not happen
|
||||||
|
warn!(
|
||||||
|
"({}) Replacing intermediate node with empty node, should not happen.",
|
||||||
|
self.data.name
|
||||||
|
);
|
||||||
|
Some(MerkleNode::Empty)
|
||||||
|
} else if children.len() == 1 {
|
||||||
|
// We now have a single node (case when the update deleted one of only two
|
||||||
|
// children). If that node is a leaf, move it to this level.
|
||||||
|
let key_sub = key.add_byte(children[0].0);
|
||||||
|
let subnode = self.read_node_txn(tx, &key_sub)?;
|
||||||
|
match subnode {
|
||||||
|
MerkleNode::Empty => {
|
||||||
|
warn!("({}) Single subnode in tree is empty Merkle node", self.data.name);
|
||||||
|
Some(MerkleNode::Empty)
|
||||||
|
}
|
||||||
|
MerkleNode::Intermediate(_) => {
|
||||||
|
Some(MerkleNode::Intermediate(children))
|
||||||
|
}
|
||||||
|
x @ MerkleNode::Leaf(_, _) => {
|
||||||
|
tx.remove(key_sub.encode())?;
|
||||||
|
Some(x)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
Some(MerkleNode::Intermediate(children))
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Subtree not changed, nothing to do
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
MerkleNode::Leaf(exlf_k, exlf_vhash) => {
|
||||||
|
if exlf_k == k {
|
||||||
|
// This leaf is for the same key that the one we are updating
|
||||||
|
match new_vhash {
|
||||||
|
Some(vhv) if vhv == exlf_vhash => None,
|
||||||
|
Some(vhv) => Some(MerkleNode::Leaf(k.to_vec(), vhv)),
|
||||||
|
None => Some(MerkleNode::Empty),
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// This is an only leaf for another key
|
||||||
|
if new_vhash.is_some() {
|
||||||
|
// Move that other key to a subnode, create another subnode for our
|
||||||
|
// insertion and replace current node by an intermediary node
|
||||||
|
let mut int = vec![];
|
||||||
|
|
||||||
|
let exlf_khash = blake2sum(&exlf_k[..]);
|
||||||
|
assert_eq!(khash.as_slice()[..i], exlf_khash.as_slice()[..i]);
|
||||||
|
|
||||||
|
{
|
||||||
|
let exlf_subkey = key.next_key(&exlf_khash);
|
||||||
|
let exlf_sub_hash = self.update_item_rec(tx, &exlf_k[..], &exlf_khash, &exlf_subkey, Some(exlf_vhash))?.unwrap();
|
||||||
|
intermediate_set_child(&mut int, exlf_subkey.prefix[i], exlf_sub_hash);
|
||||||
|
assert_eq!(int.len(), 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
let key2 = key.next_key(khash);
|
||||||
|
let subhash = self.update_item_rec(tx, k, khash, &key2, new_vhash)?.unwrap();
|
||||||
|
intermediate_set_child(&mut int, key2.prefix[i], subhash);
|
||||||
|
if exlf_khash.as_slice()[i] == khash.as_slice()[i] {
|
||||||
|
assert_eq!(int.len(), 1);
|
||||||
|
} else {
|
||||||
|
assert_eq!(int.len(), 2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Some(MerkleNode::Intermediate(int))
|
||||||
|
} else {
|
||||||
|
// Nothing to do, we don't want to insert this value because it is None,
|
||||||
|
// and we don't want to change the other value because it's for something
|
||||||
|
// else
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
if let Some(new_node) = mutate {
|
||||||
|
let hash = self.put_node_txn(tx, &key, &new_node)?;
|
||||||
|
Ok(Some(hash))
|
||||||
|
} else {
|
||||||
|
Ok(None)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Merkle tree node manipulation
|
||||||
|
|
||||||
|
fn read_node_txn(
|
||||||
|
&self,
|
||||||
|
tx: &TransactionalTree,
|
||||||
|
k: &MerkleNodeKey,
|
||||||
|
) -> ConflictableTransactionResult<MerkleNode, Error> {
|
||||||
|
let ent = tx.get(k.encode())?;
|
||||||
|
MerkleNode::decode_opt(ent).map_err(ConflictableTransactionError::Abort)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn put_node_txn(
|
||||||
|
&self,
|
||||||
|
tx: &TransactionalTree,
|
||||||
|
k: &MerkleNodeKey,
|
||||||
|
v: &MerkleNode,
|
||||||
|
) -> ConflictableTransactionResult<Hash, Error> {
|
||||||
|
trace!("Put Merkle node: {:?} => {:?}", k, v);
|
||||||
|
if *v == MerkleNode::Empty {
|
||||||
|
tx.remove(k.encode())?;
|
||||||
|
Ok(self.empty_node_hash)
|
||||||
|
} else {
|
||||||
|
let vby = rmp_to_vec_all_named(v)
|
||||||
|
.map_err(|e| ConflictableTransactionError::Abort(e.into()))?;
|
||||||
|
let rethash = blake2sum(&vby[..]);
|
||||||
|
tx.insert(k.encode(), vby)?;
|
||||||
|
Ok(rethash)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Access a node in the Merkle tree, used by the sync protocol
|
||||||
|
pub(crate) fn read_node(&self, k: &MerkleNodeKey) -> Result<MerkleNode, Error> {
|
||||||
|
let ent = self.data.merkle_tree.get(k.encode())?;
|
||||||
|
MerkleNode::decode_opt(ent)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn merkle_tree_len(&self) -> usize {
|
||||||
|
self.data.merkle_tree.len()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn todo_len(&self) -> usize {
|
||||||
|
self.data.merkle_todo.len()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl MerkleNodeKey {
|
||||||
|
fn encode(&self) -> Vec<u8> {
|
||||||
|
let mut ret = Vec::with_capacity(2 + self.prefix.len());
|
||||||
|
ret.extend(&u16::to_be_bytes(self.partition)[..]);
|
||||||
|
ret.extend(&self.prefix[..]);
|
||||||
|
ret
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn next_key(&self, h: &Hash) -> Self {
|
||||||
|
assert_eq!(h.as_slice()[0..self.prefix.len()], self.prefix[..]);
|
||||||
|
let mut s2 = self.clone();
|
||||||
|
s2.prefix.push(h.as_slice()[self.prefix.len()]);
|
||||||
|
s2
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn add_byte(&self, b: u8) -> Self {
|
||||||
|
let mut s2 = self.clone();
|
||||||
|
s2.prefix.push(b);
|
||||||
|
s2
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl MerkleNode {
|
||||||
|
fn decode_opt(ent: Option<sled::IVec>) -> Result<Self, Error> {
|
||||||
|
match ent {
|
||||||
|
None => Ok(MerkleNode::Empty),
|
||||||
|
Some(v) => Ok(rmp_serde::decode::from_read_ref::<_, MerkleNode>(&v[..])?),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn is_empty(&self) -> bool {
|
||||||
|
*self == MerkleNode::Empty
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn intermediate_set_child(ch: &mut Vec<(u8, Hash)>, pos: u8, v: Hash) {
|
||||||
|
for i in 0..ch.len() {
|
||||||
|
if ch[i].0 == pos {
|
||||||
|
ch[i].1 = v;
|
||||||
|
return;
|
||||||
|
} else if ch[i].0 > pos {
|
||||||
|
ch.insert(i, (pos, v));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ch.push((pos, v));
|
||||||
|
}
|
||||||
|
|
||||||
|
fn intermediate_rm_child(ch: &mut Vec<(u8, Hash)>, pos: u8) {
|
||||||
|
for i in 0..ch.len() {
|
||||||
|
if ch[i].0 == pos {
|
||||||
|
ch.remove(i);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_intermediate_aux() {
|
||||||
|
let mut v = vec![];
|
||||||
|
|
||||||
|
intermediate_set_child(&mut v, 12u8, [12u8; 32].into());
|
||||||
|
assert_eq!(v, vec![(12u8, [12u8; 32].into())]);
|
||||||
|
|
||||||
|
intermediate_set_child(&mut v, 42u8, [42u8; 32].into());
|
||||||
|
assert_eq!(
|
||||||
|
v,
|
||||||
|
vec![(12u8, [12u8; 32].into()), (42u8, [42u8; 32].into())]
|
||||||
|
);
|
||||||
|
|
||||||
|
intermediate_set_child(&mut v, 4u8, [4u8; 32].into());
|
||||||
|
assert_eq!(
|
||||||
|
v,
|
||||||
|
vec![
|
||||||
|
(4u8, [4u8; 32].into()),
|
||||||
|
(12u8, [12u8; 32].into()),
|
||||||
|
(42u8, [42u8; 32].into())
|
||||||
|
]
|
||||||
|
);
|
||||||
|
|
||||||
|
intermediate_set_child(&mut v, 12u8, [8u8; 32].into());
|
||||||
|
assert_eq!(
|
||||||
|
v,
|
||||||
|
vec![
|
||||||
|
(4u8, [4u8; 32].into()),
|
||||||
|
(12u8, [8u8; 32].into()),
|
||||||
|
(42u8, [42u8; 32].into())
|
||||||
|
]
|
||||||
|
);
|
||||||
|
|
||||||
|
intermediate_set_child(&mut v, 6u8, [6u8; 32].into());
|
||||||
|
assert_eq!(
|
||||||
|
v,
|
||||||
|
vec![
|
||||||
|
(4u8, [4u8; 32].into()),
|
||||||
|
(6u8, [6u8; 32].into()),
|
||||||
|
(12u8, [8u8; 32].into()),
|
||||||
|
(42u8, [42u8; 32].into())
|
||||||
|
]
|
||||||
|
);
|
||||||
|
|
||||||
|
intermediate_rm_child(&mut v, 42u8);
|
||||||
|
assert_eq!(
|
||||||
|
v,
|
||||||
|
vec![
|
||||||
|
(4u8, [4u8; 32].into()),
|
||||||
|
(6u8, [6u8; 32].into()),
|
||||||
|
(12u8, [8u8; 32].into())
|
||||||
|
]
|
||||||
|
);
|
||||||
|
|
||||||
|
intermediate_rm_child(&mut v, 11u8);
|
||||||
|
assert_eq!(
|
||||||
|
v,
|
||||||
|
vec![
|
||||||
|
(4u8, [4u8; 32].into()),
|
||||||
|
(6u8, [6u8; 32].into()),
|
||||||
|
(12u8, [8u8; 32].into())
|
||||||
|
]
|
||||||
|
);
|
||||||
|
|
||||||
|
intermediate_rm_child(&mut v, 6u8);
|
||||||
|
assert_eq!(v, vec![(4u8, [4u8; 32].into()), (12u8, [8u8; 32].into())]);
|
||||||
|
|
||||||
|
intermediate_set_child(&mut v, 6u8, [7u8; 32].into());
|
||||||
|
assert_eq!(
|
||||||
|
v,
|
||||||
|
vec![
|
||||||
|
(4u8, [4u8; 32].into()),
|
||||||
|
(6u8, [7u8; 32].into()),
|
||||||
|
(12u8, [8u8; 32].into())
|
||||||
|
]
|
||||||
|
);
|
||||||
|
}
|
51
src/table/replication/fullcopy.rs
Normal file
|
@ -0,0 +1,51 @@
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use garage_rpc::membership::System;
|
||||||
|
use garage_rpc::ring::*;
|
||||||
|
use garage_util::data::*;
|
||||||
|
|
||||||
|
use crate::replication::*;
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct TableFullReplication {
|
||||||
|
pub system: Arc<System>,
|
||||||
|
pub max_faults: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TableReplication for TableFullReplication {
|
||||||
|
// Full replication schema: all nodes store everything
|
||||||
|
// Writes are disseminated in an epidemic manner in the network
|
||||||
|
|
||||||
|
// Advantage: do all reads locally, extremely fast
|
||||||
|
// Inconvenient: only suitable to reasonably small tables
|
||||||
|
|
||||||
|
fn read_nodes(&self, _hash: &Hash) -> Vec<UUID> {
|
||||||
|
vec![self.system.id]
|
||||||
|
}
|
||||||
|
fn read_quorum(&self) -> usize {
|
||||||
|
1
|
||||||
|
}
|
||||||
|
|
||||||
|
fn write_nodes(&self, _hash: &Hash) -> Vec<UUID> {
|
||||||
|
let ring = self.system.ring.borrow();
|
||||||
|
ring.config.members.keys().cloned().collect::<Vec<_>>()
|
||||||
|
}
|
||||||
|
fn write_quorum(&self) -> usize {
|
||||||
|
let nmembers = self.system.ring.borrow().config.members.len();
|
||||||
|
if nmembers > self.max_faults {
|
||||||
|
nmembers - self.max_faults
|
||||||
|
} else {
|
||||||
|
1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fn max_write_errors(&self) -> usize {
|
||||||
|
self.max_faults
|
||||||
|
}
|
||||||
|
|
||||||
|
fn partition_of(&self, _hash: &Hash) -> Partition {
|
||||||
|
0u16
|
||||||
|
}
|
||||||
|
fn partitions(&self) -> Vec<(Partition, Hash)> {
|
||||||
|
vec![(0u16, [0u8; 32].into())]
|
||||||
|
}
|
||||||
|
}
|
6
src/table/replication/mod.rs
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
mod parameters;
|
||||||
|
|
||||||
|
pub mod fullcopy;
|
||||||
|
pub mod sharded;
|
||||||
|
|
||||||
|
pub use parameters::*;
|
21
src/table/replication/parameters.rs
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
use garage_rpc::ring::*;
|
||||||
|
|
||||||
|
use garage_util::data::*;
|
||||||
|
|
||||||
|
pub trait TableReplication: Send + Sync {
|
||||||
|
// See examples in table_sharded.rs and table_fullcopy.rs
|
||||||
|
// To understand various replication methods
|
||||||
|
|
||||||
|
// Which nodes to send reads from
|
||||||
|
fn read_nodes(&self, hash: &Hash) -> Vec<UUID>;
|
||||||
|
fn read_quorum(&self) -> usize;
|
||||||
|
|
||||||
|
// Which nodes to send writes to
|
||||||
|
fn write_nodes(&self, hash: &Hash) -> Vec<UUID>;
|
||||||
|
fn write_quorum(&self) -> usize;
|
||||||
|
fn max_write_errors(&self) -> usize;
|
||||||
|
|
||||||
|
// Accessing partitions, for Merkle tree & sync
|
||||||
|
fn partition_of(&self, hash: &Hash) -> Partition;
|
||||||
|
fn partitions(&self) -> Vec<(Partition, Hash)>;
|
||||||
|
}
|
|
@ -1,11 +1,14 @@
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
use garage_rpc::membership::System;
|
use garage_rpc::membership::System;
|
||||||
use garage_rpc::ring::Ring;
|
use garage_rpc::ring::*;
|
||||||
use garage_util::data::*;
|
use garage_util::data::*;
|
||||||
|
|
||||||
use crate::*;
|
use crate::replication::*;
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct TableShardedReplication {
|
pub struct TableShardedReplication {
|
||||||
|
pub system: Arc<System>,
|
||||||
pub replication_factor: usize,
|
pub replication_factor: usize,
|
||||||
pub read_quorum: usize,
|
pub read_quorum: usize,
|
||||||
pub write_quorum: usize,
|
pub write_quorum: usize,
|
||||||
|
@ -19,35 +22,29 @@ impl TableReplication for TableShardedReplication {
|
||||||
// - reads are done on all of the nodes that replicate the data
|
// - reads are done on all of the nodes that replicate the data
|
||||||
// - writes as well
|
// - writes as well
|
||||||
|
|
||||||
fn read_nodes(&self, hash: &Hash, system: &System) -> Vec<UUID> {
|
fn read_nodes(&self, hash: &Hash) -> Vec<UUID> {
|
||||||
let ring = system.ring.borrow().clone();
|
let ring = self.system.ring.borrow().clone();
|
||||||
ring.walk_ring(&hash, self.replication_factor)
|
ring.walk_ring(&hash, self.replication_factor)
|
||||||
}
|
}
|
||||||
fn read_quorum(&self) -> usize {
|
fn read_quorum(&self) -> usize {
|
||||||
self.read_quorum
|
self.read_quorum
|
||||||
}
|
}
|
||||||
|
|
||||||
fn write_nodes(&self, hash: &Hash, system: &System) -> Vec<UUID> {
|
fn write_nodes(&self, hash: &Hash) -> Vec<UUID> {
|
||||||
let ring = system.ring.borrow().clone();
|
let ring = self.system.ring.borrow();
|
||||||
ring.walk_ring(&hash, self.replication_factor)
|
ring.walk_ring(&hash, self.replication_factor)
|
||||||
}
|
}
|
||||||
fn write_quorum(&self, _system: &System) -> usize {
|
fn write_quorum(&self) -> usize {
|
||||||
self.write_quorum
|
self.write_quorum
|
||||||
}
|
}
|
||||||
fn max_write_errors(&self) -> usize {
|
fn max_write_errors(&self) -> usize {
|
||||||
self.replication_factor - self.write_quorum
|
self.replication_factor - self.write_quorum
|
||||||
}
|
}
|
||||||
|
|
||||||
fn replication_nodes(&self, hash: &Hash, ring: &Ring) -> Vec<UUID> {
|
fn partition_of(&self, hash: &Hash) -> Partition {
|
||||||
ring.walk_ring(&hash, self.replication_factor)
|
self.system.ring.borrow().partition_of(hash)
|
||||||
}
|
}
|
||||||
fn split_points(&self, ring: &Ring) -> Vec<Hash> {
|
fn partitions(&self) -> Vec<(Partition, Hash)> {
|
||||||
let mut ret = vec![];
|
self.system.ring.borrow().partitions()
|
||||||
|
|
||||||
for entry in ring.ring.iter() {
|
|
||||||
ret.push(entry.location);
|
|
||||||
}
|
|
||||||
ret.push([0xFFu8; 32].into());
|
|
||||||
ret
|
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -2,13 +2,15 @@ use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
use garage_util::data::*;
|
use garage_util::data::*;
|
||||||
|
|
||||||
|
use crate::crdt::CRDT;
|
||||||
|
|
||||||
pub trait PartitionKey {
|
pub trait PartitionKey {
|
||||||
fn hash(&self) -> Hash;
|
fn hash(&self) -> Hash;
|
||||||
}
|
}
|
||||||
|
|
||||||
impl PartitionKey for String {
|
impl PartitionKey for String {
|
||||||
fn hash(&self) -> Hash {
|
fn hash(&self) -> Hash {
|
||||||
sha256sum(self.as_bytes())
|
blake2sum(self.as_bytes())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -35,12 +37,14 @@ impl SortKey for Hash {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub trait Entry<P: PartitionKey, S: SortKey>:
|
pub trait Entry<P: PartitionKey, S: SortKey>:
|
||||||
PartialEq + Clone + Serialize + for<'de> Deserialize<'de> + Send + Sync
|
CRDT + PartialEq + Clone + Serialize + for<'de> Deserialize<'de> + Send + Sync
|
||||||
{
|
{
|
||||||
fn partition_key(&self) -> &P;
|
fn partition_key(&self) -> &P;
|
||||||
fn sort_key(&self) -> &S;
|
fn sort_key(&self) -> &S;
|
||||||
|
|
||||||
fn merge(&mut self, other: &Self);
|
fn is_tombstone(&self) -> bool {
|
||||||
|
false
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub trait TableSchema: Send + Sync {
|
pub trait TableSchema: Send + Sync {
|
||||||
|
|
614
src/table/sync.rs
Normal file
|
@ -0,0 +1,614 @@
|
||||||
|
use std::collections::VecDeque;
|
||||||
|
use std::sync::{Arc, Mutex};
|
||||||
|
use std::time::{Duration, Instant};
|
||||||
|
|
||||||
|
use futures::select;
|
||||||
|
use futures_util::future::*;
|
||||||
|
use futures_util::stream::*;
|
||||||
|
use rand::Rng;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use serde_bytes::ByteBuf;
|
||||||
|
use tokio::sync::{mpsc, watch};
|
||||||
|
|
||||||
|
use garage_util::data::*;
|
||||||
|
use garage_util::error::Error;
|
||||||
|
|
||||||
|
use garage_rpc::membership::System;
|
||||||
|
use garage_rpc::ring::*;
|
||||||
|
use garage_rpc::rpc_client::*;
|
||||||
|
use garage_rpc::rpc_server::*;
|
||||||
|
|
||||||
|
use crate::data::*;
|
||||||
|
use crate::merkle::*;
|
||||||
|
use crate::replication::*;
|
||||||
|
use crate::*;
|
||||||
|
|
||||||
|
const TABLE_SYNC_RPC_TIMEOUT: Duration = Duration::from_secs(30);
|
||||||
|
|
||||||
|
// Do anti-entropy every 10 minutes
|
||||||
|
const ANTI_ENTROPY_INTERVAL: Duration = Duration::from_secs(10 * 60);
|
||||||
|
|
||||||
|
pub struct TableSyncer<F: TableSchema, R: TableReplication> {
|
||||||
|
system: Arc<System>,
|
||||||
|
data: Arc<TableData<F, R>>,
|
||||||
|
merkle: Arc<MerkleUpdater<F, R>>,
|
||||||
|
|
||||||
|
todo: Mutex<SyncTodo>,
|
||||||
|
rpc_client: Arc<RpcClient<SyncRPC>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize)]
|
||||||
|
pub(crate) enum SyncRPC {
|
||||||
|
RootCkHash(Partition, Hash),
|
||||||
|
RootCkDifferent(bool),
|
||||||
|
GetNode(MerkleNodeKey),
|
||||||
|
Node(MerkleNodeKey, MerkleNode),
|
||||||
|
Items(Vec<Arc<ByteBuf>>),
|
||||||
|
Ok,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl RpcMessage for SyncRPC {}
|
||||||
|
|
||||||
|
struct SyncTodo {
|
||||||
|
todo: Vec<TodoPartition>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
struct TodoPartition {
|
||||||
|
partition: Partition,
|
||||||
|
begin: Hash,
|
||||||
|
end: Hash,
|
||||||
|
|
||||||
|
// Are we a node that stores this partition or not?
|
||||||
|
retain: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<F, R> TableSyncer<F, R>
|
||||||
|
where
|
||||||
|
F: TableSchema + 'static,
|
||||||
|
R: TableReplication + 'static,
|
||||||
|
{
|
||||||
|
pub(crate) fn launch(
|
||||||
|
system: Arc<System>,
|
||||||
|
data: Arc<TableData<F, R>>,
|
||||||
|
merkle: Arc<MerkleUpdater<F, R>>,
|
||||||
|
rpc_server: &mut RpcServer,
|
||||||
|
) -> Arc<Self> {
|
||||||
|
let rpc_path = format!("table_{}/sync", data.name);
|
||||||
|
let rpc_client = system.rpc_client::<SyncRPC>(&rpc_path);
|
||||||
|
|
||||||
|
let todo = SyncTodo { todo: vec![] };
|
||||||
|
|
||||||
|
let syncer = Arc::new(Self {
|
||||||
|
system: system.clone(),
|
||||||
|
data: data.clone(),
|
||||||
|
merkle,
|
||||||
|
todo: Mutex::new(todo),
|
||||||
|
rpc_client,
|
||||||
|
});
|
||||||
|
|
||||||
|
syncer.register_handler(rpc_server, rpc_path);
|
||||||
|
|
||||||
|
let (busy_tx, busy_rx) = mpsc::unbounded_channel();
|
||||||
|
|
||||||
|
let s1 = syncer.clone();
|
||||||
|
system.background.spawn_worker(
|
||||||
|
format!("table sync watcher for {}", data.name),
|
||||||
|
move |must_exit: watch::Receiver<bool>| s1.watcher_task(must_exit, busy_rx),
|
||||||
|
);
|
||||||
|
|
||||||
|
let s2 = syncer.clone();
|
||||||
|
system.background.spawn_worker(
|
||||||
|
format!("table syncer for {}", data.name),
|
||||||
|
move |must_exit: watch::Receiver<bool>| s2.syncer_task(must_exit, busy_tx),
|
||||||
|
);
|
||||||
|
|
||||||
|
let s3 = syncer.clone();
|
||||||
|
tokio::spawn(async move {
|
||||||
|
tokio::time::sleep(Duration::from_secs(20)).await;
|
||||||
|
s3.add_full_sync();
|
||||||
|
});
|
||||||
|
|
||||||
|
syncer
|
||||||
|
}
|
||||||
|
|
||||||
|
fn register_handler(self: &Arc<Self>, rpc_server: &mut RpcServer, path: String) {
|
||||||
|
let self2 = self.clone();
|
||||||
|
rpc_server.add_handler::<SyncRPC, _, _>(path, move |msg, _addr| {
|
||||||
|
let self2 = self2.clone();
|
||||||
|
async move { self2.handle_rpc(&msg).await }
|
||||||
|
});
|
||||||
|
|
||||||
|
let self2 = self.clone();
|
||||||
|
self.rpc_client
|
||||||
|
.set_local_handler(self.system.id, move |msg| {
|
||||||
|
let self2 = self2.clone();
|
||||||
|
async move { self2.handle_rpc(&msg).await }
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn watcher_task(
|
||||||
|
self: Arc<Self>,
|
||||||
|
mut must_exit: watch::Receiver<bool>,
|
||||||
|
mut busy_rx: mpsc::UnboundedReceiver<bool>,
|
||||||
|
) {
|
||||||
|
let mut prev_ring: Arc<Ring> = self.system.ring.borrow().clone();
|
||||||
|
let mut ring_recv: watch::Receiver<Arc<Ring>> = self.system.ring.clone();
|
||||||
|
let mut nothing_to_do_since = Some(Instant::now());
|
||||||
|
|
||||||
|
while !*must_exit.borrow() {
|
||||||
|
select! {
|
||||||
|
_ = ring_recv.changed().fuse() => {
|
||||||
|
let new_ring = ring_recv.borrow();
|
||||||
|
if !Arc::ptr_eq(&new_ring, &prev_ring) {
|
||||||
|
debug!("({}) Ring changed, adding full sync to syncer todo list", self.data.name);
|
||||||
|
self.add_full_sync();
|
||||||
|
prev_ring = new_ring.clone();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
busy_opt = busy_rx.recv().fuse() => {
|
||||||
|
if let Some(busy) = busy_opt {
|
||||||
|
if busy {
|
||||||
|
nothing_to_do_since = None;
|
||||||
|
} else {
|
||||||
|
if nothing_to_do_since.is_none() {
|
||||||
|
nothing_to_do_since = Some(Instant::now());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ = must_exit.changed().fuse() => (),
|
||||||
|
_ = tokio::time::sleep(Duration::from_secs(1)).fuse() => {
|
||||||
|
if nothing_to_do_since.map(|t| Instant::now() - t >= ANTI_ENTROPY_INTERVAL).unwrap_or(false) {
|
||||||
|
nothing_to_do_since = None;
|
||||||
|
debug!("({}) Interval passed, adding full sync to syncer todo list", self.data.name);
|
||||||
|
self.add_full_sync();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn add_full_sync(&self) {
|
||||||
|
self.todo
|
||||||
|
.lock()
|
||||||
|
.unwrap()
|
||||||
|
.add_full_sync(&self.data, &self.system);
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn syncer_task(
|
||||||
|
self: Arc<Self>,
|
||||||
|
mut must_exit: watch::Receiver<bool>,
|
||||||
|
busy_tx: mpsc::UnboundedSender<bool>,
|
||||||
|
) {
|
||||||
|
while !*must_exit.borrow() {
|
||||||
|
let task = self.todo.lock().unwrap().pop_task();
|
||||||
|
if let Some(partition) = task {
|
||||||
|
busy_tx.send(true).unwrap();
|
||||||
|
let res = self
|
||||||
|
.clone()
|
||||||
|
.sync_partition(&partition, &mut must_exit)
|
||||||
|
.await;
|
||||||
|
if let Err(e) = res {
|
||||||
|
warn!(
|
||||||
|
"({}) Error while syncing {:?}: {}",
|
||||||
|
self.data.name, partition, e
|
||||||
|
);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
busy_tx.send(false).unwrap();
|
||||||
|
tokio::time::sleep(Duration::from_secs(1)).await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn sync_partition(
|
||||||
|
self: Arc<Self>,
|
||||||
|
partition: &TodoPartition,
|
||||||
|
must_exit: &mut watch::Receiver<bool>,
|
||||||
|
) -> Result<(), Error> {
|
||||||
|
if partition.retain {
|
||||||
|
let my_id = self.system.id;
|
||||||
|
|
||||||
|
let nodes = self
|
||||||
|
.data
|
||||||
|
.replication
|
||||||
|
.write_nodes(&partition.begin)
|
||||||
|
.into_iter()
|
||||||
|
.filter(|node| *node != my_id)
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
|
debug!(
|
||||||
|
"({}) Syncing {:?} with {:?}...",
|
||||||
|
self.data.name, partition, nodes
|
||||||
|
);
|
||||||
|
let mut sync_futures = nodes
|
||||||
|
.iter()
|
||||||
|
.map(|node| {
|
||||||
|
self.clone()
|
||||||
|
.do_sync_with(partition.clone(), *node, must_exit.clone())
|
||||||
|
})
|
||||||
|
.collect::<FuturesUnordered<_>>();
|
||||||
|
|
||||||
|
let mut n_errors = 0;
|
||||||
|
while let Some(r) = sync_futures.next().await {
|
||||||
|
if let Err(e) = r {
|
||||||
|
n_errors += 1;
|
||||||
|
warn!("({}) Sync error: {}", self.data.name, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if n_errors > self.data.replication.max_write_errors() {
|
||||||
|
return Err(Error::Message(format!(
|
||||||
|
"Sync failed with too many nodes (should have been: {:?}).",
|
||||||
|
nodes
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
self.offload_partition(&partition.begin, &partition.end, must_exit)
|
||||||
|
.await?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
// Offload partition: this partition is not something we are storing,
|
||||||
|
// so send it out to all other nodes that store it and delete items locally.
|
||||||
|
// We don't bother checking if the remote nodes already have the items,
|
||||||
|
// we just batch-send everything. Offloading isn't supposed to happen very often.
|
||||||
|
// If any of the nodes that are supposed to store the items is unable to
|
||||||
|
// save them, we interrupt the process.
|
||||||
|
async fn offload_partition(
|
||||||
|
self: &Arc<Self>,
|
||||||
|
begin: &Hash,
|
||||||
|
end: &Hash,
|
||||||
|
must_exit: &mut watch::Receiver<bool>,
|
||||||
|
) -> Result<(), Error> {
|
||||||
|
let mut counter: usize = 0;
|
||||||
|
|
||||||
|
while !*must_exit.borrow() {
|
||||||
|
let mut items = Vec::new();
|
||||||
|
|
||||||
|
for item in self.data.store.range(begin.to_vec()..end.to_vec()) {
|
||||||
|
let (key, value) = item?;
|
||||||
|
items.push((key.to_vec(), Arc::new(ByteBuf::from(value.as_ref()))));
|
||||||
|
|
||||||
|
if items.len() >= 1024 {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if items.len() > 0 {
|
||||||
|
let nodes = self
|
||||||
|
.data
|
||||||
|
.replication
|
||||||
|
.write_nodes(&begin)
|
||||||
|
.into_iter()
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
if nodes.contains(&self.system.id) {
|
||||||
|
warn!(
|
||||||
|
"({}) Interrupting offload as partitions seem to have changed",
|
||||||
|
self.data.name
|
||||||
|
);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if nodes.len() < self.data.replication.write_quorum() {
|
||||||
|
return Err(Error::Message(format!(
|
||||||
|
"Not offloading as we don't have a quorum of nodes to write to."
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
counter += 1;
|
||||||
|
info!(
|
||||||
|
"({}) Offloading {} items from {:?}..{:?} ({})",
|
||||||
|
self.data.name,
|
||||||
|
items.len(),
|
||||||
|
begin,
|
||||||
|
end,
|
||||||
|
counter
|
||||||
|
);
|
||||||
|
self.offload_items(&items, &nodes[..]).await?;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn offload_items(
|
||||||
|
self: &Arc<Self>,
|
||||||
|
items: &Vec<(Vec<u8>, Arc<ByteBuf>)>,
|
||||||
|
nodes: &[UUID],
|
||||||
|
) -> Result<(), Error> {
|
||||||
|
let values = items.iter().map(|(_k, v)| v.clone()).collect::<Vec<_>>();
|
||||||
|
|
||||||
|
self.rpc_client
|
||||||
|
.try_call_many(
|
||||||
|
&nodes[..],
|
||||||
|
SyncRPC::Items(values),
|
||||||
|
RequestStrategy::with_quorum(nodes.len()).with_timeout(TABLE_SYNC_RPC_TIMEOUT),
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
// All remote nodes have written those items, now we can delete them locally
|
||||||
|
let mut not_removed = 0;
|
||||||
|
for (k, v) in items.iter() {
|
||||||
|
if !self.data.delete_if_equal(&k[..], &v[..])? {
|
||||||
|
not_removed += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if not_removed > 0 {
|
||||||
|
debug!("({}) {} items not removed during offload because they changed in between (trying again...)", self.data.name, not_removed);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
// ======= SYNCHRONIZATION PROCEDURE -- DRIVER SIDE ======
|
||||||
|
// The driver side is only concerned with sending out the item it has
|
||||||
|
// and the other side might not have. Receiving items that differ from one
|
||||||
|
// side to the other will happen when the other side syncs with us,
|
||||||
|
// which they also do regularly.
|
||||||
|
|
||||||
|
fn get_root_ck(&self, partition: Partition) -> Result<(MerkleNodeKey, MerkleNode), Error> {
|
||||||
|
let key = MerkleNodeKey {
|
||||||
|
partition,
|
||||||
|
prefix: vec![],
|
||||||
|
};
|
||||||
|
let node = self.merkle.read_node(&key)?;
|
||||||
|
Ok((key, node))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn do_sync_with(
|
||||||
|
self: Arc<Self>,
|
||||||
|
partition: TodoPartition,
|
||||||
|
who: UUID,
|
||||||
|
must_exit: watch::Receiver<bool>,
|
||||||
|
) -> Result<(), Error> {
|
||||||
|
let (root_ck_key, root_ck) = self.get_root_ck(partition.partition)?;
|
||||||
|
if root_ck.is_empty() {
|
||||||
|
debug!(
|
||||||
|
"({}) Sync {:?} with {:?}: partition is empty.",
|
||||||
|
self.data.name, partition, who
|
||||||
|
);
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
let root_ck_hash = hash_of::<MerkleNode>(&root_ck)?;
|
||||||
|
|
||||||
|
// Check if they have the same root checksum
|
||||||
|
// If so, do nothing.
|
||||||
|
let root_resp = self
|
||||||
|
.rpc_client
|
||||||
|
.call(
|
||||||
|
who,
|
||||||
|
SyncRPC::RootCkHash(partition.partition, root_ck_hash),
|
||||||
|
TABLE_SYNC_RPC_TIMEOUT,
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
let mut todo = match root_resp {
|
||||||
|
SyncRPC::RootCkDifferent(false) => {
|
||||||
|
debug!(
|
||||||
|
"({}) Sync {:?} with {:?}: no difference",
|
||||||
|
self.data.name, partition, who
|
||||||
|
);
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
SyncRPC::RootCkDifferent(true) => VecDeque::from(vec![root_ck_key]),
|
||||||
|
x => {
|
||||||
|
return Err(Error::Message(format!(
|
||||||
|
"Invalid respone to RootCkHash RPC: {}",
|
||||||
|
debug_serialize(x)
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut todo_items = vec![];
|
||||||
|
|
||||||
|
while !todo.is_empty() && !*must_exit.borrow() {
|
||||||
|
let key = todo.pop_front().unwrap();
|
||||||
|
let node = self.merkle.read_node(&key)?;
|
||||||
|
|
||||||
|
match node {
|
||||||
|
MerkleNode::Empty => {
|
||||||
|
// They have items we don't have.
|
||||||
|
// We don't request those items from them, they will send them.
|
||||||
|
// We only bother with pushing items that differ
|
||||||
|
}
|
||||||
|
MerkleNode::Leaf(ik, ivhash) => {
|
||||||
|
// Just send that item directly
|
||||||
|
if let Some(val) = self.data.store.get(&ik[..])? {
|
||||||
|
if blake2sum(&val[..]) != ivhash {
|
||||||
|
warn!("({}) Hashes differ between stored value and Merkle tree, key: {:?} (if your server is very busy, don't worry, this happens when the Merkle tree can't be updated fast enough)", self.data.name, ik);
|
||||||
|
}
|
||||||
|
todo_items.push(val.to_vec());
|
||||||
|
} else {
|
||||||
|
warn!("({}) Item from Merkle tree not found in store: {:?} (if your server is very busy, don't worry, this happens when the Merkle tree can't be updated fast enough)", self.data.name, ik);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
MerkleNode::Intermediate(l) => {
|
||||||
|
// Get Merkle node for this tree position at remote node
|
||||||
|
// and compare it with local node
|
||||||
|
let remote_node = match self
|
||||||
|
.rpc_client
|
||||||
|
.call(who, SyncRPC::GetNode(key.clone()), TABLE_SYNC_RPC_TIMEOUT)
|
||||||
|
.await?
|
||||||
|
{
|
||||||
|
SyncRPC::Node(_, node) => node,
|
||||||
|
x => {
|
||||||
|
return Err(Error::Message(format!(
|
||||||
|
"Invalid respone to GetNode RPC: {}",
|
||||||
|
debug_serialize(x)
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
let int_l2 = match remote_node {
|
||||||
|
// If they have an intermediate node at this tree position,
|
||||||
|
// we can compare them to find differences
|
||||||
|
MerkleNode::Intermediate(l2) => l2,
|
||||||
|
// Otherwise, treat it as if they have nothing for this subtree,
|
||||||
|
// which will have the consequence of sending them everything
|
||||||
|
_ => vec![],
|
||||||
|
};
|
||||||
|
|
||||||
|
let join = join_ordered(&l[..], &int_l2[..]);
|
||||||
|
for (p, v1, v2) in join.into_iter() {
|
||||||
|
let diff = match (v1, v2) {
|
||||||
|
(Some(_), None) | (None, Some(_)) => true,
|
||||||
|
(Some(a), Some(b)) => a != b,
|
||||||
|
_ => false,
|
||||||
|
};
|
||||||
|
if diff {
|
||||||
|
todo.push_back(key.add_byte(*p));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if todo_items.len() >= 256 {
|
||||||
|
self.send_items(who, std::mem::replace(&mut todo_items, vec![]))
|
||||||
|
.await?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if !todo_items.is_empty() {
|
||||||
|
self.send_items(who, todo_items).await?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn send_items(&self, who: UUID, item_value_list: Vec<Vec<u8>>) -> Result<(), Error> {
|
||||||
|
info!(
|
||||||
|
"({}) Sending {} items to {:?}",
|
||||||
|
self.data.name,
|
||||||
|
item_value_list.len(),
|
||||||
|
who
|
||||||
|
);
|
||||||
|
|
||||||
|
let values = item_value_list
|
||||||
|
.into_iter()
|
||||||
|
.map(|x| Arc::new(ByteBuf::from(x)))
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
|
let rpc_resp = self
|
||||||
|
.rpc_client
|
||||||
|
.call(who, SyncRPC::Items(values), TABLE_SYNC_RPC_TIMEOUT)
|
||||||
|
.await?;
|
||||||
|
if let SyncRPC::Ok = rpc_resp {
|
||||||
|
Ok(())
|
||||||
|
} else {
|
||||||
|
Err(Error::Message(format!(
|
||||||
|
"Unexpected response to RPC Update: {}",
|
||||||
|
debug_serialize(&rpc_resp)
|
||||||
|
)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ======= SYNCHRONIZATION PROCEDURE -- RECEIVER SIDE ======
|
||||||
|
|
||||||
|
async fn handle_rpc(self: &Arc<Self>, message: &SyncRPC) -> Result<SyncRPC, Error> {
|
||||||
|
match message {
|
||||||
|
SyncRPC::RootCkHash(range, h) => {
|
||||||
|
let (_root_ck_key, root_ck) = self.get_root_ck(*range)?;
|
||||||
|
let hash = hash_of::<MerkleNode>(&root_ck)?;
|
||||||
|
Ok(SyncRPC::RootCkDifferent(hash != *h))
|
||||||
|
}
|
||||||
|
SyncRPC::GetNode(k) => {
|
||||||
|
let node = self.merkle.read_node(&k)?;
|
||||||
|
Ok(SyncRPC::Node(k.clone(), node))
|
||||||
|
}
|
||||||
|
SyncRPC::Items(items) => {
|
||||||
|
self.data.update_many(items)?;
|
||||||
|
Ok(SyncRPC::Ok)
|
||||||
|
}
|
||||||
|
_ => Err(Error::Message(format!("Unexpected sync RPC"))),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SyncTodo {
|
||||||
|
fn add_full_sync<F: TableSchema, R: TableReplication>(
|
||||||
|
&mut self,
|
||||||
|
data: &TableData<F, R>,
|
||||||
|
system: &System,
|
||||||
|
) {
|
||||||
|
let my_id = system.id;
|
||||||
|
|
||||||
|
self.todo.clear();
|
||||||
|
|
||||||
|
let partitions = data.replication.partitions();
|
||||||
|
|
||||||
|
for i in 0..partitions.len() {
|
||||||
|
let begin = partitions[i].1;
|
||||||
|
|
||||||
|
let end = if i + 1 < partitions.len() {
|
||||||
|
partitions[i + 1].1
|
||||||
|
} else {
|
||||||
|
[0xFFu8; 32].into()
|
||||||
|
};
|
||||||
|
|
||||||
|
let nodes = data.replication.write_nodes(&begin);
|
||||||
|
|
||||||
|
let retain = nodes.contains(&my_id);
|
||||||
|
if !retain {
|
||||||
|
// Check if we have some data to send, otherwise skip
|
||||||
|
if data.store.range(begin..end).next().is_none() {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
self.todo.push(TodoPartition {
|
||||||
|
partition: partitions[i].0,
|
||||||
|
begin,
|
||||||
|
end,
|
||||||
|
retain,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn pop_task(&mut self) -> Option<TodoPartition> {
|
||||||
|
if self.todo.is_empty() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
let i = rand::thread_rng().gen_range(0..self.todo.len());
|
||||||
|
if i == self.todo.len() - 1 {
|
||||||
|
self.todo.pop()
|
||||||
|
} else {
|
||||||
|
let replacement = self.todo.pop().unwrap();
|
||||||
|
let ret = std::mem::replace(&mut self.todo[i], replacement);
|
||||||
|
Some(ret)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn hash_of<T: Serialize>(x: &T) -> Result<Hash, Error> {
|
||||||
|
Ok(blake2sum(&rmp_to_vec_all_named(x)?[..]))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn join_ordered<'a, K: Ord + Eq, V1, V2>(
|
||||||
|
x: &'a [(K, V1)],
|
||||||
|
y: &'a [(K, V2)],
|
||||||
|
) -> Vec<(&'a K, Option<&'a V1>, Option<&'a V2>)> {
|
||||||
|
let mut ret = vec![];
|
||||||
|
let mut i = 0;
|
||||||
|
let mut j = 0;
|
||||||
|
while i < x.len() || j < y.len() {
|
||||||
|
if i < x.len() && j < y.len() && x[i].0 == y[j].0 {
|
||||||
|
ret.push((&x[i].0, Some(&x[i].1), Some(&y[j].1)));
|
||||||
|
i += 1;
|
||||||
|
j += 1;
|
||||||
|
} else if i < x.len() && (j == y.len() || x[i].0 < y[j].0) {
|
||||||
|
ret.push((&x[i].0, Some(&x[i].1), None));
|
||||||
|
i += 1;
|
||||||
|
} else if j < y.len() && (i == x.len() || x[i].0 > y[j].0) {
|
||||||
|
ret.push((&y[j].0, None, Some(&y[j].1)));
|
||||||
|
j += 1;
|
||||||
|
} else {
|
||||||
|
unreachable!();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ret
|
||||||
|
}
|
|
@ -2,9 +2,6 @@ use std::collections::{BTreeMap, HashMap};
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
|
|
||||||
use log::warn;
|
|
||||||
|
|
||||||
use arc_swap::ArcSwapOption;
|
|
||||||
use futures::stream::*;
|
use futures::stream::*;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use serde_bytes::ByteBuf;
|
use serde_bytes::ByteBuf;
|
||||||
|
@ -13,25 +10,25 @@ use garage_util::data::*;
|
||||||
use garage_util::error::Error;
|
use garage_util::error::Error;
|
||||||
|
|
||||||
use garage_rpc::membership::System;
|
use garage_rpc::membership::System;
|
||||||
use garage_rpc::ring::Ring;
|
|
||||||
use garage_rpc::rpc_client::*;
|
use garage_rpc::rpc_client::*;
|
||||||
use garage_rpc::rpc_server::*;
|
use garage_rpc::rpc_server::*;
|
||||||
|
|
||||||
|
use crate::crdt::CRDT;
|
||||||
|
use crate::data::*;
|
||||||
|
use crate::gc::*;
|
||||||
|
use crate::merkle::*;
|
||||||
|
use crate::replication::*;
|
||||||
use crate::schema::*;
|
use crate::schema::*;
|
||||||
use crate::table_sync::*;
|
use crate::sync::*;
|
||||||
|
|
||||||
const TABLE_RPC_TIMEOUT: Duration = Duration::from_secs(10);
|
const TABLE_RPC_TIMEOUT: Duration = Duration::from_secs(10);
|
||||||
|
|
||||||
pub struct Table<F: TableSchema, R: TableReplication> {
|
pub struct Table<F: TableSchema, R: TableReplication> {
|
||||||
pub instance: F,
|
|
||||||
pub replication: R,
|
|
||||||
|
|
||||||
pub name: String,
|
|
||||||
pub(crate) rpc_client: Arc<RpcClient<TableRPC<F>>>,
|
|
||||||
|
|
||||||
pub system: Arc<System>,
|
pub system: Arc<System>,
|
||||||
pub store: sled::Tree,
|
pub data: Arc<TableData<F, R>>,
|
||||||
pub syncer: ArcSwapOption<TableSyncer<F, R>>,
|
pub merkle_updater: Arc<MerkleUpdater<F, R>>,
|
||||||
|
pub syncer: Arc<TableSyncer<F, R>>,
|
||||||
|
rpc_client: Arc<RpcClient<TableRPC<F>>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize)]
|
#[derive(Serialize, Deserialize)]
|
||||||
|
@ -45,30 +42,10 @@ pub(crate) enum TableRPC<F: TableSchema> {
|
||||||
ReadRange(F::P, Option<F::S>, Option<F::Filter>, usize),
|
ReadRange(F::P, Option<F::S>, Option<F::Filter>, usize),
|
||||||
|
|
||||||
Update(Vec<Arc<ByteBuf>>),
|
Update(Vec<Arc<ByteBuf>>),
|
||||||
|
|
||||||
SyncRPC(SyncRPC),
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<F: TableSchema> RpcMessage for TableRPC<F> {}
|
impl<F: TableSchema> RpcMessage for TableRPC<F> {}
|
||||||
|
|
||||||
pub trait TableReplication: Send + Sync {
|
|
||||||
// See examples in table_sharded.rs and table_fullcopy.rs
|
|
||||||
// To understand various replication methods
|
|
||||||
|
|
||||||
// Which nodes to send reads from
|
|
||||||
fn read_nodes(&self, hash: &Hash, system: &System) -> Vec<UUID>;
|
|
||||||
fn read_quorum(&self) -> usize;
|
|
||||||
|
|
||||||
// Which nodes to send writes to
|
|
||||||
fn write_nodes(&self, hash: &Hash, system: &System) -> Vec<UUID>;
|
|
||||||
fn write_quorum(&self, system: &System) -> usize;
|
|
||||||
fn max_write_errors(&self) -> usize;
|
|
||||||
|
|
||||||
// Which are the nodes that do actually replicate the data
|
|
||||||
fn replication_nodes(&self, hash: &Hash, ring: &Ring) -> Vec<UUID>;
|
|
||||||
fn split_points(&self, ring: &Ring) -> Vec<Hash>;
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<F, R> Table<F, R>
|
impl<F, R> Table<F, R>
|
||||||
where
|
where
|
||||||
F: TableSchema + 'static,
|
F: TableSchema + 'static,
|
||||||
|
@ -76,7 +53,7 @@ where
|
||||||
{
|
{
|
||||||
// =============== PUBLIC INTERFACE FUNCTIONS (new, insert, get, etc) ===============
|
// =============== PUBLIC INTERFACE FUNCTIONS (new, insert, get, etc) ===============
|
||||||
|
|
||||||
pub async fn new(
|
pub fn new(
|
||||||
instance: F,
|
instance: F,
|
||||||
replication: R,
|
replication: R,
|
||||||
system: Arc<System>,
|
system: Arc<System>,
|
||||||
|
@ -84,31 +61,37 @@ where
|
||||||
name: String,
|
name: String,
|
||||||
rpc_server: &mut RpcServer,
|
rpc_server: &mut RpcServer,
|
||||||
) -> Arc<Self> {
|
) -> Arc<Self> {
|
||||||
let store = db.open_tree(&name).expect("Unable to open DB tree");
|
|
||||||
|
|
||||||
let rpc_path = format!("table_{}", name);
|
let rpc_path = format!("table_{}", name);
|
||||||
let rpc_client = system.rpc_client::<TableRPC<F>>(&rpc_path);
|
let rpc_client = system.rpc_client::<TableRPC<F>>(&rpc_path);
|
||||||
|
|
||||||
let table = Arc::new(Self {
|
let data = TableData::new(system.clone(), name, instance, replication, db);
|
||||||
instance,
|
|
||||||
replication,
|
|
||||||
name,
|
|
||||||
rpc_client,
|
|
||||||
system,
|
|
||||||
store,
|
|
||||||
syncer: ArcSwapOption::from(None),
|
|
||||||
});
|
|
||||||
table.clone().register_handler(rpc_server, rpc_path);
|
|
||||||
|
|
||||||
let syncer = TableSyncer::launch(table.clone()).await;
|
let merkle_updater = MerkleUpdater::launch(&system.background, data.clone());
|
||||||
table.syncer.swap(Some(syncer));
|
|
||||||
|
let syncer = TableSyncer::launch(
|
||||||
|
system.clone(),
|
||||||
|
data.clone(),
|
||||||
|
merkle_updater.clone(),
|
||||||
|
rpc_server,
|
||||||
|
);
|
||||||
|
TableGC::launch(system.clone(), data.clone(), rpc_server);
|
||||||
|
|
||||||
|
let table = Arc::new(Self {
|
||||||
|
system,
|
||||||
|
data,
|
||||||
|
merkle_updater,
|
||||||
|
syncer,
|
||||||
|
rpc_client,
|
||||||
|
});
|
||||||
|
|
||||||
|
table.clone().register_handler(rpc_server, rpc_path);
|
||||||
|
|
||||||
table
|
table
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn insert(&self, e: &F::E) -> Result<(), Error> {
|
pub async fn insert(&self, e: &F::E) -> Result<(), Error> {
|
||||||
let hash = e.partition_key().hash();
|
let hash = e.partition_key().hash();
|
||||||
let who = self.replication.write_nodes(&hash, &self.system);
|
let who = self.data.replication.write_nodes(&hash);
|
||||||
//eprintln!("insert who: {:?}", who);
|
//eprintln!("insert who: {:?}", who);
|
||||||
|
|
||||||
let e_enc = Arc::new(ByteBuf::from(rmp_to_vec_all_named(e)?));
|
let e_enc = Arc::new(ByteBuf::from(rmp_to_vec_all_named(e)?));
|
||||||
|
@ -118,7 +101,7 @@ where
|
||||||
.try_call_many(
|
.try_call_many(
|
||||||
&who[..],
|
&who[..],
|
||||||
rpc,
|
rpc,
|
||||||
RequestStrategy::with_quorum(self.replication.write_quorum(&self.system))
|
RequestStrategy::with_quorum(self.data.replication.write_quorum())
|
||||||
.with_timeout(TABLE_RPC_TIMEOUT),
|
.with_timeout(TABLE_RPC_TIMEOUT),
|
||||||
)
|
)
|
||||||
.await?;
|
.await?;
|
||||||
|
@ -130,7 +113,7 @@ where
|
||||||
|
|
||||||
for entry in entries.iter() {
|
for entry in entries.iter() {
|
||||||
let hash = entry.partition_key().hash();
|
let hash = entry.partition_key().hash();
|
||||||
let who = self.replication.write_nodes(&hash, &self.system);
|
let who = self.data.replication.write_nodes(&hash);
|
||||||
let e_enc = Arc::new(ByteBuf::from(rmp_to_vec_all_named(entry)?));
|
let e_enc = Arc::new(ByteBuf::from(rmp_to_vec_all_named(entry)?));
|
||||||
for node in who {
|
for node in who {
|
||||||
if !call_list.contains_key(&node) {
|
if !call_list.contains_key(&node) {
|
||||||
|
@ -154,7 +137,7 @@ where
|
||||||
errors.push(e);
|
errors.push(e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if errors.len() > self.replication.max_write_errors() {
|
if errors.len() > self.data.replication.max_write_errors() {
|
||||||
Err(Error::Message("Too many errors".into()))
|
Err(Error::Message("Too many errors".into()))
|
||||||
} else {
|
} else {
|
||||||
Ok(())
|
Ok(())
|
||||||
|
@ -167,7 +150,7 @@ where
|
||||||
sort_key: &F::S,
|
sort_key: &F::S,
|
||||||
) -> Result<Option<F::E>, Error> {
|
) -> Result<Option<F::E>, Error> {
|
||||||
let hash = partition_key.hash();
|
let hash = partition_key.hash();
|
||||||
let who = self.replication.read_nodes(&hash, &self.system);
|
let who = self.data.replication.read_nodes(&hash);
|
||||||
//eprintln!("get who: {:?}", who);
|
//eprintln!("get who: {:?}", who);
|
||||||
|
|
||||||
let rpc = TableRPC::<F>::ReadEntry(partition_key.clone(), sort_key.clone());
|
let rpc = TableRPC::<F>::ReadEntry(partition_key.clone(), sort_key.clone());
|
||||||
|
@ -176,7 +159,7 @@ where
|
||||||
.try_call_many(
|
.try_call_many(
|
||||||
&who[..],
|
&who[..],
|
||||||
rpc,
|
rpc,
|
||||||
RequestStrategy::with_quorum(self.replication.read_quorum())
|
RequestStrategy::with_quorum(self.data.replication.read_quorum())
|
||||||
.with_timeout(TABLE_RPC_TIMEOUT)
|
.with_timeout(TABLE_RPC_TIMEOUT)
|
||||||
.interrupt_after_quorum(true),
|
.interrupt_after_quorum(true),
|
||||||
)
|
)
|
||||||
|
@ -187,7 +170,7 @@ where
|
||||||
for resp in resps {
|
for resp in resps {
|
||||||
if let TableRPC::ReadEntryResponse(value) = resp {
|
if let TableRPC::ReadEntryResponse(value) = resp {
|
||||||
if let Some(v_bytes) = value {
|
if let Some(v_bytes) = value {
|
||||||
let v = self.decode_entry(v_bytes.as_slice())?;
|
let v = self.data.decode_entry(v_bytes.as_slice())?;
|
||||||
ret = match ret {
|
ret = match ret {
|
||||||
None => Some(v),
|
None => Some(v),
|
||||||
Some(mut x) => {
|
Some(mut x) => {
|
||||||
|
@ -223,7 +206,7 @@ where
|
||||||
limit: usize,
|
limit: usize,
|
||||||
) -> Result<Vec<F::E>, Error> {
|
) -> Result<Vec<F::E>, Error> {
|
||||||
let hash = partition_key.hash();
|
let hash = partition_key.hash();
|
||||||
let who = self.replication.read_nodes(&hash, &self.system);
|
let who = self.data.replication.read_nodes(&hash);
|
||||||
|
|
||||||
let rpc = TableRPC::<F>::ReadRange(partition_key.clone(), begin_sort_key, filter, limit);
|
let rpc = TableRPC::<F>::ReadRange(partition_key.clone(), begin_sort_key, filter, limit);
|
||||||
|
|
||||||
|
@ -232,7 +215,7 @@ where
|
||||||
.try_call_many(
|
.try_call_many(
|
||||||
&who[..],
|
&who[..],
|
||||||
rpc,
|
rpc,
|
||||||
RequestStrategy::with_quorum(self.replication.read_quorum())
|
RequestStrategy::with_quorum(self.data.replication.read_quorum())
|
||||||
.with_timeout(TABLE_RPC_TIMEOUT)
|
.with_timeout(TABLE_RPC_TIMEOUT)
|
||||||
.interrupt_after_quorum(true),
|
.interrupt_after_quorum(true),
|
||||||
)
|
)
|
||||||
|
@ -243,8 +226,8 @@ where
|
||||||
for resp in resps {
|
for resp in resps {
|
||||||
if let TableRPC::Update(entries) = resp {
|
if let TableRPC::Update(entries) = resp {
|
||||||
for entry_bytes in entries.iter() {
|
for entry_bytes in entries.iter() {
|
||||||
let entry = self.decode_entry(entry_bytes.as_slice())?;
|
let entry = self.data.decode_entry(entry_bytes.as_slice())?;
|
||||||
let entry_key = self.tree_key(entry.partition_key(), entry.sort_key());
|
let entry_key = self.data.tree_key(entry.partition_key(), entry.sort_key());
|
||||||
match ret.remove(&entry_key) {
|
match ret.remove(&entry_key) {
|
||||||
None => {
|
None => {
|
||||||
ret.insert(entry_key, Some(entry));
|
ret.insert(entry_key, Some(entry));
|
||||||
|
@ -313,146 +296,18 @@ where
|
||||||
async fn handle(self: &Arc<Self>, msg: &TableRPC<F>) -> Result<TableRPC<F>, Error> {
|
async fn handle(self: &Arc<Self>, msg: &TableRPC<F>) -> Result<TableRPC<F>, Error> {
|
||||||
match msg {
|
match msg {
|
||||||
TableRPC::ReadEntry(key, sort_key) => {
|
TableRPC::ReadEntry(key, sort_key) => {
|
||||||
let value = self.handle_read_entry(key, sort_key)?;
|
let value = self.data.read_entry(key, sort_key)?;
|
||||||
Ok(TableRPC::ReadEntryResponse(value))
|
Ok(TableRPC::ReadEntryResponse(value))
|
||||||
}
|
}
|
||||||
TableRPC::ReadRange(key, begin_sort_key, filter, limit) => {
|
TableRPC::ReadRange(key, begin_sort_key, filter, limit) => {
|
||||||
let values = self.handle_read_range(key, begin_sort_key, filter, *limit)?;
|
let values = self.data.read_range(key, begin_sort_key, filter, *limit)?;
|
||||||
Ok(TableRPC::Update(values))
|
Ok(TableRPC::Update(values))
|
||||||
}
|
}
|
||||||
TableRPC::Update(pairs) => {
|
TableRPC::Update(pairs) => {
|
||||||
self.handle_update(pairs).await?;
|
self.data.update_many(pairs)?;
|
||||||
Ok(TableRPC::Ok)
|
Ok(TableRPC::Ok)
|
||||||
}
|
}
|
||||||
TableRPC::SyncRPC(rpc) => {
|
|
||||||
let syncer = self.syncer.load_full().unwrap();
|
|
||||||
let response = syncer
|
|
||||||
.handle_rpc(rpc, self.system.background.stop_signal.clone())
|
|
||||||
.await?;
|
|
||||||
Ok(TableRPC::SyncRPC(response))
|
|
||||||
}
|
|
||||||
_ => Err(Error::BadRPC(format!("Unexpected table RPC"))),
|
_ => Err(Error::BadRPC(format!("Unexpected table RPC"))),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn handle_read_entry(&self, p: &F::P, s: &F::S) -> Result<Option<ByteBuf>, Error> {
|
|
||||||
let tree_key = self.tree_key(p, s);
|
|
||||||
if let Some(bytes) = self.store.get(&tree_key)? {
|
|
||||||
Ok(Some(ByteBuf::from(bytes.to_vec())))
|
|
||||||
} else {
|
|
||||||
Ok(None)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn handle_read_range(
|
|
||||||
&self,
|
|
||||||
p: &F::P,
|
|
||||||
s: &Option<F::S>,
|
|
||||||
filter: &Option<F::Filter>,
|
|
||||||
limit: usize,
|
|
||||||
) -> Result<Vec<Arc<ByteBuf>>, Error> {
|
|
||||||
let partition_hash = p.hash();
|
|
||||||
let first_key = match s {
|
|
||||||
None => partition_hash.to_vec(),
|
|
||||||
Some(sk) => self.tree_key(p, sk),
|
|
||||||
};
|
|
||||||
let mut ret = vec![];
|
|
||||||
for item in self.store.range(first_key..) {
|
|
||||||
let (key, value) = item?;
|
|
||||||
if &key[..32] != partition_hash.as_slice() {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
let keep = match filter {
|
|
||||||
None => true,
|
|
||||||
Some(f) => {
|
|
||||||
let entry = self.decode_entry(value.as_ref())?;
|
|
||||||
F::matches_filter(&entry, f)
|
|
||||||
}
|
|
||||||
};
|
|
||||||
if keep {
|
|
||||||
ret.push(Arc::new(ByteBuf::from(value.as_ref())));
|
|
||||||
}
|
|
||||||
if ret.len() >= limit {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(ret)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub async fn handle_update(self: &Arc<Self>, entries: &[Arc<ByteBuf>]) -> Result<(), Error> {
|
|
||||||
let syncer = self.syncer.load_full().unwrap();
|
|
||||||
|
|
||||||
for update_bytes in entries.iter() {
|
|
||||||
let update = self.decode_entry(update_bytes.as_slice())?;
|
|
||||||
|
|
||||||
let tree_key = self.tree_key(update.partition_key(), update.sort_key());
|
|
||||||
|
|
||||||
let (old_entry, new_entry) = self.store.transaction(|db| {
|
|
||||||
let (old_entry, new_entry) = match db.get(&tree_key)? {
|
|
||||||
Some(prev_bytes) => {
|
|
||||||
let old_entry = self
|
|
||||||
.decode_entry(&prev_bytes)
|
|
||||||
.map_err(sled::transaction::ConflictableTransactionError::Abort)?;
|
|
||||||
let mut new_entry = old_entry.clone();
|
|
||||||
new_entry.merge(&update);
|
|
||||||
(Some(old_entry), new_entry)
|
|
||||||
}
|
|
||||||
None => (None, update.clone()),
|
|
||||||
};
|
|
||||||
|
|
||||||
let new_bytes = rmp_to_vec_all_named(&new_entry)
|
|
||||||
.map_err(Error::RMPEncode)
|
|
||||||
.map_err(sled::transaction::ConflictableTransactionError::Abort)?;
|
|
||||||
db.insert(tree_key.clone(), new_bytes)?;
|
|
||||||
Ok((old_entry, new_entry))
|
|
||||||
})?;
|
|
||||||
|
|
||||||
if old_entry.as_ref() != Some(&new_entry) {
|
|
||||||
self.instance.updated(old_entry, Some(new_entry));
|
|
||||||
syncer.invalidate(&tree_key[..]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) fn delete_if_equal(self: &Arc<Self>, k: &[u8], v: &[u8]) -> Result<bool, Error> {
|
|
||||||
let removed = self.store.transaction(|txn| {
|
|
||||||
if let Some(cur_v) = txn.get(k)? {
|
|
||||||
if cur_v == v {
|
|
||||||
txn.remove(k)?;
|
|
||||||
return Ok(true);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(false)
|
|
||||||
})?;
|
|
||||||
if removed {
|
|
||||||
let old_entry = self.decode_entry(v)?;
|
|
||||||
self.instance.updated(Some(old_entry), None);
|
|
||||||
self.syncer.load_full().unwrap().invalidate(k);
|
|
||||||
}
|
|
||||||
Ok(removed)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn tree_key(&self, p: &F::P, s: &F::S) -> Vec<u8> {
|
|
||||||
let mut ret = p.hash().to_vec();
|
|
||||||
ret.extend(s.sort_key());
|
|
||||||
ret
|
|
||||||
}
|
|
||||||
|
|
||||||
fn decode_entry(&self, bytes: &[u8]) -> Result<F::E, Error> {
|
|
||||||
match rmp_serde::decode::from_read_ref::<_, F::E>(bytes) {
|
|
||||||
Ok(x) => Ok(x),
|
|
||||||
Err(e) => match F::try_migrate(bytes) {
|
|
||||||
Some(x) => Ok(x),
|
|
||||||
None => {
|
|
||||||
warn!("Unable to decode entry of {}: {}", self.name, e);
|
|
||||||
for line in hexdump::hexdump_iter(bytes) {
|
|
||||||
debug!("{}", line);
|
|
||||||
}
|
|
||||||
Err(e.into())
|
|
||||||
}
|
|
||||||
},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,59 +0,0 @@
|
||||||
use std::sync::Arc;
|
|
||||||
|
|
||||||
use garage_rpc::membership::System;
|
|
||||||
use garage_rpc::ring::Ring;
|
|
||||||
use garage_util::data::*;
|
|
||||||
|
|
||||||
use crate::*;
|
|
||||||
|
|
||||||
#[derive(Clone)]
|
|
||||||
pub struct TableFullReplication {
|
|
||||||
pub max_faults: usize,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone)]
|
|
||||||
struct Neighbors {
|
|
||||||
ring: Arc<Ring>,
|
|
||||||
neighbors: Vec<UUID>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl TableFullReplication {
|
|
||||||
pub fn new(max_faults: usize) -> Self {
|
|
||||||
TableFullReplication { max_faults }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl TableReplication for TableFullReplication {
|
|
||||||
// Full replication schema: all nodes store everything
|
|
||||||
// Writes are disseminated in an epidemic manner in the network
|
|
||||||
|
|
||||||
// Advantage: do all reads locally, extremely fast
|
|
||||||
// Inconvenient: only suitable to reasonably small tables
|
|
||||||
|
|
||||||
fn read_nodes(&self, _hash: &Hash, system: &System) -> Vec<UUID> {
|
|
||||||
vec![system.id]
|
|
||||||
}
|
|
||||||
fn read_quorum(&self) -> usize {
|
|
||||||
1
|
|
||||||
}
|
|
||||||
|
|
||||||
fn write_nodes(&self, hash: &Hash, system: &System) -> Vec<UUID> {
|
|
||||||
self.replication_nodes(hash, system.ring.borrow().as_ref())
|
|
||||||
}
|
|
||||||
fn write_quorum(&self, system: &System) -> usize {
|
|
||||||
system.ring.borrow().config.members.len() - self.max_faults
|
|
||||||
}
|
|
||||||
fn max_write_errors(&self) -> usize {
|
|
||||||
self.max_faults
|
|
||||||
}
|
|
||||||
|
|
||||||
fn replication_nodes(&self, _hash: &Hash, ring: &Ring) -> Vec<UUID> {
|
|
||||||
ring.config.members.keys().cloned().collect::<Vec<_>>()
|
|
||||||
}
|
|
||||||
fn split_points(&self, _ring: &Ring) -> Vec<Hash> {
|
|
||||||
let mut ret = vec![];
|
|
||||||
ret.push([0u8; 32].into());
|
|
||||||
ret.push([0xFFu8; 32].into());
|
|
||||||
ret
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,891 +0,0 @@
|
||||||
use rand::Rng;
|
|
||||||
use std::collections::{BTreeMap, VecDeque};
|
|
||||||
use std::sync::{Arc, Mutex};
|
|
||||||
use std::time::{Duration, Instant};
|
|
||||||
|
|
||||||
use futures::future::join_all;
|
|
||||||
use futures::{pin_mut, select};
|
|
||||||
use futures_util::future::*;
|
|
||||||
use futures_util::stream::*;
|
|
||||||
use serde::{Deserialize, Serialize};
|
|
||||||
use serde_bytes::ByteBuf;
|
|
||||||
use tokio::sync::{mpsc, watch};
|
|
||||||
|
|
||||||
use garage_rpc::ring::Ring;
|
|
||||||
use garage_util::data::*;
|
|
||||||
use garage_util::error::Error;
|
|
||||||
|
|
||||||
use crate::*;
|
|
||||||
|
|
||||||
const MAX_DEPTH: usize = 16;
|
|
||||||
const SCAN_INTERVAL: Duration = Duration::from_secs(3600);
|
|
||||||
const CHECKSUM_CACHE_TIMEOUT: Duration = Duration::from_secs(1800);
|
|
||||||
const TABLE_SYNC_RPC_TIMEOUT: Duration = Duration::from_secs(30);
|
|
||||||
|
|
||||||
pub struct TableSyncer<F: TableSchema, R: TableReplication> {
|
|
||||||
table: Arc<Table<F, R>>,
|
|
||||||
todo: Mutex<SyncTodo>,
|
|
||||||
cache: Vec<Mutex<BTreeMap<SyncRange, RangeChecksumCache>>>,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize)]
|
|
||||||
pub(crate) enum SyncRPC {
|
|
||||||
GetRootChecksumRange(Hash, Hash),
|
|
||||||
RootChecksumRange(SyncRange),
|
|
||||||
Checksums(Vec<RangeChecksum>),
|
|
||||||
Difference(Vec<SyncRange>, Vec<Arc<ByteBuf>>),
|
|
||||||
}
|
|
||||||
|
|
||||||
struct SyncTodo {
|
|
||||||
todo: Vec<TodoPartition>,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
|
||||||
struct TodoPartition {
|
|
||||||
// Partition consists in hashes between begin included and end excluded
|
|
||||||
begin: Hash,
|
|
||||||
end: Hash,
|
|
||||||
|
|
||||||
// Are we a node that stores this partition or not?
|
|
||||||
retain: bool,
|
|
||||||
}
|
|
||||||
|
|
||||||
// A SyncRange defines a query on the dataset stored by a node, in the following way:
|
|
||||||
// - all items whose key are >= `begin`
|
|
||||||
// - stopping at the first item whose key hash has at least `level` leading zero bytes (excluded)
|
|
||||||
// - except if the first item of the range has such many leading zero bytes
|
|
||||||
// - and stopping at `end` (excluded) if such an item is not found
|
|
||||||
// The checksum itself does not store all of the items in the database, only the hashes of the "sub-ranges"
|
|
||||||
// i.e. of ranges of level `level-1` that cover the same range
|
|
||||||
// (ranges of level 0 do not exist and their hash is simply the hash of the first item >= begin)
|
|
||||||
// See RangeChecksum for the struct that stores this information.
|
|
||||||
#[derive(Hash, PartialEq, Eq, Debug, Clone, Serialize, Deserialize)]
|
|
||||||
pub(crate) struct SyncRange {
|
|
||||||
begin: Vec<u8>,
|
|
||||||
end: Vec<u8>,
|
|
||||||
level: usize,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl std::cmp::PartialOrd for SyncRange {
|
|
||||||
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
|
|
||||||
Some(self.cmp(other))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
impl std::cmp::Ord for SyncRange {
|
|
||||||
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
|
|
||||||
self.begin
|
|
||||||
.cmp(&other.begin)
|
|
||||||
.then(self.level.cmp(&other.level))
|
|
||||||
.then(self.end.cmp(&other.end))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
||||||
pub(crate) struct RangeChecksum {
|
|
||||||
bounds: SyncRange,
|
|
||||||
children: Vec<(SyncRange, Hash)>,
|
|
||||||
found_limit: Option<Vec<u8>>,
|
|
||||||
|
|
||||||
#[serde(skip, default = "std::time::Instant::now")]
|
|
||||||
time: Instant,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
|
||||||
struct RangeChecksumCache {
|
|
||||||
hash: Option<Hash>, // None if no children
|
|
||||||
found_limit: Option<Vec<u8>>,
|
|
||||||
time: Instant,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<F, R> TableSyncer<F, R>
|
|
||||||
where
|
|
||||||
F: TableSchema + 'static,
|
|
||||||
R: TableReplication + 'static,
|
|
||||||
{
|
|
||||||
pub(crate) async fn launch(table: Arc<Table<F, R>>) -> Arc<Self> {
|
|
||||||
let todo = SyncTodo { todo: Vec::new() };
|
|
||||||
let syncer = Arc::new(TableSyncer {
|
|
||||||
table: table.clone(),
|
|
||||||
todo: Mutex::new(todo),
|
|
||||||
cache: (0..MAX_DEPTH)
|
|
||||||
.map(|_| Mutex::new(BTreeMap::new()))
|
|
||||||
.collect::<Vec<_>>(),
|
|
||||||
});
|
|
||||||
|
|
||||||
let (busy_tx, busy_rx) = mpsc::unbounded_channel();
|
|
||||||
|
|
||||||
let s1 = syncer.clone();
|
|
||||||
table
|
|
||||||
.system
|
|
||||||
.background
|
|
||||||
.spawn_worker(
|
|
||||||
format!("table sync watcher for {}", table.name),
|
|
||||||
move |must_exit: watch::Receiver<bool>| s1.watcher_task(must_exit, busy_rx),
|
|
||||||
)
|
|
||||||
.await;
|
|
||||||
|
|
||||||
let s2 = syncer.clone();
|
|
||||||
table
|
|
||||||
.system
|
|
||||||
.background
|
|
||||||
.spawn_worker(
|
|
||||||
format!("table syncer for {}", table.name),
|
|
||||||
move |must_exit: watch::Receiver<bool>| s2.syncer_task(must_exit, busy_tx),
|
|
||||||
)
|
|
||||||
.await;
|
|
||||||
|
|
||||||
let s3 = syncer.clone();
|
|
||||||
tokio::spawn(async move {
|
|
||||||
tokio::time::delay_for(Duration::from_secs(20)).await;
|
|
||||||
s3.add_full_scan().await;
|
|
||||||
});
|
|
||||||
|
|
||||||
syncer
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn watcher_task(
|
|
||||||
self: Arc<Self>,
|
|
||||||
mut must_exit: watch::Receiver<bool>,
|
|
||||||
mut busy_rx: mpsc::UnboundedReceiver<bool>,
|
|
||||||
) -> Result<(), Error> {
|
|
||||||
let mut prev_ring: Arc<Ring> = self.table.system.ring.borrow().clone();
|
|
||||||
let mut ring_recv: watch::Receiver<Arc<Ring>> = self.table.system.ring.clone();
|
|
||||||
let mut nothing_to_do_since = Some(Instant::now());
|
|
||||||
|
|
||||||
while !*must_exit.borrow() {
|
|
||||||
let s_ring_recv = ring_recv.recv().fuse();
|
|
||||||
let s_busy = busy_rx.recv().fuse();
|
|
||||||
let s_must_exit = must_exit.recv().fuse();
|
|
||||||
let s_timeout = tokio::time::delay_for(Duration::from_secs(1)).fuse();
|
|
||||||
pin_mut!(s_ring_recv, s_busy, s_must_exit, s_timeout);
|
|
||||||
|
|
||||||
select! {
|
|
||||||
new_ring_r = s_ring_recv => {
|
|
||||||
if let Some(new_ring) = new_ring_r {
|
|
||||||
debug!("({}) Adding ring difference to syncer todo list", self.table.name);
|
|
||||||
self.todo.lock().unwrap().add_ring_difference(&self.table, &prev_ring, &new_ring);
|
|
||||||
prev_ring = new_ring;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
busy_opt = s_busy => {
|
|
||||||
if let Some(busy) = busy_opt {
|
|
||||||
if busy {
|
|
||||||
nothing_to_do_since = None;
|
|
||||||
} else {
|
|
||||||
if nothing_to_do_since.is_none() {
|
|
||||||
nothing_to_do_since = Some(Instant::now());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
must_exit_v = s_must_exit => {
|
|
||||||
if must_exit_v.unwrap_or(false) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
_ = s_timeout => {
|
|
||||||
if nothing_to_do_since.map(|t| Instant::now() - t >= SCAN_INTERVAL).unwrap_or(false) {
|
|
||||||
nothing_to_do_since = None;
|
|
||||||
debug!("({}) Adding full scan to syncer todo list", self.table.name);
|
|
||||||
self.add_full_scan().await;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub async fn add_full_scan(&self) {
|
|
||||||
self.todo.lock().unwrap().add_full_scan(&self.table);
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn syncer_task(
|
|
||||||
self: Arc<Self>,
|
|
||||||
mut must_exit: watch::Receiver<bool>,
|
|
||||||
busy_tx: mpsc::UnboundedSender<bool>,
|
|
||||||
) -> Result<(), Error> {
|
|
||||||
while !*must_exit.borrow() {
|
|
||||||
let task = self.todo.lock().unwrap().pop_task();
|
|
||||||
if let Some(partition) = task {
|
|
||||||
busy_tx.send(true)?;
|
|
||||||
let res = self
|
|
||||||
.clone()
|
|
||||||
.sync_partition(&partition, &mut must_exit)
|
|
||||||
.await;
|
|
||||||
if let Err(e) = res {
|
|
||||||
warn!(
|
|
||||||
"({}) Error while syncing {:?}: {}",
|
|
||||||
self.table.name, partition, e
|
|
||||||
);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
busy_tx.send(false)?;
|
|
||||||
tokio::time::delay_for(Duration::from_secs(1)).await;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn sync_partition(
|
|
||||||
self: Arc<Self>,
|
|
||||||
partition: &TodoPartition,
|
|
||||||
must_exit: &mut watch::Receiver<bool>,
|
|
||||||
) -> Result<(), Error> {
|
|
||||||
if partition.retain {
|
|
||||||
let my_id = self.table.system.id;
|
|
||||||
let nodes = self
|
|
||||||
.table
|
|
||||||
.replication
|
|
||||||
.write_nodes(&partition.begin, &self.table.system)
|
|
||||||
.into_iter()
|
|
||||||
.filter(|node| *node != my_id)
|
|
||||||
.collect::<Vec<_>>();
|
|
||||||
|
|
||||||
debug!(
|
|
||||||
"({}) Preparing to sync {:?} with {:?}...",
|
|
||||||
self.table.name, partition, nodes
|
|
||||||
);
|
|
||||||
let root_cks = self.root_checksum(&partition.begin, &partition.end, must_exit)?;
|
|
||||||
|
|
||||||
let mut sync_futures = nodes
|
|
||||||
.iter()
|
|
||||||
.map(|node| {
|
|
||||||
self.clone().do_sync_with(
|
|
||||||
partition.clone(),
|
|
||||||
root_cks.clone(),
|
|
||||||
*node,
|
|
||||||
must_exit.clone(),
|
|
||||||
)
|
|
||||||
})
|
|
||||||
.collect::<FuturesUnordered<_>>();
|
|
||||||
|
|
||||||
let mut n_errors = 0;
|
|
||||||
while let Some(r) = sync_futures.next().await {
|
|
||||||
if let Err(e) = r {
|
|
||||||
n_errors += 1;
|
|
||||||
warn!("({}) Sync error: {}", self.table.name, e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if n_errors > self.table.replication.max_write_errors() {
|
|
||||||
return Err(Error::Message(format!(
|
|
||||||
"Sync failed with too many nodes (should have been: {:?}).",
|
|
||||||
nodes
|
|
||||||
)));
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
self.offload_partition(&partition.begin, &partition.end, must_exit)
|
|
||||||
.await?;
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
// Offload partition: this partition is not something we are storing,
|
|
||||||
// so send it out to all other nodes that store it and delete items locally.
|
|
||||||
// We don't bother checking if the remote nodes already have the items,
|
|
||||||
// we just batch-send everything. Offloading isn't supposed to happen very often.
|
|
||||||
// If any of the nodes that are supposed to store the items is unable to
|
|
||||||
// save them, we interrupt the process.
|
|
||||||
async fn offload_partition(
|
|
||||||
self: &Arc<Self>,
|
|
||||||
begin: &Hash,
|
|
||||||
end: &Hash,
|
|
||||||
must_exit: &mut watch::Receiver<bool>,
|
|
||||||
) -> Result<(), Error> {
|
|
||||||
let mut counter: usize = 0;
|
|
||||||
|
|
||||||
while !*must_exit.borrow() {
|
|
||||||
let mut items = Vec::new();
|
|
||||||
|
|
||||||
for item in self.table.store.range(begin.to_vec()..end.to_vec()) {
|
|
||||||
let (key, value) = item?;
|
|
||||||
items.push((key.to_vec(), Arc::new(ByteBuf::from(value.as_ref()))));
|
|
||||||
|
|
||||||
if items.len() >= 1024 {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if items.len() > 0 {
|
|
||||||
let nodes = self
|
|
||||||
.table
|
|
||||||
.replication
|
|
||||||
.write_nodes(&begin, &self.table.system)
|
|
||||||
.into_iter()
|
|
||||||
.collect::<Vec<_>>();
|
|
||||||
if nodes.contains(&self.table.system.id) {
|
|
||||||
warn!("Interrupting offload as partitions seem to have changed");
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
counter += 1;
|
|
||||||
debug!(
|
|
||||||
"Offloading {} items from {:?}..{:?} ({})",
|
|
||||||
items.len(),
|
|
||||||
begin,
|
|
||||||
end,
|
|
||||||
counter
|
|
||||||
);
|
|
||||||
self.offload_items(&items, &nodes[..]).await?;
|
|
||||||
} else {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn offload_items(
|
|
||||||
self: &Arc<Self>,
|
|
||||||
items: &Vec<(Vec<u8>, Arc<ByteBuf>)>,
|
|
||||||
nodes: &[UUID],
|
|
||||||
) -> Result<(), Error> {
|
|
||||||
let values = items.iter().map(|(_k, v)| v.clone()).collect::<Vec<_>>();
|
|
||||||
let update_msg = Arc::new(TableRPC::<F>::Update(values));
|
|
||||||
|
|
||||||
for res in join_all(nodes.iter().map(|to| {
|
|
||||||
self.table
|
|
||||||
.rpc_client
|
|
||||||
.call_arc(*to, update_msg.clone(), TABLE_SYNC_RPC_TIMEOUT)
|
|
||||||
}))
|
|
||||||
.await
|
|
||||||
{
|
|
||||||
res?;
|
|
||||||
}
|
|
||||||
|
|
||||||
// All remote nodes have written those items, now we can delete them locally
|
|
||||||
let mut not_removed = 0;
|
|
||||||
for (k, v) in items.iter() {
|
|
||||||
if !self.table.delete_if_equal(&k[..], &v[..])? {
|
|
||||||
not_removed += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if not_removed > 0 {
|
|
||||||
debug!("{} items not removed during offload because they changed in between (trying again...)", not_removed);
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn root_checksum(
|
|
||||||
self: &Arc<Self>,
|
|
||||||
begin: &Hash,
|
|
||||||
end: &Hash,
|
|
||||||
must_exit: &mut watch::Receiver<bool>,
|
|
||||||
) -> Result<RangeChecksum, Error> {
|
|
||||||
for i in 1..MAX_DEPTH {
|
|
||||||
let rc = self.range_checksum(
|
|
||||||
&SyncRange {
|
|
||||||
begin: begin.to_vec(),
|
|
||||||
end: end.to_vec(),
|
|
||||||
level: i,
|
|
||||||
},
|
|
||||||
must_exit,
|
|
||||||
)?;
|
|
||||||
if rc.found_limit.is_none() {
|
|
||||||
return Ok(rc);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Err(Error::Message(format!(
|
|
||||||
"Unable to compute root checksum (this should never happen)"
|
|
||||||
)))
|
|
||||||
}
|
|
||||||
|
|
||||||
fn range_checksum(
|
|
||||||
self: &Arc<Self>,
|
|
||||||
range: &SyncRange,
|
|
||||||
must_exit: &mut watch::Receiver<bool>,
|
|
||||||
) -> Result<RangeChecksum, Error> {
|
|
||||||
assert!(range.level != 0);
|
|
||||||
trace!("Call range_checksum {:?}", range);
|
|
||||||
|
|
||||||
if range.level == 1 {
|
|
||||||
let mut children = vec![];
|
|
||||||
for item in self
|
|
||||||
.table
|
|
||||||
.store
|
|
||||||
.range(range.begin.clone()..range.end.clone())
|
|
||||||
{
|
|
||||||
let (key, value) = item?;
|
|
||||||
let key_hash = blake2sum(&key[..]);
|
|
||||||
if children.len() > 0
|
|
||||||
&& key_hash.as_slice()[0..range.level]
|
|
||||||
.iter()
|
|
||||||
.all(|x| *x == 0u8)
|
|
||||||
{
|
|
||||||
trace!(
|
|
||||||
"range_checksum {:?} returning {} items",
|
|
||||||
range,
|
|
||||||
children.len()
|
|
||||||
);
|
|
||||||
return Ok(RangeChecksum {
|
|
||||||
bounds: range.clone(),
|
|
||||||
children,
|
|
||||||
found_limit: Some(key.to_vec()),
|
|
||||||
time: Instant::now(),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
let item_range = SyncRange {
|
|
||||||
begin: key.to_vec(),
|
|
||||||
end: vec![],
|
|
||||||
level: 0,
|
|
||||||
};
|
|
||||||
children.push((item_range, blake2sum(&value[..])));
|
|
||||||
}
|
|
||||||
trace!(
|
|
||||||
"range_checksum {:?} returning {} items",
|
|
||||||
range,
|
|
||||||
children.len()
|
|
||||||
);
|
|
||||||
Ok(RangeChecksum {
|
|
||||||
bounds: range.clone(),
|
|
||||||
children,
|
|
||||||
found_limit: None,
|
|
||||||
time: Instant::now(),
|
|
||||||
})
|
|
||||||
} else {
|
|
||||||
let mut children = vec![];
|
|
||||||
let mut sub_range = SyncRange {
|
|
||||||
begin: range.begin.clone(),
|
|
||||||
end: range.end.clone(),
|
|
||||||
level: range.level - 1,
|
|
||||||
};
|
|
||||||
let mut time = Instant::now();
|
|
||||||
while !*must_exit.borrow() {
|
|
||||||
let sub_ck = self.range_checksum_cached_hash(&sub_range, must_exit)?;
|
|
||||||
|
|
||||||
if let Some(hash) = sub_ck.hash {
|
|
||||||
children.push((sub_range.clone(), hash));
|
|
||||||
if sub_ck.time < time {
|
|
||||||
time = sub_ck.time;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if sub_ck.found_limit.is_none() || sub_ck.hash.is_none() {
|
|
||||||
trace!(
|
|
||||||
"range_checksum {:?} returning {} items",
|
|
||||||
range,
|
|
||||||
children.len()
|
|
||||||
);
|
|
||||||
return Ok(RangeChecksum {
|
|
||||||
bounds: range.clone(),
|
|
||||||
children,
|
|
||||||
found_limit: None,
|
|
||||||
time,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
let found_limit = sub_ck.found_limit.unwrap();
|
|
||||||
|
|
||||||
let actual_limit_hash = blake2sum(&found_limit[..]);
|
|
||||||
if actual_limit_hash.as_slice()[0..range.level]
|
|
||||||
.iter()
|
|
||||||
.all(|x| *x == 0u8)
|
|
||||||
{
|
|
||||||
trace!(
|
|
||||||
"range_checksum {:?} returning {} items",
|
|
||||||
range,
|
|
||||||
children.len()
|
|
||||||
);
|
|
||||||
return Ok(RangeChecksum {
|
|
||||||
bounds: range.clone(),
|
|
||||||
children,
|
|
||||||
found_limit: Some(found_limit.clone()),
|
|
||||||
time,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
sub_range.begin = found_limit;
|
|
||||||
}
|
|
||||||
trace!("range_checksum {:?} exiting due to must_exit", range);
|
|
||||||
Err(Error::Message(format!("Exiting.")))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn range_checksum_cached_hash(
|
|
||||||
self: &Arc<Self>,
|
|
||||||
range: &SyncRange,
|
|
||||||
must_exit: &mut watch::Receiver<bool>,
|
|
||||||
) -> Result<RangeChecksumCache, Error> {
|
|
||||||
{
|
|
||||||
let mut cache = self.cache[range.level].lock().unwrap();
|
|
||||||
if let Some(v) = cache.get(&range) {
|
|
||||||
if Instant::now() - v.time < CHECKSUM_CACHE_TIMEOUT {
|
|
||||||
return Ok(v.clone());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
cache.remove(&range);
|
|
||||||
}
|
|
||||||
|
|
||||||
let v = self.range_checksum(&range, must_exit)?;
|
|
||||||
trace!(
|
|
||||||
"({}) New checksum calculated for {}-{}/{}, {} children",
|
|
||||||
self.table.name,
|
|
||||||
hex::encode(&range.begin)
|
|
||||||
.chars()
|
|
||||||
.take(16)
|
|
||||||
.collect::<String>(),
|
|
||||||
hex::encode(&range.end).chars().take(16).collect::<String>(),
|
|
||||||
range.level,
|
|
||||||
v.children.len()
|
|
||||||
);
|
|
||||||
|
|
||||||
let hash = if v.children.len() > 0 {
|
|
||||||
Some(blake2sum(&rmp_to_vec_all_named(&v)?[..]))
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
};
|
|
||||||
let cache_entry = RangeChecksumCache {
|
|
||||||
hash,
|
|
||||||
found_limit: v.found_limit,
|
|
||||||
time: v.time,
|
|
||||||
};
|
|
||||||
|
|
||||||
let mut cache = self.cache[range.level].lock().unwrap();
|
|
||||||
cache.insert(range.clone(), cache_entry.clone());
|
|
||||||
Ok(cache_entry)
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn do_sync_with(
|
|
||||||
self: Arc<Self>,
|
|
||||||
partition: TodoPartition,
|
|
||||||
root_ck: RangeChecksum,
|
|
||||||
who: UUID,
|
|
||||||
mut must_exit: watch::Receiver<bool>,
|
|
||||||
) -> Result<(), Error> {
|
|
||||||
let mut todo = VecDeque::new();
|
|
||||||
|
|
||||||
// If their root checksum has level > than us, use that as a reference
|
|
||||||
let root_cks_resp = self
|
|
||||||
.table
|
|
||||||
.rpc_client
|
|
||||||
.call(
|
|
||||||
who,
|
|
||||||
TableRPC::<F>::SyncRPC(SyncRPC::GetRootChecksumRange(
|
|
||||||
partition.begin.clone(),
|
|
||||||
partition.end.clone(),
|
|
||||||
)),
|
|
||||||
TABLE_SYNC_RPC_TIMEOUT,
|
|
||||||
)
|
|
||||||
.await?;
|
|
||||||
if let TableRPC::<F>::SyncRPC(SyncRPC::RootChecksumRange(range)) = root_cks_resp {
|
|
||||||
if range.level > root_ck.bounds.level {
|
|
||||||
let their_root_range_ck = self.range_checksum(&range, &mut must_exit)?;
|
|
||||||
todo.push_back(their_root_range_ck);
|
|
||||||
} else {
|
|
||||||
todo.push_back(root_ck);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
return Err(Error::Message(format!(
|
|
||||||
"Invalid respone to GetRootChecksumRange RPC: {}",
|
|
||||||
debug_serialize(root_cks_resp)
|
|
||||||
)));
|
|
||||||
}
|
|
||||||
|
|
||||||
while !todo.is_empty() && !*must_exit.borrow() {
|
|
||||||
let total_children = todo.iter().map(|x| x.children.len()).fold(0, |x, y| x + y);
|
|
||||||
trace!(
|
|
||||||
"({}) Sync with {:?}: {} ({}) remaining",
|
|
||||||
self.table.name,
|
|
||||||
who,
|
|
||||||
todo.len(),
|
|
||||||
total_children
|
|
||||||
);
|
|
||||||
|
|
||||||
let step_size = std::cmp::min(16, todo.len());
|
|
||||||
let step = todo.drain(..step_size).collect::<Vec<_>>();
|
|
||||||
|
|
||||||
let rpc_resp = self
|
|
||||||
.table
|
|
||||||
.rpc_client
|
|
||||||
.call(
|
|
||||||
who,
|
|
||||||
TableRPC::<F>::SyncRPC(SyncRPC::Checksums(step)),
|
|
||||||
TABLE_SYNC_RPC_TIMEOUT,
|
|
||||||
)
|
|
||||||
.await?;
|
|
||||||
if let TableRPC::<F>::SyncRPC(SyncRPC::Difference(mut diff_ranges, diff_items)) =
|
|
||||||
rpc_resp
|
|
||||||
{
|
|
||||||
if diff_ranges.len() > 0 || diff_items.len() > 0 {
|
|
||||||
info!(
|
|
||||||
"({}) Sync with {:?}: difference {} ranges, {} items",
|
|
||||||
self.table.name,
|
|
||||||
who,
|
|
||||||
diff_ranges.len(),
|
|
||||||
diff_items.len()
|
|
||||||
);
|
|
||||||
}
|
|
||||||
let mut items_to_send = vec![];
|
|
||||||
for differing in diff_ranges.drain(..) {
|
|
||||||
if differing.level == 0 {
|
|
||||||
items_to_send.push(differing.begin);
|
|
||||||
} else {
|
|
||||||
let checksum = self.range_checksum(&differing, &mut must_exit)?;
|
|
||||||
todo.push_back(checksum);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if diff_items.len() > 0 {
|
|
||||||
self.table.handle_update(&diff_items[..]).await?;
|
|
||||||
}
|
|
||||||
if items_to_send.len() > 0 {
|
|
||||||
self.send_items(who, items_to_send).await?;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
return Err(Error::Message(format!(
|
|
||||||
"Unexpected response to sync RPC checksums: {}",
|
|
||||||
debug_serialize(&rpc_resp)
|
|
||||||
)));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn send_items(&self, who: UUID, item_list: Vec<Vec<u8>>) -> Result<(), Error> {
|
|
||||||
info!(
|
|
||||||
"({}) Sending {} items to {:?}",
|
|
||||||
self.table.name,
|
|
||||||
item_list.len(),
|
|
||||||
who
|
|
||||||
);
|
|
||||||
|
|
||||||
let mut values = vec![];
|
|
||||||
for item in item_list.iter() {
|
|
||||||
if let Some(v) = self.table.store.get(&item[..])? {
|
|
||||||
values.push(Arc::new(ByteBuf::from(v.as_ref())));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
let rpc_resp = self
|
|
||||||
.table
|
|
||||||
.rpc_client
|
|
||||||
.call(who, TableRPC::<F>::Update(values), TABLE_SYNC_RPC_TIMEOUT)
|
|
||||||
.await?;
|
|
||||||
if let TableRPC::<F>::Ok = rpc_resp {
|
|
||||||
Ok(())
|
|
||||||
} else {
|
|
||||||
Err(Error::Message(format!(
|
|
||||||
"Unexpected response to RPC Update: {}",
|
|
||||||
debug_serialize(&rpc_resp)
|
|
||||||
)))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) async fn handle_rpc(
|
|
||||||
self: &Arc<Self>,
|
|
||||||
message: &SyncRPC,
|
|
||||||
mut must_exit: watch::Receiver<bool>,
|
|
||||||
) -> Result<SyncRPC, Error> {
|
|
||||||
match message {
|
|
||||||
SyncRPC::GetRootChecksumRange(begin, end) => {
|
|
||||||
let root_cks = self.root_checksum(&begin, &end, &mut must_exit)?;
|
|
||||||
Ok(SyncRPC::RootChecksumRange(root_cks.bounds))
|
|
||||||
}
|
|
||||||
SyncRPC::Checksums(checksums) => {
|
|
||||||
self.handle_checksums_rpc(&checksums[..], &mut must_exit)
|
|
||||||
.await
|
|
||||||
}
|
|
||||||
_ => Err(Error::Message(format!("Unexpected sync RPC"))),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn handle_checksums_rpc(
|
|
||||||
self: &Arc<Self>,
|
|
||||||
checksums: &[RangeChecksum],
|
|
||||||
must_exit: &mut watch::Receiver<bool>,
|
|
||||||
) -> Result<SyncRPC, Error> {
|
|
||||||
let mut ret_ranges = vec![];
|
|
||||||
let mut ret_items = vec![];
|
|
||||||
|
|
||||||
for their_ckr in checksums.iter() {
|
|
||||||
let our_ckr = self.range_checksum(&their_ckr.bounds, must_exit)?;
|
|
||||||
for (their_range, their_hash) in their_ckr.children.iter() {
|
|
||||||
let differs = match our_ckr
|
|
||||||
.children
|
|
||||||
.binary_search_by(|(our_range, _)| our_range.cmp(&their_range))
|
|
||||||
{
|
|
||||||
Err(_) => {
|
|
||||||
if their_range.level >= 1 {
|
|
||||||
let cached_hash =
|
|
||||||
self.range_checksum_cached_hash(&their_range, must_exit)?;
|
|
||||||
cached_hash.hash.map(|h| h != *their_hash).unwrap_or(true)
|
|
||||||
} else {
|
|
||||||
true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(i) => our_ckr.children[i].1 != *their_hash,
|
|
||||||
};
|
|
||||||
if differs {
|
|
||||||
ret_ranges.push(their_range.clone());
|
|
||||||
if their_range.level == 0 {
|
|
||||||
if let Some(item_bytes) =
|
|
||||||
self.table.store.get(their_range.begin.as_slice())?
|
|
||||||
{
|
|
||||||
ret_items.push(Arc::new(ByteBuf::from(item_bytes.to_vec())));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for (our_range, _hash) in our_ckr.children.iter() {
|
|
||||||
if let Some(their_found_limit) = &their_ckr.found_limit {
|
|
||||||
if our_range.begin.as_slice() > their_found_limit.as_slice() {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let not_present = our_ckr
|
|
||||||
.children
|
|
||||||
.binary_search_by(|(their_range, _)| their_range.cmp(&our_range))
|
|
||||||
.is_err();
|
|
||||||
if not_present {
|
|
||||||
if our_range.level > 0 {
|
|
||||||
ret_ranges.push(our_range.clone());
|
|
||||||
}
|
|
||||||
if our_range.level == 0 {
|
|
||||||
if let Some(item_bytes) =
|
|
||||||
self.table.store.get(our_range.begin.as_slice())?
|
|
||||||
{
|
|
||||||
ret_items.push(Arc::new(ByteBuf::from(item_bytes.to_vec())));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
let n_checksums = checksums
|
|
||||||
.iter()
|
|
||||||
.map(|x| x.children.len())
|
|
||||||
.fold(0, |x, y| x + y);
|
|
||||||
if ret_ranges.len() > 0 || ret_items.len() > 0 {
|
|
||||||
trace!(
|
|
||||||
"({}) Checksum comparison RPC: {} different + {} items for {} received",
|
|
||||||
self.table.name,
|
|
||||||
ret_ranges.len(),
|
|
||||||
ret_items.len(),
|
|
||||||
n_checksums
|
|
||||||
);
|
|
||||||
}
|
|
||||||
Ok(SyncRPC::Difference(ret_ranges, ret_items))
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) fn invalidate(self: &Arc<Self>, item_key: &[u8]) {
|
|
||||||
for i in 1..MAX_DEPTH {
|
|
||||||
let needle = SyncRange {
|
|
||||||
begin: item_key.to_vec(),
|
|
||||||
end: vec![],
|
|
||||||
level: i,
|
|
||||||
};
|
|
||||||
let mut cache = self.cache[i].lock().unwrap();
|
|
||||||
if let Some(cache_entry) = cache.range(..=needle).rev().next() {
|
|
||||||
if cache_entry.0.begin[..] <= *item_key && cache_entry.0.end[..] > *item_key {
|
|
||||||
let index = cache_entry.0.clone();
|
|
||||||
drop(cache_entry);
|
|
||||||
cache.remove(&index);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl SyncTodo {
|
|
||||||
fn add_full_scan<F: TableSchema, R: TableReplication>(&mut self, table: &Table<F, R>) {
|
|
||||||
let my_id = table.system.id;
|
|
||||||
|
|
||||||
self.todo.clear();
|
|
||||||
|
|
||||||
let ring = table.system.ring.borrow().clone();
|
|
||||||
let split_points = table.replication.split_points(&ring);
|
|
||||||
|
|
||||||
for i in 0..split_points.len() - 1 {
|
|
||||||
let begin = split_points[i];
|
|
||||||
let end = split_points[i + 1];
|
|
||||||
let nodes = table.replication.replication_nodes(&begin, &ring);
|
|
||||||
|
|
||||||
let retain = nodes.contains(&my_id);
|
|
||||||
if !retain {
|
|
||||||
// Check if we have some data to send, otherwise skip
|
|
||||||
if table.store.range(begin..end).next().is_none() {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
self.todo.push(TodoPartition { begin, end, retain });
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn add_ring_difference<F: TableSchema, R: TableReplication>(
|
|
||||||
&mut self,
|
|
||||||
table: &Table<F, R>,
|
|
||||||
old_ring: &Ring,
|
|
||||||
new_ring: &Ring,
|
|
||||||
) {
|
|
||||||
let my_id = table.system.id;
|
|
||||||
|
|
||||||
// If it is us who are entering or leaving the system,
|
|
||||||
// initiate a full sync instead of incremental sync
|
|
||||||
if old_ring.config.members.contains_key(&my_id)
|
|
||||||
!= new_ring.config.members.contains_key(&my_id)
|
|
||||||
{
|
|
||||||
self.add_full_scan(table);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut all_points = None
|
|
||||||
.into_iter()
|
|
||||||
.chain(table.replication.split_points(old_ring).drain(..))
|
|
||||||
.chain(table.replication.split_points(new_ring).drain(..))
|
|
||||||
.chain(self.todo.iter().map(|x| x.begin))
|
|
||||||
.chain(self.todo.iter().map(|x| x.end))
|
|
||||||
.collect::<Vec<_>>();
|
|
||||||
all_points.sort();
|
|
||||||
all_points.dedup();
|
|
||||||
|
|
||||||
let mut old_todo = std::mem::replace(&mut self.todo, vec![]);
|
|
||||||
old_todo.sort_by(|x, y| x.begin.cmp(&y.begin));
|
|
||||||
let mut new_todo = vec![];
|
|
||||||
|
|
||||||
for i in 0..all_points.len() - 1 {
|
|
||||||
let begin = all_points[i];
|
|
||||||
let end = all_points[i + 1];
|
|
||||||
let was_ours = table
|
|
||||||
.replication
|
|
||||||
.replication_nodes(&begin, &old_ring)
|
|
||||||
.contains(&my_id);
|
|
||||||
let is_ours = table
|
|
||||||
.replication
|
|
||||||
.replication_nodes(&begin, &new_ring)
|
|
||||||
.contains(&my_id);
|
|
||||||
|
|
||||||
let was_todo = match old_todo.binary_search_by(|x| x.begin.cmp(&begin)) {
|
|
||||||
Ok(_) => true,
|
|
||||||
Err(j) => {
|
|
||||||
(j > 0 && old_todo[j - 1].begin < end && begin < old_todo[j - 1].end)
|
|
||||||
|| (j < old_todo.len()
|
|
||||||
&& old_todo[j].begin < end && begin < old_todo[j].end)
|
|
||||||
}
|
|
||||||
};
|
|
||||||
if was_todo || (is_ours && !was_ours) || (was_ours && !is_ours) {
|
|
||||||
new_todo.push(TodoPartition {
|
|
||||||
begin,
|
|
||||||
end,
|
|
||||||
retain: is_ours,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
self.todo = new_todo;
|
|
||||||
}
|
|
||||||
|
|
||||||
fn pop_task(&mut self) -> Option<TodoPartition> {
|
|
||||||
if self.todo.is_empty() {
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
|
|
||||||
let i = rand::thread_rng().gen_range::<usize, _, _>(0, self.todo.len());
|
|
||||||
if i == self.todo.len() - 1 {
|
|
||||||
self.todo.pop()
|
|
||||||
} else {
|
|
||||||
let replacement = self.todo.pop().unwrap();
|
|
||||||
let ret = std::mem::replace(&mut self.todo[i], replacement);
|
|
||||||
Some(ret)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -13,29 +13,26 @@ path = "lib.rs"
|
||||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
rand = "0.7"
|
rand = "0.8"
|
||||||
hex = "0.3"
|
hex = "0.4"
|
||||||
sha2 = "0.8"
|
sha2 = "0.9"
|
||||||
blake2 = "0.9"
|
blake2 = "0.9"
|
||||||
err-derive = "0.2.3"
|
err-derive = "0.3"
|
||||||
log = "0.4"
|
log = "0.4"
|
||||||
fasthash = "0.4"
|
fasthash = "0.4"
|
||||||
|
|
||||||
sled = "0.34"
|
sled = "0.34"
|
||||||
|
|
||||||
toml = "0.5"
|
toml = "0.5"
|
||||||
rmp-serde = "0.14.3"
|
rmp-serde = "0.15"
|
||||||
serde = { version = "1.0", default-features = false, features = ["derive", "rc"] }
|
serde = { version = "1.0", default-features = false, features = ["derive", "rc"] }
|
||||||
serde_json = "1.0"
|
serde_json = "1.0"
|
||||||
|
chrono = "0.4"
|
||||||
|
|
||||||
futures = "0.3"
|
futures = "0.3"
|
||||||
futures-util = "0.3"
|
tokio = { version = "1.0", default-features = false, features = ["rt", "rt-multi-thread", "io-util", "net", "time", "macros", "sync", "signal", "fs"] }
|
||||||
tokio = { version = "0.2", default-features = false, features = ["rt-core", "rt-threaded", "io-driver", "net", "tcp", "time", "macros", "sync", "signal", "fs"] }
|
|
||||||
|
|
||||||
http = "0.2"
|
http = "0.2"
|
||||||
hyper = "0.13"
|
hyper = "0.14"
|
||||||
rustls = "0.17"
|
rustls = "0.19"
|
||||||
webpki = "0.21"
|
webpki = "0.21"
|
||||||
|
|
||||||
roxmltree = "0.11"
|
|
||||||
|
|
||||||
|
|
|
@ -1,12 +1,11 @@
|
||||||
use core::future::Future;
|
use core::future::Future;
|
||||||
use std::pin::Pin;
|
use std::pin::Pin;
|
||||||
|
|
||||||
use futures::future::join_all;
|
|
||||||
use futures::select;
|
|
||||||
use futures_util::future::*;
|
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use tokio::sync::Mutex;
|
use std::time::Duration;
|
||||||
use tokio::sync::{mpsc, watch, Notify};
|
|
||||||
|
use futures::future::*;
|
||||||
|
use futures::select;
|
||||||
|
use tokio::sync::{mpsc, watch, Mutex};
|
||||||
|
|
||||||
use crate::error::Error;
|
use crate::error::Error;
|
||||||
|
|
||||||
|
@ -14,54 +13,106 @@ type JobOutput = Result<(), Error>;
|
||||||
type Job = Pin<Box<dyn Future<Output = JobOutput> + Send>>;
|
type Job = Pin<Box<dyn Future<Output = JobOutput> + Send>>;
|
||||||
|
|
||||||
pub struct BackgroundRunner {
|
pub struct BackgroundRunner {
|
||||||
n_runners: usize,
|
|
||||||
pub stop_signal: watch::Receiver<bool>,
|
pub stop_signal: watch::Receiver<bool>,
|
||||||
|
|
||||||
queue_in: mpsc::UnboundedSender<(Job, bool)>,
|
queue_in: mpsc::UnboundedSender<(Job, bool)>,
|
||||||
queue_out: Mutex<mpsc::UnboundedReceiver<(Job, bool)>>,
|
worker_in: mpsc::UnboundedSender<tokio::task::JoinHandle<()>>,
|
||||||
job_notify: Notify,
|
|
||||||
|
|
||||||
workers: Mutex<Vec<tokio::task::JoinHandle<()>>>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl BackgroundRunner {
|
impl BackgroundRunner {
|
||||||
pub fn new(n_runners: usize, stop_signal: watch::Receiver<bool>) -> Arc<Self> {
|
pub fn new(
|
||||||
|
n_runners: usize,
|
||||||
|
stop_signal: watch::Receiver<bool>,
|
||||||
|
) -> (Arc<Self>, tokio::task::JoinHandle<()>) {
|
||||||
|
let (worker_in, mut worker_out) = mpsc::unbounded_channel();
|
||||||
|
|
||||||
|
let stop_signal_2 = stop_signal.clone();
|
||||||
|
let await_all_done = tokio::spawn(async move {
|
||||||
|
loop {
|
||||||
|
let wkr = {
|
||||||
|
select! {
|
||||||
|
item = worker_out.recv().fuse() => {
|
||||||
|
match item {
|
||||||
|
Some(x) => x,
|
||||||
|
None => break,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ = tokio::time::sleep(Duration::from_secs(5)).fuse() => {
|
||||||
|
if *stop_signal_2.borrow() {
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
if let Err(e) = wkr.await {
|
||||||
|
error!("Error while awaiting for worker: {}", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
let (queue_in, queue_out) = mpsc::unbounded_channel();
|
let (queue_in, queue_out) = mpsc::unbounded_channel();
|
||||||
Arc::new(Self {
|
let queue_out = Arc::new(Mutex::new(queue_out));
|
||||||
n_runners,
|
|
||||||
|
for i in 0..n_runners {
|
||||||
|
let queue_out = queue_out.clone();
|
||||||
|
let stop_signal = stop_signal.clone();
|
||||||
|
|
||||||
|
worker_in
|
||||||
|
.send(tokio::spawn(async move {
|
||||||
|
loop {
|
||||||
|
let (job, cancellable) = {
|
||||||
|
select! {
|
||||||
|
item = wait_job(&queue_out).fuse() => match item {
|
||||||
|
// We received a task, process it
|
||||||
|
Some(x) => x,
|
||||||
|
// We received a signal that no more tasks will ever be sent
|
||||||
|
// because the sending side was dropped. Exit now.
|
||||||
|
None => break,
|
||||||
|
},
|
||||||
|
_ = tokio::time::sleep(Duration::from_secs(5)).fuse() => {
|
||||||
|
if *stop_signal.borrow() {
|
||||||
|
// Nothing has been going on for 5 secs, and we are shutting
|
||||||
|
// down. Exit now.
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
// Nothing is going on but we don't want to exit.
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
if cancellable && *stop_signal.borrow() {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if let Err(e) = job.await {
|
||||||
|
error!("Job failed: {}", e)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
info!("Background worker {} exiting", i);
|
||||||
|
}))
|
||||||
|
.unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
let bgrunner = Arc::new(Self {
|
||||||
stop_signal,
|
stop_signal,
|
||||||
queue_in,
|
queue_in,
|
||||||
queue_out: Mutex::new(queue_out),
|
worker_in,
|
||||||
job_notify: Notify::new(),
|
});
|
||||||
workers: Mutex::new(Vec::new()),
|
(bgrunner, await_all_done)
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
pub async fn run(self: Arc<Self>) {
|
|
||||||
let mut workers = self.workers.lock().await;
|
|
||||||
for i in 0..self.n_runners {
|
|
||||||
workers.push(tokio::spawn(self.clone().runner(i)));
|
|
||||||
}
|
|
||||||
drop(workers);
|
|
||||||
|
|
||||||
let mut stop_signal = self.stop_signal.clone();
|
|
||||||
while let Some(exit_now) = stop_signal.recv().await {
|
|
||||||
if exit_now {
|
|
||||||
let mut workers = self.workers.lock().await;
|
|
||||||
let workers_vec = workers.drain(..).collect::<Vec<_>>();
|
|
||||||
join_all(workers_vec).await;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Spawn a task to be run in background
|
||||||
pub fn spawn<T>(&self, job: T)
|
pub fn spawn<T>(&self, job: T)
|
||||||
where
|
where
|
||||||
T: Future<Output = JobOutput> + Send + 'static,
|
T: Future<Output = JobOutput> + Send + 'static,
|
||||||
{
|
{
|
||||||
let boxed: Job = Box::pin(job);
|
let boxed: Job = Box::pin(job);
|
||||||
let _: Result<_, _> = self.queue_in.clone().send((boxed, false));
|
self.queue_in
|
||||||
self.job_notify.notify();
|
.send((boxed, false))
|
||||||
|
.map_err(|_| "could not put job in queue")
|
||||||
|
.unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn spawn_cancellable<T>(&self, job: T)
|
pub fn spawn_cancellable<T>(&self, job: T)
|
||||||
|
@ -69,56 +120,30 @@ impl BackgroundRunner {
|
||||||
T: Future<Output = JobOutput> + Send + 'static,
|
T: Future<Output = JobOutput> + Send + 'static,
|
||||||
{
|
{
|
||||||
let boxed: Job = Box::pin(job);
|
let boxed: Job = Box::pin(job);
|
||||||
let _: Result<_, _> = self.queue_in.clone().send((boxed, true));
|
self.queue_in
|
||||||
self.job_notify.notify();
|
.send((boxed, true))
|
||||||
|
.map_err(|_| "could not put job in queue")
|
||||||
|
.unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn spawn_worker<F, T>(&self, name: String, worker: F)
|
pub fn spawn_worker<F, T>(&self, name: String, worker: F)
|
||||||
where
|
where
|
||||||
F: FnOnce(watch::Receiver<bool>) -> T + Send + 'static,
|
F: FnOnce(watch::Receiver<bool>) -> T + Send + 'static,
|
||||||
T: Future<Output = JobOutput> + Send + 'static,
|
T: Future<Output = ()> + Send + 'static,
|
||||||
{
|
{
|
||||||
let mut workers = self.workers.lock().await;
|
|
||||||
let stop_signal = self.stop_signal.clone();
|
let stop_signal = self.stop_signal.clone();
|
||||||
workers.push(tokio::spawn(async move {
|
let task = tokio::spawn(async move {
|
||||||
if let Err(e) = worker(stop_signal).await {
|
info!("Worker started: {}", name);
|
||||||
error!("Worker stopped with error: {}, error: {}", name, e);
|
worker(stop_signal).await;
|
||||||
} else {
|
info!("Worker exited: {}", name);
|
||||||
info!("Worker exited successfully: {}", name);
|
});
|
||||||
}
|
self.worker_in
|
||||||
}));
|
.send(task)
|
||||||
}
|
.map_err(|_| "could not put job in queue")
|
||||||
|
.unwrap();
|
||||||
async fn runner(self: Arc<Self>, i: usize) {
|
|
||||||
let mut stop_signal = self.stop_signal.clone();
|
|
||||||
loop {
|
|
||||||
let must_exit: bool = *stop_signal.borrow();
|
|
||||||
if let Some(job) = self.dequeue_job(must_exit).await {
|
|
||||||
if let Err(e) = job.await {
|
|
||||||
error!("Job failed: {}", e)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if must_exit {
|
|
||||||
info!("Background runner {} exiting", i);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
select! {
|
|
||||||
_ = self.job_notify.notified().fuse() => (),
|
|
||||||
_ = stop_signal.recv().fuse() => (),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn dequeue_job(&self, must_exit: bool) -> Option<Job> {
|
async fn wait_job(q: &Mutex<mpsc::UnboundedReceiver<(Job, bool)>>) -> Option<(Job, bool)> {
|
||||||
let mut queue = self.queue_out.lock().await;
|
q.lock().await.recv().await
|
||||||
while let Ok((job, cancellable)) = queue.try_recv() {
|
|
||||||
if cancellable && must_exit {
|
|
||||||
continue;
|
|
||||||
} else {
|
|
||||||
return Some(job);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
None
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,7 +2,6 @@ use rand::Rng;
|
||||||
use serde::de::{self, Visitor};
|
use serde::de::{self, Visitor};
|
||||||
use serde::{Deserialize, Deserializer, Serialize, Serializer};
|
use serde::{Deserialize, Deserializer, Serialize, Serializer};
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
use std::time::{SystemTime, UNIX_EPOCH};
|
|
||||||
|
|
||||||
#[derive(Default, PartialOrd, Ord, Clone, Hash, PartialEq, Copy)]
|
#[derive(Default, PartialOrd, Ord, Clone, Hash, PartialEq, Copy)]
|
||||||
pub struct FixedBytes32([u8; 32]);
|
pub struct FixedBytes32([u8; 32]);
|
||||||
|
@ -71,6 +70,14 @@ impl FixedBytes32 {
|
||||||
pub fn to_vec(&self) -> Vec<u8> {
|
pub fn to_vec(&self) -> Vec<u8> {
|
||||||
self.0.to_vec()
|
self.0.to_vec()
|
||||||
}
|
}
|
||||||
|
pub fn try_from(by: &[u8]) -> Option<Self> {
|
||||||
|
if by.len() != 32 {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
let mut ret = [0u8; 32];
|
||||||
|
ret.copy_from_slice(by);
|
||||||
|
Some(Self(ret))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub type UUID = FixedBytes32;
|
pub type UUID = FixedBytes32;
|
||||||
|
@ -80,9 +87,9 @@ pub fn sha256sum(data: &[u8]) -> Hash {
|
||||||
use sha2::{Digest, Sha256};
|
use sha2::{Digest, Sha256};
|
||||||
|
|
||||||
let mut hasher = Sha256::new();
|
let mut hasher = Sha256::new();
|
||||||
hasher.input(data);
|
hasher.update(data);
|
||||||
let mut hash = [0u8; 32];
|
let mut hash = [0u8; 32];
|
||||||
hash.copy_from_slice(&hasher.result()[..]);
|
hash.copy_from_slice(&hasher.finalize()[..]);
|
||||||
hash.into()
|
hash.into()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -111,13 +118,6 @@ pub fn gen_uuid() -> UUID {
|
||||||
rand::thread_rng().gen::<[u8; 32]>().into()
|
rand::thread_rng().gen::<[u8; 32]>().into()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn now_msec() -> u64 {
|
|
||||||
SystemTime::now()
|
|
||||||
.duration_since(UNIX_EPOCH)
|
|
||||||
.expect("Fix your clock :o")
|
|
||||||
.as_millis() as u64
|
|
||||||
}
|
|
||||||
|
|
||||||
// RMP serialization with names of fields and variants
|
// RMP serialization with names of fields and variants
|
||||||
|
|
||||||
pub fn rmp_to_vec_all_named<T>(val: &T) -> Result<Vec<u8>, rmp_serde::encode::Error>
|
pub fn rmp_to_vec_all_named<T>(val: &T) -> Result<Vec<u8>, rmp_serde::encode::Error>
|
||||||
|
|
|
@ -8,16 +8,22 @@ use crate::data::*;
|
||||||
pub enum RPCError {
|
pub enum RPCError {
|
||||||
#[error(display = "Node is down: {:?}.", _0)]
|
#[error(display = "Node is down: {:?}.", _0)]
|
||||||
NodeDown(UUID),
|
NodeDown(UUID),
|
||||||
|
|
||||||
#[error(display = "Timeout: {}", _0)]
|
#[error(display = "Timeout: {}", _0)]
|
||||||
Timeout(#[error(source)] tokio::time::Elapsed),
|
Timeout(#[error(source)] tokio::time::error::Elapsed),
|
||||||
|
|
||||||
#[error(display = "HTTP error: {}", _0)]
|
#[error(display = "HTTP error: {}", _0)]
|
||||||
HTTP(#[error(source)] http::Error),
|
HTTP(#[error(source)] http::Error),
|
||||||
|
|
||||||
#[error(display = "Hyper error: {}", _0)]
|
#[error(display = "Hyper error: {}", _0)]
|
||||||
Hyper(#[error(source)] hyper::Error),
|
Hyper(#[error(source)] hyper::Error),
|
||||||
|
|
||||||
#[error(display = "Messagepack encode error: {}", _0)]
|
#[error(display = "Messagepack encode error: {}", _0)]
|
||||||
RMPEncode(#[error(source)] rmp_serde::encode::Error),
|
RMPEncode(#[error(source)] rmp_serde::encode::Error),
|
||||||
|
|
||||||
#[error(display = "Messagepack decode error: {}", _0)]
|
#[error(display = "Messagepack decode error: {}", _0)]
|
||||||
RMPDecode(#[error(source)] rmp_serde::decode::Error),
|
RMPDecode(#[error(source)] rmp_serde::decode::Error),
|
||||||
|
|
||||||
#[error(display = "Too many errors: {:?}", _0)]
|
#[error(display = "Too many errors: {:?}", _0)]
|
||||||
TooManyErrors(Vec<String>),
|
TooManyErrors(Vec<String>),
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,3 +5,4 @@ pub mod background;
|
||||||
pub mod config;
|
pub mod config;
|
||||||
pub mod data;
|
pub mod data;
|
||||||
pub mod error;
|
pub mod error;
|
||||||
|
pub mod time;
|
||||||
|
|
16
src/util/time.rs
Normal file
|
@ -0,0 +1,16 @@
|
||||||
|
use chrono::{SecondsFormat, TimeZone, Utc};
|
||||||
|
use std::time::{SystemTime, UNIX_EPOCH};
|
||||||
|
|
||||||
|
pub fn now_msec() -> u64 {
|
||||||
|
SystemTime::now()
|
||||||
|
.duration_since(UNIX_EPOCH)
|
||||||
|
.expect("Fix your clock :o")
|
||||||
|
.as_millis() as u64
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn msec_to_rfc3339(msecs: u64) -> String {
|
||||||
|
let secs = msecs as i64 / 1000;
|
||||||
|
let nanos = (msecs as i64 % 1000) as u32 * 1_000_000;
|
||||||
|
let timestamp = Utc.timestamp(secs, nanos);
|
||||||
|
timestamp.to_rfc3339_opts(SecondsFormat::Secs, true)
|
||||||
|
}
|
|
@ -18,11 +18,10 @@ garage_table = { version = "0.1.1", path = "../table" }
|
||||||
garage_model = { version = "0.1.1", path = "../model" }
|
garage_model = { version = "0.1.1", path = "../model" }
|
||||||
garage_api = { version = "0.1.1", path = "../api" }
|
garage_api = { version = "0.1.1", path = "../api" }
|
||||||
|
|
||||||
err-derive = "0.2.3"
|
err-derive = "0.3"
|
||||||
log = "0.4"
|
log = "0.4"
|
||||||
futures = "0.3"
|
futures = "0.3"
|
||||||
http = "0.2"
|
http = "0.2"
|
||||||
hyper = "0.13"
|
hyper = "0.14"
|
||||||
percent-encoding = "2.1.0"
|
percent-encoding = "2.1.0"
|
||||||
roxmltree = "0.11"
|
|
||||||
idna = "0.2"
|
idna = "0.2"
|
||||||
|
|