Improve how node roles are assigned in Garage
- change the terminology: the network configuration becomes the role table, the configuration of a nodes becomes a node's role - the modification of the role table takes place in two steps: first, changes are staged in a CRDT data structure. Then, once the user is happy with the changes, they can commit them all at once (or revert them). - update documentation - fix tests - implement smarter partition assignation algorithm This patch breaks the format of the network configuration: when migrating, the cluster will be in a state where no roles are assigned. All roles must be re-assigned and commited at once. This migration should not pose an issue.
This commit is contained in:
parent
53888995bd
commit
c94406f428
42 changed files with 1430 additions and 557 deletions
15
Cargo.lock
generated
15
Cargo.lock
generated
|
@ -379,7 +379,7 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "garage"
|
name = "garage"
|
||||||
version = "0.4.0"
|
version = "0.5.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"async-trait",
|
"async-trait",
|
||||||
"bytes 1.1.0",
|
"bytes 1.1.0",
|
||||||
|
@ -408,7 +408,7 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "garage_api"
|
name = "garage_api"
|
||||||
version = "0.4.0"
|
version = "0.5.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"base64",
|
"base64",
|
||||||
"bytes 1.1.0",
|
"bytes 1.1.0",
|
||||||
|
@ -440,7 +440,7 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "garage_model"
|
name = "garage_model"
|
||||||
version = "0.4.0"
|
version = "0.5.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arc-swap",
|
"arc-swap",
|
||||||
"async-trait",
|
"async-trait",
|
||||||
|
@ -462,7 +462,7 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "garage_rpc"
|
name = "garage_rpc"
|
||||||
version = "0.4.0"
|
version = "0.5.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arc-swap",
|
"arc-swap",
|
||||||
"async-trait",
|
"async-trait",
|
||||||
|
@ -479,6 +479,7 @@ dependencies = [
|
||||||
"rand",
|
"rand",
|
||||||
"rmp-serde 0.15.5",
|
"rmp-serde 0.15.5",
|
||||||
"serde",
|
"serde",
|
||||||
|
"serde_bytes",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
"tokio",
|
"tokio",
|
||||||
"tokio-stream",
|
"tokio-stream",
|
||||||
|
@ -486,7 +487,7 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "garage_table"
|
name = "garage_table"
|
||||||
version = "0.4.0"
|
version = "0.5.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"async-trait",
|
"async-trait",
|
||||||
"bytes 1.1.0",
|
"bytes 1.1.0",
|
||||||
|
@ -506,7 +507,7 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "garage_util"
|
name = "garage_util"
|
||||||
version = "0.4.0"
|
version = "0.5.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"blake2",
|
"blake2",
|
||||||
"chrono",
|
"chrono",
|
||||||
|
@ -530,7 +531,7 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "garage_web"
|
name = "garage_web"
|
||||||
version = "0.4.0"
|
version = "0.5.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"err-derive 0.3.0",
|
"err-derive 0.3.0",
|
||||||
"futures",
|
"futures",
|
||||||
|
|
83
Cargo.nix
83
Cargo.nix
|
@ -40,13 +40,13 @@ in
|
||||||
{
|
{
|
||||||
cargo2nixVersion = "0.9.0";
|
cargo2nixVersion = "0.9.0";
|
||||||
workspace = {
|
workspace = {
|
||||||
garage_util = rustPackages.unknown.garage_util."0.4.0";
|
garage_util = rustPackages.unknown.garage_util."0.5.0";
|
||||||
garage_rpc = rustPackages.unknown.garage_rpc."0.4.0";
|
garage_rpc = rustPackages.unknown.garage_rpc."0.5.0";
|
||||||
garage_table = rustPackages.unknown.garage_table."0.4.0";
|
garage_table = rustPackages.unknown.garage_table."0.5.0";
|
||||||
garage_model = rustPackages.unknown.garage_model."0.4.0";
|
garage_model = rustPackages.unknown.garage_model."0.5.0";
|
||||||
garage_api = rustPackages.unknown.garage_api."0.4.0";
|
garage_api = rustPackages.unknown.garage_api."0.5.0";
|
||||||
garage_web = rustPackages.unknown.garage_web."0.4.0";
|
garage_web = rustPackages.unknown.garage_web."0.5.0";
|
||||||
garage = rustPackages.unknown.garage."0.4.0";
|
garage = rustPackages.unknown.garage."0.5.0";
|
||||||
};
|
};
|
||||||
"registry+https://github.com/rust-lang/crates.io-index".aho-corasick."0.7.18" = overridableMkRustCrate (profileName: rec {
|
"registry+https://github.com/rust-lang/crates.io-index".aho-corasick."0.7.18" = overridableMkRustCrate (profileName: rec {
|
||||||
name = "aho-corasick";
|
name = "aho-corasick";
|
||||||
|
@ -246,7 +246,7 @@ in
|
||||||
registry = "registry+https://github.com/rust-lang/crates.io-index";
|
registry = "registry+https://github.com/rust-lang/crates.io-index";
|
||||||
src = fetchCratesIo { inherit name version; sha256 = "95059428f66df56b63431fdb4e1947ed2190586af5c5a8a8b71122bdf5a7f469"; };
|
src = fetchCratesIo { inherit name version; sha256 = "95059428f66df56b63431fdb4e1947ed2190586af5c5a8a8b71122bdf5a7f469"; };
|
||||||
dependencies = {
|
dependencies = {
|
||||||
${ if hostPlatform.config == "aarch64-apple-darwin" || hostPlatform.parsed.cpu.name == "aarch64" && hostPlatform.parsed.kernel.name == "linux" then "libc" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".libc."0.2.103" { inherit profileName; };
|
${ if hostPlatform.parsed.cpu.name == "aarch64" && hostPlatform.parsed.kernel.name == "linux" || hostPlatform.config == "aarch64-apple-darwin" then "libc" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".libc."0.2.103" { inherit profileName; };
|
||||||
};
|
};
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -606,9 +606,9 @@ in
|
||||||
};
|
};
|
||||||
});
|
});
|
||||||
|
|
||||||
"unknown".garage."0.4.0" = overridableMkRustCrate (profileName: rec {
|
"unknown".garage."0.5.0" = overridableMkRustCrate (profileName: rec {
|
||||||
name = "garage";
|
name = "garage";
|
||||||
version = "0.4.0";
|
version = "0.5.0";
|
||||||
registry = "unknown";
|
registry = "unknown";
|
||||||
src = fetchCrateLocal (workspaceSrc + "/src/garage");
|
src = fetchCrateLocal (workspaceSrc + "/src/garage");
|
||||||
dependencies = {
|
dependencies = {
|
||||||
|
@ -616,12 +616,12 @@ in
|
||||||
bytes = rustPackages."registry+https://github.com/rust-lang/crates.io-index".bytes."1.1.0" { inherit profileName; };
|
bytes = rustPackages."registry+https://github.com/rust-lang/crates.io-index".bytes."1.1.0" { inherit profileName; };
|
||||||
futures = rustPackages."registry+https://github.com/rust-lang/crates.io-index".futures."0.3.17" { inherit profileName; };
|
futures = rustPackages."registry+https://github.com/rust-lang/crates.io-index".futures."0.3.17" { inherit profileName; };
|
||||||
futures_util = rustPackages."registry+https://github.com/rust-lang/crates.io-index".futures-util."0.3.17" { inherit profileName; };
|
futures_util = rustPackages."registry+https://github.com/rust-lang/crates.io-index".futures-util."0.3.17" { inherit profileName; };
|
||||||
garage_api = rustPackages."unknown".garage_api."0.4.0" { inherit profileName; };
|
garage_api = rustPackages."unknown".garage_api."0.5.0" { inherit profileName; };
|
||||||
garage_model = rustPackages."unknown".garage_model."0.4.0" { inherit profileName; };
|
garage_model = rustPackages."unknown".garage_model."0.5.0" { inherit profileName; };
|
||||||
garage_rpc = rustPackages."unknown".garage_rpc."0.4.0" { inherit profileName; };
|
garage_rpc = rustPackages."unknown".garage_rpc."0.5.0" { inherit profileName; };
|
||||||
garage_table = rustPackages."unknown".garage_table."0.4.0" { inherit profileName; };
|
garage_table = rustPackages."unknown".garage_table."0.5.0" { inherit profileName; };
|
||||||
garage_util = rustPackages."unknown".garage_util."0.4.0" { inherit profileName; };
|
garage_util = rustPackages."unknown".garage_util."0.5.0" { inherit profileName; };
|
||||||
garage_web = rustPackages."unknown".garage_web."0.4.0" { inherit profileName; };
|
garage_web = rustPackages."unknown".garage_web."0.5.0" { inherit profileName; };
|
||||||
git_version = rustPackages."registry+https://github.com/rust-lang/crates.io-index".git-version."0.3.5" { inherit profileName; };
|
git_version = rustPackages."registry+https://github.com/rust-lang/crates.io-index".git-version."0.3.5" { inherit profileName; };
|
||||||
hex = rustPackages."registry+https://github.com/rust-lang/crates.io-index".hex."0.4.3" { inherit profileName; };
|
hex = rustPackages."registry+https://github.com/rust-lang/crates.io-index".hex."0.4.3" { inherit profileName; };
|
||||||
sodiumoxide = rustPackages."registry+https://github.com/rust-lang/crates.io-index".kuska-sodiumoxide."0.2.5-0" { inherit profileName; };
|
sodiumoxide = rustPackages."registry+https://github.com/rust-lang/crates.io-index".kuska-sodiumoxide."0.2.5-0" { inherit profileName; };
|
||||||
|
@ -638,9 +638,9 @@ in
|
||||||
};
|
};
|
||||||
});
|
});
|
||||||
|
|
||||||
"unknown".garage_api."0.4.0" = overridableMkRustCrate (profileName: rec {
|
"unknown".garage_api."0.5.0" = overridableMkRustCrate (profileName: rec {
|
||||||
name = "garage_api";
|
name = "garage_api";
|
||||||
version = "0.4.0";
|
version = "0.5.0";
|
||||||
registry = "unknown";
|
registry = "unknown";
|
||||||
src = fetchCrateLocal (workspaceSrc + "/src/api");
|
src = fetchCrateLocal (workspaceSrc + "/src/api");
|
||||||
dependencies = {
|
dependencies = {
|
||||||
|
@ -651,9 +651,9 @@ in
|
||||||
err_derive = buildRustPackages."registry+https://github.com/rust-lang/crates.io-index".err-derive."0.3.0" { profileName = "__noProfile"; };
|
err_derive = buildRustPackages."registry+https://github.com/rust-lang/crates.io-index".err-derive."0.3.0" { profileName = "__noProfile"; };
|
||||||
futures = rustPackages."registry+https://github.com/rust-lang/crates.io-index".futures."0.3.17" { inherit profileName; };
|
futures = rustPackages."registry+https://github.com/rust-lang/crates.io-index".futures."0.3.17" { inherit profileName; };
|
||||||
futures_util = rustPackages."registry+https://github.com/rust-lang/crates.io-index".futures-util."0.3.17" { inherit profileName; };
|
futures_util = rustPackages."registry+https://github.com/rust-lang/crates.io-index".futures-util."0.3.17" { inherit profileName; };
|
||||||
garage_model = rustPackages."unknown".garage_model."0.4.0" { inherit profileName; };
|
garage_model = rustPackages."unknown".garage_model."0.5.0" { inherit profileName; };
|
||||||
garage_table = rustPackages."unknown".garage_table."0.4.0" { inherit profileName; };
|
garage_table = rustPackages."unknown".garage_table."0.5.0" { inherit profileName; };
|
||||||
garage_util = rustPackages."unknown".garage_util."0.4.0" { inherit profileName; };
|
garage_util = rustPackages."unknown".garage_util."0.5.0" { inherit profileName; };
|
||||||
hex = rustPackages."registry+https://github.com/rust-lang/crates.io-index".hex."0.4.3" { inherit profileName; };
|
hex = rustPackages."registry+https://github.com/rust-lang/crates.io-index".hex."0.4.3" { inherit profileName; };
|
||||||
hmac = rustPackages."registry+https://github.com/rust-lang/crates.io-index".hmac."0.10.1" { inherit profileName; };
|
hmac = rustPackages."registry+https://github.com/rust-lang/crates.io-index".hmac."0.10.1" { inherit profileName; };
|
||||||
http = rustPackages."registry+https://github.com/rust-lang/crates.io-index".http."0.2.5" { inherit profileName; };
|
http = rustPackages."registry+https://github.com/rust-lang/crates.io-index".http."0.2.5" { inherit profileName; };
|
||||||
|
@ -673,9 +673,9 @@ in
|
||||||
};
|
};
|
||||||
});
|
});
|
||||||
|
|
||||||
"unknown".garage_model."0.4.0" = overridableMkRustCrate (profileName: rec {
|
"unknown".garage_model."0.5.0" = overridableMkRustCrate (profileName: rec {
|
||||||
name = "garage_model";
|
name = "garage_model";
|
||||||
version = "0.4.0";
|
version = "0.5.0";
|
||||||
registry = "unknown";
|
registry = "unknown";
|
||||||
src = fetchCrateLocal (workspaceSrc + "/src/model");
|
src = fetchCrateLocal (workspaceSrc + "/src/model");
|
||||||
dependencies = {
|
dependencies = {
|
||||||
|
@ -683,9 +683,9 @@ in
|
||||||
async_trait = buildRustPackages."registry+https://github.com/rust-lang/crates.io-index".async-trait."0.1.51" { profileName = "__noProfile"; };
|
async_trait = buildRustPackages."registry+https://github.com/rust-lang/crates.io-index".async-trait."0.1.51" { profileName = "__noProfile"; };
|
||||||
futures = rustPackages."registry+https://github.com/rust-lang/crates.io-index".futures."0.3.17" { inherit profileName; };
|
futures = rustPackages."registry+https://github.com/rust-lang/crates.io-index".futures."0.3.17" { inherit profileName; };
|
||||||
futures_util = rustPackages."registry+https://github.com/rust-lang/crates.io-index".futures-util."0.3.17" { inherit profileName; };
|
futures_util = rustPackages."registry+https://github.com/rust-lang/crates.io-index".futures-util."0.3.17" { inherit profileName; };
|
||||||
garage_rpc = rustPackages."unknown".garage_rpc."0.4.0" { inherit profileName; };
|
garage_rpc = rustPackages."unknown".garage_rpc."0.5.0" { inherit profileName; };
|
||||||
garage_table = rustPackages."unknown".garage_table."0.4.0" { inherit profileName; };
|
garage_table = rustPackages."unknown".garage_table."0.5.0" { inherit profileName; };
|
||||||
garage_util = rustPackages."unknown".garage_util."0.4.0" { inherit profileName; };
|
garage_util = rustPackages."unknown".garage_util."0.5.0" { inherit profileName; };
|
||||||
hex = rustPackages."registry+https://github.com/rust-lang/crates.io-index".hex."0.4.3" { inherit profileName; };
|
hex = rustPackages."registry+https://github.com/rust-lang/crates.io-index".hex."0.4.3" { inherit profileName; };
|
||||||
log = rustPackages."registry+https://github.com/rust-lang/crates.io-index".log."0.4.14" { inherit profileName; };
|
log = rustPackages."registry+https://github.com/rust-lang/crates.io-index".log."0.4.14" { inherit profileName; };
|
||||||
netapp = rustPackages."registry+https://github.com/rust-lang/crates.io-index".netapp."0.3.0" { inherit profileName; };
|
netapp = rustPackages."registry+https://github.com/rust-lang/crates.io-index".netapp."0.3.0" { inherit profileName; };
|
||||||
|
@ -698,9 +698,9 @@ in
|
||||||
};
|
};
|
||||||
});
|
});
|
||||||
|
|
||||||
"unknown".garage_rpc."0.4.0" = overridableMkRustCrate (profileName: rec {
|
"unknown".garage_rpc."0.5.0" = overridableMkRustCrate (profileName: rec {
|
||||||
name = "garage_rpc";
|
name = "garage_rpc";
|
||||||
version = "0.4.0";
|
version = "0.5.0";
|
||||||
registry = "unknown";
|
registry = "unknown";
|
||||||
src = fetchCrateLocal (workspaceSrc + "/src/rpc");
|
src = fetchCrateLocal (workspaceSrc + "/src/rpc");
|
||||||
dependencies = {
|
dependencies = {
|
||||||
|
@ -709,7 +709,7 @@ in
|
||||||
bytes = rustPackages."registry+https://github.com/rust-lang/crates.io-index".bytes."1.1.0" { inherit profileName; };
|
bytes = rustPackages."registry+https://github.com/rust-lang/crates.io-index".bytes."1.1.0" { inherit profileName; };
|
||||||
futures = rustPackages."registry+https://github.com/rust-lang/crates.io-index".futures."0.3.17" { inherit profileName; };
|
futures = rustPackages."registry+https://github.com/rust-lang/crates.io-index".futures."0.3.17" { inherit profileName; };
|
||||||
futures_util = rustPackages."registry+https://github.com/rust-lang/crates.io-index".futures-util."0.3.17" { inherit profileName; };
|
futures_util = rustPackages."registry+https://github.com/rust-lang/crates.io-index".futures-util."0.3.17" { inherit profileName; };
|
||||||
garage_util = rustPackages."unknown".garage_util."0.4.0" { inherit profileName; };
|
garage_util = rustPackages."unknown".garage_util."0.5.0" { inherit profileName; };
|
||||||
gethostname = rustPackages."registry+https://github.com/rust-lang/crates.io-index".gethostname."0.2.1" { inherit profileName; };
|
gethostname = rustPackages."registry+https://github.com/rust-lang/crates.io-index".gethostname."0.2.1" { inherit profileName; };
|
||||||
hex = rustPackages."registry+https://github.com/rust-lang/crates.io-index".hex."0.4.3" { inherit profileName; };
|
hex = rustPackages."registry+https://github.com/rust-lang/crates.io-index".hex."0.4.3" { inherit profileName; };
|
||||||
hyper = rustPackages."registry+https://github.com/rust-lang/crates.io-index".hyper."0.14.13" { inherit profileName; };
|
hyper = rustPackages."registry+https://github.com/rust-lang/crates.io-index".hyper."0.14.13" { inherit profileName; };
|
||||||
|
@ -719,15 +719,16 @@ in
|
||||||
rand = rustPackages."registry+https://github.com/rust-lang/crates.io-index".rand."0.8.4" { inherit profileName; };
|
rand = rustPackages."registry+https://github.com/rust-lang/crates.io-index".rand."0.8.4" { inherit profileName; };
|
||||||
rmp_serde = rustPackages."registry+https://github.com/rust-lang/crates.io-index".rmp-serde."0.15.5" { inherit profileName; };
|
rmp_serde = rustPackages."registry+https://github.com/rust-lang/crates.io-index".rmp-serde."0.15.5" { inherit profileName; };
|
||||||
serde = rustPackages."registry+https://github.com/rust-lang/crates.io-index".serde."1.0.130" { inherit profileName; };
|
serde = rustPackages."registry+https://github.com/rust-lang/crates.io-index".serde."1.0.130" { inherit profileName; };
|
||||||
|
serde_bytes = rustPackages."registry+https://github.com/rust-lang/crates.io-index".serde_bytes."0.11.5" { inherit profileName; };
|
||||||
serde_json = rustPackages."registry+https://github.com/rust-lang/crates.io-index".serde_json."1.0.68" { inherit profileName; };
|
serde_json = rustPackages."registry+https://github.com/rust-lang/crates.io-index".serde_json."1.0.68" { inherit profileName; };
|
||||||
tokio = rustPackages."registry+https://github.com/rust-lang/crates.io-index".tokio."1.12.0" { inherit profileName; };
|
tokio = rustPackages."registry+https://github.com/rust-lang/crates.io-index".tokio."1.12.0" { inherit profileName; };
|
||||||
tokio_stream = rustPackages."registry+https://github.com/rust-lang/crates.io-index".tokio-stream."0.1.7" { inherit profileName; };
|
tokio_stream = rustPackages."registry+https://github.com/rust-lang/crates.io-index".tokio-stream."0.1.7" { inherit profileName; };
|
||||||
};
|
};
|
||||||
});
|
});
|
||||||
|
|
||||||
"unknown".garage_table."0.4.0" = overridableMkRustCrate (profileName: rec {
|
"unknown".garage_table."0.5.0" = overridableMkRustCrate (profileName: rec {
|
||||||
name = "garage_table";
|
name = "garage_table";
|
||||||
version = "0.4.0";
|
version = "0.5.0";
|
||||||
registry = "unknown";
|
registry = "unknown";
|
||||||
src = fetchCrateLocal (workspaceSrc + "/src/table");
|
src = fetchCrateLocal (workspaceSrc + "/src/table");
|
||||||
dependencies = {
|
dependencies = {
|
||||||
|
@ -735,8 +736,8 @@ in
|
||||||
bytes = rustPackages."registry+https://github.com/rust-lang/crates.io-index".bytes."1.1.0" { inherit profileName; };
|
bytes = rustPackages."registry+https://github.com/rust-lang/crates.io-index".bytes."1.1.0" { inherit profileName; };
|
||||||
futures = rustPackages."registry+https://github.com/rust-lang/crates.io-index".futures."0.3.17" { inherit profileName; };
|
futures = rustPackages."registry+https://github.com/rust-lang/crates.io-index".futures."0.3.17" { inherit profileName; };
|
||||||
futures_util = rustPackages."registry+https://github.com/rust-lang/crates.io-index".futures-util."0.3.17" { inherit profileName; };
|
futures_util = rustPackages."registry+https://github.com/rust-lang/crates.io-index".futures-util."0.3.17" { inherit profileName; };
|
||||||
garage_rpc = rustPackages."unknown".garage_rpc."0.4.0" { inherit profileName; };
|
garage_rpc = rustPackages."unknown".garage_rpc."0.5.0" { inherit profileName; };
|
||||||
garage_util = rustPackages."unknown".garage_util."0.4.0" { inherit profileName; };
|
garage_util = rustPackages."unknown".garage_util."0.5.0" { inherit profileName; };
|
||||||
hexdump = rustPackages."registry+https://github.com/rust-lang/crates.io-index".hexdump."0.1.1" { inherit profileName; };
|
hexdump = rustPackages."registry+https://github.com/rust-lang/crates.io-index".hexdump."0.1.1" { inherit profileName; };
|
||||||
log = rustPackages."registry+https://github.com/rust-lang/crates.io-index".log."0.4.14" { inherit profileName; };
|
log = rustPackages."registry+https://github.com/rust-lang/crates.io-index".log."0.4.14" { inherit profileName; };
|
||||||
rand = rustPackages."registry+https://github.com/rust-lang/crates.io-index".rand."0.8.4" { inherit profileName; };
|
rand = rustPackages."registry+https://github.com/rust-lang/crates.io-index".rand."0.8.4" { inherit profileName; };
|
||||||
|
@ -748,9 +749,9 @@ in
|
||||||
};
|
};
|
||||||
});
|
});
|
||||||
|
|
||||||
"unknown".garage_util."0.4.0" = overridableMkRustCrate (profileName: rec {
|
"unknown".garage_util."0.5.0" = overridableMkRustCrate (profileName: rec {
|
||||||
name = "garage_util";
|
name = "garage_util";
|
||||||
version = "0.4.0";
|
version = "0.5.0";
|
||||||
registry = "unknown";
|
registry = "unknown";
|
||||||
src = fetchCrateLocal (workspaceSrc + "/src/util");
|
src = fetchCrateLocal (workspaceSrc + "/src/util");
|
||||||
dependencies = {
|
dependencies = {
|
||||||
|
@ -775,18 +776,18 @@ in
|
||||||
};
|
};
|
||||||
});
|
});
|
||||||
|
|
||||||
"unknown".garage_web."0.4.0" = overridableMkRustCrate (profileName: rec {
|
"unknown".garage_web."0.5.0" = overridableMkRustCrate (profileName: rec {
|
||||||
name = "garage_web";
|
name = "garage_web";
|
||||||
version = "0.4.0";
|
version = "0.5.0";
|
||||||
registry = "unknown";
|
registry = "unknown";
|
||||||
src = fetchCrateLocal (workspaceSrc + "/src/web");
|
src = fetchCrateLocal (workspaceSrc + "/src/web");
|
||||||
dependencies = {
|
dependencies = {
|
||||||
err_derive = buildRustPackages."registry+https://github.com/rust-lang/crates.io-index".err-derive."0.3.0" { profileName = "__noProfile"; };
|
err_derive = buildRustPackages."registry+https://github.com/rust-lang/crates.io-index".err-derive."0.3.0" { profileName = "__noProfile"; };
|
||||||
futures = rustPackages."registry+https://github.com/rust-lang/crates.io-index".futures."0.3.17" { inherit profileName; };
|
futures = rustPackages."registry+https://github.com/rust-lang/crates.io-index".futures."0.3.17" { inherit profileName; };
|
||||||
garage_api = rustPackages."unknown".garage_api."0.4.0" { inherit profileName; };
|
garage_api = rustPackages."unknown".garage_api."0.5.0" { inherit profileName; };
|
||||||
garage_model = rustPackages."unknown".garage_model."0.4.0" { inherit profileName; };
|
garage_model = rustPackages."unknown".garage_model."0.5.0" { inherit profileName; };
|
||||||
garage_table = rustPackages."unknown".garage_table."0.4.0" { inherit profileName; };
|
garage_table = rustPackages."unknown".garage_table."0.5.0" { inherit profileName; };
|
||||||
garage_util = rustPackages."unknown".garage_util."0.4.0" { inherit profileName; };
|
garage_util = rustPackages."unknown".garage_util."0.5.0" { inherit profileName; };
|
||||||
http = rustPackages."registry+https://github.com/rust-lang/crates.io-index".http."0.2.5" { inherit profileName; };
|
http = rustPackages."registry+https://github.com/rust-lang/crates.io-index".http."0.2.5" { inherit profileName; };
|
||||||
hyper = rustPackages."registry+https://github.com/rust-lang/crates.io-index".hyper."0.14.13" { inherit profileName; };
|
hyper = rustPackages."registry+https://github.com/rust-lang/crates.io-index".hyper."0.14.13" { inherit profileName; };
|
||||||
log = rustPackages."registry+https://github.com/rust-lang/crates.io-index".log."0.4.14" { inherit profileName; };
|
log = rustPackages."registry+https://github.com/rust-lang/crates.io-index".log."0.4.14" { inherit profileName; };
|
||||||
|
|
12
README.md
12
README.md
|
@ -3,10 +3,18 @@ Garage [![Build Status](https://drone.deuxfleurs.fr/api/badges/Deuxfleurs/garage
|
||||||
|
|
||||||
<p align="center" style="text-align:center;">
|
<p align="center" style="text-align:center;">
|
||||||
<a href="https://garagehq.deuxfleurs.fr">
|
<a href="https://garagehq.deuxfleurs.fr">
|
||||||
<img alt="Garage logo" src="doc/logo/garage.png" height="200" />
|
<img alt="Garage logo" src="https://garagehq.deuxfleurs.fr/img/logo.svg" height="200" />
|
||||||
</a>
|
</a>
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
|
<p align="center" style="text-align:center;">
|
||||||
|
[ <strong><a href="https://garagehq.deuxfleurs.fr/">Website and documentation</a></strong>
|
||||||
|
| <a href="https://garagehq.deuxfleurs.fr/_releases.html">Binary releases</a>
|
||||||
|
| <a href="https://git.deuxfleurs.fr/Deuxfleurs/garage">Git repository</a>
|
||||||
|
| <a href="https://matrix.to/#/%23garage:deuxfleurs.fr">Matrix channel</a>
|
||||||
|
]
|
||||||
|
</p>
|
||||||
|
|
||||||
Garage is a lightweight S3-compatible distributed object store, with the following goals:
|
Garage is a lightweight S3-compatible distributed object store, with the following goals:
|
||||||
|
|
||||||
- As self-contained as possible
|
- As self-contained as possible
|
||||||
|
@ -22,5 +30,3 @@ Non-goals include:
|
||||||
- Erasure coding (our replication model is simply to copy the data as is on several nodes, in different datacenters if possible)
|
- Erasure coding (our replication model is simply to copy the data as is on several nodes, in different datacenters if possible)
|
||||||
|
|
||||||
Our main use case is to provide a distributed storage layer for small-scale self hosted services such as [Deuxfleurs](https://deuxfleurs.fr).
|
Our main use case is to provide a distributed storage layer for small-scale self hosted services such as [Deuxfleurs](https://deuxfleurs.fr).
|
||||||
|
|
||||||
**[Go to the documentation](https://garagehq.deuxfleurs.fr)**
|
|
||||||
|
|
|
@ -5,12 +5,12 @@
|
||||||
- [Quick start](./quick_start/index.md)
|
- [Quick start](./quick_start/index.md)
|
||||||
|
|
||||||
- [Cookbook](./cookbook/index.md)
|
- [Cookbook](./cookbook/index.md)
|
||||||
|
- [Multi-node deployment](./cookbook/real_world.md)
|
||||||
- [Building from source](./cookbook/from_source.md)
|
- [Building from source](./cookbook/from_source.md)
|
||||||
- [Integration with systemd](./cookbook/systemd.md)
|
- [Integration with systemd](./cookbook/systemd.md)
|
||||||
- [Gateways](./cookbook/gateways.md)
|
- [Gateways](./cookbook/gateways.md)
|
||||||
- [Exposing buckets as websites](./cookbook/exposing_websites.md)
|
- [Exposing buckets as websites](./cookbook/exposing_websites.md)
|
||||||
- [Configuring a reverse proxy](./cookbook/reverse_proxy.md)
|
- [Configuring a reverse proxy](./cookbook/reverse_proxy.md)
|
||||||
- [Production Deployment](./cookbook/real_world.md)
|
|
||||||
- [Recovering from failures](./cookbook/recovering.md)
|
- [Recovering from failures](./cookbook/recovering.md)
|
||||||
|
|
||||||
- [Integrations](./connect/index.md)
|
- [Integrations](./connect/index.md)
|
||||||
|
@ -25,6 +25,7 @@
|
||||||
|
|
||||||
- [Reference Manual](./reference_manual/index.md)
|
- [Reference Manual](./reference_manual/index.md)
|
||||||
- [Garage configuration file](./reference_manual/configuration.md)
|
- [Garage configuration file](./reference_manual/configuration.md)
|
||||||
|
- [Cluster layout management](./reference_manual/layout.md)
|
||||||
- [Garage CLI](./reference_manual/cli.md)
|
- [Garage CLI](./reference_manual/cli.md)
|
||||||
- [S3 compatibility status](./reference_manual/s3_compatibility.md)
|
- [S3 compatibility status](./reference_manual/s3_compatibility.md)
|
||||||
|
|
||||||
|
|
|
@ -21,7 +21,9 @@ Currently it will not work with minio client. Follow issue [#64](https://git.deu
|
||||||
The instructions are similar to a regular node, the only option that is different is while configuring the node, you must set the `--gateway` parameter:
|
The instructions are similar to a regular node, the only option that is different is while configuring the node, you must set the `--gateway` parameter:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
garage node configure --gateway --tag gw1 xxxx
|
garage layout assign --gateway --tag gw1 <node_id>
|
||||||
|
garage layout show # review the changes you are making
|
||||||
|
garage layout apply # once satisfied, apply the changes
|
||||||
```
|
```
|
||||||
|
|
||||||
Then use `http://localhost:3900` when a S3 endpoint is required:
|
Then use `http://localhost:3900` when a S3 endpoint is required:
|
||||||
|
@ -29,3 +31,9 @@ Then use `http://localhost:3900` when a S3 endpoint is required:
|
||||||
```bash
|
```bash
|
||||||
aws --endpoint-url http://127.0.0.1:3900 s3 ls
|
aws --endpoint-url http://127.0.0.1:3900 s3 ls
|
||||||
```
|
```
|
||||||
|
|
||||||
|
If a newly added gateway node seems to not be working, do a full table resync to ensure that bucket and key list are correctly propagated:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
garage repair -a --yes tables
|
||||||
|
```
|
||||||
|
|
|
@ -41,15 +41,15 @@ For our example, we will suppose the following infrastructure with IPv6 connecti
|
||||||
|
|
||||||
## Get a Docker image
|
## Get a Docker image
|
||||||
|
|
||||||
Our docker image is currently named `lxpz/garage_amd64` and is stored on the [Docker Hub](https://hub.docker.com/r/lxpz/garage_amd64/tags?page=1&ordering=last_updated).
|
Our docker image is currently named `dxflrs/amd64_garage` and is stored on the [Docker Hub](https://hub.docker.com/r/dxflrs/amd64_garage/tags?page=1&ordering=last_updated).
|
||||||
We encourage you to use a fixed tag (eg. `v0.4.0`) and not the `latest` tag.
|
We encourage you to use a fixed tag (eg. `v0.4.0`) and not the `latest` tag.
|
||||||
For this example, we will use the latest published version at the time of the writing which is `v0.4.0` but it's up to you
|
For this example, we will use the latest published version at the time of the writing which is `v0.4.0` but it's up to you
|
||||||
to check [the most recent versions on the Docker Hub](https://hub.docker.com/r/lxpz/garage_amd64/tags?page=1&ordering=last_updated).
|
to check [the most recent versions on the Docker Hub](https://hub.docker.com/r/dxflrs/amd64_garage/tags?page=1&ordering=last_updated).
|
||||||
|
|
||||||
For example:
|
For example:
|
||||||
|
|
||||||
```
|
```
|
||||||
sudo docker pull lxpz/garage_amd64:v0.4.0
|
sudo docker pull dxflrs/amd64_garage:v0.4.0
|
||||||
```
|
```
|
||||||
|
|
||||||
## Deploying and configuring Garage
|
## Deploying and configuring Garage
|
||||||
|
@ -144,7 +144,7 @@ At this point, nodes are not yet talking to one another.
|
||||||
Your output should therefore look like follows:
|
Your output should therefore look like follows:
|
||||||
|
|
||||||
```
|
```
|
||||||
Mercury$ garage node-id
|
Mercury$ garage status
|
||||||
==== HEALTHY NODES ====
|
==== HEALTHY NODES ====
|
||||||
ID Hostname Address Tag Zone Capacity
|
ID Hostname Address Tag Zone Capacity
|
||||||
563e1ac825ee3323… Mercury [fc00:1::1]:3901 NO ROLE ASSIGNED
|
563e1ac825ee3323… Mercury [fc00:1::1]:3901 NO ROLE ASSIGNED
|
||||||
|
@ -157,14 +157,14 @@ When your Garage nodes first start, they will generate a local node identifier
|
||||||
(based on a public/private key pair).
|
(based on a public/private key pair).
|
||||||
|
|
||||||
To obtain the node identifier of a node, once it is generated,
|
To obtain the node identifier of a node, once it is generated,
|
||||||
run `garage node-id`.
|
run `garage node id`.
|
||||||
This will print keys as follows:
|
This will print keys as follows:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
Mercury$ garage node-id
|
Mercury$ garage node id
|
||||||
563e1ac825ee3323aa441e72c26d1030d6d4414aeb3dd25287c531e7fc2bc95d@[fc00:1::1]:3901
|
563e1ac825ee3323aa441e72c26d1030d6d4414aeb3dd25287c531e7fc2bc95d@[fc00:1::1]:3901
|
||||||
|
|
||||||
Venus$ garage node-id
|
Venus$ garage node id
|
||||||
86f0f26ae4afbd59aaf9cfb059eefac844951efd5b8caeec0d53f4ed6c85f332@[fc00:1::2]:3901
|
86f0f26ae4afbd59aaf9cfb059eefac844951efd5b8caeec0d53f4ed6c85f332@[fc00:1::2]:3901
|
||||||
|
|
||||||
etc.
|
etc.
|
||||||
|
@ -191,20 +191,22 @@ ID Hostname Address Tag Zone Capa
|
||||||
212f7572f0c89da9… Mars [fc00:F::1]:3901 NO ROLE ASSIGNED
|
212f7572f0c89da9… Mars [fc00:F::1]:3901 NO ROLE ASSIGNED
|
||||||
```
|
```
|
||||||
|
|
||||||
## Giving roles to nodes
|
## Creating a cluster layout
|
||||||
|
|
||||||
We will now inform Garage of the disk space available on each node of the cluster
|
We will now inform Garage of the disk space available on each node of the cluster
|
||||||
as well as the zone (e.g. datacenter) in which each machine is located.
|
as well as the zone (e.g. datacenter) in which each machine is located.
|
||||||
|
This information is called the **cluster layout** and consists
|
||||||
|
of a role that is assigned to each active cluster node.
|
||||||
|
|
||||||
For our example, we will suppose we have the following infrastructure
|
For our example, we will suppose we have the following infrastructure
|
||||||
(Capacity, Identifier and Zone are specific values to Garage described in the following):
|
(Capacity, Identifier and Zone are specific values to Garage described in the following):
|
||||||
|
|
||||||
| Location | Name | Disk Space | `Capacity` | `Identifier` | `Zone` |
|
| Location | Name | Disk Space | `Capacity` | `Identifier` | `Zone` |
|
||||||
|----------|---------|------------|------------|--------------|--------------|
|
|----------|---------|------------|------------|--------------|--------------|
|
||||||
| Paris | Mercury | 1 To | `2` | `563e` | `par1` |
|
| Paris | Mercury | 1 To | `10` | `563e` | `par1` |
|
||||||
| Paris | Venus | 2 To | `4` | `86f0` | `par1` |
|
| Paris | Venus | 2 To | `20` | `86f0` | `par1` |
|
||||||
| London | Earth | 2 To | `4` | `6814` | `lon1` |
|
| London | Earth | 2 To | `20` | `6814` | `lon1` |
|
||||||
| Brussels | Mars | 1.5 To | `3` | `212f` | `bru1` |
|
| Brussels | Mars | 1.5 To | `15` | `212f` | `bru1` |
|
||||||
|
|
||||||
#### Node identifiers
|
#### Node identifiers
|
||||||
|
|
||||||
|
@ -239,13 +241,9 @@ in order to provide high availability despite failure of a zone.
|
||||||
|
|
||||||
Garage reasons on an abstract metric about disk storage that is named the *capacity* of a node.
|
Garage reasons on an abstract metric about disk storage that is named the *capacity* of a node.
|
||||||
The capacity configured in Garage must be proportional to the disk space dedicated to the node.
|
The capacity configured in Garage must be proportional to the disk space dedicated to the node.
|
||||||
Due to the way the Garage allocation algorithm works, capacity values must
|
|
||||||
be **integers**, and must be **as small as possible**, for instance with
|
|
||||||
1 representing the size of your smallest server.
|
|
||||||
|
|
||||||
Here we chose that 1 unit of capacity = 0.5 To, so that we can express servers of size
|
Capacity values must be **integers** but can be given any signification.
|
||||||
1 To and 2 To, as wel as the intermediate size 1.5 To, with the integer values 2, 4 and
|
Here we chose that 1 unit of capacity = 100 GB.
|
||||||
3 respectively (see table above).
|
|
||||||
|
|
||||||
Note that the amount of data stored by Garage on each server may not be strictly proportional to
|
Note that the amount of data stored by Garage on each server may not be strictly proportional to
|
||||||
its capacity value, as Garage will priorize having 3 copies of data in different zones,
|
its capacity value, as Garage will priorize having 3 copies of data in different zones,
|
||||||
|
@ -257,13 +255,29 @@ have 66% chance of being stored by Venus and 33% chance of being stored by Mercu
|
||||||
|
|
||||||
Given the information above, we will configure our cluster as follow:
|
Given the information above, we will configure our cluster as follow:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
garage layout assign -z par1 -c 10 -t mercury 563e
|
||||||
|
garage layout assign -z par1 -c 20 -t venus 86f0
|
||||||
|
garage layout assign -z lon1 -c 20 -t earth 6814
|
||||||
|
garage layout assign -z bru1 -c 15 -t mars 212f
|
||||||
```
|
```
|
||||||
garage node configure -z par1 -c 2 -t mercury 563e
|
|
||||||
garage node configure -z par1 -c 4 -t venus 86f0
|
At this point, the changes in the cluster layout have not yet been applied.
|
||||||
garage node configure -z lon1 -c 4 -t earth 6814
|
To show the new layout that will be applied, call:
|
||||||
garage node configure -z bru1 -c 3 -t mars 212f
|
|
||||||
|
```bash
|
||||||
|
garage layout show
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Once you are satisfied with your new layout, apply it with:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
garage layout apply
|
||||||
|
```
|
||||||
|
|
||||||
|
**WARNING:** if you want to use the layout modification commands in a script,
|
||||||
|
make sure to read [this page](/reference_manual/layout.html) first.
|
||||||
|
|
||||||
|
|
||||||
## Using your Garage cluster
|
## Using your Garage cluster
|
||||||
|
|
||||||
|
|
|
@ -28,8 +28,10 @@ and you should instead use one of the methods detailed in the next sections.
|
||||||
|
|
||||||
Removing a node is done with the following command:
|
Removing a node is done with the following command:
|
||||||
|
|
||||||
```
|
```bash
|
||||||
garage node remove --yes <node_id>
|
garage layout remove <node_id>
|
||||||
|
garage layout show # review the changes you are making
|
||||||
|
garage layout apply # once satisfied, apply the changes
|
||||||
```
|
```
|
||||||
|
|
||||||
(you can get the `node_id` of the failed node by running `garage status`)
|
(you can get the `node_id` of the failed node by running `garage status`)
|
||||||
|
@ -50,7 +52,7 @@ We just need to tell Garage to get back all the data blocks and store them on th
|
||||||
First, set up a new HDD to store Garage's data directory on the failed node, and restart Garage using
|
First, set up a new HDD to store Garage's data directory on the failed node, and restart Garage using
|
||||||
the existing configuration. Then, run:
|
the existing configuration. Then, run:
|
||||||
|
|
||||||
```
|
```bash
|
||||||
garage repair -a --yes blocks
|
garage repair -a --yes blocks
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -58,7 +60,7 @@ This will re-synchronize blocks of data that are missing to the new HDD, reading
|
||||||
|
|
||||||
You can check on the advancement of this process by doing the following command:
|
You can check on the advancement of this process by doing the following command:
|
||||||
|
|
||||||
```
|
```bash
|
||||||
garage stats -a
|
garage stats -a
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -94,9 +96,11 @@ The ID of the lost node should be shown in `garage status` in the section for di
|
||||||
|
|
||||||
Then, replace the broken node by the new one, using:
|
Then, replace the broken node by the new one, using:
|
||||||
|
|
||||||
```
|
```bash
|
||||||
garage node configure --replace <old_node_id> \
|
garage layout assign <new_node_id> --replace <old_node_id> \
|
||||||
-c <capacity> -z <zone> -t <node_tag> <new_node_id>
|
-c <capacity> -z <zone> -t <node_tag>
|
||||||
|
garage layout show # review the changes you are making
|
||||||
|
garage layout apply # once satisfied, apply the changes
|
||||||
```
|
```
|
||||||
|
|
||||||
Garage will then start synchronizing all required data on the new node.
|
Garage will then start synchronizing all required data on the new node.
|
||||||
|
|
|
@ -18,10 +18,18 @@ This very website is hosted using Garage. In other words: the doc is the PoC!
|
||||||
|
|
||||||
# The Garage Geo-Distributed Data Store
|
# The Garage Geo-Distributed Data Store
|
||||||
|
|
||||||
Garage is a lightweight geo-distributed data store.
|
Garage is a lightweight geo-distributed data store that implements the
|
||||||
It comes from the observation that despite numerous object stores
|
[Amazon S3](https://docs.aws.amazon.com/AmazonS3/latest/API/Welcome.html)
|
||||||
many people have broken data management policies (backup/replication on a single site or none at all).
|
object storage protocole. It enables applications to store large blobs such
|
||||||
To promote better data management policies, we focused on the following **desirable properties**:
|
as pictures, video, images, documents, etc., in a redundant multi-node
|
||||||
|
setting. S3 is versatile enough to also be used to publish a static
|
||||||
|
website.
|
||||||
|
|
||||||
|
Garage comes from the observation that despite the numerous existing
|
||||||
|
implementation of object stores, many people have broken data management
|
||||||
|
policies (backup/replication on a single site or none at all). To promote
|
||||||
|
better data management policies, we focused on the following **desirable
|
||||||
|
properties**:
|
||||||
|
|
||||||
- **Self-contained & lightweight**: works everywhere and integrates well in existing environments to target [hyperconverged infrastructures](https://en.wikipedia.org/wiki/Hyper-converged_infrastructure).
|
- **Self-contained & lightweight**: works everywhere and integrates well in existing environments to target [hyperconverged infrastructures](https://en.wikipedia.org/wiki/Hyper-converged_infrastructure).
|
||||||
- **Highly resilient**: highly resilient to network failures, network latency, disk failures, sysadmin failures.
|
- **Highly resilient**: highly resilient to network failures, network latency, disk failures, sysadmin failures.
|
||||||
|
@ -32,26 +40,19 @@ We also noted that the pursuit of some other goals are detrimental to our initia
|
||||||
The following has been identified as **non-goals** (if these points matter to you, you should not use Garage):
|
The following has been identified as **non-goals** (if these points matter to you, you should not use Garage):
|
||||||
|
|
||||||
- **Extreme performances**: high performances constrain a lot the design and the infrastructure; we seek performances through minimalism only.
|
- **Extreme performances**: high performances constrain a lot the design and the infrastructure; we seek performances through minimalism only.
|
||||||
- **Feature extensiveness**: complete implementation of the S3 API or any other API to make garage a drop-in replacement is not targeted as it could lead to decisions impacting our desirable properties.
|
- **Feature extensiveness**: complete implementation of the S3 API or any other API to make Garage a drop-in replacement is not targeted as it could lead to decisions impacting our desirable properties.
|
||||||
- **Storage optimizations**: erasure coding or any other coding technique both increase the difficulty of placing data and synchronizing; we limit ourselves to duplication.
|
- **Storage optimizations**: erasure coding or any other coding technique both increase the difficulty of placing data and synchronizing; we limit ourselves to duplication.
|
||||||
- **POSIX/Filesystem compatibility**: we do not aim at being POSIX compatible or to emulate any kind of filesystem. Indeed, in a distributed environment, such synchronizations are translated in network messages that impose severe constraints on the deployment.
|
- **POSIX/Filesystem compatibility**: we do not aim at being POSIX compatible or to emulate any kind of filesystem. Indeed, in a distributed environment, such synchronizations are translated in network messages that impose severe constraints on the deployment.
|
||||||
|
|
||||||
## Supported and planned protocols
|
|
||||||
|
|
||||||
Garage speaks (or will speak) the following protocols:
|
|
||||||
|
|
||||||
- [S3](https://docs.aws.amazon.com/AmazonS3/latest/API/Welcome.html) - *SUPPORTED* - Enable applications to store large blobs such as pictures, video, images, documents, etc. S3 is versatile enough to also be used to publish a static website.
|
|
||||||
- [IMAP](https://github.com/go-pluto/pluto) - *PLANNED* - email storage is quite complex to get good performances.
|
|
||||||
To keep performances optimal, most IMAP servers only support on-disk storage.
|
|
||||||
We plan to add logic to Garage to make it a viable solution for email storage.
|
|
||||||
- *More to come*
|
|
||||||
|
|
||||||
## Use Cases
|
## Use Cases
|
||||||
|
|
||||||
**[Deuxfleurs](https://deuxfleurs.fr):** Garage is used by Deuxfleurs which is a non-profit hosting organization.
|
**[Deuxfleurs](https://deuxfleurs.fr):** Garage is used by Deuxfleurs which
|
||||||
Especially, it is used to host their main website, this documentation and some of its members' blogs.
|
is a non-profit hosting organization. Especially, it is used to host their
|
||||||
Additionally, Garage is used as a [backend for Nextcloud](https://docs.nextcloud.com/server/20/admin_manual/configuration_files/primary_storage.html).
|
main website, this documentation and some of its members' blogs.
|
||||||
Deuxfleurs also plans to use Garage as their [Matrix's media backend](https://github.com/matrix-org/synapse-s3-storage-provider) and as the backend of [OCIS](https://github.com/owncloud/ocis).
|
Deuxfleurs also uses Garage as their [Matrix's media
|
||||||
|
backend](https://github.com/matrix-org/synapse-s3-storage-provider).
|
||||||
|
Deuxfleurs also uses it in its continuous integration platform to store
|
||||||
|
Drone's job logs and a Nix binary cache.
|
||||||
|
|
||||||
*Are you using Garage? [Open a pull request](https://git.deuxfleurs.fr/Deuxfleurs/garage/) to add your organization here!*
|
*Are you using Garage? [Open a pull request](https://git.deuxfleurs.fr/Deuxfleurs/garage/) to add your organization here!*
|
||||||
|
|
||||||
|
|
|
@ -6,22 +6,23 @@ and how to interact with it.
|
||||||
|
|
||||||
Our goal is to introduce you to Garage's workflows.
|
Our goal is to introduce you to Garage's workflows.
|
||||||
Following this guide is recommended before moving on to
|
Following this guide is recommended before moving on to
|
||||||
[configuring a real-world deployment](../cookbook/real_world.md).
|
[configuring a multi-node cluster](../cookbook/real_world.md).
|
||||||
|
|
||||||
Note that this kind of deployment should not be used in production, as it provides
|
Note that this kind of deployment should not be used in production,
|
||||||
no redundancy for your data!
|
as it provides no redundancy for your data!
|
||||||
|
|
||||||
## Get a binary
|
## Get a binary
|
||||||
|
|
||||||
Download the latest Garage binary from the release pages on our repository:
|
Download the latest Garage binary from the release pages on our repository:
|
||||||
|
|
||||||
<https://git.deuxfleurs.fr/Deuxfleurs/garage/releases>
|
<https://garagehq.deuxfleurs.fr/_releases.html>
|
||||||
|
|
||||||
Place this binary somewhere in your `$PATH` so that you can invoke the `garage`
|
Place this binary somewhere in your `$PATH` so that you can invoke the `garage`
|
||||||
command directly (for instance you can copy the binary in `/usr/local/bin`
|
command directly (for instance you can copy the binary in `/usr/local/bin`
|
||||||
or in `~/.local/bin`).
|
or in `~/.local/bin`).
|
||||||
|
|
||||||
If a binary of the last version is not available for your architecture,
|
If a binary of the last version is not available for your architecture,
|
||||||
|
or if you want a build customized for your system,
|
||||||
you can [build Garage from source](../cookbook/from_source.md).
|
you can [build Garage from source](../cookbook/from_source.md).
|
||||||
|
|
||||||
|
|
||||||
|
@ -109,9 +110,9 @@ ID Hostname Address Tag Zone Capacit
|
||||||
563e1ac825ee3323… linuxbox 127.0.0.1:3901 NO ROLE ASSIGNED
|
563e1ac825ee3323… linuxbox 127.0.0.1:3901 NO ROLE ASSIGNED
|
||||||
```
|
```
|
||||||
|
|
||||||
## Configuring your Garage node
|
## Creating a cluster layout
|
||||||
|
|
||||||
Configuring the nodes in a Garage deployment means informing Garage
|
Creating a cluster layout for a Garage deployment means informing Garage
|
||||||
of the disk space available on each node of the cluster
|
of the disk space available on each node of the cluster
|
||||||
as well as the zone (e.g. datacenter) each machine is located in.
|
as well as the zone (e.g. datacenter) each machine is located in.
|
||||||
|
|
||||||
|
@ -119,14 +120,18 @@ For our test deployment, we are using only one node. The way in which we configu
|
||||||
it does not matter, you can simply write:
|
it does not matter, you can simply write:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
garage node configure -z dc1 -c 1 <node_id>
|
garage layout assign -z dc1 -c 1 <node_id>
|
||||||
```
|
```
|
||||||
|
|
||||||
where `<node_id>` corresponds to the identifier of the node shown by `garage status` (first column).
|
where `<node_id>` corresponds to the identifier of the node shown by `garage status` (first column).
|
||||||
You can enter simply a prefix of that identifier.
|
You can enter simply a prefix of that identifier.
|
||||||
For instance here you could write just `garage node configure -z dc1 -c 1 563e`.
|
For instance here you could write just `garage layout assign -z dc1 -c 1 563e`.
|
||||||
|
|
||||||
|
The layout then has to be applied to the cluster, using:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
garage layout apply
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
## Creating buckets and keys
|
## Creating buckets and keys
|
||||||
|
@ -197,7 +202,7 @@ Now that we have a bucket and a key, we need to give permissions to the key on t
|
||||||
```
|
```
|
||||||
garage bucket allow \
|
garage bucket allow \
|
||||||
--read \
|
--read \
|
||||||
--write
|
--write \
|
||||||
nextcloud-bucket \
|
nextcloud-bucket \
|
||||||
--key nextcloud-app-key
|
--key nextcloud-app-key
|
||||||
```
|
```
|
||||||
|
@ -270,5 +275,5 @@ The following tools can also be used to send and recieve files from/to Garage:
|
||||||
- [Cyberduck](https://cyberduck.io/)
|
- [Cyberduck](https://cyberduck.io/)
|
||||||
- [`s3cmd`](https://s3tools.org/s3cmd)
|
- [`s3cmd`](https://s3tools.org/s3cmd)
|
||||||
|
|
||||||
Refer to the ["configuring clients"](../cookbook/clients.md) page to learn how to configure
|
Refer to the ["Integrations" section](../connect/index.md) to learn how to
|
||||||
these clients to interact with a Garage server.
|
configure application and command line utilities to integrate with Garage.
|
||||||
|
|
|
@ -133,9 +133,9 @@ These peer identifiers have the following syntax:
|
||||||
|
|
||||||
In the case where `rpc_public_addr` is correctly specified in the
|
In the case where `rpc_public_addr` is correctly specified in the
|
||||||
configuration file, the full identifier of a node including IP and port can
|
configuration file, the full identifier of a node including IP and port can
|
||||||
be obtained by running `garage node-id` and then included directly in the
|
be obtained by running `garage node id` and then included directly in the
|
||||||
`bootstrap_peers` list of other nodes. Otherwise, only the node's public
|
`bootstrap_peers` list of other nodes. Otherwise, only the node's public
|
||||||
key will be returned by `garage node-id` and you will have to add the IP
|
key will be returned by `garage node id` and you will have to add the IP
|
||||||
yourself.
|
yourself.
|
||||||
|
|
||||||
#### `consul_host` and `consul_service_name`
|
#### `consul_host` and `consul_service_name`
|
||||||
|
|
74
doc/book/src/reference_manual/layout.md
Normal file
74
doc/book/src/reference_manual/layout.md
Normal file
|
@ -0,0 +1,74 @@
|
||||||
|
# Creating and updating a cluster layout
|
||||||
|
|
||||||
|
The cluster layout in Garage is a table that assigns to each node a role in
|
||||||
|
the cluster. The role of a node in Garage can either be a storage node with
|
||||||
|
a certain capacity, or a gateway node that does not store data and is only
|
||||||
|
used as an API entry point for faster cluster access.
|
||||||
|
An introduction to building cluster layouts can be found in the [production deployment](/cookbook/real_world.md) page.
|
||||||
|
|
||||||
|
## How cluster layouts work in Garage
|
||||||
|
|
||||||
|
In Garage, a cluster layout is composed of the following components:
|
||||||
|
|
||||||
|
- a table of roles assigned to nodes
|
||||||
|
- a version number
|
||||||
|
|
||||||
|
Garage nodes will always use the cluster layout with the highest version number.
|
||||||
|
|
||||||
|
Garage nodes also maintain and synchronize between them a set of proposed role
|
||||||
|
changes that haven't yet been applied. These changes will be applied (or
|
||||||
|
canceled) in the next version of the layout
|
||||||
|
|
||||||
|
The following commands insert modifications to the set of proposed role changes
|
||||||
|
for the next layout version (but they do not create the new layout immediately):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
garage layout assign [...]
|
||||||
|
garage layout remove [...]
|
||||||
|
```
|
||||||
|
|
||||||
|
The following command can be used to inspect the layout that is currently set in the cluster
|
||||||
|
and the changes proposed for the next layout version, if any:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
garage layout show
|
||||||
|
```
|
||||||
|
|
||||||
|
The following commands create a new layout with the specified version number,
|
||||||
|
that either takes into account the proposed changes or cancels them:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
garage layout apply --version <new_version_number>
|
||||||
|
garage layout revert --version <new_version_number>
|
||||||
|
```
|
||||||
|
|
||||||
|
The version number of the new layout to create must be 1 + the version number
|
||||||
|
of the previous layout that existed in the cluster. The `apply` and `revert`
|
||||||
|
commands will fail otherwise.
|
||||||
|
|
||||||
|
## Warnings about Garage cluster layout management
|
||||||
|
|
||||||
|
**Warning: never make several calls to `garage layout apply` or `garage layout
|
||||||
|
revert` with the same value of the `--version` flag. Doing so can lead to the
|
||||||
|
creation of several different layouts with the same version number, in which
|
||||||
|
case your Garage cluster will become inconsistent until fixed.** If a call to
|
||||||
|
`garage layout apply` or `garage layout revert` has failed and `garage layout
|
||||||
|
show` indicates that a new layout with the given version number has not been
|
||||||
|
set in the cluster, then it is fine to call the command again with the same
|
||||||
|
version number.
|
||||||
|
|
||||||
|
If you are using the `garage` CLI by typing individual commands in your
|
||||||
|
shell, you shouldn't have much issues as long as you run commands one after
|
||||||
|
the other and take care of checking the output of `garage layout show`
|
||||||
|
before applying any changes.
|
||||||
|
|
||||||
|
If you are using the `garage` CLI to script layout changes, follow the following recommendations:
|
||||||
|
|
||||||
|
- Make all of your `garage` CLI calls to the same RPC host. Do not use the
|
||||||
|
`garage` CLI to connect to individual nodes to send them each a piece of the
|
||||||
|
layout changes you are making, as the changes propagate asynchronously
|
||||||
|
between nodes and might not all be taken into account at the time when the
|
||||||
|
new layout is applied.
|
||||||
|
|
||||||
|
- **Only call `garage layout apply` once**, and call it **strictly after** all
|
||||||
|
of the `layout assign` and `layout remove` commands have returned.
|
|
@ -1,5 +1,7 @@
|
||||||
# Load Balancing Data (planned for version 0.2)
|
# Load Balancing Data (planned for version 0.2)
|
||||||
|
|
||||||
|
**This is being yet improved in release 0.5. The working document has not been updated yet, it still only applies to Garage 0.2 through 0.4.**
|
||||||
|
|
||||||
I have conducted a quick study of different methods to load-balance data over different Garage nodes using consistent hashing.
|
I have conducted a quick study of different methods to load-balance data over different Garage nodes using consistent hashing.
|
||||||
|
|
||||||
## Requirements
|
## Requirements
|
||||||
|
|
|
@ -69,7 +69,7 @@ done
|
||||||
sleep 3
|
sleep 3
|
||||||
# Establish connections between nodes
|
# Establish connections between nodes
|
||||||
for count in $(seq 1 3); do
|
for count in $(seq 1 3); do
|
||||||
NODE=$(garage -c /tmp/config.$count.toml node-id -q)
|
NODE=$(garage -c /tmp/config.$count.toml node id -q)
|
||||||
for count2 in $(seq 1 3); do
|
for count2 in $(seq 1 3); do
|
||||||
garage -c /tmp/config.$count2.toml node connect $NODE
|
garage -c /tmp/config.$count2.toml node connect $NODE
|
||||||
done
|
done
|
||||||
|
|
|
@ -25,6 +25,7 @@ garage -c /tmp/config.1.toml status \
|
||||||
| grep 'NO ROLE' \
|
| grep 'NO ROLE' \
|
||||||
| grep -Po '^[0-9a-f]+' \
|
| grep -Po '^[0-9a-f]+' \
|
||||||
| while read id; do
|
| while read id; do
|
||||||
garage -c /tmp/config.1.toml node configure -z dc1 -c 1 $id
|
garage -c /tmp/config.1.toml layout assign $id -z dc1 -c 1
|
||||||
done
|
done
|
||||||
|
|
||||||
|
garage -c /tmp/config.1.toml layout apply --version 1
|
||||||
|
|
|
@ -116,11 +116,11 @@ if [ -z "$SKIP_AWS" ]; then
|
||||||
echo "🧪 Website Testing"
|
echo "🧪 Website Testing"
|
||||||
echo "<h1>hello world</h1>" > /tmp/garage-index.html
|
echo "<h1>hello world</h1>" > /tmp/garage-index.html
|
||||||
aws s3 cp /tmp/garage-index.html s3://eprouvette/index.html
|
aws s3 cp /tmp/garage-index.html s3://eprouvette/index.html
|
||||||
[ `curl -s -o /dev/null -w "%{http_code}" --header "Host: eprouvette.garage.tld" http://127.0.0.1:3923/ ` == 404 ]
|
[ `curl -s -o /dev/null -w "%{http_code}" --header "Host: eprouvette.garage.tld" http://127.0.0.1:3921/ ` == 404 ]
|
||||||
garage -c /tmp/config.1.toml bucket website --allow eprouvette
|
garage -c /tmp/config.1.toml bucket website --allow eprouvette
|
||||||
[ `curl -s -o /dev/null -w "%{http_code}" --header "Host: eprouvette.garage.tld" http://127.0.0.1:3923/ ` == 200 ]
|
[ `curl -s -o /dev/null -w "%{http_code}" --header "Host: eprouvette.garage.tld" http://127.0.0.1:3921/ ` == 200 ]
|
||||||
garage -c /tmp/config.1.toml bucket website --deny eprouvette
|
garage -c /tmp/config.1.toml bucket website --deny eprouvette
|
||||||
[ `curl -s -o /dev/null -w "%{http_code}" --header "Host: eprouvette.garage.tld" http://127.0.0.1:3923/ ` == 404 ]
|
[ `curl -s -o /dev/null -w "%{http_code}" --header "Host: eprouvette.garage.tld" http://127.0.0.1:3921/ ` == 404 ]
|
||||||
aws s3 rm s3://eprouvette/index.html
|
aws s3 rm s3://eprouvette/index.html
|
||||||
rm /tmp/garage-index.html
|
rm /tmp/garage-index.html
|
||||||
fi
|
fi
|
||||||
|
|
|
@ -1,11 +1,12 @@
|
||||||
[package]
|
[package]
|
||||||
name = "garage_api"
|
name = "garage_api"
|
||||||
version = "0.4.0"
|
version = "0.5.0"
|
||||||
authors = ["Alex Auvolat <alex@adnab.me>"]
|
authors = ["Alex Auvolat <alex@adnab.me>"]
|
||||||
edition = "2018"
|
edition = "2018"
|
||||||
license = "AGPL-3.0"
|
license = "AGPL-3.0"
|
||||||
description = "S3 API server crate for the Garage object store"
|
description = "S3 API server crate for the Garage object store"
|
||||||
repository = "https://git.deuxfleurs.fr/Deuxfleurs/garage"
|
repository = "https://git.deuxfleurs.fr/Deuxfleurs/garage"
|
||||||
|
readme = "../../README.md"
|
||||||
|
|
||||||
[lib]
|
[lib]
|
||||||
path = "lib.rs"
|
path = "lib.rs"
|
||||||
|
@ -13,9 +14,9 @@ path = "lib.rs"
|
||||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
garage_model = { version = "0.4.0", path = "../model" }
|
garage_model = { version = "0.5.0", path = "../model" }
|
||||||
garage_table = { version = "0.4.0", path = "../table" }
|
garage_table = { version = "0.5.0", path = "../table" }
|
||||||
garage_util = { version = "0.4.0", path = "../util" }
|
garage_util = { version = "0.5.0", path = "../util" }
|
||||||
|
|
||||||
base64 = "0.13"
|
base64 = "0.13"
|
||||||
bytes = "1.0"
|
bytes = "1.0"
|
||||||
|
|
|
@ -1,11 +1,12 @@
|
||||||
[package]
|
[package]
|
||||||
name = "garage"
|
name = "garage"
|
||||||
version = "0.4.0"
|
version = "0.5.0"
|
||||||
authors = ["Alex Auvolat <alex@adnab.me>"]
|
authors = ["Alex Auvolat <alex@adnab.me>"]
|
||||||
edition = "2018"
|
edition = "2018"
|
||||||
license = "AGPL-3.0"
|
license = "AGPL-3.0"
|
||||||
description = "Garage, an S3-compatible distributed object store for self-hosted deployments"
|
description = "Garage, an S3-compatible distributed object store for self-hosted deployments"
|
||||||
repository = "https://git.deuxfleurs.fr/Deuxfleurs/garage"
|
repository = "https://git.deuxfleurs.fr/Deuxfleurs/garage"
|
||||||
|
readme = "../../README.md"
|
||||||
|
|
||||||
[[bin]]
|
[[bin]]
|
||||||
name = "garage"
|
name = "garage"
|
||||||
|
@ -14,12 +15,12 @@ path = "main.rs"
|
||||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
garage_api = { version = "0.4.0", path = "../api" }
|
garage_api = { version = "0.5.0", path = "../api" }
|
||||||
garage_model = { version = "0.4.0", path = "../model" }
|
garage_model = { version = "0.5.0", path = "../model" }
|
||||||
garage_rpc = { version = "0.4.0", path = "../rpc" }
|
garage_rpc = { version = "0.5.0", path = "../rpc" }
|
||||||
garage_table = { version = "0.4.0", path = "../table" }
|
garage_table = { version = "0.5.0", path = "../table" }
|
||||||
garage_util = { version = "0.4.0", path = "../util" }
|
garage_util = { version = "0.5.0", path = "../util" }
|
||||||
garage_web = { version = "0.4.0", path = "../web" }
|
garage_web = { version = "0.5.0", path = "../web" }
|
||||||
|
|
||||||
bytes = "1.0"
|
bytes = "1.0"
|
||||||
git-version = "0.3.4"
|
git-version = "0.3.4"
|
||||||
|
|
|
@ -339,7 +339,7 @@ impl AdminRpcHandler {
|
||||||
|
|
||||||
let mut failures = vec![];
|
let mut failures = vec![];
|
||||||
let ring = self.garage.system.ring.borrow().clone();
|
let ring = self.garage.system.ring.borrow().clone();
|
||||||
for node in ring.config.members.keys() {
|
for node in ring.layout.node_ids().iter() {
|
||||||
let node = (*node).into();
|
let node = (*node).into();
|
||||||
let resp = self
|
let resp = self
|
||||||
.endpoint
|
.endpoint
|
||||||
|
@ -383,7 +383,7 @@ impl AdminRpcHandler {
|
||||||
let mut ret = String::new();
|
let mut ret = String::new();
|
||||||
let ring = self.garage.system.ring.borrow().clone();
|
let ring = self.garage.system.ring.borrow().clone();
|
||||||
|
|
||||||
for node in ring.config.members.keys() {
|
for node in ring.layout.node_ids().iter() {
|
||||||
let mut opt = opt.clone();
|
let mut opt = opt.clone();
|
||||||
opt.all_nodes = false;
|
opt.all_nodes = false;
|
||||||
|
|
||||||
|
|
|
@ -2,7 +2,7 @@ use std::collections::HashSet;
|
||||||
|
|
||||||
use garage_util::error::*;
|
use garage_util::error::*;
|
||||||
|
|
||||||
use garage_rpc::ring::*;
|
use garage_rpc::layout::*;
|
||||||
use garage_rpc::system::*;
|
use garage_rpc::system::*;
|
||||||
use garage_rpc::*;
|
use garage_rpc::*;
|
||||||
|
|
||||||
|
@ -20,11 +20,8 @@ pub async fn cli_command_dispatch(
|
||||||
Command::Node(NodeOperation::Connect(connect_opt)) => {
|
Command::Node(NodeOperation::Connect(connect_opt)) => {
|
||||||
cmd_connect(system_rpc_endpoint, rpc_host, connect_opt).await
|
cmd_connect(system_rpc_endpoint, rpc_host, connect_opt).await
|
||||||
}
|
}
|
||||||
Command::Node(NodeOperation::Configure(configure_opt)) => {
|
Command::Layout(layout_opt) => {
|
||||||
cmd_configure(system_rpc_endpoint, rpc_host, configure_opt).await
|
cli_layout_command_dispatch(layout_opt, system_rpc_endpoint, rpc_host).await
|
||||||
}
|
|
||||||
Command::Node(NodeOperation::Remove(remove_opt)) => {
|
|
||||||
cmd_remove(system_rpc_endpoint, rpc_host, remove_opt).await
|
|
||||||
}
|
}
|
||||||
Command::Bucket(bo) => {
|
Command::Bucket(bo) => {
|
||||||
cmd_admin(admin_rpc_endpoint, rpc_host, AdminRpc::BucketOperation(bo)).await
|
cmd_admin(admin_rpc_endpoint, rpc_host, AdminRpc::BucketOperation(bo)).await
|
||||||
|
@ -48,56 +45,60 @@ pub async fn cmd_status(rpc_cli: &Endpoint<SystemRpc, ()>, rpc_host: NodeID) ->
|
||||||
SystemRpc::ReturnKnownNodes(nodes) => nodes,
|
SystemRpc::ReturnKnownNodes(nodes) => nodes,
|
||||||
resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
|
resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
|
||||||
};
|
};
|
||||||
let config = match rpc_cli
|
let layout = fetch_layout(rpc_cli, rpc_host).await?;
|
||||||
.call(&rpc_host, &SystemRpc::PullConfig, PRIO_NORMAL)
|
|
||||||
.await??
|
|
||||||
{
|
|
||||||
SystemRpc::AdvertiseConfig(cfg) => cfg,
|
|
||||||
resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
|
|
||||||
};
|
|
||||||
|
|
||||||
println!("==== HEALTHY NODES ====");
|
println!("==== HEALTHY NODES ====");
|
||||||
let mut healthy_nodes = vec!["ID\tHostname\tAddress\tTag\tZone\tCapacity".to_string()];
|
let mut healthy_nodes = vec!["ID\tHostname\tAddress\tTags\tZone\tCapacity".to_string()];
|
||||||
for adv in status.iter().filter(|adv| adv.is_up) {
|
for adv in status.iter().filter(|adv| adv.is_up) {
|
||||||
if let Some(cfg) = config.members.get(&adv.id) {
|
match layout.roles.get(&adv.id) {
|
||||||
healthy_nodes.push(format!(
|
Some(NodeRoleV(Some(cfg))) => {
|
||||||
"{id:?}\t{host}\t{addr}\t[{tag}]\t{zone}\t{capacity}",
|
healthy_nodes.push(format!(
|
||||||
id = adv.id,
|
"{id:?}\t{host}\t{addr}\t[{tags}]\t{zone}\t{capacity}",
|
||||||
host = adv.status.hostname,
|
id = adv.id,
|
||||||
addr = adv.addr,
|
host = adv.status.hostname,
|
||||||
tag = cfg.tag,
|
addr = adv.addr,
|
||||||
zone = cfg.zone,
|
tags = cfg.tags.join(","),
|
||||||
capacity = cfg.capacity_string(),
|
zone = cfg.zone,
|
||||||
));
|
capacity = cfg.capacity_string(),
|
||||||
} else {
|
));
|
||||||
healthy_nodes.push(format!(
|
}
|
||||||
"{id:?}\t{h}\t{addr}\tNO ROLE ASSIGNED",
|
_ => {
|
||||||
id = adv.id,
|
let new_role = match layout.staging.get(&adv.id) {
|
||||||
h = adv.status.hostname,
|
Some(NodeRoleV(Some(_))) => "(pending)",
|
||||||
addr = adv.addr,
|
_ => "NO ROLE ASSIGNED",
|
||||||
));
|
};
|
||||||
|
healthy_nodes.push(format!(
|
||||||
|
"{id:?}\t{h}\t{addr}\t{new_role}",
|
||||||
|
id = adv.id,
|
||||||
|
h = adv.status.hostname,
|
||||||
|
addr = adv.addr,
|
||||||
|
new_role = new_role,
|
||||||
|
));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
format_table(healthy_nodes);
|
format_table(healthy_nodes);
|
||||||
|
|
||||||
let status_keys = status.iter().map(|adv| adv.id).collect::<HashSet<_>>();
|
let status_keys = status.iter().map(|adv| adv.id).collect::<HashSet<_>>();
|
||||||
let failure_case_1 = status.iter().any(|adv| !adv.is_up);
|
let failure_case_1 = status.iter().any(|adv| !adv.is_up);
|
||||||
let failure_case_2 = config
|
let failure_case_2 = layout
|
||||||
.members
|
.roles
|
||||||
|
.items()
|
||||||
.iter()
|
.iter()
|
||||||
.any(|(id, _)| !status_keys.contains(id));
|
.filter(|(_, _, v)| v.0.is_some())
|
||||||
|
.any(|(id, _, _)| !status_keys.contains(id));
|
||||||
if failure_case_1 || failure_case_2 {
|
if failure_case_1 || failure_case_2 {
|
||||||
println!("\n==== FAILED NODES ====");
|
println!("\n==== FAILED NODES ====");
|
||||||
let mut failed_nodes =
|
let mut failed_nodes =
|
||||||
vec!["ID\tHostname\tAddress\tTag\tZone\tCapacity\tLast seen".to_string()];
|
vec!["ID\tHostname\tAddress\tTags\tZone\tCapacity\tLast seen".to_string()];
|
||||||
for adv in status.iter().filter(|adv| !adv.is_up) {
|
for adv in status.iter().filter(|adv| !adv.is_up) {
|
||||||
if let Some(cfg) = config.members.get(&adv.id) {
|
if let Some(NodeRoleV(Some(cfg))) = layout.roles.get(&adv.id) {
|
||||||
failed_nodes.push(format!(
|
failed_nodes.push(format!(
|
||||||
"{id:?}\t{host}\t{addr}\t[{tag}]\t{zone}\t{capacity}\t{last_seen}",
|
"{id:?}\t{host}\t{addr}\t[{tags}]\t{zone}\t{capacity}\t{last_seen}",
|
||||||
id = adv.id,
|
id = adv.id,
|
||||||
host = adv.status.hostname,
|
host = adv.status.hostname,
|
||||||
addr = adv.addr,
|
addr = adv.addr,
|
||||||
tag = cfg.tag,
|
tags = cfg.tags.join(","),
|
||||||
zone = cfg.zone,
|
zone = cfg.zone,
|
||||||
capacity = cfg.capacity_string(),
|
capacity = cfg.capacity_string(),
|
||||||
last_seen = adv
|
last_seen = adv
|
||||||
|
@ -107,20 +108,28 @@ pub async fn cmd_status(rpc_cli: &Endpoint<SystemRpc, ()>, rpc_host: NodeID) ->
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (id, cfg) in config.members.iter() {
|
for (id, _, role_v) in layout.roles.items().iter() {
|
||||||
if !status_keys.contains(id) {
|
if let NodeRoleV(Some(cfg)) = role_v {
|
||||||
failed_nodes.push(format!(
|
if !status_keys.contains(id) {
|
||||||
"{id:?}\t??\t??\t[{tag}]\t{zone}\t{capacity}\tnever seen",
|
failed_nodes.push(format!(
|
||||||
id = id,
|
"{id:?}\t??\t??\t[{tags}]\t{zone}\t{capacity}\tnever seen",
|
||||||
tag = cfg.tag,
|
id = id,
|
||||||
zone = cfg.zone,
|
tags = cfg.tags.join(","),
|
||||||
capacity = cfg.capacity_string(),
|
zone = cfg.zone,
|
||||||
));
|
capacity = cfg.capacity_string(),
|
||||||
|
));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
format_table(failed_nodes);
|
format_table(failed_nodes);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if print_staging_role_changes(&layout) {
|
||||||
|
println!();
|
||||||
|
println!("Please use `garage layout show` to check the proposed new layout and apply it.");
|
||||||
|
println!();
|
||||||
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -141,115 +150,6 @@ pub async fn cmd_connect(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn cmd_configure(
|
|
||||||
rpc_cli: &Endpoint<SystemRpc, ()>,
|
|
||||||
rpc_host: NodeID,
|
|
||||||
args: ConfigureNodeOpt,
|
|
||||||
) -> Result<(), Error> {
|
|
||||||
let status = match rpc_cli
|
|
||||||
.call(&rpc_host, &SystemRpc::GetKnownNodes, PRIO_NORMAL)
|
|
||||||
.await??
|
|
||||||
{
|
|
||||||
SystemRpc::ReturnKnownNodes(nodes) => nodes,
|
|
||||||
resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
|
|
||||||
};
|
|
||||||
|
|
||||||
let added_node = find_matching_node(status.iter().map(|adv| adv.id), &args.node_id)?;
|
|
||||||
|
|
||||||
let mut config = match rpc_cli
|
|
||||||
.call(&rpc_host, &SystemRpc::PullConfig, PRIO_NORMAL)
|
|
||||||
.await??
|
|
||||||
{
|
|
||||||
SystemRpc::AdvertiseConfig(cfg) => cfg,
|
|
||||||
resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
|
|
||||||
};
|
|
||||||
|
|
||||||
for replaced in args.replace.iter() {
|
|
||||||
let replaced_node = find_matching_node(config.members.keys().cloned(), replaced)?;
|
|
||||||
if config.members.remove(&replaced_node).is_none() {
|
|
||||||
return Err(Error::Message(format!(
|
|
||||||
"Cannot replace node {:?} as it is not in current configuration",
|
|
||||||
replaced_node
|
|
||||||
)));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if args.capacity.is_some() && args.gateway {
|
|
||||||
return Err(Error::Message(
|
|
||||||
"-c and -g are mutually exclusive, please configure node either with c>0 to act as a storage node or with -g to act as a gateway node".into()));
|
|
||||||
}
|
|
||||||
if args.capacity == Some(0) {
|
|
||||||
return Err(Error::Message("Invalid capacity value: 0".into()));
|
|
||||||
}
|
|
||||||
|
|
||||||
let new_entry = match config.members.get(&added_node) {
|
|
||||||
None => {
|
|
||||||
let capacity = match args.capacity {
|
|
||||||
Some(c) => Some(c),
|
|
||||||
None if args.gateway => None,
|
|
||||||
_ => return Err(Error::Message(
|
|
||||||
"Please specify a capacity with the -c flag, or set node explicitly as gateway with -g".into())),
|
|
||||||
};
|
|
||||||
NetworkConfigEntry {
|
|
||||||
zone: args.zone.ok_or("Please specifiy a zone with the -z flag")?,
|
|
||||||
capacity,
|
|
||||||
tag: args.tag.unwrap_or_default(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Some(old) => {
|
|
||||||
let capacity = match args.capacity {
|
|
||||||
Some(c) => Some(c),
|
|
||||||
None if args.gateway => None,
|
|
||||||
_ => old.capacity,
|
|
||||||
};
|
|
||||||
NetworkConfigEntry {
|
|
||||||
zone: args.zone.unwrap_or_else(|| old.zone.to_string()),
|
|
||||||
capacity,
|
|
||||||
tag: args.tag.unwrap_or_else(|| old.tag.to_string()),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
config.members.insert(added_node, new_entry);
|
|
||||||
config.version += 1;
|
|
||||||
|
|
||||||
rpc_cli
|
|
||||||
.call(&rpc_host, &SystemRpc::AdvertiseConfig(config), PRIO_NORMAL)
|
|
||||||
.await??;
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub async fn cmd_remove(
|
|
||||||
rpc_cli: &Endpoint<SystemRpc, ()>,
|
|
||||||
rpc_host: NodeID,
|
|
||||||
args: RemoveNodeOpt,
|
|
||||||
) -> Result<(), Error> {
|
|
||||||
let mut config = match rpc_cli
|
|
||||||
.call(&rpc_host, &SystemRpc::PullConfig, PRIO_NORMAL)
|
|
||||||
.await??
|
|
||||||
{
|
|
||||||
SystemRpc::AdvertiseConfig(cfg) => cfg,
|
|
||||||
resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
|
|
||||||
};
|
|
||||||
|
|
||||||
let deleted_node = find_matching_node(config.members.keys().cloned(), &args.node_id)?;
|
|
||||||
|
|
||||||
if !args.yes {
|
|
||||||
return Err(Error::Message(format!(
|
|
||||||
"Add the flag --yes to really remove {:?} from the cluster",
|
|
||||||
deleted_node
|
|
||||||
)));
|
|
||||||
}
|
|
||||||
|
|
||||||
config.members.remove(&deleted_node);
|
|
||||||
config.version += 1;
|
|
||||||
|
|
||||||
rpc_cli
|
|
||||||
.call(&rpc_host, &SystemRpc::AdvertiseConfig(config), PRIO_NORMAL)
|
|
||||||
.await??;
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub async fn cmd_admin(
|
pub async fn cmd_admin(
|
||||||
rpc_cli: &Endpoint<AdminRpc, ()>,
|
rpc_cli: &Endpoint<AdminRpc, ()>,
|
||||||
rpc_host: NodeID,
|
rpc_host: NodeID,
|
||||||
|
@ -283,5 +183,3 @@ pub async fn cmd_admin(
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
// --- Utility functions ----
|
|
||||||
|
|
340
src/garage/cli/layout.rs
Normal file
340
src/garage/cli/layout.rs
Normal file
|
@ -0,0 +1,340 @@
|
||||||
|
use garage_util::crdt::Crdt;
|
||||||
|
use garage_util::data::*;
|
||||||
|
use garage_util::error::*;
|
||||||
|
|
||||||
|
use garage_rpc::layout::*;
|
||||||
|
use garage_rpc::system::*;
|
||||||
|
use garage_rpc::*;
|
||||||
|
|
||||||
|
use crate::cli::*;
|
||||||
|
|
||||||
|
pub async fn cli_layout_command_dispatch(
|
||||||
|
cmd: LayoutOperation,
|
||||||
|
system_rpc_endpoint: &Endpoint<SystemRpc, ()>,
|
||||||
|
rpc_host: NodeID,
|
||||||
|
) -> Result<(), Error> {
|
||||||
|
match cmd {
|
||||||
|
LayoutOperation::Assign(configure_opt) => {
|
||||||
|
cmd_assign_role(system_rpc_endpoint, rpc_host, configure_opt).await
|
||||||
|
}
|
||||||
|
LayoutOperation::Remove(remove_opt) => {
|
||||||
|
cmd_remove_role(system_rpc_endpoint, rpc_host, remove_opt).await
|
||||||
|
}
|
||||||
|
LayoutOperation::Show => cmd_show_layout(system_rpc_endpoint, rpc_host).await,
|
||||||
|
LayoutOperation::Apply(apply_opt) => {
|
||||||
|
cmd_apply_layout(system_rpc_endpoint, rpc_host, apply_opt).await
|
||||||
|
}
|
||||||
|
LayoutOperation::Revert(revert_opt) => {
|
||||||
|
cmd_revert_layout(system_rpc_endpoint, rpc_host, revert_opt).await
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn cmd_assign_role(
|
||||||
|
rpc_cli: &Endpoint<SystemRpc, ()>,
|
||||||
|
rpc_host: NodeID,
|
||||||
|
args: AssignRoleOpt,
|
||||||
|
) -> Result<(), Error> {
|
||||||
|
let status = match rpc_cli
|
||||||
|
.call(&rpc_host, &SystemRpc::GetKnownNodes, PRIO_NORMAL)
|
||||||
|
.await??
|
||||||
|
{
|
||||||
|
SystemRpc::ReturnKnownNodes(nodes) => nodes,
|
||||||
|
resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
|
||||||
|
};
|
||||||
|
|
||||||
|
let added_node = find_matching_node(status.iter().map(|adv| adv.id), &args.node_id)?;
|
||||||
|
|
||||||
|
let mut layout = fetch_layout(rpc_cli, rpc_host).await?;
|
||||||
|
|
||||||
|
let mut roles = layout.roles.clone();
|
||||||
|
roles.merge(&layout.staging);
|
||||||
|
|
||||||
|
for replaced in args.replace.iter() {
|
||||||
|
let replaced_node = find_matching_node(layout.node_ids().iter().cloned(), replaced)?;
|
||||||
|
match roles.get(&replaced_node) {
|
||||||
|
Some(NodeRoleV(Some(_))) => {
|
||||||
|
layout
|
||||||
|
.staging
|
||||||
|
.merge(&roles.update_mutator(replaced_node, NodeRoleV(None)));
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
return Err(Error::Message(format!(
|
||||||
|
"Cannot replace node {:?} as it is not currently in planned layout",
|
||||||
|
replaced_node
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if args.capacity.is_some() && args.gateway {
|
||||||
|
return Err(Error::Message(
|
||||||
|
"-c and -g are mutually exclusive, please configure node either with c>0 to act as a storage node or with -g to act as a gateway node".into()));
|
||||||
|
}
|
||||||
|
if args.capacity == Some(0) {
|
||||||
|
return Err(Error::Message("Invalid capacity value: 0".into()));
|
||||||
|
}
|
||||||
|
|
||||||
|
let new_entry = match roles.get(&added_node) {
|
||||||
|
Some(NodeRoleV(Some(old))) => {
|
||||||
|
let capacity = match args.capacity {
|
||||||
|
Some(c) => Some(c),
|
||||||
|
None if args.gateway => None,
|
||||||
|
None => old.capacity,
|
||||||
|
};
|
||||||
|
let tags = if args.tags.is_empty() {
|
||||||
|
old.tags.clone()
|
||||||
|
} else {
|
||||||
|
args.tags
|
||||||
|
};
|
||||||
|
NodeRole {
|
||||||
|
zone: args.zone.unwrap_or_else(|| old.zone.to_string()),
|
||||||
|
capacity,
|
||||||
|
tags,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
let capacity = match args.capacity {
|
||||||
|
Some(c) => Some(c),
|
||||||
|
None if args.gateway => None,
|
||||||
|
None => return Err(Error::Message(
|
||||||
|
"Please specify a capacity with the -c flag, or set node explicitly as gateway with -g".into())),
|
||||||
|
};
|
||||||
|
NodeRole {
|
||||||
|
zone: args.zone.ok_or("Please specifiy a zone with the -z flag")?,
|
||||||
|
capacity,
|
||||||
|
tags: args.tags,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
layout
|
||||||
|
.staging
|
||||||
|
.merge(&roles.update_mutator(added_node, NodeRoleV(Some(new_entry))));
|
||||||
|
|
||||||
|
send_layout(rpc_cli, rpc_host, layout).await?;
|
||||||
|
|
||||||
|
println!("Role change is staged but not yet commited.");
|
||||||
|
println!("Use `garage layout show` to view staged role changes,");
|
||||||
|
println!("and `garage layout apply` to enact staged changes.");
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn cmd_remove_role(
|
||||||
|
rpc_cli: &Endpoint<SystemRpc, ()>,
|
||||||
|
rpc_host: NodeID,
|
||||||
|
args: RemoveRoleOpt,
|
||||||
|
) -> Result<(), Error> {
|
||||||
|
let mut layout = fetch_layout(rpc_cli, rpc_host).await?;
|
||||||
|
|
||||||
|
let mut roles = layout.roles.clone();
|
||||||
|
roles.merge(&layout.staging);
|
||||||
|
|
||||||
|
let deleted_node =
|
||||||
|
find_matching_node(roles.items().iter().map(|(id, _, _)| *id), &args.node_id)?;
|
||||||
|
|
||||||
|
layout
|
||||||
|
.staging
|
||||||
|
.merge(&roles.update_mutator(deleted_node, NodeRoleV(None)));
|
||||||
|
|
||||||
|
send_layout(rpc_cli, rpc_host, layout).await?;
|
||||||
|
|
||||||
|
println!("Role removal is staged but not yet commited.");
|
||||||
|
println!("Use `garage layout show` to view staged role changes,");
|
||||||
|
println!("and `garage layout apply` to enact staged changes.");
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn cmd_show_layout(
|
||||||
|
rpc_cli: &Endpoint<SystemRpc, ()>,
|
||||||
|
rpc_host: NodeID,
|
||||||
|
) -> Result<(), Error> {
|
||||||
|
let mut layout = fetch_layout(rpc_cli, rpc_host).await?;
|
||||||
|
|
||||||
|
println!("==== CURRENT CLUSTER LAYOUT ====");
|
||||||
|
if !print_cluster_layout(&layout) {
|
||||||
|
println!("No nodes currently have a role in the cluster.");
|
||||||
|
println!("See `garage status` to view available nodes.");
|
||||||
|
}
|
||||||
|
println!();
|
||||||
|
println!("Current cluster layout version: {}", layout.version);
|
||||||
|
|
||||||
|
if print_staging_role_changes(&layout) {
|
||||||
|
layout.roles.merge(&layout.staging);
|
||||||
|
|
||||||
|
println!();
|
||||||
|
println!("==== NEW CLUSTER LAYOUT AFTER APPLYING CHANGES ====");
|
||||||
|
if !print_cluster_layout(&layout) {
|
||||||
|
println!("No nodes have a role in the new layout.");
|
||||||
|
}
|
||||||
|
println!();
|
||||||
|
|
||||||
|
// this will print the stats of what partitions
|
||||||
|
// will move around when we apply
|
||||||
|
if layout.calculate_partition_assignation() {
|
||||||
|
println!("To enact the staged role changes, type:");
|
||||||
|
println!();
|
||||||
|
println!(" garage layout apply --version {}", layout.version + 1);
|
||||||
|
println!();
|
||||||
|
println!(
|
||||||
|
"You can also revert all proposed changes with: garage layout revert --version {}",
|
||||||
|
layout.version + 1
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
println!("Not enough nodes have an assigned role to maintain enough copies of data.");
|
||||||
|
println!("This new layout cannot yet be applied.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn cmd_apply_layout(
|
||||||
|
rpc_cli: &Endpoint<SystemRpc, ()>,
|
||||||
|
rpc_host: NodeID,
|
||||||
|
apply_opt: ApplyLayoutOpt,
|
||||||
|
) -> Result<(), Error> {
|
||||||
|
let mut layout = fetch_layout(rpc_cli, rpc_host).await?;
|
||||||
|
|
||||||
|
match apply_opt.version {
|
||||||
|
None => {
|
||||||
|
println!("Please pass the --version flag to ensure that you are writing the correct version of the cluster layout.");
|
||||||
|
println!("To know the correct value of the --version flag, invoke `garage layout show` and review the proposed changes.");
|
||||||
|
return Err(Error::Message("--version flag is missing".into()));
|
||||||
|
}
|
||||||
|
Some(v) => {
|
||||||
|
if v != layout.version + 1 {
|
||||||
|
return Err(Error::Message("Invalid value of --version flag".into()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
layout.roles.merge(&layout.staging);
|
||||||
|
|
||||||
|
if !layout.calculate_partition_assignation() {
|
||||||
|
return Err(Error::Message("Could not calculate new assignation of partitions to nodes. This can happen if there are less nodes than the desired number of copies of your data (see the replication_mode configuration parameter).".into()));
|
||||||
|
}
|
||||||
|
|
||||||
|
layout.staging.clear();
|
||||||
|
layout.staging_hash = blake2sum(&rmp_to_vec_all_named(&layout.staging).unwrap()[..]);
|
||||||
|
|
||||||
|
layout.version += 1;
|
||||||
|
|
||||||
|
send_layout(rpc_cli, rpc_host, layout).await?;
|
||||||
|
|
||||||
|
println!("New cluster layout with updated role assignation has been applied in cluster.");
|
||||||
|
println!("Data will now be moved around between nodes accordingly.");
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn cmd_revert_layout(
|
||||||
|
rpc_cli: &Endpoint<SystemRpc, ()>,
|
||||||
|
rpc_host: NodeID,
|
||||||
|
revert_opt: RevertLayoutOpt,
|
||||||
|
) -> Result<(), Error> {
|
||||||
|
let mut layout = fetch_layout(rpc_cli, rpc_host).await?;
|
||||||
|
|
||||||
|
match revert_opt.version {
|
||||||
|
None => {
|
||||||
|
println!("Please pass the --version flag to ensure that you are writing the correct version of the cluster layout.");
|
||||||
|
println!("To know the correct value of the --version flag, invoke `garage layout show` and review the proposed changes.");
|
||||||
|
return Err(Error::Message("--version flag is missing".into()));
|
||||||
|
}
|
||||||
|
Some(v) => {
|
||||||
|
if v != layout.version + 1 {
|
||||||
|
return Err(Error::Message("Invalid value of --version flag".into()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
layout.staging.clear();
|
||||||
|
layout.staging_hash = blake2sum(&rmp_to_vec_all_named(&layout.staging).unwrap()[..]);
|
||||||
|
|
||||||
|
layout.version += 1;
|
||||||
|
|
||||||
|
send_layout(rpc_cli, rpc_host, layout).await?;
|
||||||
|
|
||||||
|
println!("All proposed role changes in cluster layout have been canceled.");
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- utility ---
|
||||||
|
|
||||||
|
pub async fn fetch_layout(
|
||||||
|
rpc_cli: &Endpoint<SystemRpc, ()>,
|
||||||
|
rpc_host: NodeID,
|
||||||
|
) -> Result<ClusterLayout, Error> {
|
||||||
|
match rpc_cli
|
||||||
|
.call(&rpc_host, &SystemRpc::PullClusterLayout, PRIO_NORMAL)
|
||||||
|
.await??
|
||||||
|
{
|
||||||
|
SystemRpc::AdvertiseClusterLayout(t) => Ok(t),
|
||||||
|
resp => Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn send_layout(
|
||||||
|
rpc_cli: &Endpoint<SystemRpc, ()>,
|
||||||
|
rpc_host: NodeID,
|
||||||
|
layout: ClusterLayout,
|
||||||
|
) -> Result<(), Error> {
|
||||||
|
rpc_cli
|
||||||
|
.call(
|
||||||
|
&rpc_host,
|
||||||
|
&SystemRpc::AdvertiseClusterLayout(layout),
|
||||||
|
PRIO_NORMAL,
|
||||||
|
)
|
||||||
|
.await??;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn print_cluster_layout(layout: &ClusterLayout) -> bool {
|
||||||
|
let mut table = vec!["ID\tTags\tZone\tCapacity".to_string()];
|
||||||
|
for (id, _, role) in layout.roles.items().iter() {
|
||||||
|
let role = match &role.0 {
|
||||||
|
Some(r) => r,
|
||||||
|
_ => continue,
|
||||||
|
};
|
||||||
|
let tags = role.tags.join(",");
|
||||||
|
table.push(format!(
|
||||||
|
"{:?}\t{}\t{}\t{}",
|
||||||
|
id,
|
||||||
|
tags,
|
||||||
|
role.zone,
|
||||||
|
role.capacity_string()
|
||||||
|
));
|
||||||
|
}
|
||||||
|
if table.len() == 1 {
|
||||||
|
false
|
||||||
|
} else {
|
||||||
|
format_table(table);
|
||||||
|
true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn print_staging_role_changes(layout: &ClusterLayout) -> bool {
|
||||||
|
if !layout.staging.items().is_empty() {
|
||||||
|
println!();
|
||||||
|
println!("==== STAGED ROLE CHANGES ====");
|
||||||
|
let mut table = vec!["ID\tTags\tZone\tCapacity".to_string()];
|
||||||
|
for (id, _, role) in layout.staging.items().iter() {
|
||||||
|
if let Some(role) = &role.0 {
|
||||||
|
let tags = role.tags.join(",");
|
||||||
|
table.push(format!(
|
||||||
|
"{:?}\t{}\t{}\t{}",
|
||||||
|
id,
|
||||||
|
tags,
|
||||||
|
role.zone,
|
||||||
|
role.capacity_string()
|
||||||
|
));
|
||||||
|
} else {
|
||||||
|
table.push(format!("{:?}\tREMOVED", id));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
format_table(table);
|
||||||
|
true
|
||||||
|
} else {
|
||||||
|
false
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,9 +1,11 @@
|
||||||
pub(crate) mod cmd;
|
pub(crate) mod cmd;
|
||||||
pub(crate) mod init;
|
pub(crate) mod init;
|
||||||
|
pub(crate) mod layout;
|
||||||
pub(crate) mod structs;
|
pub(crate) mod structs;
|
||||||
pub(crate) mod util;
|
pub(crate) mod util;
|
||||||
|
|
||||||
pub(crate) use cmd::*;
|
pub(crate) use cmd::*;
|
||||||
pub(crate) use init::*;
|
pub(crate) use init::*;
|
||||||
|
pub(crate) use layout::*;
|
||||||
pub(crate) use structs::*;
|
pub(crate) use structs::*;
|
||||||
pub(crate) use util::*;
|
pub(crate) use util::*;
|
||||||
|
|
|
@ -8,23 +8,23 @@ pub enum Command {
|
||||||
#[structopt(name = "server")]
|
#[structopt(name = "server")]
|
||||||
Server,
|
Server,
|
||||||
|
|
||||||
/// Print identifier (public key) of this Garage node
|
|
||||||
#[structopt(name = "node-id")]
|
|
||||||
NodeId(NodeIdOpt),
|
|
||||||
|
|
||||||
/// Get network status
|
/// Get network status
|
||||||
#[structopt(name = "status")]
|
#[structopt(name = "status")]
|
||||||
Status,
|
Status,
|
||||||
|
|
||||||
/// Garage node operations
|
/// Operations on individual Garage nodes
|
||||||
#[structopt(name = "node")]
|
#[structopt(name = "node")]
|
||||||
Node(NodeOperation),
|
Node(NodeOperation),
|
||||||
|
|
||||||
/// Bucket operations
|
/// Operations on the assignation of node roles in the cluster layout
|
||||||
|
#[structopt(name = "layout")]
|
||||||
|
Layout(LayoutOperation),
|
||||||
|
|
||||||
|
/// Operations on buckets
|
||||||
#[structopt(name = "bucket")]
|
#[structopt(name = "bucket")]
|
||||||
Bucket(BucketOperation),
|
Bucket(BucketOperation),
|
||||||
|
|
||||||
/// Key operations
|
/// Operations on S3 access keys
|
||||||
#[structopt(name = "key")]
|
#[structopt(name = "key")]
|
||||||
Key(KeyOperation),
|
Key(KeyOperation),
|
||||||
|
|
||||||
|
@ -39,17 +39,13 @@ pub enum Command {
|
||||||
|
|
||||||
#[derive(StructOpt, Debug)]
|
#[derive(StructOpt, Debug)]
|
||||||
pub enum NodeOperation {
|
pub enum NodeOperation {
|
||||||
|
/// Print identifier (public key) of this Garage node
|
||||||
|
#[structopt(name = "id")]
|
||||||
|
NodeId(NodeIdOpt),
|
||||||
|
|
||||||
/// Connect to Garage node that is currently isolated from the system
|
/// Connect to Garage node that is currently isolated from the system
|
||||||
#[structopt(name = "connect")]
|
#[structopt(name = "connect")]
|
||||||
Connect(ConnectNodeOpt),
|
Connect(ConnectNodeOpt),
|
||||||
|
|
||||||
/// Configure Garage node
|
|
||||||
#[structopt(name = "configure")]
|
|
||||||
Configure(ConfigureNodeOpt),
|
|
||||||
|
|
||||||
/// Remove Garage node from cluster
|
|
||||||
#[structopt(name = "remove")]
|
|
||||||
Remove(RemoveNodeOpt),
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(StructOpt, Debug)]
|
#[derive(StructOpt, Debug)]
|
||||||
|
@ -67,8 +63,31 @@ pub struct ConnectNodeOpt {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(StructOpt, Debug)]
|
#[derive(StructOpt, Debug)]
|
||||||
pub struct ConfigureNodeOpt {
|
pub enum LayoutOperation {
|
||||||
/// Node to configure (prefix of hexadecimal node id)
|
/// Assign role to Garage node
|
||||||
|
#[structopt(name = "assign")]
|
||||||
|
Assign(AssignRoleOpt),
|
||||||
|
|
||||||
|
/// Remove role from Garage cluster node
|
||||||
|
#[structopt(name = "remove")]
|
||||||
|
Remove(RemoveRoleOpt),
|
||||||
|
|
||||||
|
/// Show roles currently assigned to nodes and changes staged for commit
|
||||||
|
#[structopt(name = "show")]
|
||||||
|
Show,
|
||||||
|
|
||||||
|
/// Apply staged changes to cluster layout
|
||||||
|
#[structopt(name = "apply")]
|
||||||
|
Apply(ApplyLayoutOpt),
|
||||||
|
|
||||||
|
/// Revert staged changes to cluster layout
|
||||||
|
#[structopt(name = "revert")]
|
||||||
|
Revert(RevertLayoutOpt),
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(StructOpt, Debug)]
|
||||||
|
pub struct AssignRoleOpt {
|
||||||
|
/// Node to which to assign role (prefix of hexadecimal node id)
|
||||||
pub(crate) node_id: String,
|
pub(crate) node_id: String,
|
||||||
|
|
||||||
/// Location (zone or datacenter) of the node
|
/// Location (zone or datacenter) of the node
|
||||||
|
@ -83,9 +102,9 @@ pub struct ConfigureNodeOpt {
|
||||||
#[structopt(short = "g", long = "gateway")]
|
#[structopt(short = "g", long = "gateway")]
|
||||||
pub(crate) gateway: bool,
|
pub(crate) gateway: bool,
|
||||||
|
|
||||||
/// Optional node tag
|
/// Optional tags to add to node
|
||||||
#[structopt(short = "t", long = "tag")]
|
#[structopt(short = "t", long = "tag")]
|
||||||
pub(crate) tag: Option<String>,
|
pub(crate) tags: Vec<String>,
|
||||||
|
|
||||||
/// Replaced node(s): list of node IDs that will be removed from the current cluster
|
/// Replaced node(s): list of node IDs that will be removed from the current cluster
|
||||||
#[structopt(long = "replace")]
|
#[structopt(long = "replace")]
|
||||||
|
@ -93,13 +112,24 @@ pub struct ConfigureNodeOpt {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(StructOpt, Debug)]
|
#[derive(StructOpt, Debug)]
|
||||||
pub struct RemoveNodeOpt {
|
pub struct RemoveRoleOpt {
|
||||||
/// Node to configure (prefix of hexadecimal node id)
|
/// Node whose role to remove (prefix of hexadecimal node id)
|
||||||
pub(crate) node_id: String,
|
pub(crate) node_id: String,
|
||||||
|
}
|
||||||
|
|
||||||
/// If this flag is not given, the node won't be removed
|
#[derive(StructOpt, Debug)]
|
||||||
#[structopt(long = "yes")]
|
pub struct ApplyLayoutOpt {
|
||||||
pub(crate) yes: bool,
|
/// Version number of new configuration: this command will fail if
|
||||||
|
/// it is not exactly 1 + the previous configuration's version
|
||||||
|
#[structopt(long = "version")]
|
||||||
|
pub(crate) version: Option<u64>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(StructOpt, Debug)]
|
||||||
|
pub struct RevertLayoutOpt {
|
||||||
|
/// Version number of old configuration to which to revert
|
||||||
|
#[structopt(long = "version")]
|
||||||
|
pub(crate) version: Option<u64>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize, StructOpt, Debug)]
|
#[derive(Serialize, Deserialize, StructOpt, Debug)]
|
||||||
|
|
|
@ -70,7 +70,9 @@ async fn main() {
|
||||||
|
|
||||||
server::run_server(opt.config_file).await
|
server::run_server(opt.config_file).await
|
||||||
}
|
}
|
||||||
Command::NodeId(node_id_opt) => node_id_command(opt.config_file, node_id_opt.quiet),
|
Command::Node(NodeOperation::NodeId(node_id_opt)) => {
|
||||||
|
node_id_command(opt.config_file, node_id_opt.quiet)
|
||||||
|
}
|
||||||
_ => cli_command(opt).await,
|
_ => cli_command(opt).await,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -1,11 +1,12 @@
|
||||||
[package]
|
[package]
|
||||||
name = "garage_model"
|
name = "garage_model"
|
||||||
version = "0.4.0"
|
version = "0.5.0"
|
||||||
authors = ["Alex Auvolat <alex@adnab.me>"]
|
authors = ["Alex Auvolat <alex@adnab.me>"]
|
||||||
edition = "2018"
|
edition = "2018"
|
||||||
license = "AGPL-3.0"
|
license = "AGPL-3.0"
|
||||||
description = "Core data model for the Garage object store"
|
description = "Core data model for the Garage object store"
|
||||||
repository = "https://git.deuxfleurs.fr/Deuxfleurs/garage"
|
repository = "https://git.deuxfleurs.fr/Deuxfleurs/garage"
|
||||||
|
readme = "../../README.md"
|
||||||
|
|
||||||
[lib]
|
[lib]
|
||||||
path = "lib.rs"
|
path = "lib.rs"
|
||||||
|
@ -13,9 +14,9 @@ path = "lib.rs"
|
||||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
garage_rpc = { version = "0.4.0", path = "../rpc" }
|
garage_rpc = { version = "0.5.0", path = "../rpc" }
|
||||||
garage_table = { version = "0.4.0", path = "../table" }
|
garage_table = { version = "0.5.0", path = "../table" }
|
||||||
garage_util = { version = "0.4.0", path = "../util" }
|
garage_util = { version = "0.5.0", path = "../util" }
|
||||||
|
|
||||||
async-trait = "0.1.7"
|
async-trait = "0.1.7"
|
||||||
arc-swap = "1.0"
|
arc-swap = "1.0"
|
||||||
|
|
|
@ -1,11 +1,12 @@
|
||||||
[package]
|
[package]
|
||||||
name = "garage_rpc"
|
name = "garage_rpc"
|
||||||
version = "0.4.0"
|
version = "0.5.0"
|
||||||
authors = ["Alex Auvolat <alex@adnab.me>"]
|
authors = ["Alex Auvolat <alex@adnab.me>"]
|
||||||
edition = "2018"
|
edition = "2018"
|
||||||
license = "AGPL-3.0"
|
license = "AGPL-3.0"
|
||||||
description = "Cluster membership management and RPC protocol for the Garage object store"
|
description = "Cluster membership management and RPC protocol for the Garage object store"
|
||||||
repository = "https://git.deuxfleurs.fr/Deuxfleurs/garage"
|
repository = "https://git.deuxfleurs.fr/Deuxfleurs/garage"
|
||||||
|
readme = "../../README.md"
|
||||||
|
|
||||||
[lib]
|
[lib]
|
||||||
path = "lib.rs"
|
path = "lib.rs"
|
||||||
|
@ -13,7 +14,7 @@ path = "lib.rs"
|
||||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
garage_util = { version = "0.4.0", path = "../util" }
|
garage_util = { version = "0.5.0", path = "../util" }
|
||||||
|
|
||||||
arc-swap = "1.0"
|
arc-swap = "1.0"
|
||||||
bytes = "1.0"
|
bytes = "1.0"
|
||||||
|
@ -26,6 +27,7 @@ sodiumoxide = { version = "0.2.5-0", package = "kuska-sodiumoxide" }
|
||||||
async-trait = "0.1.7"
|
async-trait = "0.1.7"
|
||||||
rmp-serde = "0.15"
|
rmp-serde = "0.15"
|
||||||
serde = { version = "1.0", default-features = false, features = ["derive", "rc"] }
|
serde = { version = "1.0", default-features = false, features = ["derive", "rc"] }
|
||||||
|
serde_bytes = "0.11"
|
||||||
serde_json = "1.0"
|
serde_json = "1.0"
|
||||||
|
|
||||||
futures = "0.3"
|
futures = "0.3"
|
||||||
|
|
579
src/rpc/layout.rs
Normal file
579
src/rpc/layout.rs
Normal file
|
@ -0,0 +1,579 @@
|
||||||
|
use std::cmp::Ordering;
|
||||||
|
use std::collections::{HashMap, HashSet};
|
||||||
|
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
use garage_util::crdt::{AutoCrdt, Crdt, LwwMap};
|
||||||
|
use garage_util::data::*;
|
||||||
|
|
||||||
|
use crate::ring::*;
|
||||||
|
|
||||||
|
/// The layout of the cluster, i.e. the list of roles
|
||||||
|
/// which are assigned to each cluster node
|
||||||
|
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||||
|
pub struct ClusterLayout {
|
||||||
|
pub version: u64,
|
||||||
|
|
||||||
|
pub replication_factor: usize,
|
||||||
|
pub roles: LwwMap<Uuid, NodeRoleV>,
|
||||||
|
|
||||||
|
/// node_id_vec: a vector of node IDs with a role assigned
|
||||||
|
/// in the system (this includes gateway nodes).
|
||||||
|
/// The order here is different than the vec stored by `roles`, because:
|
||||||
|
/// 1. non-gateway nodes are first so that they have lower numbers
|
||||||
|
/// 2. nodes that don't have a role are excluded (but they need to
|
||||||
|
/// stay in the CRDT as tombstones)
|
||||||
|
pub node_id_vec: Vec<Uuid>,
|
||||||
|
/// the assignation of data partitions to node, the values
|
||||||
|
/// are indices in node_id_vec
|
||||||
|
#[serde(with = "serde_bytes")]
|
||||||
|
pub ring_assignation_data: Vec<CompactNodeType>,
|
||||||
|
|
||||||
|
/// Role changes which are staged for the next version of the layout
|
||||||
|
pub staging: LwwMap<Uuid, NodeRoleV>,
|
||||||
|
pub staging_hash: Hash,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Debug, Serialize, Deserialize)]
|
||||||
|
pub struct NodeRoleV(pub Option<NodeRole>);
|
||||||
|
|
||||||
|
impl AutoCrdt for NodeRoleV {
|
||||||
|
const WARN_IF_DIFFERENT: bool = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The user-assigned roles of cluster nodes
|
||||||
|
#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Debug, Serialize, Deserialize)]
|
||||||
|
pub struct NodeRole {
|
||||||
|
/// Datacenter at which this entry belong. This information might be used to perform a better
|
||||||
|
/// geodistribution
|
||||||
|
pub zone: String,
|
||||||
|
/// The (relative) capacity of the node
|
||||||
|
/// If this is set to None, the node does not participate in storing data for the system
|
||||||
|
/// and is only active as an API gateway to other nodes
|
||||||
|
pub capacity: Option<u32>,
|
||||||
|
/// A set of tags to recognize the node
|
||||||
|
pub tags: Vec<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl NodeRole {
|
||||||
|
pub fn capacity_string(&self) -> String {
|
||||||
|
match self.capacity {
|
||||||
|
Some(c) => format!("{}", c),
|
||||||
|
None => "gateway".to_string(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ClusterLayout {
|
||||||
|
pub fn new(replication_factor: usize) -> Self {
|
||||||
|
let empty_lwwmap = LwwMap::new();
|
||||||
|
let empty_lwwmap_hash = blake2sum(&rmp_to_vec_all_named(&empty_lwwmap).unwrap()[..]);
|
||||||
|
|
||||||
|
ClusterLayout {
|
||||||
|
version: 0,
|
||||||
|
replication_factor,
|
||||||
|
roles: LwwMap::new(),
|
||||||
|
node_id_vec: Vec::new(),
|
||||||
|
ring_assignation_data: Vec::new(),
|
||||||
|
staging: empty_lwwmap,
|
||||||
|
staging_hash: empty_lwwmap_hash,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn merge(&mut self, other: &ClusterLayout) -> bool {
|
||||||
|
match other.version.cmp(&self.version) {
|
||||||
|
Ordering::Greater => {
|
||||||
|
*self = other.clone();
|
||||||
|
true
|
||||||
|
}
|
||||||
|
Ordering::Equal => {
|
||||||
|
self.staging.merge(&other.staging);
|
||||||
|
|
||||||
|
let new_staging_hash = blake2sum(&rmp_to_vec_all_named(&self.staging).unwrap()[..]);
|
||||||
|
let changed = new_staging_hash != self.staging_hash;
|
||||||
|
|
||||||
|
self.staging_hash = new_staging_hash;
|
||||||
|
|
||||||
|
changed
|
||||||
|
}
|
||||||
|
Ordering::Less => false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns a list of IDs of nodes that currently have
|
||||||
|
/// a role in the cluster
|
||||||
|
pub fn node_ids(&self) -> &[Uuid] {
|
||||||
|
&self.node_id_vec[..]
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn num_nodes(&self) -> usize {
|
||||||
|
self.node_id_vec.len()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the role of a node in the layout
|
||||||
|
pub fn node_role(&self, node: &Uuid) -> Option<&NodeRole> {
|
||||||
|
match self.roles.get(node) {
|
||||||
|
Some(NodeRoleV(Some(v))) => Some(v),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check a cluster layout for internal consistency
|
||||||
|
/// returns true if consistent, false if error
|
||||||
|
pub fn check(&self) -> bool {
|
||||||
|
// Check that the hash of the staging data is correct
|
||||||
|
let staging_hash = blake2sum(&rmp_to_vec_all_named(&self.staging).unwrap()[..]);
|
||||||
|
if staging_hash != self.staging_hash {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check that node_id_vec contains the correct list of nodes
|
||||||
|
let mut expected_nodes = self
|
||||||
|
.roles
|
||||||
|
.items()
|
||||||
|
.iter()
|
||||||
|
.filter(|(_, _, v)| v.0.is_some())
|
||||||
|
.map(|(id, _, _)| *id)
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
expected_nodes.sort();
|
||||||
|
let mut node_id_vec = self.node_id_vec.clone();
|
||||||
|
node_id_vec.sort();
|
||||||
|
if expected_nodes != node_id_vec {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check that the assignation data has the correct length
|
||||||
|
if self.ring_assignation_data.len() != (1 << PARTITION_BITS) * self.replication_factor {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check that the assigned nodes are correct identifiers
|
||||||
|
// of nodes that are assigned a role
|
||||||
|
// and that role is not the role of a gateway nodes
|
||||||
|
for x in self.ring_assignation_data.iter() {
|
||||||
|
if *x as usize >= self.node_id_vec.len() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
let node = self.node_id_vec[*x as usize];
|
||||||
|
match self.roles.get(&node) {
|
||||||
|
Some(NodeRoleV(Some(x))) if x.capacity.is_some() => (),
|
||||||
|
_ => return false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
true
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Calculate an assignation of partitions to nodes
|
||||||
|
pub fn calculate_partition_assignation(&mut self) -> bool {
|
||||||
|
let (configured_nodes, zones) = self.configured_nodes_and_zones();
|
||||||
|
let n_zones = zones.len();
|
||||||
|
|
||||||
|
println!("Calculating updated partition assignation, this may take some time...");
|
||||||
|
println!();
|
||||||
|
|
||||||
|
let old_partitions = self.parse_assignation_data();
|
||||||
|
|
||||||
|
let mut partitions = old_partitions.clone();
|
||||||
|
for part in partitions.iter_mut() {
|
||||||
|
part.nodes
|
||||||
|
.retain(|(_, info)| info.map(|x| x.capacity.is_some()).unwrap_or(false));
|
||||||
|
}
|
||||||
|
|
||||||
|
// When nodes are removed, or when bootstraping an assignation from
|
||||||
|
// scratch for a new cluster, the old partitions will have holes (or be empty).
|
||||||
|
// Here we add more nodes to make a complete (sub-optimal) assignation,
|
||||||
|
// using an initial partition assignation that is calculated using the multi-dc maglev trick
|
||||||
|
match self.initial_partition_assignation() {
|
||||||
|
Some(initial_partitions) => {
|
||||||
|
for (part, ipart) in partitions.iter_mut().zip(initial_partitions.iter()) {
|
||||||
|
for (id, info) in ipart.nodes.iter() {
|
||||||
|
if part.nodes.len() < self.replication_factor {
|
||||||
|
part.add(part.nodes.len() + 1, n_zones, id, info.unwrap());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assert!(part.nodes.len() == self.replication_factor);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate how many partitions each node should ideally store,
|
||||||
|
// and how many partitions they are storing with the current assignation
|
||||||
|
// This defines our target for which we will optimize in the following loop.
|
||||||
|
let total_capacity = configured_nodes
|
||||||
|
.iter()
|
||||||
|
.map(|(_, info)| info.capacity.unwrap_or(0))
|
||||||
|
.sum::<u32>() as usize;
|
||||||
|
let total_partitions = self.replication_factor * (1 << PARTITION_BITS);
|
||||||
|
let target_partitions_per_node = configured_nodes
|
||||||
|
.iter()
|
||||||
|
.map(|(id, info)| {
|
||||||
|
(
|
||||||
|
*id,
|
||||||
|
info.capacity.unwrap_or(0) as usize * total_partitions / total_capacity,
|
||||||
|
)
|
||||||
|
})
|
||||||
|
.collect::<HashMap<&Uuid, usize>>();
|
||||||
|
|
||||||
|
let mut partitions_per_node = self.partitions_per_node(&partitions[..]);
|
||||||
|
|
||||||
|
println!("Target number of partitions per node:");
|
||||||
|
for (node, npart) in target_partitions_per_node.iter() {
|
||||||
|
println!("{:?}\t{}", node, npart);
|
||||||
|
}
|
||||||
|
println!();
|
||||||
|
|
||||||
|
// Shuffle partitions between nodes so that nodes will reach (or better approach)
|
||||||
|
// their target number of stored partitions
|
||||||
|
loop {
|
||||||
|
let mut option = None;
|
||||||
|
for (i, part) in partitions.iter_mut().enumerate() {
|
||||||
|
for (irm, (idrm, _)) in part.nodes.iter().enumerate() {
|
||||||
|
let suprm = partitions_per_node.get(*idrm).cloned().unwrap_or(0) as i32
|
||||||
|
- target_partitions_per_node.get(*idrm).cloned().unwrap_or(0) as i32;
|
||||||
|
|
||||||
|
for (idadd, infoadd) in configured_nodes.iter() {
|
||||||
|
// skip replacing a node by itself
|
||||||
|
// and skip replacing by gateway nodes
|
||||||
|
if idadd == idrm || infoadd.capacity.is_none() {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
let supadd = partitions_per_node.get(*idadd).cloned().unwrap_or(0) as i32
|
||||||
|
- target_partitions_per_node.get(*idadd).cloned().unwrap_or(0) as i32;
|
||||||
|
|
||||||
|
// We want to try replacing node idrm by node idadd
|
||||||
|
// if that brings us close to our goal.
|
||||||
|
let square = |i: i32| i * i;
|
||||||
|
let oldcost = square(suprm) + square(supadd);
|
||||||
|
let newcost = square(suprm - 1) + square(supadd + 1);
|
||||||
|
if newcost >= oldcost {
|
||||||
|
// not closer to our goal
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let gain = oldcost - newcost;
|
||||||
|
|
||||||
|
let mut newpart = part.clone();
|
||||||
|
|
||||||
|
newpart.nodes.remove(irm);
|
||||||
|
if !newpart.add(newpart.nodes.len() + 1, n_zones, idadd, infoadd) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
assert!(newpart.nodes.len() == self.replication_factor);
|
||||||
|
|
||||||
|
if !old_partitions[i]
|
||||||
|
.is_valid_transition_to(&newpart, self.replication_factor)
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if option
|
||||||
|
.as_ref()
|
||||||
|
.map(|(old_gain, _, _, _, _)| gain > *old_gain)
|
||||||
|
.unwrap_or(true)
|
||||||
|
{
|
||||||
|
option = Some((gain, i, idadd, idrm, newpart));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if let Some((_gain, i, idadd, idrm, newpart)) = option {
|
||||||
|
*partitions_per_node.entry(idadd).or_insert(0) += 1;
|
||||||
|
*partitions_per_node.get_mut(idrm).unwrap() -= 1;
|
||||||
|
partitions[i] = newpart;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check we completed the assignation correctly
|
||||||
|
// (this is a set of checks for the algorithm's consistency)
|
||||||
|
assert!(partitions.len() == (1 << PARTITION_BITS));
|
||||||
|
assert!(partitions
|
||||||
|
.iter()
|
||||||
|
.all(|p| p.nodes.len() == self.replication_factor));
|
||||||
|
|
||||||
|
let new_partitions_per_node = self.partitions_per_node(&partitions[..]);
|
||||||
|
assert!(new_partitions_per_node == partitions_per_node);
|
||||||
|
|
||||||
|
// Show statistics
|
||||||
|
println!("New number of partitions per node:");
|
||||||
|
for (node, npart) in partitions_per_node.iter() {
|
||||||
|
println!("{:?}\t{}", node, npart);
|
||||||
|
}
|
||||||
|
println!();
|
||||||
|
|
||||||
|
let mut diffcount = HashMap::new();
|
||||||
|
for (oldpart, newpart) in old_partitions.iter().zip(partitions.iter()) {
|
||||||
|
let nminus = oldpart.txtplus(newpart);
|
||||||
|
let nplus = newpart.txtplus(oldpart);
|
||||||
|
if nminus != "[...]" || nplus != "[...]" {
|
||||||
|
let tup = (nminus, nplus);
|
||||||
|
*diffcount.entry(tup).or_insert(0) += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if diffcount.is_empty() {
|
||||||
|
println!("No data will be moved between nodes.");
|
||||||
|
} else {
|
||||||
|
let mut diffcount = diffcount.into_iter().collect::<Vec<_>>();
|
||||||
|
diffcount.sort();
|
||||||
|
println!("Number of partitions that move:");
|
||||||
|
for ((nminus, nplus), npart) in diffcount {
|
||||||
|
println!("\t{}\t{} -> {}", npart, nminus, nplus);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
println!();
|
||||||
|
|
||||||
|
// Calculate and save new assignation data
|
||||||
|
let (nodes, assignation_data) =
|
||||||
|
self.compute_assignation_data(&configured_nodes[..], &partitions[..]);
|
||||||
|
|
||||||
|
self.node_id_vec = nodes;
|
||||||
|
self.ring_assignation_data = assignation_data;
|
||||||
|
|
||||||
|
true
|
||||||
|
}
|
||||||
|
|
||||||
|
fn initial_partition_assignation(&self) -> Option<Vec<PartitionAss<'_>>> {
|
||||||
|
let (configured_nodes, zones) = self.configured_nodes_and_zones();
|
||||||
|
let n_zones = zones.len();
|
||||||
|
|
||||||
|
// Create a vector of partition indices (0 to 2**PARTITION_BITS-1)
|
||||||
|
let partitions_idx = (0usize..(1usize << PARTITION_BITS)).collect::<Vec<_>>();
|
||||||
|
|
||||||
|
// Prepare ring
|
||||||
|
let mut partitions: Vec<PartitionAss> = partitions_idx
|
||||||
|
.iter()
|
||||||
|
.map(|_i| PartitionAss::new())
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
|
// Create MagLev priority queues for each node
|
||||||
|
let mut queues = configured_nodes
|
||||||
|
.iter()
|
||||||
|
.filter(|(_id, info)| info.capacity.is_some())
|
||||||
|
.map(|(node_id, node_info)| {
|
||||||
|
let mut parts = partitions_idx
|
||||||
|
.iter()
|
||||||
|
.map(|i| {
|
||||||
|
let part_data =
|
||||||
|
[&u16::to_be_bytes(*i as u16)[..], node_id.as_slice()].concat();
|
||||||
|
(*i, fasthash(&part_data[..]))
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
parts.sort_by_key(|(_i, h)| *h);
|
||||||
|
let parts_i = parts.iter().map(|(i, _h)| *i).collect::<Vec<_>>();
|
||||||
|
(node_id, node_info, parts_i, 0)
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
|
let max_capacity = configured_nodes
|
||||||
|
.iter()
|
||||||
|
.filter_map(|(_, node_info)| node_info.capacity)
|
||||||
|
.fold(0, std::cmp::max);
|
||||||
|
|
||||||
|
// Fill up ring
|
||||||
|
for rep in 0..self.replication_factor {
|
||||||
|
queues.sort_by_key(|(ni, _np, _q, _p)| {
|
||||||
|
let queue_data = [&u16::to_be_bytes(rep as u16)[..], ni.as_slice()].concat();
|
||||||
|
fasthash(&queue_data[..])
|
||||||
|
});
|
||||||
|
|
||||||
|
for (_, _, _, pos) in queues.iter_mut() {
|
||||||
|
*pos = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut remaining = partitions_idx.len();
|
||||||
|
while remaining > 0 {
|
||||||
|
let remaining0 = remaining;
|
||||||
|
for i_round in 0..max_capacity {
|
||||||
|
for (node_id, node_info, q, pos) in queues.iter_mut() {
|
||||||
|
if i_round >= node_info.capacity.unwrap() {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
for (pos2, &qv) in q.iter().enumerate().skip(*pos) {
|
||||||
|
if partitions[qv].add(rep + 1, n_zones, node_id, node_info) {
|
||||||
|
remaining -= 1;
|
||||||
|
*pos = pos2 + 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if remaining == remaining0 {
|
||||||
|
// No progress made, exit
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Some(partitions)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn configured_nodes_and_zones(&self) -> (Vec<(&Uuid, &NodeRole)>, HashSet<&str>) {
|
||||||
|
let configured_nodes = self
|
||||||
|
.roles
|
||||||
|
.items()
|
||||||
|
.iter()
|
||||||
|
.filter(|(_id, _, info)| info.0.is_some())
|
||||||
|
.map(|(id, _, info)| (id, info.0.as_ref().unwrap()))
|
||||||
|
.collect::<Vec<(&Uuid, &NodeRole)>>();
|
||||||
|
|
||||||
|
let zones = configured_nodes
|
||||||
|
.iter()
|
||||||
|
.filter(|(_id, info)| info.capacity.is_some())
|
||||||
|
.map(|(_id, info)| info.zone.as_str())
|
||||||
|
.collect::<HashSet<&str>>();
|
||||||
|
|
||||||
|
(configured_nodes, zones)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn compute_assignation_data<'a>(
|
||||||
|
&self,
|
||||||
|
configured_nodes: &[(&'a Uuid, &'a NodeRole)],
|
||||||
|
partitions: &[PartitionAss<'a>],
|
||||||
|
) -> (Vec<Uuid>, Vec<CompactNodeType>) {
|
||||||
|
assert!(partitions.len() == (1 << PARTITION_BITS));
|
||||||
|
|
||||||
|
// Make a canonical order for nodes
|
||||||
|
let mut nodes = configured_nodes
|
||||||
|
.iter()
|
||||||
|
.filter(|(_id, info)| info.capacity.is_some())
|
||||||
|
.map(|(id, _)| **id)
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
let nodes_rev = nodes
|
||||||
|
.iter()
|
||||||
|
.enumerate()
|
||||||
|
.map(|(i, id)| (*id, i as CompactNodeType))
|
||||||
|
.collect::<HashMap<Uuid, CompactNodeType>>();
|
||||||
|
|
||||||
|
let mut assignation_data = vec![];
|
||||||
|
for partition in partitions.iter() {
|
||||||
|
assert!(partition.nodes.len() == self.replication_factor);
|
||||||
|
for (id, _) in partition.nodes.iter() {
|
||||||
|
assignation_data.push(*nodes_rev.get(id).unwrap());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
nodes.extend(
|
||||||
|
configured_nodes
|
||||||
|
.iter()
|
||||||
|
.filter(|(_id, info)| info.capacity.is_none())
|
||||||
|
.map(|(id, _)| **id),
|
||||||
|
);
|
||||||
|
|
||||||
|
(nodes, assignation_data)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_assignation_data(&self) -> Vec<PartitionAss<'_>> {
|
||||||
|
if self.ring_assignation_data.len() == self.replication_factor * (1 << PARTITION_BITS) {
|
||||||
|
// If the previous assignation data is correct, use that
|
||||||
|
let mut partitions = vec![];
|
||||||
|
for i in 0..(1 << PARTITION_BITS) {
|
||||||
|
let mut part = PartitionAss::new();
|
||||||
|
for node_i in self.ring_assignation_data
|
||||||
|
[i * self.replication_factor..(i + 1) * self.replication_factor]
|
||||||
|
.iter()
|
||||||
|
{
|
||||||
|
let node_id = &self.node_id_vec[*node_i as usize];
|
||||||
|
|
||||||
|
if let Some(NodeRoleV(Some(info))) = self.roles.get(node_id) {
|
||||||
|
part.nodes.push((node_id, Some(info)));
|
||||||
|
} else {
|
||||||
|
part.nodes.push((node_id, None));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
partitions.push(part);
|
||||||
|
}
|
||||||
|
partitions
|
||||||
|
} else {
|
||||||
|
// Otherwise start fresh
|
||||||
|
(0..(1 << PARTITION_BITS))
|
||||||
|
.map(|_| PartitionAss::new())
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn partitions_per_node<'a>(&self, partitions: &[PartitionAss<'a>]) -> HashMap<&'a Uuid, usize> {
|
||||||
|
let mut partitions_per_node = HashMap::<&Uuid, usize>::new();
|
||||||
|
for p in partitions.iter() {
|
||||||
|
for (id, _) in p.nodes.iter() {
|
||||||
|
*partitions_per_node.entry(*id).or_insert(0) += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
partitions_per_node
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---- Internal structs for partition assignation in layout ----
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
struct PartitionAss<'a> {
|
||||||
|
nodes: Vec<(&'a Uuid, Option<&'a NodeRole>)>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> PartitionAss<'a> {
|
||||||
|
fn new() -> Self {
|
||||||
|
Self { nodes: Vec::new() }
|
||||||
|
}
|
||||||
|
|
||||||
|
fn nplus(&self, other: &PartitionAss<'a>) -> usize {
|
||||||
|
self.nodes
|
||||||
|
.iter()
|
||||||
|
.filter(|x| !other.nodes.contains(x))
|
||||||
|
.count()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn txtplus(&self, other: &PartitionAss<'a>) -> String {
|
||||||
|
let mut nodes = self
|
||||||
|
.nodes
|
||||||
|
.iter()
|
||||||
|
.filter(|x| !other.nodes.contains(x))
|
||||||
|
.map(|x| format!("{:?}", x.0))
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
nodes.sort();
|
||||||
|
if self.nodes.iter().any(|x| other.nodes.contains(x)) {
|
||||||
|
nodes.push("...".into());
|
||||||
|
}
|
||||||
|
format!("[{}]", nodes.join(" "))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_valid_transition_to(&self, other: &PartitionAss<'a>, replication_factor: usize) -> bool {
|
||||||
|
let min_keep_nodes_per_part = (replication_factor + 1) / 2;
|
||||||
|
let n_removed = self.nplus(other);
|
||||||
|
|
||||||
|
if self.nodes.len() <= min_keep_nodes_per_part {
|
||||||
|
n_removed == 0
|
||||||
|
} else {
|
||||||
|
n_removed <= self.nodes.len() - min_keep_nodes_per_part
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn add(
|
||||||
|
&mut self,
|
||||||
|
target_len: usize,
|
||||||
|
n_zones: usize,
|
||||||
|
node: &'a Uuid,
|
||||||
|
role: &'a NodeRole,
|
||||||
|
) -> bool {
|
||||||
|
if self.nodes.len() != target_len - 1 {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
let p_zns = self
|
||||||
|
.nodes
|
||||||
|
.iter()
|
||||||
|
.map(|(_id, info)| info.unwrap().zone.as_str())
|
||||||
|
.collect::<HashSet<&str>>();
|
||||||
|
if (p_zns.len() < n_zones && !p_zns.contains(&role.zone.as_str()))
|
||||||
|
|| (p_zns.len() == n_zones && !self.nodes.iter().any(|(id, _)| *id == node))
|
||||||
|
{
|
||||||
|
self.nodes.push((node, Some(role)));
|
||||||
|
true
|
||||||
|
} else {
|
||||||
|
false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -5,6 +5,7 @@ extern crate log;
|
||||||
|
|
||||||
mod consul;
|
mod consul;
|
||||||
|
|
||||||
|
pub mod layout;
|
||||||
pub mod ring;
|
pub mod ring;
|
||||||
pub mod system;
|
pub mod system;
|
||||||
|
|
||||||
|
|
197
src/rpc/ring.rs
197
src/rpc/ring.rs
|
@ -1,12 +1,11 @@
|
||||||
//! Module containing types related to computing nodes which should receive a copy of data blocks
|
//! Module containing types related to computing nodes which should receive a copy of data blocks
|
||||||
//! and metadata
|
//! and metadata
|
||||||
use std::collections::{HashMap, HashSet};
|
|
||||||
use std::convert::TryInto;
|
use std::convert::TryInto;
|
||||||
|
|
||||||
use serde::{Deserialize, Serialize};
|
|
||||||
|
|
||||||
use garage_util::data::*;
|
use garage_util::data::*;
|
||||||
|
|
||||||
|
use crate::layout::ClusterLayout;
|
||||||
|
|
||||||
/// A partition id, which is stored on 16 bits
|
/// A partition id, which is stored on 16 bits
|
||||||
/// i.e. we have up to 2**16 partitions.
|
/// i.e. we have up to 2**16 partitions.
|
||||||
/// (in practice we have exactly 2**PARTITION_BITS partitions)
|
/// (in practice we have exactly 2**PARTITION_BITS partitions)
|
||||||
|
@ -22,47 +21,6 @@ pub const PARTITION_BITS: usize = 8;
|
||||||
|
|
||||||
const PARTITION_MASK_U16: u16 = ((1 << PARTITION_BITS) - 1) << (16 - PARTITION_BITS);
|
const PARTITION_MASK_U16: u16 = ((1 << PARTITION_BITS) - 1) << (16 - PARTITION_BITS);
|
||||||
|
|
||||||
/// The user-defined configuration of the cluster's nodes
|
|
||||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
|
||||||
pub struct NetworkConfig {
|
|
||||||
/// Map of each node's id to it's configuration
|
|
||||||
pub members: HashMap<Uuid, NetworkConfigEntry>,
|
|
||||||
/// Version of this config
|
|
||||||
pub version: u64,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl NetworkConfig {
|
|
||||||
pub(crate) fn new() -> Self {
|
|
||||||
Self {
|
|
||||||
members: HashMap::new(),
|
|
||||||
version: 0,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// The overall configuration of one (possibly remote) node
|
|
||||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
|
||||||
pub struct NetworkConfigEntry {
|
|
||||||
/// Datacenter at which this entry belong. This infromation might be used to perform a better
|
|
||||||
/// geodistribution
|
|
||||||
pub zone: String,
|
|
||||||
/// The (relative) capacity of the node
|
|
||||||
/// If this is set to None, the node does not participate in storing data for the system
|
|
||||||
/// and is only active as an API gateway to other nodes
|
|
||||||
pub capacity: Option<u32>,
|
|
||||||
/// A tag to recognize the entry, not used for other things than display
|
|
||||||
pub tag: String,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl NetworkConfigEntry {
|
|
||||||
pub fn capacity_string(&self) -> String {
|
|
||||||
match self.capacity {
|
|
||||||
Some(c) => format!("{}", c),
|
|
||||||
None => "gateway".to_string(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// A ring distributing fairly objects to nodes
|
/// A ring distributing fairly objects to nodes
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct Ring {
|
pub struct Ring {
|
||||||
|
@ -70,7 +28,7 @@ pub struct Ring {
|
||||||
pub replication_factor: usize,
|
pub replication_factor: usize,
|
||||||
|
|
||||||
/// The network configuration used to generate this ring
|
/// The network configuration used to generate this ring
|
||||||
pub config: NetworkConfig,
|
pub layout: ClusterLayout,
|
||||||
|
|
||||||
// Internal order of nodes used to make a more compact representation of the ring
|
// Internal order of nodes used to make a more compact representation of the ring
|
||||||
nodes: Vec<Uuid>,
|
nodes: Vec<Uuid>,
|
||||||
|
@ -81,7 +39,7 @@ pub struct Ring {
|
||||||
|
|
||||||
// Type to store compactly the id of a node in the system
|
// Type to store compactly the id of a node in the system
|
||||||
// Change this to u16 the day we want to have more than 256 nodes in a cluster
|
// Change this to u16 the day we want to have more than 256 nodes in a cluster
|
||||||
type CompactNodeType = u8;
|
pub type CompactNodeType = u8;
|
||||||
|
|
||||||
// The maximum number of times an object might get replicated
|
// The maximum number of times an object might get replicated
|
||||||
// This must be at least 3 because Garage supports 3-way replication
|
// This must be at least 3 because Garage supports 3-way replication
|
||||||
|
@ -102,132 +60,26 @@ struct RingEntry {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Ring {
|
impl Ring {
|
||||||
// TODO this function MUST be refactored, it's 100 lines long, with a 50 lines loop, going up to 6
|
pub(crate) fn new(layout: ClusterLayout, replication_factor: usize) -> Self {
|
||||||
// levels of imbrication. It is basically impossible to test, maintain, or understand for an
|
if replication_factor != layout.replication_factor {
|
||||||
// outsider.
|
warn!("Could not build ring: replication factor does not match between local configuration and network role assignation.");
|
||||||
pub(crate) fn new(config: NetworkConfig, replication_factor: usize) -> Self {
|
return Self::empty(layout, replication_factor);
|
||||||
// Create a vector of partition indices (0 to 2**PARTITION_BITS-1)
|
|
||||||
let partitions_idx = (0usize..(1usize << PARTITION_BITS)).collect::<Vec<_>>();
|
|
||||||
|
|
||||||
let zones = config
|
|
||||||
.members
|
|
||||||
.iter()
|
|
||||||
.filter(|(_id, info)| info.capacity.is_some())
|
|
||||||
.map(|(_id, info)| info.zone.as_str())
|
|
||||||
.collect::<HashSet<&str>>();
|
|
||||||
let n_zones = zones.len();
|
|
||||||
|
|
||||||
// Prepare ring
|
|
||||||
let mut partitions: Vec<Vec<(&Uuid, &NetworkConfigEntry)>> = partitions_idx
|
|
||||||
.iter()
|
|
||||||
.map(|_i| Vec::new())
|
|
||||||
.collect::<Vec<_>>();
|
|
||||||
|
|
||||||
// Create MagLev priority queues for each node
|
|
||||||
let mut queues = config
|
|
||||||
.members
|
|
||||||
.iter()
|
|
||||||
.filter(|(_id, info)| info.capacity.is_some())
|
|
||||||
.map(|(node_id, node_info)| {
|
|
||||||
let mut parts = partitions_idx
|
|
||||||
.iter()
|
|
||||||
.map(|i| {
|
|
||||||
let part_data =
|
|
||||||
[&u16::to_be_bytes(*i as u16)[..], node_id.as_slice()].concat();
|
|
||||||
(*i, fasthash(&part_data[..]))
|
|
||||||
})
|
|
||||||
.collect::<Vec<_>>();
|
|
||||||
parts.sort_by_key(|(_i, h)| *h);
|
|
||||||
let parts_i = parts.iter().map(|(i, _h)| *i).collect::<Vec<_>>();
|
|
||||||
(node_id, node_info, parts_i, 0)
|
|
||||||
})
|
|
||||||
.collect::<Vec<_>>();
|
|
||||||
|
|
||||||
let max_capacity = config
|
|
||||||
.members
|
|
||||||
.iter()
|
|
||||||
.filter_map(|(_, node_info)| node_info.capacity)
|
|
||||||
.fold(0, std::cmp::max);
|
|
||||||
|
|
||||||
assert!(replication_factor <= MAX_REPLICATION);
|
|
||||||
|
|
||||||
// Fill up ring
|
|
||||||
for rep in 0..replication_factor {
|
|
||||||
queues.sort_by_key(|(ni, _np, _q, _p)| {
|
|
||||||
let queue_data = [&u16::to_be_bytes(rep as u16)[..], ni.as_slice()].concat();
|
|
||||||
fasthash(&queue_data[..])
|
|
||||||
});
|
|
||||||
|
|
||||||
for (_, _, _, pos) in queues.iter_mut() {
|
|
||||||
*pos = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut remaining = partitions_idx.len();
|
|
||||||
while remaining > 0 {
|
|
||||||
let remaining0 = remaining;
|
|
||||||
for i_round in 0..max_capacity {
|
|
||||||
for (node_id, node_info, q, pos) in queues.iter_mut() {
|
|
||||||
if i_round >= node_info.capacity.unwrap() {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
for (pos2, &qv) in q.iter().enumerate().skip(*pos) {
|
|
||||||
if partitions[qv].len() != rep {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
let p_zns = partitions[qv]
|
|
||||||
.iter()
|
|
||||||
.map(|(_id, info)| info.zone.as_str())
|
|
||||||
.collect::<HashSet<&str>>();
|
|
||||||
if (p_zns.len() < n_zones && !p_zns.contains(&node_info.zone.as_str()))
|
|
||||||
|| (p_zns.len() == n_zones
|
|
||||||
&& !partitions[qv].iter().any(|(id, _i)| id == node_id))
|
|
||||||
{
|
|
||||||
partitions[qv].push((node_id, node_info));
|
|
||||||
remaining -= 1;
|
|
||||||
*pos = pos2 + 1;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if remaining == remaining0 {
|
|
||||||
// No progress made, exit
|
|
||||||
warn!("Could not build ring, not enough nodes configured.");
|
|
||||||
return Self {
|
|
||||||
replication_factor,
|
|
||||||
config,
|
|
||||||
nodes: vec![],
|
|
||||||
ring: vec![],
|
|
||||||
};
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Make a canonical order for nodes
|
if layout.ring_assignation_data.len() != replication_factor * (1 << PARTITION_BITS) {
|
||||||
let nodes = config
|
warn!("Could not build ring: network role assignation data has invalid length");
|
||||||
.members
|
return Self::empty(layout, replication_factor);
|
||||||
.iter()
|
}
|
||||||
.filter(|(_id, info)| info.capacity.is_some())
|
|
||||||
.map(|(id, _)| *id)
|
|
||||||
.collect::<Vec<_>>();
|
|
||||||
let nodes_rev = nodes
|
|
||||||
.iter()
|
|
||||||
.enumerate()
|
|
||||||
.map(|(i, id)| (*id, i as CompactNodeType))
|
|
||||||
.collect::<HashMap<Uuid, CompactNodeType>>();
|
|
||||||
|
|
||||||
let ring = partitions
|
let nodes = layout.node_id_vec.clone();
|
||||||
.iter()
|
let ring = (0..(1 << PARTITION_BITS))
|
||||||
.enumerate()
|
.map(|i| {
|
||||||
.map(|(i, nodes)| {
|
|
||||||
let top = (i as u16) << (16 - PARTITION_BITS);
|
let top = (i as u16) << (16 - PARTITION_BITS);
|
||||||
let nodes = nodes
|
|
||||||
.iter()
|
|
||||||
.map(|(id, _info)| *nodes_rev.get(id).unwrap())
|
|
||||||
.collect::<Vec<CompactNodeType>>();
|
|
||||||
assert!(nodes.len() == replication_factor);
|
|
||||||
let mut nodes_buf = [0u8; MAX_REPLICATION];
|
let mut nodes_buf = [0u8; MAX_REPLICATION];
|
||||||
nodes_buf[..replication_factor].copy_from_slice(&nodes[..]);
|
nodes_buf[..replication_factor].copy_from_slice(
|
||||||
|
&layout.ring_assignation_data
|
||||||
|
[replication_factor * i..replication_factor * (i + 1)],
|
||||||
|
);
|
||||||
RingEntry {
|
RingEntry {
|
||||||
hash_prefix: top,
|
hash_prefix: top,
|
||||||
nodes_buf,
|
nodes_buf,
|
||||||
|
@ -237,12 +89,21 @@ impl Ring {
|
||||||
|
|
||||||
Self {
|
Self {
|
||||||
replication_factor,
|
replication_factor,
|
||||||
config,
|
layout,
|
||||||
nodes,
|
nodes,
|
||||||
ring,
|
ring,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn empty(layout: ClusterLayout, replication_factor: usize) -> Self {
|
||||||
|
Self {
|
||||||
|
replication_factor,
|
||||||
|
layout,
|
||||||
|
nodes: vec![],
|
||||||
|
ring: vec![],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Get the partition in which data would fall on
|
/// Get the partition in which data would fall on
|
||||||
pub fn partition_of(&self, position: &Hash) -> Partition {
|
pub fn partition_of(&self, position: &Hash) -> Partition {
|
||||||
let top = u16::from_be_bytes(position.as_slice()[0..2].try_into().unwrap());
|
let top = u16::from_be_bytes(position.as_slice()[0..2].try_into().unwrap());
|
||||||
|
|
|
@ -225,7 +225,7 @@ impl RpcHelper {
|
||||||
// Retrieve some status variables that we will use to sort requests
|
// Retrieve some status variables that we will use to sort requests
|
||||||
let peer_list = self.0.fullmesh.get_peer_list();
|
let peer_list = self.0.fullmesh.get_peer_list();
|
||||||
let ring: Arc<Ring> = self.0.ring.borrow().clone();
|
let ring: Arc<Ring> = self.0.ring.borrow().clone();
|
||||||
let our_zone = match ring.config.members.get(&self.0.our_node_id) {
|
let our_zone = match ring.layout.node_role(&self.0.our_node_id) {
|
||||||
Some(pc) => &pc.zone,
|
Some(pc) => &pc.zone,
|
||||||
None => "",
|
None => "",
|
||||||
};
|
};
|
||||||
|
@ -238,7 +238,7 @@ impl RpcHelper {
|
||||||
// and within a same zone we priorize nodes with the lowest latency.
|
// and within a same zone we priorize nodes with the lowest latency.
|
||||||
let mut requests = requests
|
let mut requests = requests
|
||||||
.map(|(to, fut)| {
|
.map(|(to, fut)| {
|
||||||
let peer_zone = match ring.config.members.get(&to) {
|
let peer_zone = match ring.layout.node_role(&to) {
|
||||||
Some(pc) => &pc.zone,
|
Some(pc) => &pc.zone,
|
||||||
None => "",
|
None => "",
|
||||||
};
|
};
|
||||||
|
|
|
@ -23,12 +23,13 @@ use netapp::{NetApp, NetworkKey, NodeID, NodeKey};
|
||||||
|
|
||||||
use garage_util::background::BackgroundRunner;
|
use garage_util::background::BackgroundRunner;
|
||||||
use garage_util::config::Config;
|
use garage_util::config::Config;
|
||||||
use garage_util::data::Uuid;
|
use garage_util::data::*;
|
||||||
use garage_util::error::*;
|
use garage_util::error::*;
|
||||||
use garage_util::persister::Persister;
|
use garage_util::persister::Persister;
|
||||||
use garage_util::time::*;
|
use garage_util::time::*;
|
||||||
|
|
||||||
use crate::consul::*;
|
use crate::consul::*;
|
||||||
|
use crate::layout::*;
|
||||||
use crate::ring::*;
|
use crate::ring::*;
|
||||||
use crate::rpc_helper::*;
|
use crate::rpc_helper::*;
|
||||||
|
|
||||||
|
@ -48,13 +49,13 @@ pub enum SystemRpc {
|
||||||
Ok,
|
Ok,
|
||||||
/// Request to connect to a specific node (in <pubkey>@<host>:<port> format)
|
/// Request to connect to a specific node (in <pubkey>@<host>:<port> format)
|
||||||
Connect(String),
|
Connect(String),
|
||||||
/// Ask other node its config. Answered with AdvertiseConfig
|
/// Ask other node its cluster layout. Answered with AdvertiseClusterLayout
|
||||||
PullConfig,
|
PullClusterLayout,
|
||||||
/// Advertise Garage status. Answered with another AdvertiseStatus.
|
/// Advertise Garage status. Answered with another AdvertiseStatus.
|
||||||
/// Exchanged with every node on a regular basis.
|
/// Exchanged with every node on a regular basis.
|
||||||
AdvertiseStatus(NodeStatus),
|
AdvertiseStatus(NodeStatus),
|
||||||
/// Advertisement of nodes config. Sent spontanously or in response to PullConfig
|
/// Advertisement of cluster layout. Sent spontanously or in response to PullClusterLayout
|
||||||
AdvertiseConfig(NetworkConfig),
|
AdvertiseClusterLayout(ClusterLayout),
|
||||||
/// Get known nodes states
|
/// Get known nodes states
|
||||||
GetKnownNodes,
|
GetKnownNodes,
|
||||||
/// Return known nodes
|
/// Return known nodes
|
||||||
|
@ -70,7 +71,7 @@ pub struct System {
|
||||||
/// The id of this node
|
/// The id of this node
|
||||||
pub id: Uuid,
|
pub id: Uuid,
|
||||||
|
|
||||||
persist_config: Persister<NetworkConfig>,
|
persist_cluster_layout: Persister<ClusterLayout>,
|
||||||
persist_peer_list: Persister<Vec<(Uuid, SocketAddr)>>,
|
persist_peer_list: Persister<Vec<(Uuid, SocketAddr)>>,
|
||||||
|
|
||||||
local_status: ArcSwap<NodeStatus>,
|
local_status: ArcSwap<NodeStatus>,
|
||||||
|
@ -103,8 +104,10 @@ pub struct NodeStatus {
|
||||||
pub hostname: String,
|
pub hostname: String,
|
||||||
/// Replication factor configured on the node
|
/// Replication factor configured on the node
|
||||||
pub replication_factor: usize,
|
pub replication_factor: usize,
|
||||||
/// Configuration version
|
/// Cluster layout version
|
||||||
pub config_version: u64,
|
pub cluster_layout_version: u64,
|
||||||
|
/// Hash of cluster layout staging data
|
||||||
|
pub cluster_layout_staging_hash: Hash,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
@ -187,17 +190,17 @@ impl System {
|
||||||
gen_node_key(&config.metadata_dir).expect("Unable to read or generate node ID");
|
gen_node_key(&config.metadata_dir).expect("Unable to read or generate node ID");
|
||||||
info!("Node public key: {}", hex::encode(&node_key.public_key()));
|
info!("Node public key: {}", hex::encode(&node_key.public_key()));
|
||||||
|
|
||||||
let persist_config = Persister::new(&config.metadata_dir, "network_config");
|
let persist_cluster_layout = Persister::new(&config.metadata_dir, "cluster_layout");
|
||||||
let persist_peer_list = Persister::new(&config.metadata_dir, "peer_list");
|
let persist_peer_list = Persister::new(&config.metadata_dir, "peer_list");
|
||||||
|
|
||||||
let net_config = match persist_config.load() {
|
let cluster_layout = match persist_cluster_layout.load() {
|
||||||
Ok(x) => x,
|
Ok(x) => x,
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
info!(
|
info!(
|
||||||
"No valid previous network configuration stored ({}), starting fresh.",
|
"No valid previous cluster layout stored ({}), starting fresh.",
|
||||||
e
|
e
|
||||||
);
|
);
|
||||||
NetworkConfig::new()
|
ClusterLayout::new(replication_factor)
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -206,10 +209,11 @@ impl System {
|
||||||
.into_string()
|
.into_string()
|
||||||
.unwrap_or_else(|_| "<invalid utf-8>".to_string()),
|
.unwrap_or_else(|_| "<invalid utf-8>".to_string()),
|
||||||
replication_factor,
|
replication_factor,
|
||||||
config_version: net_config.version,
|
cluster_layout_version: cluster_layout.version,
|
||||||
|
cluster_layout_staging_hash: cluster_layout.staging_hash,
|
||||||
};
|
};
|
||||||
|
|
||||||
let ring = Ring::new(net_config, replication_factor);
|
let ring = Ring::new(cluster_layout, replication_factor);
|
||||||
let (update_ring, ring) = watch::channel(Arc::new(ring));
|
let (update_ring, ring) = watch::channel(Arc::new(ring));
|
||||||
|
|
||||||
if let Some(addr) = config.rpc_public_addr {
|
if let Some(addr) = config.rpc_public_addr {
|
||||||
|
@ -229,7 +233,7 @@ impl System {
|
||||||
|
|
||||||
let sys = Arc::new(System {
|
let sys = Arc::new(System {
|
||||||
id: netapp.id.into(),
|
id: netapp.id.into(),
|
||||||
persist_config,
|
persist_cluster_layout,
|
||||||
persist_peer_list,
|
persist_peer_list,
|
||||||
local_status: ArcSwap::new(Arc::new(local_status)),
|
local_status: ArcSwap::new(Arc::new(local_status)),
|
||||||
node_status: RwLock::new(HashMap::new()),
|
node_status: RwLock::new(HashMap::new()),
|
||||||
|
@ -292,12 +296,12 @@ impl System {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Save network configuration to disc
|
/// Save network configuration to disc
|
||||||
async fn save_network_config(self: Arc<Self>) -> Result<(), Error> {
|
async fn save_cluster_layout(self: Arc<Self>) -> Result<(), Error> {
|
||||||
let ring: Arc<Ring> = self.ring.borrow().clone();
|
let ring: Arc<Ring> = self.ring.borrow().clone();
|
||||||
self.persist_config
|
self.persist_cluster_layout
|
||||||
.save_async(&ring.config)
|
.save_async(&ring.layout)
|
||||||
.await
|
.await
|
||||||
.expect("Cannot save current cluster configuration");
|
.expect("Cannot save current cluster layout");
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -305,7 +309,8 @@ impl System {
|
||||||
let mut new_si: NodeStatus = self.local_status.load().as_ref().clone();
|
let mut new_si: NodeStatus = self.local_status.load().as_ref().clone();
|
||||||
|
|
||||||
let ring = self.ring.borrow();
|
let ring = self.ring.borrow();
|
||||||
new_si.config_version = ring.config.version;
|
new_si.cluster_layout_version = ring.layout.version;
|
||||||
|
new_si.cluster_layout_staging_hash = ring.layout.staging_hash;
|
||||||
self.local_status.swap(Arc::new(new_si));
|
self.local_status.swap(Arc::new(new_si));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -337,9 +342,9 @@ impl System {
|
||||||
)));
|
)));
|
||||||
}
|
}
|
||||||
|
|
||||||
fn handle_pull_config(&self) -> SystemRpc {
|
fn handle_pull_cluster_layout(&self) -> SystemRpc {
|
||||||
let ring = self.ring.borrow().clone();
|
let ring = self.ring.borrow().clone();
|
||||||
SystemRpc::AdvertiseConfig(ring.config.clone())
|
SystemRpc::AdvertiseClusterLayout(ring.layout.clone())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn handle_get_known_nodes(&self) -> SystemRpc {
|
fn handle_get_known_nodes(&self) -> SystemRpc {
|
||||||
|
@ -360,7 +365,8 @@ impl System {
|
||||||
.unwrap_or(NodeStatus {
|
.unwrap_or(NodeStatus {
|
||||||
hostname: "?".to_string(),
|
hostname: "?".to_string(),
|
||||||
replication_factor: 0,
|
replication_factor: 0,
|
||||||
config_version: 0,
|
cluster_layout_version: 0,
|
||||||
|
cluster_layout_staging_hash: Hash::from([0u8; 32]),
|
||||||
}),
|
}),
|
||||||
})
|
})
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
|
@ -381,10 +387,12 @@ impl System {
|
||||||
std::process::exit(1);
|
std::process::exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if info.config_version > local_info.config_version {
|
if info.cluster_layout_version > local_info.cluster_layout_version
|
||||||
|
|| info.cluster_layout_staging_hash != local_info.cluster_layout_staging_hash
|
||||||
|
{
|
||||||
let self2 = self.clone();
|
let self2 = self.clone();
|
||||||
self.background.spawn_cancellable(async move {
|
self.background.spawn_cancellable(async move {
|
||||||
self2.pull_config(from).await;
|
self2.pull_cluster_layout(from).await;
|
||||||
Ok(())
|
Ok(())
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
@ -397,32 +405,39 @@ impl System {
|
||||||
Ok(SystemRpc::Ok)
|
Ok(SystemRpc::Ok)
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn handle_advertise_config(
|
async fn handle_advertise_cluster_layout(
|
||||||
self: Arc<Self>,
|
self: Arc<Self>,
|
||||||
adv: &NetworkConfig,
|
adv: &ClusterLayout,
|
||||||
) -> Result<SystemRpc, Error> {
|
) -> Result<SystemRpc, Error> {
|
||||||
let update_ring = self.update_ring.lock().await;
|
let update_ring = self.update_ring.lock().await;
|
||||||
let ring: Arc<Ring> = self.ring.borrow().clone();
|
let mut layout: ClusterLayout = self.ring.borrow().layout.clone();
|
||||||
|
|
||||||
if adv.version > ring.config.version {
|
let prev_layout_check = layout.check();
|
||||||
let ring = Ring::new(adv.clone(), self.replication_factor);
|
if layout.merge(adv) {
|
||||||
|
if prev_layout_check && !layout.check() {
|
||||||
|
error!("New cluster layout is invalid, discarding.");
|
||||||
|
return Err(Error::Message(
|
||||||
|
"New cluster layout is invalid, discarding.".into(),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
let ring = Ring::new(layout.clone(), self.replication_factor);
|
||||||
update_ring.send(Arc::new(ring))?;
|
update_ring.send(Arc::new(ring))?;
|
||||||
drop(update_ring);
|
drop(update_ring);
|
||||||
|
|
||||||
let self2 = self.clone();
|
let self2 = self.clone();
|
||||||
let adv = adv.clone();
|
|
||||||
self.background.spawn_cancellable(async move {
|
self.background.spawn_cancellable(async move {
|
||||||
self2
|
self2
|
||||||
.rpc
|
.rpc
|
||||||
.broadcast(
|
.broadcast(
|
||||||
&self2.system_endpoint,
|
&self2.system_endpoint,
|
||||||
SystemRpc::AdvertiseConfig(adv),
|
SystemRpc::AdvertiseClusterLayout(layout),
|
||||||
RequestStrategy::with_priority(PRIO_HIGH),
|
RequestStrategy::with_priority(PRIO_HIGH),
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
Ok(())
|
Ok(())
|
||||||
});
|
});
|
||||||
self.background.spawn(self.clone().save_network_config());
|
self.background.spawn(self.clone().save_cluster_layout());
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(SystemRpc::Ok)
|
Ok(SystemRpc::Ok)
|
||||||
|
@ -456,14 +471,15 @@ impl System {
|
||||||
};
|
};
|
||||||
|
|
||||||
while !*stop_signal.borrow() {
|
while !*stop_signal.borrow() {
|
||||||
let not_configured = self.ring.borrow().config.members.is_empty();
|
let not_configured = !self.ring.borrow().layout.check();
|
||||||
let no_peers = self.fullmesh.get_peer_list().len() < self.replication_factor;
|
let no_peers = self.fullmesh.get_peer_list().len() < self.replication_factor;
|
||||||
|
let expected_n_nodes = self.ring.borrow().layout.num_nodes();
|
||||||
let bad_peers = self
|
let bad_peers = self
|
||||||
.fullmesh
|
.fullmesh
|
||||||
.get_peer_list()
|
.get_peer_list()
|
||||||
.iter()
|
.iter()
|
||||||
.filter(|p| p.is_up())
|
.filter(|p| p.is_up())
|
||||||
.count() != self.ring.borrow().config.members.len();
|
.count() != expected_n_nodes;
|
||||||
|
|
||||||
if not_configured || no_peers || bad_peers {
|
if not_configured || no_peers || bad_peers {
|
||||||
info!("Doing a bootstrap/discovery step (not_configured: {}, no_peers: {}, bad_peers: {})", not_configured, no_peers, bad_peers);
|
info!("Doing a bootstrap/discovery step (not_configured: {}, no_peers: {}, bad_peers: {})", not_configured, no_peers, bad_peers);
|
||||||
|
@ -533,18 +549,18 @@ impl System {
|
||||||
self.persist_peer_list.save_async(&peer_list).await
|
self.persist_peer_list.save_async(&peer_list).await
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn pull_config(self: Arc<Self>, peer: Uuid) {
|
async fn pull_cluster_layout(self: Arc<Self>, peer: Uuid) {
|
||||||
let resp = self
|
let resp = self
|
||||||
.rpc
|
.rpc
|
||||||
.call(
|
.call(
|
||||||
&self.system_endpoint,
|
&self.system_endpoint,
|
||||||
peer,
|
peer,
|
||||||
SystemRpc::PullConfig,
|
SystemRpc::PullClusterLayout,
|
||||||
RequestStrategy::with_priority(PRIO_HIGH).with_timeout(PING_TIMEOUT),
|
RequestStrategy::with_priority(PRIO_HIGH).with_timeout(PING_TIMEOUT),
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
if let Ok(SystemRpc::AdvertiseConfig(config)) = resp {
|
if let Ok(SystemRpc::AdvertiseClusterLayout(layout)) = resp {
|
||||||
let _: Result<_, _> = self.handle_advertise_config(&config).await;
|
let _: Result<_, _> = self.handle_advertise_cluster_layout(&layout).await;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -554,9 +570,11 @@ impl EndpointHandler<SystemRpc> for System {
|
||||||
async fn handle(self: &Arc<Self>, msg: &SystemRpc, from: NodeID) -> Result<SystemRpc, Error> {
|
async fn handle(self: &Arc<Self>, msg: &SystemRpc, from: NodeID) -> Result<SystemRpc, Error> {
|
||||||
match msg {
|
match msg {
|
||||||
SystemRpc::Connect(node) => self.handle_connect(node).await,
|
SystemRpc::Connect(node) => self.handle_connect(node).await,
|
||||||
SystemRpc::PullConfig => Ok(self.handle_pull_config()),
|
SystemRpc::PullClusterLayout => Ok(self.handle_pull_cluster_layout()),
|
||||||
SystemRpc::AdvertiseStatus(adv) => self.handle_advertise_status(from.into(), adv).await,
|
SystemRpc::AdvertiseStatus(adv) => self.handle_advertise_status(from.into(), adv).await,
|
||||||
SystemRpc::AdvertiseConfig(adv) => self.clone().handle_advertise_config(adv).await,
|
SystemRpc::AdvertiseClusterLayout(adv) => {
|
||||||
|
self.clone().handle_advertise_cluster_layout(adv).await
|
||||||
|
}
|
||||||
SystemRpc::GetKnownNodes => Ok(self.handle_get_known_nodes()),
|
SystemRpc::GetKnownNodes => Ok(self.handle_get_known_nodes()),
|
||||||
_ => Err(Error::BadRpc("Unexpected RPC message".to_string())),
|
_ => Err(Error::BadRpc("Unexpected RPC message".to_string())),
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,11 +1,12 @@
|
||||||
[package]
|
[package]
|
||||||
name = "garage_table"
|
name = "garage_table"
|
||||||
version = "0.4.0"
|
version = "0.5.0"
|
||||||
authors = ["Alex Auvolat <alex@adnab.me>"]
|
authors = ["Alex Auvolat <alex@adnab.me>"]
|
||||||
edition = "2018"
|
edition = "2018"
|
||||||
license = "AGPL-3.0"
|
license = "AGPL-3.0"
|
||||||
description = "Table sharding and replication engine (DynamoDB-like) for the Garage object store"
|
description = "Table sharding and replication engine (DynamoDB-like) for the Garage object store"
|
||||||
repository = "https://git.deuxfleurs.fr/Deuxfleurs/garage"
|
repository = "https://git.deuxfleurs.fr/Deuxfleurs/garage"
|
||||||
|
readme = "../../README.md"
|
||||||
|
|
||||||
[lib]
|
[lib]
|
||||||
path = "lib.rs"
|
path = "lib.rs"
|
||||||
|
@ -13,8 +14,8 @@ path = "lib.rs"
|
||||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
garage_rpc = { version = "0.4.0", path = "../rpc" }
|
garage_rpc = { version = "0.5.0", path = "../rpc" }
|
||||||
garage_util = { version = "0.4.0", path = "../util" }
|
garage_util = { version = "0.5.0", path = "../util" }
|
||||||
|
|
||||||
async-trait = "0.1.7"
|
async-trait = "0.1.7"
|
||||||
bytes = "1.0"
|
bytes = "1.0"
|
||||||
|
|
|
@ -4,7 +4,6 @@
|
||||||
#[macro_use]
|
#[macro_use]
|
||||||
extern crate log;
|
extern crate log;
|
||||||
|
|
||||||
pub mod crdt;
|
|
||||||
pub mod schema;
|
pub mod schema;
|
||||||
pub mod util;
|
pub mod util;
|
||||||
|
|
||||||
|
@ -18,3 +17,7 @@ pub mod table;
|
||||||
pub use schema::*;
|
pub use schema::*;
|
||||||
pub use table::*;
|
pub use table::*;
|
||||||
pub use util::*;
|
pub use util::*;
|
||||||
|
|
||||||
|
pub mod crdt {
|
||||||
|
pub use garage_util::crdt::*;
|
||||||
|
}
|
||||||
|
|
|
@ -28,10 +28,10 @@ impl TableReplication for TableFullReplication {
|
||||||
|
|
||||||
fn write_nodes(&self, _hash: &Hash) -> Vec<Uuid> {
|
fn write_nodes(&self, _hash: &Hash) -> Vec<Uuid> {
|
||||||
let ring = self.system.ring.borrow();
|
let ring = self.system.ring.borrow();
|
||||||
ring.config.members.keys().cloned().collect::<Vec<_>>()
|
ring.layout.node_ids().to_vec()
|
||||||
}
|
}
|
||||||
fn write_quorum(&self) -> usize {
|
fn write_quorum(&self) -> usize {
|
||||||
let nmembers = self.system.ring.borrow().config.members.len();
|
let nmembers = self.system.ring.borrow().layout.node_ids().len();
|
||||||
if nmembers > self.max_faults {
|
if nmembers > self.max_faults {
|
||||||
nmembers - self.max_faults
|
nmembers - self.max_faults
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -1,11 +1,12 @@
|
||||||
[package]
|
[package]
|
||||||
name = "garage_util"
|
name = "garage_util"
|
||||||
version = "0.4.0"
|
version = "0.5.0"
|
||||||
authors = ["Alex Auvolat <alex@adnab.me>"]
|
authors = ["Alex Auvolat <alex@adnab.me>"]
|
||||||
edition = "2018"
|
edition = "2018"
|
||||||
license = "AGPL-3.0"
|
license = "AGPL-3.0"
|
||||||
description = "Utility crate for the Garage object store"
|
description = "Utility crate for the Garage object store"
|
||||||
repository = "https://git.deuxfleurs.fr/Deuxfleurs/garage"
|
repository = "https://git.deuxfleurs.fr/Deuxfleurs/garage"
|
||||||
|
readme = "../../README.md"
|
||||||
|
|
||||||
[lib]
|
[lib]
|
||||||
path = "lib.rs"
|
path = "lib.rs"
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
use garage_util::data::*;
|
use crate::data::*;
|
||||||
|
|
||||||
/// Definition of a CRDT - all CRDT Rust types implement this.
|
/// Definition of a CRDT - all CRDT Rust types implement this.
|
||||||
///
|
///
|
|
@ -1,6 +1,8 @@
|
||||||
|
use std::cmp::Ordering;
|
||||||
|
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
use garage_util::time::now_msec;
|
use crate::time::now_msec;
|
||||||
|
|
||||||
use crate::crdt::crdt::*;
|
use crate::crdt::crdt::*;
|
||||||
|
|
||||||
|
@ -104,11 +106,15 @@ where
|
||||||
T: Clone + Crdt,
|
T: Clone + Crdt,
|
||||||
{
|
{
|
||||||
fn merge(&mut self, other: &Self) {
|
fn merge(&mut self, other: &Self) {
|
||||||
if other.ts > self.ts {
|
match other.ts.cmp(&self.ts) {
|
||||||
self.ts = other.ts;
|
Ordering::Greater => {
|
||||||
self.v = other.v.clone();
|
self.ts = other.ts;
|
||||||
} else if other.ts == self.ts {
|
self.v = other.v.clone();
|
||||||
self.v.merge(&other.v);
|
}
|
||||||
|
Ordering::Equal => {
|
||||||
|
self.v.merge(&other.v);
|
||||||
|
}
|
||||||
|
Ordering::Less => (),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -1,6 +1,8 @@
|
||||||
|
use std::cmp::Ordering;
|
||||||
|
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
use garage_util::time::now_msec;
|
use crate::time::now_msec;
|
||||||
|
|
||||||
use crate::crdt::crdt::*;
|
use crate::crdt::crdt::*;
|
||||||
|
|
||||||
|
@ -135,11 +137,15 @@ where
|
||||||
match self.vals.binary_search_by(|(k2, _, _)| k2.cmp(k)) {
|
match self.vals.binary_search_by(|(k2, _, _)| k2.cmp(k)) {
|
||||||
Ok(i) => {
|
Ok(i) => {
|
||||||
let (_, ts1, _v1) = &self.vals[i];
|
let (_, ts1, _v1) = &self.vals[i];
|
||||||
if ts2 > ts1 {
|
match ts2.cmp(ts1) {
|
||||||
self.vals[i].1 = *ts2;
|
Ordering::Greater => {
|
||||||
self.vals[i].2 = v2.clone();
|
self.vals[i].1 = *ts2;
|
||||||
} else if ts1 == ts2 {
|
self.vals[i].2 = v2.clone();
|
||||||
self.vals[i].2.merge(v2);
|
}
|
||||||
|
Ordering::Equal => {
|
||||||
|
self.vals[i].2.merge(v2);
|
||||||
|
}
|
||||||
|
Ordering::Less => (),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Err(i) => {
|
Err(i) => {
|
|
@ -5,6 +5,7 @@ extern crate log;
|
||||||
|
|
||||||
pub mod background;
|
pub mod background;
|
||||||
pub mod config;
|
pub mod config;
|
||||||
|
pub mod crdt;
|
||||||
pub mod data;
|
pub mod data;
|
||||||
pub mod error;
|
pub mod error;
|
||||||
pub mod persister;
|
pub mod persister;
|
||||||
|
|
|
@ -1,11 +1,12 @@
|
||||||
[package]
|
[package]
|
||||||
name = "garage_web"
|
name = "garage_web"
|
||||||
version = "0.4.0"
|
version = "0.5.0"
|
||||||
authors = ["Alex Auvolat <alex@adnab.me>", "Quentin Dufour <quentin@dufour.io>"]
|
authors = ["Alex Auvolat <alex@adnab.me>", "Quentin Dufour <quentin@dufour.io>"]
|
||||||
edition = "2018"
|
edition = "2018"
|
||||||
license = "AGPL-3.0"
|
license = "AGPL-3.0"
|
||||||
description = "S3-like website endpoint crate for the Garage object store"
|
description = "S3-like website endpoint crate for the Garage object store"
|
||||||
repository = "https://git.deuxfleurs.fr/Deuxfleurs/garage"
|
repository = "https://git.deuxfleurs.fr/Deuxfleurs/garage"
|
||||||
|
readme = "../../README.md"
|
||||||
|
|
||||||
[lib]
|
[lib]
|
||||||
path = "lib.rs"
|
path = "lib.rs"
|
||||||
|
@ -13,10 +14,10 @@ path = "lib.rs"
|
||||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
garage_api = { version = "0.4.0", path = "../api" }
|
garage_api = { version = "0.5.0", path = "../api" }
|
||||||
garage_model = { version = "0.4.0", path = "../model" }
|
garage_model = { version = "0.5.0", path = "../model" }
|
||||||
garage_util = { version = "0.4.0", path = "../util" }
|
garage_util = { version = "0.5.0", path = "../util" }
|
||||||
garage_table = { version = "0.4.0", path = "../table" }
|
garage_table = { version = "0.5.0", path = "../table" }
|
||||||
|
|
||||||
err-derive = "0.3"
|
err-derive = "0.3"
|
||||||
log = "0.4"
|
log = "0.4"
|
||||||
|
|
Loading…
Reference in a new issue