improve how nodes roles are assigned in garage #152
15
Cargo.lock
generated
|
@ -379,7 +379,7 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "garage"
|
name = "garage"
|
||||||
version = "0.4.0"
|
version = "0.5.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"async-trait",
|
"async-trait",
|
||||||
"bytes 1.1.0",
|
"bytes 1.1.0",
|
||||||
|
@ -408,7 +408,7 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "garage_api"
|
name = "garage_api"
|
||||||
version = "0.4.0"
|
version = "0.5.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"base64",
|
"base64",
|
||||||
"bytes 1.1.0",
|
"bytes 1.1.0",
|
||||||
|
@ -440,7 +440,7 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "garage_model"
|
name = "garage_model"
|
||||||
version = "0.4.0"
|
version = "0.5.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arc-swap",
|
"arc-swap",
|
||||||
"async-trait",
|
"async-trait",
|
||||||
|
@ -462,7 +462,7 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "garage_rpc"
|
name = "garage_rpc"
|
||||||
version = "0.4.0"
|
version = "0.5.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arc-swap",
|
"arc-swap",
|
||||||
"async-trait",
|
"async-trait",
|
||||||
|
@ -479,6 +479,7 @@ dependencies = [
|
||||||
"rand",
|
"rand",
|
||||||
"rmp-serde 0.15.5",
|
"rmp-serde 0.15.5",
|
||||||
"serde",
|
"serde",
|
||||||
|
"serde_bytes",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
"tokio",
|
"tokio",
|
||||||
"tokio-stream",
|
"tokio-stream",
|
||||||
|
@ -486,7 +487,7 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "garage_table"
|
name = "garage_table"
|
||||||
version = "0.4.0"
|
version = "0.5.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"async-trait",
|
"async-trait",
|
||||||
"bytes 1.1.0",
|
"bytes 1.1.0",
|
||||||
|
@ -506,7 +507,7 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "garage_util"
|
name = "garage_util"
|
||||||
version = "0.4.0"
|
version = "0.5.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"blake2",
|
"blake2",
|
||||||
"chrono",
|
"chrono",
|
||||||
|
@ -530,7 +531,7 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "garage_web"
|
name = "garage_web"
|
||||||
version = "0.4.0"
|
version = "0.5.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"err-derive 0.3.0",
|
"err-derive 0.3.0",
|
||||||
"futures",
|
"futures",
|
||||||
|
|
83
Cargo.nix
|
@ -40,13 +40,13 @@ in
|
||||||
{
|
{
|
||||||
cargo2nixVersion = "0.9.0";
|
cargo2nixVersion = "0.9.0";
|
||||||
workspace = {
|
workspace = {
|
||||||
garage_util = rustPackages.unknown.garage_util."0.4.0";
|
garage_util = rustPackages.unknown.garage_util."0.5.0";
|
||||||
garage_rpc = rustPackages.unknown.garage_rpc."0.4.0";
|
garage_rpc = rustPackages.unknown.garage_rpc."0.5.0";
|
||||||
garage_table = rustPackages.unknown.garage_table."0.4.0";
|
garage_table = rustPackages.unknown.garage_table."0.5.0";
|
||||||
garage_model = rustPackages.unknown.garage_model."0.4.0";
|
garage_model = rustPackages.unknown.garage_model."0.5.0";
|
||||||
garage_api = rustPackages.unknown.garage_api."0.4.0";
|
garage_api = rustPackages.unknown.garage_api."0.5.0";
|
||||||
garage_web = rustPackages.unknown.garage_web."0.4.0";
|
garage_web = rustPackages.unknown.garage_web."0.5.0";
|
||||||
garage = rustPackages.unknown.garage."0.4.0";
|
garage = rustPackages.unknown.garage."0.5.0";
|
||||||
};
|
};
|
||||||
"registry+https://github.com/rust-lang/crates.io-index".aho-corasick."0.7.18" = overridableMkRustCrate (profileName: rec {
|
"registry+https://github.com/rust-lang/crates.io-index".aho-corasick."0.7.18" = overridableMkRustCrate (profileName: rec {
|
||||||
name = "aho-corasick";
|
name = "aho-corasick";
|
||||||
|
@ -246,7 +246,7 @@ in
|
||||||
registry = "registry+https://github.com/rust-lang/crates.io-index";
|
registry = "registry+https://github.com/rust-lang/crates.io-index";
|
||||||
src = fetchCratesIo { inherit name version; sha256 = "95059428f66df56b63431fdb4e1947ed2190586af5c5a8a8b71122bdf5a7f469"; };
|
src = fetchCratesIo { inherit name version; sha256 = "95059428f66df56b63431fdb4e1947ed2190586af5c5a8a8b71122bdf5a7f469"; };
|
||||||
dependencies = {
|
dependencies = {
|
||||||
${ if hostPlatform.config == "aarch64-apple-darwin" || hostPlatform.parsed.cpu.name == "aarch64" && hostPlatform.parsed.kernel.name == "linux" then "libc" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".libc."0.2.103" { inherit profileName; };
|
${ if hostPlatform.parsed.cpu.name == "aarch64" && hostPlatform.parsed.kernel.name == "linux" || hostPlatform.config == "aarch64-apple-darwin" then "libc" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".libc."0.2.103" { inherit profileName; };
|
||||||
};
|
};
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -606,9 +606,9 @@ in
|
||||||
};
|
};
|
||||||
});
|
});
|
||||||
|
|
||||||
"unknown".garage."0.4.0" = overridableMkRustCrate (profileName: rec {
|
"unknown".garage."0.5.0" = overridableMkRustCrate (profileName: rec {
|
||||||
name = "garage";
|
name = "garage";
|
||||||
version = "0.4.0";
|
version = "0.5.0";
|
||||||
registry = "unknown";
|
registry = "unknown";
|
||||||
src = fetchCrateLocal (workspaceSrc + "/src/garage");
|
src = fetchCrateLocal (workspaceSrc + "/src/garage");
|
||||||
dependencies = {
|
dependencies = {
|
||||||
|
@ -616,12 +616,12 @@ in
|
||||||
bytes = rustPackages."registry+https://github.com/rust-lang/crates.io-index".bytes."1.1.0" { inherit profileName; };
|
bytes = rustPackages."registry+https://github.com/rust-lang/crates.io-index".bytes."1.1.0" { inherit profileName; };
|
||||||
futures = rustPackages."registry+https://github.com/rust-lang/crates.io-index".futures."0.3.17" { inherit profileName; };
|
futures = rustPackages."registry+https://github.com/rust-lang/crates.io-index".futures."0.3.17" { inherit profileName; };
|
||||||
futures_util = rustPackages."registry+https://github.com/rust-lang/crates.io-index".futures-util."0.3.17" { inherit profileName; };
|
futures_util = rustPackages."registry+https://github.com/rust-lang/crates.io-index".futures-util."0.3.17" { inherit profileName; };
|
||||||
garage_api = rustPackages."unknown".garage_api."0.4.0" { inherit profileName; };
|
garage_api = rustPackages."unknown".garage_api."0.5.0" { inherit profileName; };
|
||||||
garage_model = rustPackages."unknown".garage_model."0.4.0" { inherit profileName; };
|
garage_model = rustPackages."unknown".garage_model."0.5.0" { inherit profileName; };
|
||||||
garage_rpc = rustPackages."unknown".garage_rpc."0.4.0" { inherit profileName; };
|
garage_rpc = rustPackages."unknown".garage_rpc."0.5.0" { inherit profileName; };
|
||||||
garage_table = rustPackages."unknown".garage_table."0.4.0" { inherit profileName; };
|
garage_table = rustPackages."unknown".garage_table."0.5.0" { inherit profileName; };
|
||||||
garage_util = rustPackages."unknown".garage_util."0.4.0" { inherit profileName; };
|
garage_util = rustPackages."unknown".garage_util."0.5.0" { inherit profileName; };
|
||||||
garage_web = rustPackages."unknown".garage_web."0.4.0" { inherit profileName; };
|
garage_web = rustPackages."unknown".garage_web."0.5.0" { inherit profileName; };
|
||||||
git_version = rustPackages."registry+https://github.com/rust-lang/crates.io-index".git-version."0.3.5" { inherit profileName; };
|
git_version = rustPackages."registry+https://github.com/rust-lang/crates.io-index".git-version."0.3.5" { inherit profileName; };
|
||||||
hex = rustPackages."registry+https://github.com/rust-lang/crates.io-index".hex."0.4.3" { inherit profileName; };
|
hex = rustPackages."registry+https://github.com/rust-lang/crates.io-index".hex."0.4.3" { inherit profileName; };
|
||||||
sodiumoxide = rustPackages."registry+https://github.com/rust-lang/crates.io-index".kuska-sodiumoxide."0.2.5-0" { inherit profileName; };
|
sodiumoxide = rustPackages."registry+https://github.com/rust-lang/crates.io-index".kuska-sodiumoxide."0.2.5-0" { inherit profileName; };
|
||||||
|
@ -638,9 +638,9 @@ in
|
||||||
};
|
};
|
||||||
});
|
});
|
||||||
|
|
||||||
"unknown".garage_api."0.4.0" = overridableMkRustCrate (profileName: rec {
|
"unknown".garage_api."0.5.0" = overridableMkRustCrate (profileName: rec {
|
||||||
name = "garage_api";
|
name = "garage_api";
|
||||||
version = "0.4.0";
|
version = "0.5.0";
|
||||||
registry = "unknown";
|
registry = "unknown";
|
||||||
src = fetchCrateLocal (workspaceSrc + "/src/api");
|
src = fetchCrateLocal (workspaceSrc + "/src/api");
|
||||||
dependencies = {
|
dependencies = {
|
||||||
|
@ -651,9 +651,9 @@ in
|
||||||
err_derive = buildRustPackages."registry+https://github.com/rust-lang/crates.io-index".err-derive."0.3.0" { profileName = "__noProfile"; };
|
err_derive = buildRustPackages."registry+https://github.com/rust-lang/crates.io-index".err-derive."0.3.0" { profileName = "__noProfile"; };
|
||||||
futures = rustPackages."registry+https://github.com/rust-lang/crates.io-index".futures."0.3.17" { inherit profileName; };
|
futures = rustPackages."registry+https://github.com/rust-lang/crates.io-index".futures."0.3.17" { inherit profileName; };
|
||||||
futures_util = rustPackages."registry+https://github.com/rust-lang/crates.io-index".futures-util."0.3.17" { inherit profileName; };
|
futures_util = rustPackages."registry+https://github.com/rust-lang/crates.io-index".futures-util."0.3.17" { inherit profileName; };
|
||||||
garage_model = rustPackages."unknown".garage_model."0.4.0" { inherit profileName; };
|
garage_model = rustPackages."unknown".garage_model."0.5.0" { inherit profileName; };
|
||||||
garage_table = rustPackages."unknown".garage_table."0.4.0" { inherit profileName; };
|
garage_table = rustPackages."unknown".garage_table."0.5.0" { inherit profileName; };
|
||||||
garage_util = rustPackages."unknown".garage_util."0.4.0" { inherit profileName; };
|
garage_util = rustPackages."unknown".garage_util."0.5.0" { inherit profileName; };
|
||||||
hex = rustPackages."registry+https://github.com/rust-lang/crates.io-index".hex."0.4.3" { inherit profileName; };
|
hex = rustPackages."registry+https://github.com/rust-lang/crates.io-index".hex."0.4.3" { inherit profileName; };
|
||||||
hmac = rustPackages."registry+https://github.com/rust-lang/crates.io-index".hmac."0.10.1" { inherit profileName; };
|
hmac = rustPackages."registry+https://github.com/rust-lang/crates.io-index".hmac."0.10.1" { inherit profileName; };
|
||||||
http = rustPackages."registry+https://github.com/rust-lang/crates.io-index".http."0.2.5" { inherit profileName; };
|
http = rustPackages."registry+https://github.com/rust-lang/crates.io-index".http."0.2.5" { inherit profileName; };
|
||||||
|
@ -673,9 +673,9 @@ in
|
||||||
};
|
};
|
||||||
});
|
});
|
||||||
|
|
||||||
"unknown".garage_model."0.4.0" = overridableMkRustCrate (profileName: rec {
|
"unknown".garage_model."0.5.0" = overridableMkRustCrate (profileName: rec {
|
||||||
name = "garage_model";
|
name = "garage_model";
|
||||||
version = "0.4.0";
|
version = "0.5.0";
|
||||||
registry = "unknown";
|
registry = "unknown";
|
||||||
src = fetchCrateLocal (workspaceSrc + "/src/model");
|
src = fetchCrateLocal (workspaceSrc + "/src/model");
|
||||||
dependencies = {
|
dependencies = {
|
||||||
|
@ -683,9 +683,9 @@ in
|
||||||
async_trait = buildRustPackages."registry+https://github.com/rust-lang/crates.io-index".async-trait."0.1.51" { profileName = "__noProfile"; };
|
async_trait = buildRustPackages."registry+https://github.com/rust-lang/crates.io-index".async-trait."0.1.51" { profileName = "__noProfile"; };
|
||||||
futures = rustPackages."registry+https://github.com/rust-lang/crates.io-index".futures."0.3.17" { inherit profileName; };
|
futures = rustPackages."registry+https://github.com/rust-lang/crates.io-index".futures."0.3.17" { inherit profileName; };
|
||||||
futures_util = rustPackages."registry+https://github.com/rust-lang/crates.io-index".futures-util."0.3.17" { inherit profileName; };
|
futures_util = rustPackages."registry+https://github.com/rust-lang/crates.io-index".futures-util."0.3.17" { inherit profileName; };
|
||||||
garage_rpc = rustPackages."unknown".garage_rpc."0.4.0" { inherit profileName; };
|
garage_rpc = rustPackages."unknown".garage_rpc."0.5.0" { inherit profileName; };
|
||||||
garage_table = rustPackages."unknown".garage_table."0.4.0" { inherit profileName; };
|
garage_table = rustPackages."unknown".garage_table."0.5.0" { inherit profileName; };
|
||||||
garage_util = rustPackages."unknown".garage_util."0.4.0" { inherit profileName; };
|
garage_util = rustPackages."unknown".garage_util."0.5.0" { inherit profileName; };
|
||||||
hex = rustPackages."registry+https://github.com/rust-lang/crates.io-index".hex."0.4.3" { inherit profileName; };
|
hex = rustPackages."registry+https://github.com/rust-lang/crates.io-index".hex."0.4.3" { inherit profileName; };
|
||||||
log = rustPackages."registry+https://github.com/rust-lang/crates.io-index".log."0.4.14" { inherit profileName; };
|
log = rustPackages."registry+https://github.com/rust-lang/crates.io-index".log."0.4.14" { inherit profileName; };
|
||||||
netapp = rustPackages."registry+https://github.com/rust-lang/crates.io-index".netapp."0.3.0" { inherit profileName; };
|
netapp = rustPackages."registry+https://github.com/rust-lang/crates.io-index".netapp."0.3.0" { inherit profileName; };
|
||||||
|
@ -698,9 +698,9 @@ in
|
||||||
};
|
};
|
||||||
});
|
});
|
||||||
|
|
||||||
"unknown".garage_rpc."0.4.0" = overridableMkRustCrate (profileName: rec {
|
"unknown".garage_rpc."0.5.0" = overridableMkRustCrate (profileName: rec {
|
||||||
name = "garage_rpc";
|
name = "garage_rpc";
|
||||||
version = "0.4.0";
|
version = "0.5.0";
|
||||||
registry = "unknown";
|
registry = "unknown";
|
||||||
src = fetchCrateLocal (workspaceSrc + "/src/rpc");
|
src = fetchCrateLocal (workspaceSrc + "/src/rpc");
|
||||||
dependencies = {
|
dependencies = {
|
||||||
|
@ -709,7 +709,7 @@ in
|
||||||
bytes = rustPackages."registry+https://github.com/rust-lang/crates.io-index".bytes."1.1.0" { inherit profileName; };
|
bytes = rustPackages."registry+https://github.com/rust-lang/crates.io-index".bytes."1.1.0" { inherit profileName; };
|
||||||
futures = rustPackages."registry+https://github.com/rust-lang/crates.io-index".futures."0.3.17" { inherit profileName; };
|
futures = rustPackages."registry+https://github.com/rust-lang/crates.io-index".futures."0.3.17" { inherit profileName; };
|
||||||
futures_util = rustPackages."registry+https://github.com/rust-lang/crates.io-index".futures-util."0.3.17" { inherit profileName; };
|
futures_util = rustPackages."registry+https://github.com/rust-lang/crates.io-index".futures-util."0.3.17" { inherit profileName; };
|
||||||
garage_util = rustPackages."unknown".garage_util."0.4.0" { inherit profileName; };
|
garage_util = rustPackages."unknown".garage_util."0.5.0" { inherit profileName; };
|
||||||
gethostname = rustPackages."registry+https://github.com/rust-lang/crates.io-index".gethostname."0.2.1" { inherit profileName; };
|
gethostname = rustPackages."registry+https://github.com/rust-lang/crates.io-index".gethostname."0.2.1" { inherit profileName; };
|
||||||
hex = rustPackages."registry+https://github.com/rust-lang/crates.io-index".hex."0.4.3" { inherit profileName; };
|
hex = rustPackages."registry+https://github.com/rust-lang/crates.io-index".hex."0.4.3" { inherit profileName; };
|
||||||
hyper = rustPackages."registry+https://github.com/rust-lang/crates.io-index".hyper."0.14.13" { inherit profileName; };
|
hyper = rustPackages."registry+https://github.com/rust-lang/crates.io-index".hyper."0.14.13" { inherit profileName; };
|
||||||
|
@ -719,15 +719,16 @@ in
|
||||||
rand = rustPackages."registry+https://github.com/rust-lang/crates.io-index".rand."0.8.4" { inherit profileName; };
|
rand = rustPackages."registry+https://github.com/rust-lang/crates.io-index".rand."0.8.4" { inherit profileName; };
|
||||||
rmp_serde = rustPackages."registry+https://github.com/rust-lang/crates.io-index".rmp-serde."0.15.5" { inherit profileName; };
|
rmp_serde = rustPackages."registry+https://github.com/rust-lang/crates.io-index".rmp-serde."0.15.5" { inherit profileName; };
|
||||||
serde = rustPackages."registry+https://github.com/rust-lang/crates.io-index".serde."1.0.130" { inherit profileName; };
|
serde = rustPackages."registry+https://github.com/rust-lang/crates.io-index".serde."1.0.130" { inherit profileName; };
|
||||||
|
serde_bytes = rustPackages."registry+https://github.com/rust-lang/crates.io-index".serde_bytes."0.11.5" { inherit profileName; };
|
||||||
serde_json = rustPackages."registry+https://github.com/rust-lang/crates.io-index".serde_json."1.0.68" { inherit profileName; };
|
serde_json = rustPackages."registry+https://github.com/rust-lang/crates.io-index".serde_json."1.0.68" { inherit profileName; };
|
||||||
tokio = rustPackages."registry+https://github.com/rust-lang/crates.io-index".tokio."1.12.0" { inherit profileName; };
|
tokio = rustPackages."registry+https://github.com/rust-lang/crates.io-index".tokio."1.12.0" { inherit profileName; };
|
||||||
tokio_stream = rustPackages."registry+https://github.com/rust-lang/crates.io-index".tokio-stream."0.1.7" { inherit profileName; };
|
tokio_stream = rustPackages."registry+https://github.com/rust-lang/crates.io-index".tokio-stream."0.1.7" { inherit profileName; };
|
||||||
};
|
};
|
||||||
});
|
});
|
||||||
|
|
||||||
"unknown".garage_table."0.4.0" = overridableMkRustCrate (profileName: rec {
|
"unknown".garage_table."0.5.0" = overridableMkRustCrate (profileName: rec {
|
||||||
name = "garage_table";
|
name = "garage_table";
|
||||||
version = "0.4.0";
|
version = "0.5.0";
|
||||||
registry = "unknown";
|
registry = "unknown";
|
||||||
src = fetchCrateLocal (workspaceSrc + "/src/table");
|
src = fetchCrateLocal (workspaceSrc + "/src/table");
|
||||||
dependencies = {
|
dependencies = {
|
||||||
|
@ -735,8 +736,8 @@ in
|
||||||
bytes = rustPackages."registry+https://github.com/rust-lang/crates.io-index".bytes."1.1.0" { inherit profileName; };
|
bytes = rustPackages."registry+https://github.com/rust-lang/crates.io-index".bytes."1.1.0" { inherit profileName; };
|
||||||
futures = rustPackages."registry+https://github.com/rust-lang/crates.io-index".futures."0.3.17" { inherit profileName; };
|
futures = rustPackages."registry+https://github.com/rust-lang/crates.io-index".futures."0.3.17" { inherit profileName; };
|
||||||
futures_util = rustPackages."registry+https://github.com/rust-lang/crates.io-index".futures-util."0.3.17" { inherit profileName; };
|
futures_util = rustPackages."registry+https://github.com/rust-lang/crates.io-index".futures-util."0.3.17" { inherit profileName; };
|
||||||
garage_rpc = rustPackages."unknown".garage_rpc."0.4.0" { inherit profileName; };
|
garage_rpc = rustPackages."unknown".garage_rpc."0.5.0" { inherit profileName; };
|
||||||
garage_util = rustPackages."unknown".garage_util."0.4.0" { inherit profileName; };
|
garage_util = rustPackages."unknown".garage_util."0.5.0" { inherit profileName; };
|
||||||
hexdump = rustPackages."registry+https://github.com/rust-lang/crates.io-index".hexdump."0.1.1" { inherit profileName; };
|
hexdump = rustPackages."registry+https://github.com/rust-lang/crates.io-index".hexdump."0.1.1" { inherit profileName; };
|
||||||
log = rustPackages."registry+https://github.com/rust-lang/crates.io-index".log."0.4.14" { inherit profileName; };
|
log = rustPackages."registry+https://github.com/rust-lang/crates.io-index".log."0.4.14" { inherit profileName; };
|
||||||
rand = rustPackages."registry+https://github.com/rust-lang/crates.io-index".rand."0.8.4" { inherit profileName; };
|
rand = rustPackages."registry+https://github.com/rust-lang/crates.io-index".rand."0.8.4" { inherit profileName; };
|
||||||
|
@ -748,9 +749,9 @@ in
|
||||||
};
|
};
|
||||||
});
|
});
|
||||||
|
|
||||||
"unknown".garage_util."0.4.0" = overridableMkRustCrate (profileName: rec {
|
"unknown".garage_util."0.5.0" = overridableMkRustCrate (profileName: rec {
|
||||||
name = "garage_util";
|
name = "garage_util";
|
||||||
version = "0.4.0";
|
version = "0.5.0";
|
||||||
registry = "unknown";
|
registry = "unknown";
|
||||||
src = fetchCrateLocal (workspaceSrc + "/src/util");
|
src = fetchCrateLocal (workspaceSrc + "/src/util");
|
||||||
dependencies = {
|
dependencies = {
|
||||||
|
@ -775,18 +776,18 @@ in
|
||||||
};
|
};
|
||||||
});
|
});
|
||||||
|
|
||||||
"unknown".garage_web."0.4.0" = overridableMkRustCrate (profileName: rec {
|
"unknown".garage_web."0.5.0" = overridableMkRustCrate (profileName: rec {
|
||||||
name = "garage_web";
|
name = "garage_web";
|
||||||
version = "0.4.0";
|
version = "0.5.0";
|
||||||
registry = "unknown";
|
registry = "unknown";
|
||||||
src = fetchCrateLocal (workspaceSrc + "/src/web");
|
src = fetchCrateLocal (workspaceSrc + "/src/web");
|
||||||
dependencies = {
|
dependencies = {
|
||||||
err_derive = buildRustPackages."registry+https://github.com/rust-lang/crates.io-index".err-derive."0.3.0" { profileName = "__noProfile"; };
|
err_derive = buildRustPackages."registry+https://github.com/rust-lang/crates.io-index".err-derive."0.3.0" { profileName = "__noProfile"; };
|
||||||
futures = rustPackages."registry+https://github.com/rust-lang/crates.io-index".futures."0.3.17" { inherit profileName; };
|
futures = rustPackages."registry+https://github.com/rust-lang/crates.io-index".futures."0.3.17" { inherit profileName; };
|
||||||
garage_api = rustPackages."unknown".garage_api."0.4.0" { inherit profileName; };
|
garage_api = rustPackages."unknown".garage_api."0.5.0" { inherit profileName; };
|
||||||
garage_model = rustPackages."unknown".garage_model."0.4.0" { inherit profileName; };
|
garage_model = rustPackages."unknown".garage_model."0.5.0" { inherit profileName; };
|
||||||
garage_table = rustPackages."unknown".garage_table."0.4.0" { inherit profileName; };
|
garage_table = rustPackages."unknown".garage_table."0.5.0" { inherit profileName; };
|
||||||
garage_util = rustPackages."unknown".garage_util."0.4.0" { inherit profileName; };
|
garage_util = rustPackages."unknown".garage_util."0.5.0" { inherit profileName; };
|
||||||
http = rustPackages."registry+https://github.com/rust-lang/crates.io-index".http."0.2.5" { inherit profileName; };
|
http = rustPackages."registry+https://github.com/rust-lang/crates.io-index".http."0.2.5" { inherit profileName; };
|
||||||
hyper = rustPackages."registry+https://github.com/rust-lang/crates.io-index".hyper."0.14.13" { inherit profileName; };
|
hyper = rustPackages."registry+https://github.com/rust-lang/crates.io-index".hyper."0.14.13" { inherit profileName; };
|
||||||
log = rustPackages."registry+https://github.com/rust-lang/crates.io-index".log."0.4.14" { inherit profileName; };
|
log = rustPackages."registry+https://github.com/rust-lang/crates.io-index".log."0.4.14" { inherit profileName; };
|
||||||
|
|
12
README.md
|
@ -3,10 +3,18 @@ Garage [![Build Status](https://drone.deuxfleurs.fr/api/badges/Deuxfleurs/garage
|
||||||
|
|
||||||
<p align="center" style="text-align:center;">
|
<p align="center" style="text-align:center;">
|
||||||
<a href="https://garagehq.deuxfleurs.fr">
|
<a href="https://garagehq.deuxfleurs.fr">
|
||||||
<img alt="Garage logo" src="doc/logo/garage.png" height="200" />
|
<img alt="Garage logo" src="https://garagehq.deuxfleurs.fr/img/logo.svg" height="200" />
|
||||||
</a>
|
</a>
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
|
<p align="center" style="text-align:center;">
|
||||||
|
[ <strong><a href="https://garagehq.deuxfleurs.fr/">Website and documentation</a></strong>
|
||||||
|
| <a href="https://garagehq.deuxfleurs.fr/_releases.html">Binary releases</a>
|
||||||
|
| <a href="https://git.deuxfleurs.fr/Deuxfleurs/garage">Git repository</a>
|
||||||
|
| <a href="https://matrix.to/#/%23garage:deuxfleurs.fr">Matrix channel</a>
|
||||||
|
]
|
||||||
|
</p>
|
||||||
|
|
||||||
Garage is a lightweight S3-compatible distributed object store, with the following goals:
|
Garage is a lightweight S3-compatible distributed object store, with the following goals:
|
||||||
|
|
||||||
- As self-contained as possible
|
- As self-contained as possible
|
||||||
|
@ -22,5 +30,3 @@ Non-goals include:
|
||||||
- Erasure coding (our replication model is simply to copy the data as is on several nodes, in different datacenters if possible)
|
- Erasure coding (our replication model is simply to copy the data as is on several nodes, in different datacenters if possible)
|
||||||
|
|
||||||
Our main use case is to provide a distributed storage layer for small-scale self hosted services such as [Deuxfleurs](https://deuxfleurs.fr).
|
Our main use case is to provide a distributed storage layer for small-scale self hosted services such as [Deuxfleurs](https://deuxfleurs.fr).
|
||||||
|
|
||||||
**[Go to the documentation](https://garagehq.deuxfleurs.fr)**
|
|
||||||
|
|
|
@ -5,12 +5,12 @@
|
||||||
- [Quick start](./quick_start/index.md)
|
- [Quick start](./quick_start/index.md)
|
||||||
|
|
||||||
- [Cookbook](./cookbook/index.md)
|
- [Cookbook](./cookbook/index.md)
|
||||||
|
- [Multi-node deployment](./cookbook/real_world.md)
|
||||||
|
|||||||
- [Building from source](./cookbook/from_source.md)
|
- [Building from source](./cookbook/from_source.md)
|
||||||
- [Integration with systemd](./cookbook/systemd.md)
|
- [Integration with systemd](./cookbook/systemd.md)
|
||||||
- [Gateways](./cookbook/gateways.md)
|
- [Gateways](./cookbook/gateways.md)
|
||||||
- [Exposing buckets as websites](./cookbook/exposing_websites.md)
|
- [Exposing buckets as websites](./cookbook/exposing_websites.md)
|
||||||
- [Configuring a reverse proxy](./cookbook/reverse_proxy.md)
|
- [Configuring a reverse proxy](./cookbook/reverse_proxy.md)
|
||||||
- [Production Deployment](./cookbook/real_world.md)
|
|
||||||
- [Recovering from failures](./cookbook/recovering.md)
|
- [Recovering from failures](./cookbook/recovering.md)
|
||||||
|
|
||||||
- [Integrations](./connect/index.md)
|
- [Integrations](./connect/index.md)
|
||||||
|
@ -25,6 +25,7 @@
|
||||||
|
|
||||||
- [Reference Manual](./reference_manual/index.md)
|
- [Reference Manual](./reference_manual/index.md)
|
||||||
- [Garage configuration file](./reference_manual/configuration.md)
|
- [Garage configuration file](./reference_manual/configuration.md)
|
||||||
|
- [Cluster layout management](./reference_manual/layout.md)
|
||||||
- [Garage CLI](./reference_manual/cli.md)
|
- [Garage CLI](./reference_manual/cli.md)
|
||||||
- [S3 compatibility status](./reference_manual/s3_compatibility.md)
|
- [S3 compatibility status](./reference_manual/s3_compatibility.md)
|
||||||
|
|
||||||
|
|
|
@ -21,7 +21,9 @@ Currently it will not work with minio client. Follow issue [#64](https://git.deu
|
||||||
The instructions are similar to a regular node, the only option that is different is while configuring the node, you must set the `--gateway` parameter:
|
The instructions are similar to a regular node, the only option that is different is while configuring the node, you must set the `--gateway` parameter:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
garage node configure --gateway --tag gw1 xxxx
|
garage layout assign --gateway --tag gw1 <node_id>
|
||||||
|
garage layout show # review the changes you are making
|
||||||
|
garage layout apply # once satisfied, apply the changes
|
||||||
```
|
```
|
||||||
|
|
||||||
Then use `http://localhost:3900` when a S3 endpoint is required:
|
Then use `http://localhost:3900` when a S3 endpoint is required:
|
||||||
|
@ -29,3 +31,9 @@ Then use `http://localhost:3900` when a S3 endpoint is required:
|
||||||
```bash
|
```bash
|
||||||
aws --endpoint-url http://127.0.0.1:3900 s3 ls
|
aws --endpoint-url http://127.0.0.1:3900 s3 ls
|
||||||
```
|
```
|
||||||
|
|
||||||
|
If a newly added gateway node seems to not be working, do a full table resync to ensure that bucket and key list are correctly propagated:
|
||||||
quentin
commented
What are the conditions that require to manually trigger a resync? What are the conditions that require to manually trigger a resync?
Is it only in case of a bug?
lx
commented
The tables should be resynced regularly so if you just let nodes do their thing, it will eventually work. What can happen is that a node that is added does not receive the content of these tables before the first resync, so for some time the gateway node might be inoperable. (this is probably worthy of opening an issue) The tables should be resynced regularly so if you just let nodes do their thing, it will eventually work. What can happen is that a node that is added does not receive the content of these tables before the first resync, so for some time the gateway node might be inoperable. (this is probably worthy of opening an issue)
|
|||||||
|
|
||||||
|
```bash
|
||||||
|
garage repair -a --yes tables
|
||||||
|
```
|
||||||
|
|
|
@ -41,15 +41,15 @@ For our example, we will suppose the following infrastructure with IPv6 connecti
|
||||||
|
|
||||||
## Get a Docker image
|
## Get a Docker image
|
||||||
|
|
||||||
Our docker image is currently named `lxpz/garage_amd64` and is stored on the [Docker Hub](https://hub.docker.com/r/lxpz/garage_amd64/tags?page=1&ordering=last_updated).
|
Our docker image is currently named `dxflrs/amd64_garage` and is stored on the [Docker Hub](https://hub.docker.com/r/dxflrs/amd64_garage/tags?page=1&ordering=last_updated).
|
||||||
We encourage you to use a fixed tag (eg. `v0.4.0`) and not the `latest` tag.
|
We encourage you to use a fixed tag (eg. `v0.4.0`) and not the `latest` tag.
|
||||||
For this example, we will use the latest published version at the time of the writing which is `v0.4.0` but it's up to you
|
For this example, we will use the latest published version at the time of the writing which is `v0.4.0` but it's up to you
|
||||||
to check [the most recent versions on the Docker Hub](https://hub.docker.com/r/lxpz/garage_amd64/tags?page=1&ordering=last_updated).
|
to check [the most recent versions on the Docker Hub](https://hub.docker.com/r/dxflrs/amd64_garage/tags?page=1&ordering=last_updated).
|
||||||
|
|
||||||
For example:
|
For example:
|
||||||
|
|
||||||
```
|
```
|
||||||
sudo docker pull lxpz/garage_amd64:v0.4.0
|
sudo docker pull dxflrs/amd64_garage:v0.4.0
|
||||||
```
|
```
|
||||||
|
|
||||||
## Deploying and configuring Garage
|
## Deploying and configuring Garage
|
||||||
|
@ -144,7 +144,7 @@ At this point, nodes are not yet talking to one another.
|
||||||
Your output should therefore look like follows:
|
Your output should therefore look like follows:
|
||||||
|
|
||||||
```
|
```
|
||||||
Mercury$ garage node-id
|
Mercury$ garage status
|
||||||
==== HEALTHY NODES ====
|
==== HEALTHY NODES ====
|
||||||
ID Hostname Address Tag Zone Capacity
|
ID Hostname Address Tag Zone Capacity
|
||||||
563e1ac825ee3323… Mercury [fc00:1::1]:3901 NO ROLE ASSIGNED
|
563e1ac825ee3323… Mercury [fc00:1::1]:3901 NO ROLE ASSIGNED
|
||||||
|
@ -157,14 +157,14 @@ When your Garage nodes first start, they will generate a local node identifier
|
||||||
(based on a public/private key pair).
|
(based on a public/private key pair).
|
||||||
|
|
||||||
To obtain the node identifier of a node, once it is generated,
|
To obtain the node identifier of a node, once it is generated,
|
||||||
run `garage node-id`.
|
run `garage node id`.
|
||||||
This will print keys as follows:
|
This will print keys as follows:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
Mercury$ garage node-id
|
Mercury$ garage node id
|
||||||
563e1ac825ee3323aa441e72c26d1030d6d4414aeb3dd25287c531e7fc2bc95d@[fc00:1::1]:3901
|
563e1ac825ee3323aa441e72c26d1030d6d4414aeb3dd25287c531e7fc2bc95d@[fc00:1::1]:3901
|
||||||
|
|
||||||
Venus$ garage node-id
|
Venus$ garage node id
|
||||||
86f0f26ae4afbd59aaf9cfb059eefac844951efd5b8caeec0d53f4ed6c85f332@[fc00:1::2]:3901
|
86f0f26ae4afbd59aaf9cfb059eefac844951efd5b8caeec0d53f4ed6c85f332@[fc00:1::2]:3901
|
||||||
|
|
||||||
etc.
|
etc.
|
||||||
|
@ -191,20 +191,22 @@ ID Hostname Address Tag Zone Capa
|
||||||
212f7572f0c89da9… Mars [fc00:F::1]:3901 NO ROLE ASSIGNED
|
212f7572f0c89da9… Mars [fc00:F::1]:3901 NO ROLE ASSIGNED
|
||||||
```
|
```
|
||||||
|
|
||||||
## Giving roles to nodes
|
## Creating a cluster layout
|
||||||
|
|
||||||
We will now inform Garage of the disk space available on each node of the cluster
|
We will now inform Garage of the disk space available on each node of the cluster
|
||||||
as well as the zone (e.g. datacenter) in which each machine is located.
|
as well as the zone (e.g. datacenter) in which each machine is located.
|
||||||
|
This information is called the **cluster layout** and consists
|
||||||
|
of a role that is assigned to each active cluster node.
|
||||||
|
|
||||||
For our example, we will suppose we have the following infrastructure
|
For our example, we will suppose we have the following infrastructure
|
||||||
(Capacity, Identifier and Zone are specific values to Garage described in the following):
|
(Capacity, Identifier and Zone are specific values to Garage described in the following):
|
||||||
|
|
||||||
| Location | Name | Disk Space | `Capacity` | `Identifier` | `Zone` |
|
| Location | Name | Disk Space | `Capacity` | `Identifier` | `Zone` |
|
||||||
|----------|---------|------------|------------|--------------|--------------|
|
|----------|---------|------------|------------|--------------|--------------|
|
||||||
| Paris | Mercury | 1 To | `2` | `563e` | `par1` |
|
| Paris | Mercury | 1 To | `10` | `563e` | `par1` |
|
||||||
| Paris | Venus | 2 To | `4` | `86f0` | `par1` |
|
| Paris | Venus | 2 To | `20` | `86f0` | `par1` |
|
||||||
| London | Earth | 2 To | `4` | `6814` | `lon1` |
|
| London | Earth | 2 To | `20` | `6814` | `lon1` |
|
||||||
| Brussels | Mars | 1.5 To | `3` | `212f` | `bru1` |
|
| Brussels | Mars | 1.5 To | `15` | `212f` | `bru1` |
|
||||||
|
|
||||||
#### Node identifiers
|
#### Node identifiers
|
||||||
|
|
||||||
|
@ -239,13 +241,9 @@ in order to provide high availability despite failure of a zone.
|
||||||
|
|
||||||
Garage reasons on an abstract metric about disk storage that is named the *capacity* of a node.
|
Garage reasons on an abstract metric about disk storage that is named the *capacity* of a node.
|
||||||
The capacity configured in Garage must be proportional to the disk space dedicated to the node.
|
The capacity configured in Garage must be proportional to the disk space dedicated to the node.
|
||||||
Due to the way the Garage allocation algorithm works, capacity values must
|
|
||||||
be **integers**, and must be **as small as possible**, for instance with
|
|
||||||
1 representing the size of your smallest server.
|
|
||||||
|
|
||||||
Here we chose that 1 unit of capacity = 0.5 To, so that we can express servers of size
|
Capacity values must be **integers** but can be given any signification.
|
||||||
1 To and 2 To, as wel as the intermediate size 1.5 To, with the integer values 2, 4 and
|
Here we chose that 1 unit of capacity = 100 GB.
|
||||||
3 respectively (see table above).
|
|
||||||
|
|
||||||
Note that the amount of data stored by Garage on each server may not be strictly proportional to
|
Note that the amount of data stored by Garage on each server may not be strictly proportional to
|
||||||
its capacity value, as Garage will priorize having 3 copies of data in different zones,
|
its capacity value, as Garage will priorize having 3 copies of data in different zones,
|
||||||
|
@ -257,13 +255,29 @@ have 66% chance of being stored by Venus and 33% chance of being stored by Mercu
|
||||||
|
|
||||||
Given the information above, we will configure our cluster as follow:
|
Given the information above, we will configure our cluster as follow:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
garage layout assign -z par1 -c 10 -t mercury 563e
|
||||||
|
garage layout assign -z par1 -c 20 -t venus 86f0
|
||||||
|
garage layout assign -z lon1 -c 20 -t earth 6814
|
||||||
|
garage layout assign -z bru1 -c 15 -t mars 212f
|
||||||
```
|
```
|
||||||
garage node configure -z par1 -c 2 -t mercury 563e
|
|
||||||
garage node configure -z par1 -c 4 -t venus 86f0
|
At this point, the changes in the cluster layout have not yet been applied.
|
||||||
garage node configure -z lon1 -c 4 -t earth 6814
|
To show the new layout that will be applied, call:
|
||||||
garage node configure -z bru1 -c 3 -t mars 212f
|
|
||||||
|
```bash
|
||||||
|
garage layout show
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Once you are satisfied with your new layout, apply it with:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
garage layout apply
|
||||||
|
```
|
||||||
|
|
||||||
|
**WARNING:** if you want to use the layout modification commands in a script,
|
||||||
|
make sure to read [this page](/reference_manual/layout.html) first.
|
||||||
|
|
||||||
|
|
||||||
## Using your Garage cluster
|
## Using your Garage cluster
|
||||||
|
|
||||||
|
|
|
@ -28,8 +28,10 @@ and you should instead use one of the methods detailed in the next sections.
|
||||||
|
|
||||||
Removing a node is done with the following command:
|
Removing a node is done with the following command:
|
||||||
|
|
||||||
```
|
```bash
|
||||||
garage node remove --yes <node_id>
|
garage layout remove <node_id>
|
||||||
|
garage layout show # review the changes you are making
|
||||||
|
garage layout apply # once satisfied, apply the changes
|
||||||
```
|
```
|
||||||
|
|
||||||
(you can get the `node_id` of the failed node by running `garage status`)
|
(you can get the `node_id` of the failed node by running `garage status`)
|
||||||
|
@ -50,7 +52,7 @@ We just need to tell Garage to get back all the data blocks and store them on th
|
||||||
First, set up a new HDD to store Garage's data directory on the failed node, and restart Garage using
|
First, set up a new HDD to store Garage's data directory on the failed node, and restart Garage using
|
||||||
the existing configuration. Then, run:
|
the existing configuration. Then, run:
|
||||||
|
|
||||||
```
|
```bash
|
||||||
garage repair -a --yes blocks
|
garage repair -a --yes blocks
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -58,7 +60,7 @@ This will re-synchronize blocks of data that are missing to the new HDD, reading
|
||||||
|
|
||||||
You can check on the advancement of this process by doing the following command:
|
You can check on the advancement of this process by doing the following command:
|
||||||
|
|
||||||
```
|
```bash
|
||||||
garage stats -a
|
garage stats -a
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -94,9 +96,11 @@ The ID of the lost node should be shown in `garage status` in the section for di
|
||||||
|
|
||||||
Then, replace the broken node by the new one, using:
|
Then, replace the broken node by the new one, using:
|
||||||
|
|
||||||
```
|
```bash
|
||||||
garage node configure --replace <old_node_id> \
|
garage layout assign <new_node_id> --replace <old_node_id> \
|
||||||
-c <capacity> -z <zone> -t <node_tag> <new_node_id>
|
-c <capacity> -z <zone> -t <node_tag>
|
||||||
|
garage layout show # review the changes you are making
|
||||||
|
garage layout apply # once satisfied, apply the changes
|
||||||
```
|
```
|
||||||
|
|
||||||
Garage will then start synchronizing all required data on the new node.
|
Garage will then start synchronizing all required data on the new node.
|
||||||
|
|
|
@ -18,10 +18,18 @@ This very website is hosted using Garage. In other words: the doc is the PoC!
|
||||||
|
|
||||||
# The Garage Geo-Distributed Data Store
|
# The Garage Geo-Distributed Data Store
|
||||||
|
|
||||||
Garage is a lightweight geo-distributed data store.
|
Garage is a lightweight geo-distributed data store that implements the
|
||||||
It comes from the observation that despite numerous object stores
|
[Amazon S3](https://docs.aws.amazon.com/AmazonS3/latest/API/Welcome.html)
|
||||||
many people have broken data management policies (backup/replication on a single site or none at all).
|
object storage protocole. It enables applications to store large blobs such
|
||||||
To promote better data management policies, we focused on the following **desirable properties**:
|
as pictures, video, images, documents, etc., in a redundant multi-node
|
||||||
|
setting. S3 is versatile enough to also be used to publish a static
|
||||||
|
website.
|
||||||
|
|
||||||
|
Garage comes from the observation that despite the numerous existing
|
||||||
|
implementation of object stores, many people have broken data management
|
||||||
|
policies (backup/replication on a single site or none at all). To promote
|
||||||
|
better data management policies, we focused on the following **desirable
|
||||||
|
properties**:
|
||||||
|
|
||||||
- **Self-contained & lightweight**: works everywhere and integrates well in existing environments to target [hyperconverged infrastructures](https://en.wikipedia.org/wiki/Hyper-converged_infrastructure).
|
- **Self-contained & lightweight**: works everywhere and integrates well in existing environments to target [hyperconverged infrastructures](https://en.wikipedia.org/wiki/Hyper-converged_infrastructure).
|
||||||
- **Highly resilient**: highly resilient to network failures, network latency, disk failures, sysadmin failures.
|
- **Highly resilient**: highly resilient to network failures, network latency, disk failures, sysadmin failures.
|
||||||
|
@ -32,26 +40,19 @@ We also noted that the pursuit of some other goals are detrimental to our initia
|
||||||
The following has been identified as **non-goals** (if these points matter to you, you should not use Garage):
|
The following has been identified as **non-goals** (if these points matter to you, you should not use Garage):
|
||||||
|
|
||||||
- **Extreme performances**: high performances constrain a lot the design and the infrastructure; we seek performances through minimalism only.
|
- **Extreme performances**: high performances constrain a lot the design and the infrastructure; we seek performances through minimalism only.
|
||||||
- **Feature extensiveness**: complete implementation of the S3 API or any other API to make garage a drop-in replacement is not targeted as it could lead to decisions impacting our desirable properties.
|
- **Feature extensiveness**: complete implementation of the S3 API or any other API to make Garage a drop-in replacement is not targeted as it could lead to decisions impacting our desirable properties.
|
||||||
- **Storage optimizations**: erasure coding or any other coding technique both increase the difficulty of placing data and synchronizing; we limit ourselves to duplication.
|
- **Storage optimizations**: erasure coding or any other coding technique both increase the difficulty of placing data and synchronizing; we limit ourselves to duplication.
|
||||||
- **POSIX/Filesystem compatibility**: we do not aim at being POSIX compatible or to emulate any kind of filesystem. Indeed, in a distributed environment, such synchronizations are translated in network messages that impose severe constraints on the deployment.
|
- **POSIX/Filesystem compatibility**: we do not aim at being POSIX compatible or to emulate any kind of filesystem. Indeed, in a distributed environment, such synchronizations are translated in network messages that impose severe constraints on the deployment.
|
||||||
|
|
||||||
## Supported and planned protocols
|
|
||||||
|
|
||||||
Garage speaks (or will speak) the following protocols:
|
|
||||||
|
|
||||||
- [S3](https://docs.aws.amazon.com/AmazonS3/latest/API/Welcome.html) - *SUPPORTED* - Enable applications to store large blobs such as pictures, video, images, documents, etc. S3 is versatile enough to also be used to publish a static website.
|
|
||||||
- [IMAP](https://github.com/go-pluto/pluto) - *PLANNED* - email storage is quite complex to get good performances.
|
|
||||||
To keep performances optimal, most IMAP servers only support on-disk storage.
|
|
||||||
We plan to add logic to Garage to make it a viable solution for email storage.
|
|
||||||
- *More to come*
|
|
||||||
|
|
||||||
## Use Cases
|
## Use Cases
|
||||||
|
|
||||||
**[Deuxfleurs](https://deuxfleurs.fr):** Garage is used by Deuxfleurs which is a non-profit hosting organization.
|
**[Deuxfleurs](https://deuxfleurs.fr):** Garage is used by Deuxfleurs which
|
||||||
Especially, it is used to host their main website, this documentation and some of its members' blogs.
|
is a non-profit hosting organization. Especially, it is used to host their
|
||||||
Additionally, Garage is used as a [backend for Nextcloud](https://docs.nextcloud.com/server/20/admin_manual/configuration_files/primary_storage.html).
|
main website, this documentation and some of its members' blogs.
|
||||||
Deuxfleurs also plans to use Garage as their [Matrix's media backend](https://github.com/matrix-org/synapse-s3-storage-provider) and as the backend of [OCIS](https://github.com/owncloud/ocis).
|
Deuxfleurs also uses Garage as their [Matrix's media
|
||||||
|
backend](https://github.com/matrix-org/synapse-s3-storage-provider).
|
||||||
|
Deuxfleurs also uses it in its continuous integration platform to store
|
||||||
|
Drone's job logs and a Nix binary cache.
|
||||||
|
|
||||||
*Are you using Garage? [Open a pull request](https://git.deuxfleurs.fr/Deuxfleurs/garage/) to add your organization here!*
|
*Are you using Garage? [Open a pull request](https://git.deuxfleurs.fr/Deuxfleurs/garage/) to add your organization here!*
|
||||||
|
|
||||||
|
|
|
@ -6,22 +6,23 @@ and how to interact with it.
|
||||||
|
|
||||||
Our goal is to introduce you to Garage's workflows.
|
Our goal is to introduce you to Garage's workflows.
|
||||||
Following this guide is recommended before moving on to
|
Following this guide is recommended before moving on to
|
||||||
[configuring a real-world deployment](../cookbook/real_world.md).
|
[configuring a multi-node cluster](../cookbook/real_world.md).
|
||||||
|
|
||||||
Note that this kind of deployment should not be used in production, as it provides
|
Note that this kind of deployment should not be used in production,
|
||||||
no redundancy for your data!
|
as it provides no redundancy for your data!
|
||||||
|
|
||||||
## Get a binary
|
## Get a binary
|
||||||
|
|
||||||
Download the latest Garage binary from the release pages on our repository:
|
Download the latest Garage binary from the release pages on our repository:
|
||||||
|
|
||||||
<https://git.deuxfleurs.fr/Deuxfleurs/garage/releases>
|
<https://garagehq.deuxfleurs.fr/_releases.html>
|
||||||
|
|
||||||
Place this binary somewhere in your `$PATH` so that you can invoke the `garage`
|
Place this binary somewhere in your `$PATH` so that you can invoke the `garage`
|
||||||
command directly (for instance you can copy the binary in `/usr/local/bin`
|
command directly (for instance you can copy the binary in `/usr/local/bin`
|
||||||
or in `~/.local/bin`).
|
or in `~/.local/bin`).
|
||||||
|
|
||||||
If a binary of the last version is not available for your architecture,
|
If a binary of the last version is not available for your architecture,
|
||||||
|
or if you want a build customized for your system,
|
||||||
you can [build Garage from source](../cookbook/from_source.md).
|
you can [build Garage from source](../cookbook/from_source.md).
|
||||||
|
|
||||||
|
|
||||||
|
@ -109,9 +110,9 @@ ID Hostname Address Tag Zone Capacit
|
||||||
563e1ac825ee3323… linuxbox 127.0.0.1:3901 NO ROLE ASSIGNED
|
563e1ac825ee3323… linuxbox 127.0.0.1:3901 NO ROLE ASSIGNED
|
||||||
```
|
```
|
||||||
|
|
||||||
## Configuring your Garage node
|
## Creating a cluster layout
|
||||||
|
|
||||||
Configuring the nodes in a Garage deployment means informing Garage
|
Creating a cluster layout for a Garage deployment means informing Garage
|
||||||
of the disk space available on each node of the cluster
|
of the disk space available on each node of the cluster
|
||||||
as well as the zone (e.g. datacenter) each machine is located in.
|
as well as the zone (e.g. datacenter) each machine is located in.
|
||||||
|
|
||||||
|
@ -119,14 +120,18 @@ For our test deployment, we are using only one node. The way in which we configu
|
||||||
it does not matter, you can simply write:
|
it does not matter, you can simply write:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
garage node configure -z dc1 -c 1 <node_id>
|
garage layout assign -z dc1 -c 1 <node_id>
|
||||||
```
|
```
|
||||||
|
|
||||||
where `<node_id>` corresponds to the identifier of the node shown by `garage status` (first column).
|
where `<node_id>` corresponds to the identifier of the node shown by `garage status` (first column).
|
||||||
You can enter simply a prefix of that identifier.
|
You can enter simply a prefix of that identifier.
|
||||||
For instance here you could write just `garage node configure -z dc1 -c 1 563e`.
|
For instance here you could write just `garage layout assign -z dc1 -c 1 563e`.
|
||||||
|
|
||||||
|
The layout then has to be applied to the cluster, using:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
garage layout apply
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
## Creating buckets and keys
|
## Creating buckets and keys
|
||||||
|
@ -197,7 +202,7 @@ Now that we have a bucket and a key, we need to give permissions to the key on t
|
||||||
```
|
```
|
||||||
garage bucket allow \
|
garage bucket allow \
|
||||||
--read \
|
--read \
|
||||||
--write
|
--write \
|
||||||
nextcloud-bucket \
|
nextcloud-bucket \
|
||||||
--key nextcloud-app-key
|
--key nextcloud-app-key
|
||||||
```
|
```
|
||||||
|
@ -270,5 +275,5 @@ The following tools can also be used to send and recieve files from/to Garage:
|
||||||
- [Cyberduck](https://cyberduck.io/)
|
- [Cyberduck](https://cyberduck.io/)
|
||||||
- [`s3cmd`](https://s3tools.org/s3cmd)
|
- [`s3cmd`](https://s3tools.org/s3cmd)
|
||||||
|
|
||||||
Refer to the ["configuring clients"](../cookbook/clients.md) page to learn how to configure
|
Refer to the ["Integrations" section](../connect/index.md) to learn how to
|
||||||
these clients to interact with a Garage server.
|
configure application and command line utilities to integrate with Garage.
|
||||||
|
|
|
@ -133,9 +133,9 @@ These peer identifiers have the following syntax:
|
||||||
|
|
||||||
In the case where `rpc_public_addr` is correctly specified in the
|
In the case where `rpc_public_addr` is correctly specified in the
|
||||||
configuration file, the full identifier of a node including IP and port can
|
configuration file, the full identifier of a node including IP and port can
|
||||||
be obtained by running `garage node-id` and then included directly in the
|
be obtained by running `garage node id` and then included directly in the
|
||||||
`bootstrap_peers` list of other nodes. Otherwise, only the node's public
|
`bootstrap_peers` list of other nodes. Otherwise, only the node's public
|
||||||
key will be returned by `garage node-id` and you will have to add the IP
|
key will be returned by `garage node id` and you will have to add the IP
|
||||||
yourself.
|
yourself.
|
||||||
|
|
||||||
#### `consul_host` and `consul_service_name`
|
#### `consul_host` and `consul_service_name`
|
||||||
|
|
74
doc/book/src/reference_manual/layout.md
Normal file
|
@ -0,0 +1,74 @@
|
||||||
|
# Creating and updating a cluster layout
|
||||||
|
|
||||||
|
The cluster layout in Garage is a table that assigns to each node a role in
|
||||||
|
the cluster. The role of a node in Garage can either be a storage node with
|
||||||
|
a certain capacity, or a gateway node that does not store data and is only
|
||||||
|
used as an API entry point for faster cluster access.
|
||||||
|
An introduction to building cluster layouts can be found in the [production deployment](/cookbook/real_world.md) page.
|
||||||
|
|
||||||
|
## How cluster layouts work in Garage
|
||||||
|
|
||||||
|
In Garage, a cluster layout is composed of the following components:
|
||||||
|
|
||||||
|
- a table of roles assigned to nodes
|
||||||
|
- a version number
|
||||||
|
|
||||||
|
Garage nodes will always use the cluster layout with the highest version number.
|
||||||
|
|
||||||
|
Garage nodes also maintain and synchronize between them a set of proposed role
|
||||||
|
changes that haven't yet been applied. These changes will be applied (or
|
||||||
|
canceled) in the next version of the layout
|
||||||
|
|
||||||
|
The following commands insert modifications to the set of proposed role changes
|
||||||
|
for the next layout version (but they do not create the new layout immediately):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
garage layout assign [...]
|
||||||
|
garage layout remove [...]
|
||||||
|
```
|
||||||
|
|
||||||
|
The following command can be used to inspect the layout that is currently set in the cluster
|
||||||
|
and the changes proposed for the next layout version, if any:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
garage layout show
|
||||||
|
```
|
||||||
|
|
||||||
|
The following commands create a new layout with the specified version number,
|
||||||
|
that either takes into account the proposed changes or cancels them:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
garage layout apply --version <new_version_number>
|
||||||
quentin
commented
For future release, could we consider a system similar to Nomad's one? For future release, could we consider a system similar to Nomad's one?
We could compute an ID for each layout, either random or a hash of the datastructure.
Then, when you run apply, you pass the ID of the layout configuration you want to apply.
This ID could be obtained from `garage layout show`.
It would prevent the following bug: a version number would be necessarily bound to a specific version?
lx
commented
True, but we also need a way to make sure that version numbers are an increasing sequence, otherwise nodes don't have a way to know what is the last version (the one they should use) True, but we also need a way to make sure that version numbers are an increasing sequence, otherwise nodes don't have a way to know what is the last version (the one they should use)
|
|||||||
|
garage layout revert --version <new_version_number>
|
||||||
|
```
|
||||||
|
|
||||||
|
The version number of the new layout to create must be 1 + the version number
|
||||||
|
of the previous layout that existed in the cluster. The `apply` and `revert`
|
||||||
|
commands will fail otherwise.
|
||||||
|
|
||||||
|
## Warnings about Garage cluster layout management
|
||||||
|
|
||||||
|
**Warning: never make several calls to `garage layout apply` or `garage layout
|
||||||
|
revert` with the same value of the `--version` flag. Doing so can lead to the
|
||||||
|
creation of several different layouts with the same version number, in which
|
||||||
|
case your Garage cluster will become inconsistent until fixed.** If a call to
|
||||||
|
`garage layout apply` or `garage layout revert` has failed and `garage layout
|
||||||
|
show` indicates that a new layout with the given version number has not been
|
||||||
|
set in the cluster, then it is fine to call the command again with the same
|
||||||
|
version number.
|
||||||
|
|
||||||
|
If you are using the `garage` CLI by typing individual commands in your
|
||||||
|
shell, you shouldn't have much issues as long as you run commands one after
|
||||||
|
the other and take care of checking the output of `garage layout show`
|
||||||
|
before applying any changes.
|
||||||
|
|
||||||
|
If you are using the `garage` CLI to script layout changes, follow the following recommendations:
|
||||||
|
|
||||||
|
- Make all of your `garage` CLI calls to the same RPC host. Do not use the
|
||||||
|
`garage` CLI to connect to individual nodes to send them each a piece of the
|
||||||
|
layout changes you are making, as the changes propagate asynchronously
|
||||||
|
between nodes and might not all be taken into account at the time when the
|
||||||
|
new layout is applied.
|
||||||
|
|
||||||
|
- **Only call `garage layout apply` once**, and call it **strictly after** all
|
||||||
|
of the `layout assign` and `layout remove` commands have returned.
|
|
@ -1,5 +1,7 @@
|
||||||
# Load Balancing Data (planned for version 0.2)
|
# Load Balancing Data (planned for version 0.2)
|
||||||
|
|
||||||
|
**This is being yet improved in release 0.5. The working document has not been updated yet, it still only applies to Garage 0.2 through 0.4.**
|
||||||
|
|
||||||
I have conducted a quick study of different methods to load-balance data over different Garage nodes using consistent hashing.
|
I have conducted a quick study of different methods to load-balance data over different Garage nodes using consistent hashing.
|
||||||
|
|
||||||
## Requirements
|
## Requirements
|
||||||
|
|
|
@ -69,7 +69,7 @@ done
|
||||||
sleep 3
|
sleep 3
|
||||||
# Establish connections between nodes
|
# Establish connections between nodes
|
||||||
for count in $(seq 1 3); do
|
for count in $(seq 1 3); do
|
||||||
NODE=$(garage -c /tmp/config.$count.toml node-id -q)
|
NODE=$(garage -c /tmp/config.$count.toml node id -q)
|
||||||
for count2 in $(seq 1 3); do
|
for count2 in $(seq 1 3); do
|
||||||
garage -c /tmp/config.$count2.toml node connect $NODE
|
garage -c /tmp/config.$count2.toml node connect $NODE
|
||||||
done
|
done
|
||||||
|
|
|
@ -25,6 +25,7 @@ garage -c /tmp/config.1.toml status \
|
||||||
| grep 'NO ROLE' \
|
| grep 'NO ROLE' \
|
||||||
| grep -Po '^[0-9a-f]+' \
|
| grep -Po '^[0-9a-f]+' \
|
||||||
| while read id; do
|
| while read id; do
|
||||||
garage -c /tmp/config.1.toml node configure -z dc1 -c 1 $id
|
garage -c /tmp/config.1.toml layout assign $id -z dc1 -c 1
|
||||||
done
|
done
|
||||||
|
|
||||||
|
garage -c /tmp/config.1.toml layout apply --version 1
|
||||||
|
|
|
@ -116,11 +116,11 @@ if [ -z "$SKIP_AWS" ]; then
|
||||||
echo "🧪 Website Testing"
|
echo "🧪 Website Testing"
|
||||||
echo "<h1>hello world</h1>" > /tmp/garage-index.html
|
echo "<h1>hello world</h1>" > /tmp/garage-index.html
|
||||||
aws s3 cp /tmp/garage-index.html s3://eprouvette/index.html
|
aws s3 cp /tmp/garage-index.html s3://eprouvette/index.html
|
||||||
[ `curl -s -o /dev/null -w "%{http_code}" --header "Host: eprouvette.garage.tld" http://127.0.0.1:3923/ ` == 404 ]
|
[ `curl -s -o /dev/null -w "%{http_code}" --header "Host: eprouvette.garage.tld" http://127.0.0.1:3921/ ` == 404 ]
|
||||||
garage -c /tmp/config.1.toml bucket website --allow eprouvette
|
garage -c /tmp/config.1.toml bucket website --allow eprouvette
|
||||||
[ `curl -s -o /dev/null -w "%{http_code}" --header "Host: eprouvette.garage.tld" http://127.0.0.1:3923/ ` == 200 ]
|
[ `curl -s -o /dev/null -w "%{http_code}" --header "Host: eprouvette.garage.tld" http://127.0.0.1:3921/ ` == 200 ]
|
||||||
garage -c /tmp/config.1.toml bucket website --deny eprouvette
|
garage -c /tmp/config.1.toml bucket website --deny eprouvette
|
||||||
[ `curl -s -o /dev/null -w "%{http_code}" --header "Host: eprouvette.garage.tld" http://127.0.0.1:3923/ ` == 404 ]
|
[ `curl -s -o /dev/null -w "%{http_code}" --header "Host: eprouvette.garage.tld" http://127.0.0.1:3921/ ` == 404 ]
|
||||||
aws s3 rm s3://eprouvette/index.html
|
aws s3 rm s3://eprouvette/index.html
|
||||||
rm /tmp/garage-index.html
|
rm /tmp/garage-index.html
|
||||||
fi
|
fi
|
||||||
|
|
|
@ -1,11 +1,12 @@
|
||||||
[package]
|
[package]
|
||||||
name = "garage_api"
|
name = "garage_api"
|
||||||
version = "0.4.0"
|
version = "0.5.0"
|
||||||
authors = ["Alex Auvolat <alex@adnab.me>"]
|
authors = ["Alex Auvolat <alex@adnab.me>"]
|
||||||
edition = "2018"
|
edition = "2018"
|
||||||
license = "AGPL-3.0"
|
license = "AGPL-3.0"
|
||||||
description = "S3 API server crate for the Garage object store"
|
description = "S3 API server crate for the Garage object store"
|
||||||
repository = "https://git.deuxfleurs.fr/Deuxfleurs/garage"
|
repository = "https://git.deuxfleurs.fr/Deuxfleurs/garage"
|
||||||
|
readme = "../../README.md"
|
||||||
|
|
||||||
[lib]
|
[lib]
|
||||||
path = "lib.rs"
|
path = "lib.rs"
|
||||||
|
@ -13,9 +14,9 @@ path = "lib.rs"
|
||||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
garage_model = { version = "0.4.0", path = "../model" }
|
garage_model = { version = "0.5.0", path = "../model" }
|
||||||
garage_table = { version = "0.4.0", path = "../table" }
|
garage_table = { version = "0.5.0", path = "../table" }
|
||||||
garage_util = { version = "0.4.0", path = "../util" }
|
garage_util = { version = "0.5.0", path = "../util" }
|
||||||
|
|
||||||
base64 = "0.13"
|
base64 = "0.13"
|
||||||
bytes = "1.0"
|
bytes = "1.0"
|
||||||
|
|
|
@ -1,11 +1,12 @@
|
||||||
[package]
|
[package]
|
||||||
name = "garage"
|
name = "garage"
|
||||||
version = "0.4.0"
|
version = "0.5.0"
|
||||||
authors = ["Alex Auvolat <alex@adnab.me>"]
|
authors = ["Alex Auvolat <alex@adnab.me>"]
|
||||||
edition = "2018"
|
edition = "2018"
|
||||||
license = "AGPL-3.0"
|
license = "AGPL-3.0"
|
||||||
description = "Garage, an S3-compatible distributed object store for self-hosted deployments"
|
description = "Garage, an S3-compatible distributed object store for self-hosted deployments"
|
||||||
repository = "https://git.deuxfleurs.fr/Deuxfleurs/garage"
|
repository = "https://git.deuxfleurs.fr/Deuxfleurs/garage"
|
||||||
|
readme = "../../README.md"
|
||||||
|
|
||||||
[[bin]]
|
[[bin]]
|
||||||
name = "garage"
|
name = "garage"
|
||||||
|
@ -14,12 +15,12 @@ path = "main.rs"
|
||||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
garage_api = { version = "0.4.0", path = "../api" }
|
garage_api = { version = "0.5.0", path = "../api" }
|
||||||
garage_model = { version = "0.4.0", path = "../model" }
|
garage_model = { version = "0.5.0", path = "../model" }
|
||||||
garage_rpc = { version = "0.4.0", path = "../rpc" }
|
garage_rpc = { version = "0.5.0", path = "../rpc" }
|
||||||
garage_table = { version = "0.4.0", path = "../table" }
|
garage_table = { version = "0.5.0", path = "../table" }
|
||||||
garage_util = { version = "0.4.0", path = "../util" }
|
garage_util = { version = "0.5.0", path = "../util" }
|
||||||
garage_web = { version = "0.4.0", path = "../web" }
|
garage_web = { version = "0.5.0", path = "../web" }
|
||||||
|
|
||||||
bytes = "1.0"
|
bytes = "1.0"
|
||||||
git-version = "0.3.4"
|
git-version = "0.3.4"
|
||||||
|
|
|
@ -339,7 +339,7 @@ impl AdminRpcHandler {
|
||||||
|
|
||||||
let mut failures = vec![];
|
let mut failures = vec![];
|
||||||
let ring = self.garage.system.ring.borrow().clone();
|
let ring = self.garage.system.ring.borrow().clone();
|
||||||
for node in ring.config.members.keys() {
|
for node in ring.layout.node_ids().iter() {
|
||||||
let node = (*node).into();
|
let node = (*node).into();
|
||||||
let resp = self
|
let resp = self
|
||||||
.endpoint
|
.endpoint
|
||||||
|
@ -383,7 +383,7 @@ impl AdminRpcHandler {
|
||||||
let mut ret = String::new();
|
let mut ret = String::new();
|
||||||
let ring = self.garage.system.ring.borrow().clone();
|
let ring = self.garage.system.ring.borrow().clone();
|
||||||
|
|
||||||
for node in ring.config.members.keys() {
|
for node in ring.layout.node_ids().iter() {
|
||||||
let mut opt = opt.clone();
|
let mut opt = opt.clone();
|
||||||
opt.all_nodes = false;
|
opt.all_nodes = false;
|
||||||
|
|
||||||
|
|
|
@ -2,7 +2,7 @@ use std::collections::HashSet;
|
||||||
|
|
||||||
use garage_util::error::*;
|
use garage_util::error::*;
|
||||||
|
|
||||||
use garage_rpc::ring::*;
|
use garage_rpc::layout::*;
|
||||||
use garage_rpc::system::*;
|
use garage_rpc::system::*;
|
||||||
use garage_rpc::*;
|
use garage_rpc::*;
|
||||||
|
|
||||||
|
@ -20,11 +20,8 @@ pub async fn cli_command_dispatch(
|
||||||
Command::Node(NodeOperation::Connect(connect_opt)) => {
|
Command::Node(NodeOperation::Connect(connect_opt)) => {
|
||||||
cmd_connect(system_rpc_endpoint, rpc_host, connect_opt).await
|
cmd_connect(system_rpc_endpoint, rpc_host, connect_opt).await
|
||||||
}
|
}
|
||||||
Command::Node(NodeOperation::Configure(configure_opt)) => {
|
Command::Layout(layout_opt) => {
|
||||||
cmd_configure(system_rpc_endpoint, rpc_host, configure_opt).await
|
cli_layout_command_dispatch(layout_opt, system_rpc_endpoint, rpc_host).await
|
||||||
}
|
|
||||||
Command::Node(NodeOperation::Remove(remove_opt)) => {
|
|
||||||
cmd_remove(system_rpc_endpoint, rpc_host, remove_opt).await
|
|
||||||
}
|
}
|
||||||
Command::Bucket(bo) => {
|
Command::Bucket(bo) => {
|
||||||
cmd_admin(admin_rpc_endpoint, rpc_host, AdminRpc::BucketOperation(bo)).await
|
cmd_admin(admin_rpc_endpoint, rpc_host, AdminRpc::BucketOperation(bo)).await
|
||||||
|
@ -48,56 +45,60 @@ pub async fn cmd_status(rpc_cli: &Endpoint<SystemRpc, ()>, rpc_host: NodeID) ->
|
||||||
SystemRpc::ReturnKnownNodes(nodes) => nodes,
|
SystemRpc::ReturnKnownNodes(nodes) => nodes,
|
||||||
resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
|
resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
|
||||||
};
|
};
|
||||||
let config = match rpc_cli
|
let layout = fetch_layout(rpc_cli, rpc_host).await?;
|
||||||
.call(&rpc_host, &SystemRpc::PullConfig, PRIO_NORMAL)
|
|
||||||
.await??
|
|
||||||
{
|
|
||||||
SystemRpc::AdvertiseConfig(cfg) => cfg,
|
|
||||||
resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
|
|
||||||
};
|
|
||||||
|
|
||||||
println!("==== HEALTHY NODES ====");
|
println!("==== HEALTHY NODES ====");
|
||||||
let mut healthy_nodes = vec!["ID\tHostname\tAddress\tTag\tZone\tCapacity".to_string()];
|
let mut healthy_nodes = vec!["ID\tHostname\tAddress\tTags\tZone\tCapacity".to_string()];
|
||||||
for adv in status.iter().filter(|adv| adv.is_up) {
|
for adv in status.iter().filter(|adv| adv.is_up) {
|
||||||
if let Some(cfg) = config.members.get(&adv.id) {
|
match layout.roles.get(&adv.id) {
|
||||||
healthy_nodes.push(format!(
|
Some(NodeRoleV(Some(cfg))) => {
|
||||||
"{id:?}\t{host}\t{addr}\t[{tag}]\t{zone}\t{capacity}",
|
healthy_nodes.push(format!(
|
||||||
id = adv.id,
|
"{id:?}\t{host}\t{addr}\t[{tags}]\t{zone}\t{capacity}",
|
||||||
host = adv.status.hostname,
|
id = adv.id,
|
||||||
addr = adv.addr,
|
host = adv.status.hostname,
|
||||||
tag = cfg.tag,
|
addr = adv.addr,
|
||||||
zone = cfg.zone,
|
tags = cfg.tags.join(","),
|
||||||
capacity = cfg.capacity_string(),
|
zone = cfg.zone,
|
||||||
));
|
capacity = cfg.capacity_string(),
|
||||||
} else {
|
));
|
||||||
healthy_nodes.push(format!(
|
}
|
||||||
"{id:?}\t{h}\t{addr}\tNO ROLE ASSIGNED",
|
_ => {
|
||||||
id = adv.id,
|
let new_role = match layout.staging.get(&adv.id) {
|
||||||
h = adv.status.hostname,
|
Some(NodeRoleV(Some(_))) => "(pending)",
|
||||||
addr = adv.addr,
|
_ => "NO ROLE ASSIGNED",
|
||||||
));
|
};
|
||||||
|
healthy_nodes.push(format!(
|
||||||
|
"{id:?}\t{h}\t{addr}\t{new_role}",
|
||||||
|
id = adv.id,
|
||||||
|
h = adv.status.hostname,
|
||||||
|
addr = adv.addr,
|
||||||
|
new_role = new_role,
|
||||||
|
));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
format_table(healthy_nodes);
|
format_table(healthy_nodes);
|
||||||
|
|
||||||
let status_keys = status.iter().map(|adv| adv.id).collect::<HashSet<_>>();
|
let status_keys = status.iter().map(|adv| adv.id).collect::<HashSet<_>>();
|
||||||
let failure_case_1 = status.iter().any(|adv| !adv.is_up);
|
let failure_case_1 = status.iter().any(|adv| !adv.is_up);
|
||||||
let failure_case_2 = config
|
let failure_case_2 = layout
|
||||||
.members
|
.roles
|
||||||
|
.items()
|
||||||
.iter()
|
.iter()
|
||||||
.any(|(id, _)| !status_keys.contains(id));
|
.filter(|(_, _, v)| v.0.is_some())
|
||||||
|
.any(|(id, _, _)| !status_keys.contains(id));
|
||||||
if failure_case_1 || failure_case_2 {
|
if failure_case_1 || failure_case_2 {
|
||||||
println!("\n==== FAILED NODES ====");
|
println!("\n==== FAILED NODES ====");
|
||||||
let mut failed_nodes =
|
let mut failed_nodes =
|
||||||
vec!["ID\tHostname\tAddress\tTag\tZone\tCapacity\tLast seen".to_string()];
|
vec!["ID\tHostname\tAddress\tTags\tZone\tCapacity\tLast seen".to_string()];
|
||||||
for adv in status.iter().filter(|adv| !adv.is_up) {
|
for adv in status.iter().filter(|adv| !adv.is_up) {
|
||||||
if let Some(cfg) = config.members.get(&adv.id) {
|
if let Some(NodeRoleV(Some(cfg))) = layout.roles.get(&adv.id) {
|
||||||
failed_nodes.push(format!(
|
failed_nodes.push(format!(
|
||||||
"{id:?}\t{host}\t{addr}\t[{tag}]\t{zone}\t{capacity}\t{last_seen}",
|
"{id:?}\t{host}\t{addr}\t[{tags}]\t{zone}\t{capacity}\t{last_seen}",
|
||||||
id = adv.id,
|
id = adv.id,
|
||||||
host = adv.status.hostname,
|
host = adv.status.hostname,
|
||||||
addr = adv.addr,
|
addr = adv.addr,
|
||||||
tag = cfg.tag,
|
tags = cfg.tags.join(","),
|
||||||
zone = cfg.zone,
|
zone = cfg.zone,
|
||||||
capacity = cfg.capacity_string(),
|
capacity = cfg.capacity_string(),
|
||||||
last_seen = adv
|
last_seen = adv
|
||||||
|
@ -107,20 +108,28 @@ pub async fn cmd_status(rpc_cli: &Endpoint<SystemRpc, ()>, rpc_host: NodeID) ->
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (id, cfg) in config.members.iter() {
|
for (id, _, role_v) in layout.roles.items().iter() {
|
||||||
if !status_keys.contains(id) {
|
if let NodeRoleV(Some(cfg)) = role_v {
|
||||||
failed_nodes.push(format!(
|
if !status_keys.contains(id) {
|
||||||
"{id:?}\t??\t??\t[{tag}]\t{zone}\t{capacity}\tnever seen",
|
failed_nodes.push(format!(
|
||||||
id = id,
|
"{id:?}\t??\t??\t[{tags}]\t{zone}\t{capacity}\tnever seen",
|
||||||
tag = cfg.tag,
|
id = id,
|
||||||
zone = cfg.zone,
|
tags = cfg.tags.join(","),
|
||||||
capacity = cfg.capacity_string(),
|
zone = cfg.zone,
|
||||||
));
|
capacity = cfg.capacity_string(),
|
||||||
|
));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
format_table(failed_nodes);
|
format_table(failed_nodes);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if print_staging_role_changes(&layout) {
|
||||||
|
println!();
|
||||||
|
println!("Please use `garage layout show` to check the proposed new layout and apply it.");
|
||||||
|
println!();
|
||||||
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -141,115 +150,6 @@ pub async fn cmd_connect(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn cmd_configure(
|
|
||||||
rpc_cli: &Endpoint<SystemRpc, ()>,
|
|
||||||
rpc_host: NodeID,
|
|
||||||
args: ConfigureNodeOpt,
|
|
||||||
) -> Result<(), Error> {
|
|
||||||
let status = match rpc_cli
|
|
||||||
.call(&rpc_host, &SystemRpc::GetKnownNodes, PRIO_NORMAL)
|
|
||||||
.await??
|
|
||||||
{
|
|
||||||
SystemRpc::ReturnKnownNodes(nodes) => nodes,
|
|
||||||
resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
|
|
||||||
};
|
|
||||||
|
|
||||||
let added_node = find_matching_node(status.iter().map(|adv| adv.id), &args.node_id)?;
|
|
||||||
|
|
||||||
let mut config = match rpc_cli
|
|
||||||
.call(&rpc_host, &SystemRpc::PullConfig, PRIO_NORMAL)
|
|
||||||
.await??
|
|
||||||
{
|
|
||||||
SystemRpc::AdvertiseConfig(cfg) => cfg,
|
|
||||||
resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
|
|
||||||
};
|
|
||||||
|
|
||||||
for replaced in args.replace.iter() {
|
|
||||||
let replaced_node = find_matching_node(config.members.keys().cloned(), replaced)?;
|
|
||||||
if config.members.remove(&replaced_node).is_none() {
|
|
||||||
return Err(Error::Message(format!(
|
|
||||||
"Cannot replace node {:?} as it is not in current configuration",
|
|
||||||
replaced_node
|
|
||||||
)));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if args.capacity.is_some() && args.gateway {
|
|
||||||
return Err(Error::Message(
|
|
||||||
"-c and -g are mutually exclusive, please configure node either with c>0 to act as a storage node or with -g to act as a gateway node".into()));
|
|
||||||
}
|
|
||||||
if args.capacity == Some(0) {
|
|
||||||
return Err(Error::Message("Invalid capacity value: 0".into()));
|
|
||||||
}
|
|
||||||
|
|
||||||
let new_entry = match config.members.get(&added_node) {
|
|
||||||
None => {
|
|
||||||
let capacity = match args.capacity {
|
|
||||||
Some(c) => Some(c),
|
|
||||||
None if args.gateway => None,
|
|
||||||
_ => return Err(Error::Message(
|
|
||||||
"Please specify a capacity with the -c flag, or set node explicitly as gateway with -g".into())),
|
|
||||||
};
|
|
||||||
NetworkConfigEntry {
|
|
||||||
zone: args.zone.ok_or("Please specifiy a zone with the -z flag")?,
|
|
||||||
capacity,
|
|
||||||
tag: args.tag.unwrap_or_default(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Some(old) => {
|
|
||||||
let capacity = match args.capacity {
|
|
||||||
Some(c) => Some(c),
|
|
||||||
None if args.gateway => None,
|
|
||||||
_ => old.capacity,
|
|
||||||
};
|
|
||||||
NetworkConfigEntry {
|
|
||||||
zone: args.zone.unwrap_or_else(|| old.zone.to_string()),
|
|
||||||
capacity,
|
|
||||||
tag: args.tag.unwrap_or_else(|| old.tag.to_string()),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
config.members.insert(added_node, new_entry);
|
|
||||||
config.version += 1;
|
|
||||||
|
|
||||||
rpc_cli
|
|
||||||
.call(&rpc_host, &SystemRpc::AdvertiseConfig(config), PRIO_NORMAL)
|
|
||||||
.await??;
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub async fn cmd_remove(
|
|
||||||
rpc_cli: &Endpoint<SystemRpc, ()>,
|
|
||||||
rpc_host: NodeID,
|
|
||||||
args: RemoveNodeOpt,
|
|
||||||
) -> Result<(), Error> {
|
|
||||||
let mut config = match rpc_cli
|
|
||||||
.call(&rpc_host, &SystemRpc::PullConfig, PRIO_NORMAL)
|
|
||||||
.await??
|
|
||||||
{
|
|
||||||
SystemRpc::AdvertiseConfig(cfg) => cfg,
|
|
||||||
resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
|
|
||||||
};
|
|
||||||
|
|
||||||
let deleted_node = find_matching_node(config.members.keys().cloned(), &args.node_id)?;
|
|
||||||
|
|
||||||
if !args.yes {
|
|
||||||
return Err(Error::Message(format!(
|
|
||||||
"Add the flag --yes to really remove {:?} from the cluster",
|
|
||||||
deleted_node
|
|
||||||
)));
|
|
||||||
}
|
|
||||||
|
|
||||||
config.members.remove(&deleted_node);
|
|
||||||
config.version += 1;
|
|
||||||
|
|
||||||
rpc_cli
|
|
||||||
.call(&rpc_host, &SystemRpc::AdvertiseConfig(config), PRIO_NORMAL)
|
|
||||||
.await??;
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub async fn cmd_admin(
|
pub async fn cmd_admin(
|
||||||
rpc_cli: &Endpoint<AdminRpc, ()>,
|
rpc_cli: &Endpoint<AdminRpc, ()>,
|
||||||
rpc_host: NodeID,
|
rpc_host: NodeID,
|
||||||
|
@ -283,5 +183,3 @@ pub async fn cmd_admin(
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
// --- Utility functions ----
|
|
||||||
|
|
340
src/garage/cli/layout.rs
Normal file
|
@ -0,0 +1,340 @@
|
||||||
|
use garage_util::crdt::Crdt;
|
||||||
|
use garage_util::data::*;
|
||||||
|
use garage_util::error::*;
|
||||||
|
|
||||||
|
use garage_rpc::layout::*;
|
||||||
|
use garage_rpc::system::*;
|
||||||
|
use garage_rpc::*;
|
||||||
|
|
||||||
|
use crate::cli::*;
|
||||||
|
|
||||||
|
pub async fn cli_layout_command_dispatch(
|
||||||
|
cmd: LayoutOperation,
|
||||||
|
system_rpc_endpoint: &Endpoint<SystemRpc, ()>,
|
||||||
|
rpc_host: NodeID,
|
||||||
|
) -> Result<(), Error> {
|
||||||
|
match cmd {
|
||||||
|
LayoutOperation::Assign(configure_opt) => {
|
||||||
|
cmd_assign_role(system_rpc_endpoint, rpc_host, configure_opt).await
|
||||||
|
}
|
||||||
|
LayoutOperation::Remove(remove_opt) => {
|
||||||
|
cmd_remove_role(system_rpc_endpoint, rpc_host, remove_opt).await
|
||||||
|
}
|
||||||
|
LayoutOperation::Show => cmd_show_layout(system_rpc_endpoint, rpc_host).await,
|
||||||
|
LayoutOperation::Apply(apply_opt) => {
|
||||||
|
cmd_apply_layout(system_rpc_endpoint, rpc_host, apply_opt).await
|
||||||
|
}
|
||||||
|
LayoutOperation::Revert(revert_opt) => {
|
||||||
|
cmd_revert_layout(system_rpc_endpoint, rpc_host, revert_opt).await
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn cmd_assign_role(
|
||||||
|
rpc_cli: &Endpoint<SystemRpc, ()>,
|
||||||
|
rpc_host: NodeID,
|
||||||
|
args: AssignRoleOpt,
|
||||||
|
) -> Result<(), Error> {
|
||||||
|
let status = match rpc_cli
|
||||||
|
.call(&rpc_host, &SystemRpc::GetKnownNodes, PRIO_NORMAL)
|
||||||
|
.await??
|
||||||
|
{
|
||||||
|
SystemRpc::ReturnKnownNodes(nodes) => nodes,
|
||||||
|
resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
|
||||||
|
};
|
||||||
|
|
||||||
|
let added_node = find_matching_node(status.iter().map(|adv| adv.id), &args.node_id)?;
|
||||||
|
|
||||||
|
let mut layout = fetch_layout(rpc_cli, rpc_host).await?;
|
||||||
|
|
||||||
|
let mut roles = layout.roles.clone();
|
||||||
|
roles.merge(&layout.staging);
|
||||||
|
|
||||||
|
for replaced in args.replace.iter() {
|
||||||
|
let replaced_node = find_matching_node(layout.node_ids().iter().cloned(), replaced)?;
|
||||||
|
match roles.get(&replaced_node) {
|
||||||
|
Some(NodeRoleV(Some(_))) => {
|
||||||
|
layout
|
||||||
|
.staging
|
||||||
|
.merge(&roles.update_mutator(replaced_node, NodeRoleV(None)));
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
return Err(Error::Message(format!(
|
||||||
|
"Cannot replace node {:?} as it is not currently in planned layout",
|
||||||
|
replaced_node
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if args.capacity.is_some() && args.gateway {
|
||||||
|
return Err(Error::Message(
|
||||||
|
"-c and -g are mutually exclusive, please configure node either with c>0 to act as a storage node or with -g to act as a gateway node".into()));
|
||||||
|
}
|
||||||
|
if args.capacity == Some(0) {
|
||||||
|
return Err(Error::Message("Invalid capacity value: 0".into()));
|
||||||
|
}
|
||||||
|
|
||||||
|
let new_entry = match roles.get(&added_node) {
|
||||||
|
Some(NodeRoleV(Some(old))) => {
|
||||||
|
let capacity = match args.capacity {
|
||||||
|
Some(c) => Some(c),
|
||||||
|
None if args.gateway => None,
|
||||||
|
None => old.capacity,
|
||||||
|
};
|
||||||
|
let tags = if args.tags.is_empty() {
|
||||||
|
old.tags.clone()
|
||||||
|
} else {
|
||||||
|
args.tags
|
||||||
|
};
|
||||||
|
NodeRole {
|
||||||
|
zone: args.zone.unwrap_or_else(|| old.zone.to_string()),
|
||||||
|
capacity,
|
||||||
|
tags,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
let capacity = match args.capacity {
|
||||||
|
Some(c) => Some(c),
|
||||||
|
None if args.gateway => None,
|
||||||
|
None => return Err(Error::Message(
|
||||||
|
"Please specify a capacity with the -c flag, or set node explicitly as gateway with -g".into())),
|
||||||
|
};
|
||||||
|
NodeRole {
|
||||||
|
zone: args.zone.ok_or("Please specifiy a zone with the -z flag")?,
|
||||||
|
capacity,
|
||||||
|
tags: args.tags,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
layout
|
||||||
|
.staging
|
||||||
|
.merge(&roles.update_mutator(added_node, NodeRoleV(Some(new_entry))));
|
||||||
|
|
||||||
|
send_layout(rpc_cli, rpc_host, layout).await?;
|
||||||
|
|
||||||
|
println!("Role change is staged but not yet commited.");
|
||||||
|
println!("Use `garage layout show` to view staged role changes,");
|
||||||
|
println!("and `garage layout apply` to enact staged changes.");
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn cmd_remove_role(
|
||||||
|
rpc_cli: &Endpoint<SystemRpc, ()>,
|
||||||
|
rpc_host: NodeID,
|
||||||
|
args: RemoveRoleOpt,
|
||||||
|
) -> Result<(), Error> {
|
||||||
|
let mut layout = fetch_layout(rpc_cli, rpc_host).await?;
|
||||||
|
|
||||||
|
let mut roles = layout.roles.clone();
|
||||||
|
roles.merge(&layout.staging);
|
||||||
|
|
||||||
|
let deleted_node =
|
||||||
|
find_matching_node(roles.items().iter().map(|(id, _, _)| *id), &args.node_id)?;
|
||||||
|
|
||||||
|
layout
|
||||||
|
.staging
|
||||||
|
.merge(&roles.update_mutator(deleted_node, NodeRoleV(None)));
|
||||||
|
|
||||||
|
send_layout(rpc_cli, rpc_host, layout).await?;
|
||||||
|
|
||||||
|
println!("Role removal is staged but not yet commited.");
|
||||||
|
println!("Use `garage layout show` to view staged role changes,");
|
||||||
|
println!("and `garage layout apply` to enact staged changes.");
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn cmd_show_layout(
|
||||||
|
rpc_cli: &Endpoint<SystemRpc, ()>,
|
||||||
|
rpc_host: NodeID,
|
||||||
|
) -> Result<(), Error> {
|
||||||
|
let mut layout = fetch_layout(rpc_cli, rpc_host).await?;
|
||||||
|
|
||||||
|
println!("==== CURRENT CLUSTER LAYOUT ====");
|
||||||
|
if !print_cluster_layout(&layout) {
|
||||||
|
println!("No nodes currently have a role in the cluster.");
|
||||||
|
println!("See `garage status` to view available nodes.");
|
||||||
|
}
|
||||||
|
println!();
|
||||||
|
println!("Current cluster layout version: {}", layout.version);
|
||||||
|
|
||||||
|
if print_staging_role_changes(&layout) {
|
||||||
|
layout.roles.merge(&layout.staging);
|
||||||
|
|
||||||
|
println!();
|
||||||
|
println!("==== NEW CLUSTER LAYOUT AFTER APPLYING CHANGES ====");
|
||||||
|
if !print_cluster_layout(&layout) {
|
||||||
|
println!("No nodes have a role in the new layout.");
|
||||||
|
}
|
||||||
|
println!();
|
||||||
|
|
||||||
|
// this will print the stats of what partitions
|
||||||
|
// will move around when we apply
|
||||||
|
if layout.calculate_partition_assignation() {
|
||||||
|
println!("To enact the staged role changes, type:");
|
||||||
|
println!();
|
||||||
|
println!(" garage layout apply --version {}", layout.version + 1);
|
||||||
|
println!();
|
||||||
|
println!(
|
||||||
|
"You can also revert all proposed changes with: garage layout revert --version {}",
|
||||||
|
layout.version + 1
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
println!("Not enough nodes have an assigned role to maintain enough copies of data.");
|
||||||
|
println!("This new layout cannot yet be applied.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn cmd_apply_layout(
|
||||||
|
rpc_cli: &Endpoint<SystemRpc, ()>,
|
||||||
|
rpc_host: NodeID,
|
||||||
|
apply_opt: ApplyLayoutOpt,
|
||||||
|
) -> Result<(), Error> {
|
||||||
|
let mut layout = fetch_layout(rpc_cli, rpc_host).await?;
|
||||||
|
|
||||||
|
match apply_opt.version {
|
||||||
|
None => {
|
||||||
|
println!("Please pass the --version flag to ensure that you are writing the correct version of the cluster layout.");
|
||||||
|
println!("To know the correct value of the --version flag, invoke `garage layout show` and review the proposed changes.");
|
||||||
|
return Err(Error::Message("--version flag is missing".into()));
|
||||||
|
}
|
||||||
|
Some(v) => {
|
||||||
|
if v != layout.version + 1 {
|
||||||
|
return Err(Error::Message("Invalid value of --version flag".into()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
layout.roles.merge(&layout.staging);
|
||||||
|
|
||||||
|
if !layout.calculate_partition_assignation() {
|
||||||
|
return Err(Error::Message("Could not calculate new assignation of partitions to nodes. This can happen if there are less nodes than the desired number of copies of your data (see the replication_mode configuration parameter).".into()));
|
||||||
|
}
|
||||||
|
|
||||||
|
layout.staging.clear();
|
||||||
|
layout.staging_hash = blake2sum(&rmp_to_vec_all_named(&layout.staging).unwrap()[..]);
|
||||||
|
|
||||||
|
layout.version += 1;
|
||||||
|
|
||||||
|
send_layout(rpc_cli, rpc_host, layout).await?;
|
||||||
|
|
||||||
|
println!("New cluster layout with updated role assignation has been applied in cluster.");
|
||||||
|
println!("Data will now be moved around between nodes accordingly.");
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn cmd_revert_layout(
|
||||||
|
rpc_cli: &Endpoint<SystemRpc, ()>,
|
||||||
|
rpc_host: NodeID,
|
||||||
|
revert_opt: RevertLayoutOpt,
|
||||||
|
) -> Result<(), Error> {
|
||||||
|
let mut layout = fetch_layout(rpc_cli, rpc_host).await?;
|
||||||
|
|
||||||
|
match revert_opt.version {
|
||||||
|
None => {
|
||||||
|
println!("Please pass the --version flag to ensure that you are writing the correct version of the cluster layout.");
|
||||||
|
println!("To know the correct value of the --version flag, invoke `garage layout show` and review the proposed changes.");
|
||||||
|
return Err(Error::Message("--version flag is missing".into()));
|
||||||
|
}
|
||||||
|
Some(v) => {
|
||||||
|
if v != layout.version + 1 {
|
||||||
|
return Err(Error::Message("Invalid value of --version flag".into()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
layout.staging.clear();
|
||||||
|
layout.staging_hash = blake2sum(&rmp_to_vec_all_named(&layout.staging).unwrap()[..]);
|
||||||
|
|
||||||
|
layout.version += 1;
|
||||||
|
|
||||||
|
send_layout(rpc_cli, rpc_host, layout).await?;
|
||||||
|
|
||||||
|
println!("All proposed role changes in cluster layout have been canceled.");
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- utility ---
|
||||||
|
|
||||||
|
pub async fn fetch_layout(
|
||||||
|
rpc_cli: &Endpoint<SystemRpc, ()>,
|
||||||
|
rpc_host: NodeID,
|
||||||
|
) -> Result<ClusterLayout, Error> {
|
||||||
|
match rpc_cli
|
||||||
|
.call(&rpc_host, &SystemRpc::PullClusterLayout, PRIO_NORMAL)
|
||||||
|
.await??
|
||||||
|
{
|
||||||
|
SystemRpc::AdvertiseClusterLayout(t) => Ok(t),
|
||||||
|
resp => Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn send_layout(
|
||||||
|
rpc_cli: &Endpoint<SystemRpc, ()>,
|
||||||
|
rpc_host: NodeID,
|
||||||
|
layout: ClusterLayout,
|
||||||
|
) -> Result<(), Error> {
|
||||||
|
rpc_cli
|
||||||
|
.call(
|
||||||
|
&rpc_host,
|
||||||
|
&SystemRpc::AdvertiseClusterLayout(layout),
|
||||||
|
PRIO_NORMAL,
|
||||||
|
)
|
||||||
|
.await??;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn print_cluster_layout(layout: &ClusterLayout) -> bool {
|
||||||
|
let mut table = vec!["ID\tTags\tZone\tCapacity".to_string()];
|
||||||
|
for (id, _, role) in layout.roles.items().iter() {
|
||||||
|
let role = match &role.0 {
|
||||||
|
Some(r) => r,
|
||||||
|
_ => continue,
|
||||||
|
};
|
||||||
|
let tags = role.tags.join(",");
|
||||||
|
table.push(format!(
|
||||||
|
"{:?}\t{}\t{}\t{}",
|
||||||
|
id,
|
||||||
|
tags,
|
||||||
|
role.zone,
|
||||||
|
role.capacity_string()
|
||||||
|
));
|
||||||
|
}
|
||||||
|
if table.len() == 1 {
|
||||||
|
false
|
||||||
|
} else {
|
||||||
|
format_table(table);
|
||||||
|
true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn print_staging_role_changes(layout: &ClusterLayout) -> bool {
|
||||||
|
if !layout.staging.items().is_empty() {
|
||||||
|
println!();
|
||||||
|
println!("==== STAGED ROLE CHANGES ====");
|
||||||
|
let mut table = vec!["ID\tTags\tZone\tCapacity".to_string()];
|
||||||
|
for (id, _, role) in layout.staging.items().iter() {
|
||||||
|
if let Some(role) = &role.0 {
|
||||||
|
let tags = role.tags.join(",");
|
||||||
|
table.push(format!(
|
||||||
|
"{:?}\t{}\t{}\t{}",
|
||||||
|
id,
|
||||||
|
tags,
|
||||||
|
role.zone,
|
||||||
|
role.capacity_string()
|
||||||
|
));
|
||||||
|
} else {
|
||||||
|
table.push(format!("{:?}\tREMOVED", id));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
format_table(table);
|
||||||
|
true
|
||||||
|
} else {
|
||||||
|
false
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,9 +1,11 @@
|
||||||
pub(crate) mod cmd;
|
pub(crate) mod cmd;
|
||||||
pub(crate) mod init;
|
pub(crate) mod init;
|
||||||
|
pub(crate) mod layout;
|
||||||
pub(crate) mod structs;
|
pub(crate) mod structs;
|
||||||
pub(crate) mod util;
|
pub(crate) mod util;
|
||||||
|
|
||||||
pub(crate) use cmd::*;
|
pub(crate) use cmd::*;
|
||||||
pub(crate) use init::*;
|
pub(crate) use init::*;
|
||||||
|
pub(crate) use layout::*;
|
||||||
pub(crate) use structs::*;
|
pub(crate) use structs::*;
|
||||||
pub(crate) use util::*;
|
pub(crate) use util::*;
|
||||||
|
|
|
@ -8,23 +8,23 @@ pub enum Command {
|
||||||
#[structopt(name = "server")]
|
#[structopt(name = "server")]
|
||||||
Server,
|
Server,
|
||||||
|
|
||||||
/// Print identifier (public key) of this Garage node
|
|
||||||
#[structopt(name = "node-id")]
|
|
||||||
NodeId(NodeIdOpt),
|
|
||||||
|
|
||||||
/// Get network status
|
/// Get network status
|
||||||
#[structopt(name = "status")]
|
#[structopt(name = "status")]
|
||||||
Status,
|
Status,
|
||||||
|
|
||||||
/// Garage node operations
|
/// Operations on individual Garage nodes
|
||||||
#[structopt(name = "node")]
|
#[structopt(name = "node")]
|
||||||
Node(NodeOperation),
|
Node(NodeOperation),
|
||||||
|
|
||||||
/// Bucket operations
|
/// Operations on the assignation of node roles in the cluster layout
|
||||||
|
#[structopt(name = "layout")]
|
||||||
|
Layout(LayoutOperation),
|
||||||
|
|
||||||
|
/// Operations on buckets
|
||||||
#[structopt(name = "bucket")]
|
#[structopt(name = "bucket")]
|
||||||
Bucket(BucketOperation),
|
Bucket(BucketOperation),
|
||||||
|
|
||||||
/// Key operations
|
/// Operations on S3 access keys
|
||||||
#[structopt(name = "key")]
|
#[structopt(name = "key")]
|
||||||
Key(KeyOperation),
|
Key(KeyOperation),
|
||||||
|
|
||||||
|
@ -39,17 +39,13 @@ pub enum Command {
|
||||||
|
|
||||||
#[derive(StructOpt, Debug)]
|
#[derive(StructOpt, Debug)]
|
||||||
pub enum NodeOperation {
|
pub enum NodeOperation {
|
||||||
|
/// Print identifier (public key) of this Garage node
|
||||||
|
#[structopt(name = "id")]
|
||||||
|
NodeId(NodeIdOpt),
|
||||||
|
|
||||||
/// Connect to Garage node that is currently isolated from the system
|
/// Connect to Garage node that is currently isolated from the system
|
||||||
#[structopt(name = "connect")]
|
#[structopt(name = "connect")]
|
||||||
Connect(ConnectNodeOpt),
|
Connect(ConnectNodeOpt),
|
||||||
|
|
||||||
/// Configure Garage node
|
|
||||||
#[structopt(name = "configure")]
|
|
||||||
Configure(ConfigureNodeOpt),
|
|
||||||
|
|
||||||
/// Remove Garage node from cluster
|
|
||||||
#[structopt(name = "remove")]
|
|
||||||
Remove(RemoveNodeOpt),
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(StructOpt, Debug)]
|
#[derive(StructOpt, Debug)]
|
||||||
|
@ -67,8 +63,31 @@ pub struct ConnectNodeOpt {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(StructOpt, Debug)]
|
#[derive(StructOpt, Debug)]
|
||||||
pub struct ConfigureNodeOpt {
|
pub enum LayoutOperation {
|
||||||
/// Node to configure (prefix of hexadecimal node id)
|
/// Assign role to Garage node
|
||||||
|
#[structopt(name = "assign")]
|
||||||
|
Assign(AssignRoleOpt),
|
||||||
|
|
||||||
|
/// Remove role from Garage cluster node
|
||||||
|
#[structopt(name = "remove")]
|
||||||
|
Remove(RemoveRoleOpt),
|
||||||
|
|
||||||
|
/// Show roles currently assigned to nodes and changes staged for commit
|
||||||
|
#[structopt(name = "show")]
|
||||||
|
Show,
|
||||||
|
|
||||||
|
/// Apply staged changes to cluster layout
|
||||||
|
#[structopt(name = "apply")]
|
||||||
|
Apply(ApplyLayoutOpt),
|
||||||
|
|
||||||
|
/// Revert staged changes to cluster layout
|
||||||
|
#[structopt(name = "revert")]
|
||||||
|
Revert(RevertLayoutOpt),
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(StructOpt, Debug)]
|
||||||
|
pub struct AssignRoleOpt {
|
||||||
|
/// Node to which to assign role (prefix of hexadecimal node id)
|
||||||
pub(crate) node_id: String,
|
pub(crate) node_id: String,
|
||||||
|
|
||||||
/// Location (zone or datacenter) of the node
|
/// Location (zone or datacenter) of the node
|
||||||
|
@ -83,9 +102,9 @@ pub struct ConfigureNodeOpt {
|
||||||
#[structopt(short = "g", long = "gateway")]
|
#[structopt(short = "g", long = "gateway")]
|
||||||
pub(crate) gateway: bool,
|
pub(crate) gateway: bool,
|
||||||
|
|
||||||
/// Optional node tag
|
/// Optional tags to add to node
|
||||||
#[structopt(short = "t", long = "tag")]
|
#[structopt(short = "t", long = "tag")]
|
||||||
pub(crate) tag: Option<String>,
|
pub(crate) tags: Vec<String>,
|
||||||
|
|
||||||
/// Replaced node(s): list of node IDs that will be removed from the current cluster
|
/// Replaced node(s): list of node IDs that will be removed from the current cluster
|
||||||
#[structopt(long = "replace")]
|
#[structopt(long = "replace")]
|
||||||
|
@ -93,13 +112,24 @@ pub struct ConfigureNodeOpt {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(StructOpt, Debug)]
|
#[derive(StructOpt, Debug)]
|
||||||
pub struct RemoveNodeOpt {
|
pub struct RemoveRoleOpt {
|
||||||
/// Node to configure (prefix of hexadecimal node id)
|
/// Node whose role to remove (prefix of hexadecimal node id)
|
||||||
pub(crate) node_id: String,
|
pub(crate) node_id: String,
|
||||||
|
}
|
||||||
|
|
||||||
/// If this flag is not given, the node won't be removed
|
#[derive(StructOpt, Debug)]
|
||||||
#[structopt(long = "yes")]
|
pub struct ApplyLayoutOpt {
|
||||||
pub(crate) yes: bool,
|
/// Version number of new configuration: this command will fail if
|
||||||
|
/// it is not exactly 1 + the previous configuration's version
|
||||||
|
#[structopt(long = "version")]
|
||||||
|
pub(crate) version: Option<u64>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(StructOpt, Debug)]
|
||||||
|
pub struct RevertLayoutOpt {
|
||||||
|
/// Version number of old configuration to which to revert
|
||||||
|
#[structopt(long = "version")]
|
||||||
|
pub(crate) version: Option<u64>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize, StructOpt, Debug)]
|
#[derive(Serialize, Deserialize, StructOpt, Debug)]
|
||||||
|
|
|
@ -70,7 +70,9 @@ async fn main() {
|
||||||
|
|
||||||
server::run_server(opt.config_file).await
|
server::run_server(opt.config_file).await
|
||||||
}
|
}
|
||||||
Command::NodeId(node_id_opt) => node_id_command(opt.config_file, node_id_opt.quiet),
|
Command::Node(NodeOperation::NodeId(node_id_opt)) => {
|
||||||
|
node_id_command(opt.config_file, node_id_opt.quiet)
|
||||||
|
}
|
||||||
_ => cli_command(opt).await,
|
_ => cli_command(opt).await,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -1,11 +1,12 @@
|
||||||
[package]
|
[package]
|
||||||
name = "garage_model"
|
name = "garage_model"
|
||||||
version = "0.4.0"
|
version = "0.5.0"
|
||||||
authors = ["Alex Auvolat <alex@adnab.me>"]
|
authors = ["Alex Auvolat <alex@adnab.me>"]
|
||||||
edition = "2018"
|
edition = "2018"
|
||||||
license = "AGPL-3.0"
|
license = "AGPL-3.0"
|
||||||
description = "Core data model for the Garage object store"
|
description = "Core data model for the Garage object store"
|
||||||
repository = "https://git.deuxfleurs.fr/Deuxfleurs/garage"
|
repository = "https://git.deuxfleurs.fr/Deuxfleurs/garage"
|
||||||
|
readme = "../../README.md"
|
||||||
|
|
||||||
[lib]
|
[lib]
|
||||||
path = "lib.rs"
|
path = "lib.rs"
|
||||||
|
@ -13,9 +14,9 @@ path = "lib.rs"
|
||||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
garage_rpc = { version = "0.4.0", path = "../rpc" }
|
garage_rpc = { version = "0.5.0", path = "../rpc" }
|
||||||
garage_table = { version = "0.4.0", path = "../table" }
|
garage_table = { version = "0.5.0", path = "../table" }
|
||||||
garage_util = { version = "0.4.0", path = "../util" }
|
garage_util = { version = "0.5.0", path = "../util" }
|
||||||
|
|
||||||
async-trait = "0.1.7"
|
async-trait = "0.1.7"
|
||||||
arc-swap = "1.0"
|
arc-swap = "1.0"
|
||||||
|
|
|
@ -1,11 +1,12 @@
|
||||||
[package]
|
[package]
|
||||||
name = "garage_rpc"
|
name = "garage_rpc"
|
||||||
version = "0.4.0"
|
version = "0.5.0"
|
||||||
authors = ["Alex Auvolat <alex@adnab.me>"]
|
authors = ["Alex Auvolat <alex@adnab.me>"]
|
||||||
edition = "2018"
|
edition = "2018"
|
||||||
license = "AGPL-3.0"
|
license = "AGPL-3.0"
|
||||||
description = "Cluster membership management and RPC protocol for the Garage object store"
|
description = "Cluster membership management and RPC protocol for the Garage object store"
|
||||||
repository = "https://git.deuxfleurs.fr/Deuxfleurs/garage"
|
repository = "https://git.deuxfleurs.fr/Deuxfleurs/garage"
|
||||||
|
readme = "../../README.md"
|
||||||
|
|
||||||
[lib]
|
[lib]
|
||||||
path = "lib.rs"
|
path = "lib.rs"
|
||||||
|
@ -13,7 +14,7 @@ path = "lib.rs"
|
||||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
garage_util = { version = "0.4.0", path = "../util" }
|
garage_util = { version = "0.5.0", path = "../util" }
|
||||||
|
|
||||||
arc-swap = "1.0"
|
arc-swap = "1.0"
|
||||||
bytes = "1.0"
|
bytes = "1.0"
|
||||||
|
@ -26,6 +27,7 @@ sodiumoxide = { version = "0.2.5-0", package = "kuska-sodiumoxide" }
|
||||||
async-trait = "0.1.7"
|
async-trait = "0.1.7"
|
||||||
rmp-serde = "0.15"
|
rmp-serde = "0.15"
|
||||||
serde = { version = "1.0", default-features = false, features = ["derive", "rc"] }
|
serde = { version = "1.0", default-features = false, features = ["derive", "rc"] }
|
||||||
|
serde_bytes = "0.11"
|
||||||
serde_json = "1.0"
|
serde_json = "1.0"
|
||||||
|
|
||||||
futures = "0.3"
|
futures = "0.3"
|
||||||
|
|
579
src/rpc/layout.rs
Normal file
|
@ -0,0 +1,579 @@
|
||||||
|
use std::cmp::Ordering;
|
||||||
|
use std::collections::{HashMap, HashSet};
|
||||||
|
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
use garage_util::crdt::{AutoCrdt, Crdt, LwwMap};
|
||||||
|
use garage_util::data::*;
|
||||||
|
|
||||||
|
use crate::ring::*;
|
||||||
|
|
||||||
|
/// The layout of the cluster, i.e. the list of roles
|
||||||
|
/// which are assigned to each cluster node
|
||||||
|
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||||
|
pub struct ClusterLayout {
|
||||||
|
pub version: u64,
|
||||||
|
|
||||||
|
pub replication_factor: usize,
|
||||||
|
pub roles: LwwMap<Uuid, NodeRoleV>,
|
||||||
|
|
||||||
|
/// node_id_vec: a vector of node IDs with a role assigned
|
||||||
|
/// in the system (this includes gateway nodes).
|
||||||
|
/// The order here is different than the vec stored by `roles`, because:
|
||||||
|
/// 1. non-gateway nodes are first so that they have lower numbers
|
||||||
|
/// 2. nodes that don't have a role are excluded (but they need to
|
||||||
|
/// stay in the CRDT as tombstones)
|
||||||
|
pub node_id_vec: Vec<Uuid>,
|
||||||
|
/// the assignation of data partitions to node, the values
|
||||||
|
/// are indices in node_id_vec
|
||||||
|
#[serde(with = "serde_bytes")]
|
||||||
|
pub ring_assignation_data: Vec<CompactNodeType>,
|
||||||
|
|
||||||
|
/// Role changes which are staged for the next version of the layout
|
||||||
|
pub staging: LwwMap<Uuid, NodeRoleV>,
|
||||||
|
pub staging_hash: Hash,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Debug, Serialize, Deserialize)]
|
||||||
|
pub struct NodeRoleV(pub Option<NodeRole>);
|
||||||
|
|
||||||
|
impl AutoCrdt for NodeRoleV {
|
||||||
|
const WARN_IF_DIFFERENT: bool = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The user-assigned roles of cluster nodes
|
||||||
|
#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Debug, Serialize, Deserialize)]
|
||||||
|
pub struct NodeRole {
|
||||||
|
/// Datacenter at which this entry belong. This information might be used to perform a better
|
||||||
|
/// geodistribution
|
||||||
|
pub zone: String,
|
||||||
|
/// The (relative) capacity of the node
|
||||||
|
/// If this is set to None, the node does not participate in storing data for the system
|
||||||
|
/// and is only active as an API gateway to other nodes
|
||||||
|
pub capacity: Option<u32>,
|
||||||
|
/// A set of tags to recognize the node
|
||||||
|
pub tags: Vec<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl NodeRole {
|
||||||
|
pub fn capacity_string(&self) -> String {
|
||||||
|
match self.capacity {
|
||||||
|
Some(c) => format!("{}", c),
|
||||||
|
None => "gateway".to_string(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ClusterLayout {
|
||||||
|
pub fn new(replication_factor: usize) -> Self {
|
||||||
|
let empty_lwwmap = LwwMap::new();
|
||||||
|
let empty_lwwmap_hash = blake2sum(&rmp_to_vec_all_named(&empty_lwwmap).unwrap()[..]);
|
||||||
|
|
||||||
|
ClusterLayout {
|
||||||
|
version: 0,
|
||||||
|
replication_factor,
|
||||||
|
roles: LwwMap::new(),
|
||||||
|
node_id_vec: Vec::new(),
|
||||||
|
ring_assignation_data: Vec::new(),
|
||||||
|
staging: empty_lwwmap,
|
||||||
|
staging_hash: empty_lwwmap_hash,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn merge(&mut self, other: &ClusterLayout) -> bool {
|
||||||
|
match other.version.cmp(&self.version) {
|
||||||
|
Ordering::Greater => {
|
||||||
|
*self = other.clone();
|
||||||
|
true
|
||||||
|
}
|
||||||
|
Ordering::Equal => {
|
||||||
|
self.staging.merge(&other.staging);
|
||||||
|
|
||||||
|
let new_staging_hash = blake2sum(&rmp_to_vec_all_named(&self.staging).unwrap()[..]);
|
||||||
|
let changed = new_staging_hash != self.staging_hash;
|
||||||
|
|
||||||
|
self.staging_hash = new_staging_hash;
|
||||||
|
|
||||||
|
changed
|
||||||
|
}
|
||||||
|
Ordering::Less => false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns a list of IDs of nodes that currently have
|
||||||
|
/// a role in the cluster
|
||||||
|
pub fn node_ids(&self) -> &[Uuid] {
|
||||||
|
&self.node_id_vec[..]
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn num_nodes(&self) -> usize {
|
||||||
|
self.node_id_vec.len()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the role of a node in the layout
|
||||||
|
pub fn node_role(&self, node: &Uuid) -> Option<&NodeRole> {
|
||||||
|
match self.roles.get(node) {
|
||||||
|
Some(NodeRoleV(Some(v))) => Some(v),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check a cluster layout for internal consistency
|
||||||
|
/// returns true if consistent, false if error
|
||||||
|
pub fn check(&self) -> bool {
|
||||||
|
// Check that the hash of the staging data is correct
|
||||||
|
let staging_hash = blake2sum(&rmp_to_vec_all_named(&self.staging).unwrap()[..]);
|
||||||
|
if staging_hash != self.staging_hash {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check that node_id_vec contains the correct list of nodes
|
||||||
|
let mut expected_nodes = self
|
||||||
|
.roles
|
||||||
|
.items()
|
||||||
|
.iter()
|
||||||
|
.filter(|(_, _, v)| v.0.is_some())
|
||||||
|
.map(|(id, _, _)| *id)
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
expected_nodes.sort();
|
||||||
|
let mut node_id_vec = self.node_id_vec.clone();
|
||||||
|
node_id_vec.sort();
|
||||||
|
if expected_nodes != node_id_vec {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check that the assignation data has the correct length
|
||||||
|
if self.ring_assignation_data.len() != (1 << PARTITION_BITS) * self.replication_factor {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check that the assigned nodes are correct identifiers
|
||||||
|
// of nodes that are assigned a role
|
||||||
|
// and that role is not the role of a gateway nodes
|
||||||
|
for x in self.ring_assignation_data.iter() {
|
||||||
|
if *x as usize >= self.node_id_vec.len() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
let node = self.node_id_vec[*x as usize];
|
||||||
|
match self.roles.get(&node) {
|
||||||
|
Some(NodeRoleV(Some(x))) if x.capacity.is_some() => (),
|
||||||
|
_ => return false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
true
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Calculate an assignation of partitions to nodes
|
||||||
quentin
commented
So this is our new stateful partition algorithm, this one being when a modification is operated in the cluster? So this is our new stateful partition algorithm, this one being when a modification is operated in the cluster?
lx
commented
Yes :) Yes :)
|
|||||||
|
pub fn calculate_partition_assignation(&mut self) -> bool {
|
||||||
|
let (configured_nodes, zones) = self.configured_nodes_and_zones();
|
||||||
|
let n_zones = zones.len();
|
||||||
|
|
||||||
|
println!("Calculating updated partition assignation, this may take some time...");
|
||||||
|
println!();
|
||||||
|
|
||||||
|
let old_partitions = self.parse_assignation_data();
|
||||||
|
|
||||||
|
let mut partitions = old_partitions.clone();
|
||||||
|
for part in partitions.iter_mut() {
|
||||||
|
part.nodes
|
||||||
|
.retain(|(_, info)| info.map(|x| x.capacity.is_some()).unwrap_or(false));
|
||||||
|
}
|
||||||
|
|
||||||
|
// When nodes are removed, or when bootstraping an assignation from
|
||||||
|
// scratch for a new cluster, the old partitions will have holes (or be empty).
|
||||||
|
// Here we add more nodes to make a complete (sub-optimal) assignation,
|
||||||
|
// using an initial partition assignation that is calculated using the multi-dc maglev trick
|
||||||
|
match self.initial_partition_assignation() {
|
||||||
|
Some(initial_partitions) => {
|
||||||
|
for (part, ipart) in partitions.iter_mut().zip(initial_partitions.iter()) {
|
||||||
|
for (id, info) in ipart.nodes.iter() {
|
||||||
|
if part.nodes.len() < self.replication_factor {
|
||||||
|
part.add(part.nodes.len() + 1, n_zones, id, info.unwrap());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assert!(part.nodes.len() == self.replication_factor);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate how many partitions each node should ideally store,
|
||||||
|
// and how many partitions they are storing with the current assignation
|
||||||
|
// This defines our target for which we will optimize in the following loop.
|
||||||
|
let total_capacity = configured_nodes
|
||||||
|
.iter()
|
||||||
|
.map(|(_, info)| info.capacity.unwrap_or(0))
|
||||||
|
.sum::<u32>() as usize;
|
||||||
|
let total_partitions = self.replication_factor * (1 << PARTITION_BITS);
|
||||||
|
let target_partitions_per_node = configured_nodes
|
||||||
|
.iter()
|
||||||
|
.map(|(id, info)| {
|
||||||
|
(
|
||||||
|
*id,
|
||||||
|
info.capacity.unwrap_or(0) as usize * total_partitions / total_capacity,
|
||||||
|
)
|
||||||
|
})
|
||||||
|
.collect::<HashMap<&Uuid, usize>>();
|
||||||
|
|
||||||
|
let mut partitions_per_node = self.partitions_per_node(&partitions[..]);
|
||||||
|
|
||||||
|
println!("Target number of partitions per node:");
|
||||||
|
for (node, npart) in target_partitions_per_node.iter() {
|
||||||
|
println!("{:?}\t{}", node, npart);
|
||||||
|
}
|
||||||
|
println!();
|
||||||
quentin
commented
is there any reason you wrote useful with 2 is there any reason you wrote useful with 2 `l` (it seems to be an old way to write it)?
lx
commented
I don't speak English very well, sorry. I don't speak English very well, sorry.
|
|||||||
|
|
||||||
|
// Shuffle partitions between nodes so that nodes will reach (or better approach)
|
||||||
|
// their target number of stored partitions
|
||||||
|
loop {
|
||||||
|
let mut option = None;
|
||||||
|
for (i, part) in partitions.iter_mut().enumerate() {
|
||||||
|
for (irm, (idrm, _)) in part.nodes.iter().enumerate() {
|
||||||
|
let suprm = partitions_per_node.get(*idrm).cloned().unwrap_or(0) as i32
|
||||||
|
- target_partitions_per_node.get(*idrm).cloned().unwrap_or(0) as i32;
|
||||||
|
|
||||||
|
for (idadd, infoadd) in configured_nodes.iter() {
|
||||||
|
// skip replacing a node by itself
|
||||||
|
// and skip replacing by gateway nodes
|
||||||
|
if idadd == idrm || infoadd.capacity.is_none() {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
let supadd = partitions_per_node.get(*idadd).cloned().unwrap_or(0) as i32
|
||||||
|
- target_partitions_per_node.get(*idadd).cloned().unwrap_or(0) as i32;
|
||||||
|
|
||||||
|
// We want to try replacing node idrm by node idadd
|
||||||
|
// if that brings us close to our goal.
|
||||||
|
let square = |i: i32| i * i;
|
||||||
|
let oldcost = square(suprm) + square(supadd);
|
||||||
|
let newcost = square(suprm - 1) + square(supadd + 1);
|
||||||
|
if newcost >= oldcost {
|
||||||
|
// not closer to our goal
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let gain = oldcost - newcost;
|
||||||
|
|
||||||
|
let mut newpart = part.clone();
|
||||||
|
|
||||||
|
newpart.nodes.remove(irm);
|
||||||
|
if !newpart.add(newpart.nodes.len() + 1, n_zones, idadd, infoadd) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
assert!(newpart.nodes.len() == self.replication_factor);
|
||||||
|
|
||||||
|
if !old_partitions[i]
|
||||||
|
.is_valid_transition_to(&newpart, self.replication_factor)
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if option
|
||||||
|
.as_ref()
|
||||||
|
.map(|(old_gain, _, _, _, _)| gain > *old_gain)
|
||||||
|
.unwrap_or(true)
|
||||||
|
{
|
||||||
|
option = Some((gain, i, idadd, idrm, newpart));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if let Some((_gain, i, idadd, idrm, newpart)) = option {
|
||||||
|
*partitions_per_node.entry(idadd).or_insert(0) += 1;
|
||||||
|
*partitions_per_node.get_mut(idrm).unwrap() -= 1;
|
||||||
|
partitions[i] = newpart;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check we completed the assignation correctly
|
||||||
|
// (this is a set of checks for the algorithm's consistency)
|
||||||
|
assert!(partitions.len() == (1 << PARTITION_BITS));
|
||||||
|
assert!(partitions
|
||||||
|
.iter()
|
||||||
|
.all(|p| p.nodes.len() == self.replication_factor));
|
||||||
|
|
||||||
|
let new_partitions_per_node = self.partitions_per_node(&partitions[..]);
|
||||||
|
assert!(new_partitions_per_node == partitions_per_node);
|
||||||
|
|
||||||
|
// Show statistics
|
||||||
|
println!("New number of partitions per node:");
|
||||||
|
for (node, npart) in partitions_per_node.iter() {
|
||||||
|
println!("{:?}\t{}", node, npart);
|
||||||
|
}
|
||||||
quentin
commented
When I initialize a cluster, I have:
I do not understant what these values mean :s I think this line misses at least the id of the affected node as the first value, something like:
And I think that the
or:
When I initialize a cluster, I have:
```
Number of partitions that move:
-0 +1 256
```
I do not understant what these values mean :s
I think this line misses at least the id of the affected node as the first value, something like:
```
Number of partitions that move:
65edae1c553f7935… -0 +1 256
```
And I think that the `+1` is a strange edge case of the initial algorithm, I think it should be either:
```
Number of partitions that move:
65edae1c553f7935… -0 +256 256
```
or:
```
Number of partitions that move:
65edae1c553f7935… -0 +0 256
```
lx
commented
Ok I will make this part more readable and contain more information. Ok I will make this part more readable and contain more information.
|
|||||||
|
println!();
|
||||||
|
|
||||||
|
let mut diffcount = HashMap::new();
|
||||||
|
for (oldpart, newpart) in old_partitions.iter().zip(partitions.iter()) {
|
||||||
|
let nminus = oldpart.txtplus(newpart);
|
||||||
|
let nplus = newpart.txtplus(oldpart);
|
||||||
|
if nminus != "[...]" || nplus != "[...]" {
|
||||||
|
let tup = (nminus, nplus);
|
||||||
|
*diffcount.entry(tup).or_insert(0) += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if diffcount.is_empty() {
|
||||||
|
println!("No data will be moved between nodes.");
|
||||||
|
} else {
|
||||||
quentin
commented
And this is one is our initial partition algorithm, so when the cluster is initialized for the first time? And this is one is our initial partition algorithm, so when the cluster is initialized for the first time?
lx
commented
Yes, it is used:
Basically, we use this function to ensure that we have an initial assignation of three (or n) nodes to each partition. Once we have it, we just do an iterative optimization algorihtm (the other function, above), that tries to balance better the number of partitions between nodes by doing elementary operations that consist only in replacing one node by another somewhere in the assignation. Yes, it is used:
- when the cluster is initialized for the first time
- when a node is removed, to re-assign new nodes to the partitions that the old node stored, even if this assignation is not well balanced
Basically, we use this function to ensure that we have an initial assignation of three (or n) nodes to each partition. Once we have it, we just do an iterative optimization algorihtm (the other function, above), that tries to balance better the number of partitions between nodes by doing elementary operations that consist only in replacing one node by another somewhere in the assignation.
|
|||||||
|
let mut diffcount = diffcount.into_iter().collect::<Vec<_>>();
|
||||||
|
diffcount.sort();
|
||||||
|
println!("Number of partitions that move:");
|
||||||
|
for ((nminus, nplus), npart) in diffcount {
|
||||||
|
println!("\t{}\t{} -> {}", npart, nminus, nplus);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
println!();
|
||||||
|
|
||||||
|
// Calculate and save new assignation data
|
||||||
|
let (nodes, assignation_data) =
|
||||||
|
self.compute_assignation_data(&configured_nodes[..], &partitions[..]);
|
||||||
|
|
||||||
|
self.node_id_vec = nodes;
|
||||||
|
self.ring_assignation_data = assignation_data;
|
||||||
|
|
||||||
|
true
|
||||||
|
}
|
||||||
|
|
||||||
|
fn initial_partition_assignation(&self) -> Option<Vec<PartitionAss<'_>>> {
|
||||||
|
let (configured_nodes, zones) = self.configured_nodes_and_zones();
|
||||||
|
let n_zones = zones.len();
|
||||||
|
|
||||||
|
// Create a vector of partition indices (0 to 2**PARTITION_BITS-1)
|
||||||
|
let partitions_idx = (0usize..(1usize << PARTITION_BITS)).collect::<Vec<_>>();
|
||||||
|
|
||||||
|
// Prepare ring
|
||||||
|
let mut partitions: Vec<PartitionAss> = partitions_idx
|
||||||
|
.iter()
|
||||||
|
.map(|_i| PartitionAss::new())
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
|
// Create MagLev priority queues for each node
|
||||||
|
let mut queues = configured_nodes
|
||||||
|
.iter()
|
||||||
|
.filter(|(_id, info)| info.capacity.is_some())
|
||||||
|
.map(|(node_id, node_info)| {
|
||||||
|
let mut parts = partitions_idx
|
||||||
|
.iter()
|
||||||
|
.map(|i| {
|
||||||
|
let part_data =
|
||||||
|
[&u16::to_be_bytes(*i as u16)[..], node_id.as_slice()].concat();
|
||||||
|
(*i, fasthash(&part_data[..]))
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
parts.sort_by_key(|(_i, h)| *h);
|
||||||
|
let parts_i = parts.iter().map(|(i, _h)| *i).collect::<Vec<_>>();
|
||||||
|
(node_id, node_info, parts_i, 0)
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
|
let max_capacity = configured_nodes
|
||||||
|
.iter()
|
||||||
|
.filter_map(|(_, node_info)| node_info.capacity)
|
||||||
|
.fold(0, std::cmp::max);
|
||||||
|
|
||||||
|
// Fill up ring
|
||||||
|
for rep in 0..self.replication_factor {
|
||||||
|
queues.sort_by_key(|(ni, _np, _q, _p)| {
|
||||||
|
let queue_data = [&u16::to_be_bytes(rep as u16)[..], ni.as_slice()].concat();
|
||||||
|
fasthash(&queue_data[..])
|
||||||
|
});
|
||||||
|
|
||||||
|
for (_, _, _, pos) in queues.iter_mut() {
|
||||||
|
*pos = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut remaining = partitions_idx.len();
|
||||||
|
while remaining > 0 {
|
||||||
|
let remaining0 = remaining;
|
||||||
|
for i_round in 0..max_capacity {
|
||||||
|
for (node_id, node_info, q, pos) in queues.iter_mut() {
|
||||||
|
if i_round >= node_info.capacity.unwrap() {
|
||||||
|
continue;
|
||||||
quentin
commented
We might want to put the pseudo code of these 2 partition computation algorithms in the design page. We might want to put the pseudo code of these 2 partition computation algorithms in the design page.
Ideally it would be someone else than you, LX, that write it: it would allow to proof-read this part more in-depth and spread the knowledge a bit more :)
lx
commented
True True
|
|||||||
|
}
|
||||||
|
for (pos2, &qv) in q.iter().enumerate().skip(*pos) {
|
||||||
|
if partitions[qv].add(rep + 1, n_zones, node_id, node_info) {
|
||||||
|
remaining -= 1;
|
||||||
|
*pos = pos2 + 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if remaining == remaining0 {
|
||||||
|
// No progress made, exit
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Some(partitions)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn configured_nodes_and_zones(&self) -> (Vec<(&Uuid, &NodeRole)>, HashSet<&str>) {
|
||||||
|
let configured_nodes = self
|
||||||
|
.roles
|
||||||
|
.items()
|
||||||
|
.iter()
|
||||||
|
.filter(|(_id, _, info)| info.0.is_some())
|
||||||
|
.map(|(id, _, info)| (id, info.0.as_ref().unwrap()))
|
||||||
|
.collect::<Vec<(&Uuid, &NodeRole)>>();
|
||||||
|
|
||||||
|
let zones = configured_nodes
|
||||||
|
.iter()
|
||||||
|
.filter(|(_id, info)| info.capacity.is_some())
|
||||||
|
.map(|(_id, info)| info.zone.as_str())
|
||||||
|
.collect::<HashSet<&str>>();
|
||||||
|
|
||||||
|
(configured_nodes, zones)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn compute_assignation_data<'a>(
|
||||||
|
&self,
|
||||||
|
configured_nodes: &[(&'a Uuid, &'a NodeRole)],
|
||||||
|
partitions: &[PartitionAss<'a>],
|
||||||
|
) -> (Vec<Uuid>, Vec<CompactNodeType>) {
|
||||||
|
assert!(partitions.len() == (1 << PARTITION_BITS));
|
||||||
|
|
||||||
|
// Make a canonical order for nodes
|
||||||
|
let mut nodes = configured_nodes
|
||||||
|
.iter()
|
||||||
|
.filter(|(_id, info)| info.capacity.is_some())
|
||||||
|
.map(|(id, _)| **id)
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
let nodes_rev = nodes
|
||||||
|
.iter()
|
||||||
|
.enumerate()
|
||||||
|
.map(|(i, id)| (*id, i as CompactNodeType))
|
||||||
|
.collect::<HashMap<Uuid, CompactNodeType>>();
|
||||||
|
|
||||||
|
let mut assignation_data = vec![];
|
||||||
|
for partition in partitions.iter() {
|
||||||
|
assert!(partition.nodes.len() == self.replication_factor);
|
||||||
|
for (id, _) in partition.nodes.iter() {
|
||||||
|
assignation_data.push(*nodes_rev.get(id).unwrap());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
nodes.extend(
|
||||||
|
configured_nodes
|
||||||
|
.iter()
|
||||||
|
.filter(|(_id, info)| info.capacity.is_none())
|
||||||
|
.map(|(id, _)| **id),
|
||||||
|
);
|
||||||
|
|
||||||
|
(nodes, assignation_data)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_assignation_data(&self) -> Vec<PartitionAss<'_>> {
|
||||||
|
if self.ring_assignation_data.len() == self.replication_factor * (1 << PARTITION_BITS) {
|
||||||
|
// If the previous assignation data is correct, use that
|
||||||
|
let mut partitions = vec![];
|
||||||
|
for i in 0..(1 << PARTITION_BITS) {
|
||||||
|
let mut part = PartitionAss::new();
|
||||||
|
for node_i in self.ring_assignation_data
|
||||||
|
[i * self.replication_factor..(i + 1) * self.replication_factor]
|
||||||
|
.iter()
|
||||||
|
{
|
||||||
|
let node_id = &self.node_id_vec[*node_i as usize];
|
||||||
|
|
||||||
|
if let Some(NodeRoleV(Some(info))) = self.roles.get(node_id) {
|
||||||
|
part.nodes.push((node_id, Some(info)));
|
||||||
|
} else {
|
||||||
|
part.nodes.push((node_id, None));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
partitions.push(part);
|
||||||
|
}
|
||||||
|
partitions
|
||||||
|
} else {
|
||||||
|
// Otherwise start fresh
|
||||||
|
(0..(1 << PARTITION_BITS))
|
||||||
|
.map(|_| PartitionAss::new())
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn partitions_per_node<'a>(&self, partitions: &[PartitionAss<'a>]) -> HashMap<&'a Uuid, usize> {
|
||||||
|
let mut partitions_per_node = HashMap::<&Uuid, usize>::new();
|
||||||
|
for p in partitions.iter() {
|
||||||
|
for (id, _) in p.nodes.iter() {
|
||||||
|
*partitions_per_node.entry(*id).or_insert(0) += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
partitions_per_node
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---- Internal structs for partition assignation in layout ----
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
struct PartitionAss<'a> {
|
||||||
|
nodes: Vec<(&'a Uuid, Option<&'a NodeRole>)>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> PartitionAss<'a> {
|
||||||
|
fn new() -> Self {
|
||||||
|
Self { nodes: Vec::new() }
|
||||||
|
}
|
||||||
|
|
||||||
|
fn nplus(&self, other: &PartitionAss<'a>) -> usize {
|
||||||
|
self.nodes
|
||||||
|
.iter()
|
||||||
|
.filter(|x| !other.nodes.contains(x))
|
||||||
|
.count()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn txtplus(&self, other: &PartitionAss<'a>) -> String {
|
||||||
|
let mut nodes = self
|
||||||
|
.nodes
|
||||||
|
.iter()
|
||||||
|
.filter(|x| !other.nodes.contains(x))
|
||||||
|
.map(|x| format!("{:?}", x.0))
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
nodes.sort();
|
||||||
|
if self.nodes.iter().any(|x| other.nodes.contains(x)) {
|
||||||
|
nodes.push("...".into());
|
||||||
|
}
|
||||||
|
format!("[{}]", nodes.join(" "))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_valid_transition_to(&self, other: &PartitionAss<'a>, replication_factor: usize) -> bool {
|
||||||
|
let min_keep_nodes_per_part = (replication_factor + 1) / 2;
|
||||||
|
let n_removed = self.nplus(other);
|
||||||
|
|
||||||
|
if self.nodes.len() <= min_keep_nodes_per_part {
|
||||||
|
n_removed == 0
|
||||||
|
} else {
|
||||||
|
n_removed <= self.nodes.len() - min_keep_nodes_per_part
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn add(
|
||||||
|
&mut self,
|
||||||
|
target_len: usize,
|
||||||
|
n_zones: usize,
|
||||||
|
node: &'a Uuid,
|
||||||
|
role: &'a NodeRole,
|
||||||
|
) -> bool {
|
||||||
|
if self.nodes.len() != target_len - 1 {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
let p_zns = self
|
||||||
|
.nodes
|
||||||
|
.iter()
|
||||||
|
.map(|(_id, info)| info.unwrap().zone.as_str())
|
||||||
|
.collect::<HashSet<&str>>();
|
||||||
|
if (p_zns.len() < n_zones && !p_zns.contains(&role.zone.as_str()))
|
||||||
|
|| (p_zns.len() == n_zones && !self.nodes.iter().any(|(id, _)| *id == node))
|
||||||
|
{
|
||||||
|
self.nodes.push((node, Some(role)));
|
||||||
|
true
|
||||||
|
} else {
|
||||||
|
false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -5,6 +5,7 @@ extern crate log;
|
||||||
|
|
||||||
mod consul;
|
mod consul;
|
||||||
|
|
||||||
|
pub mod layout;
|
||||||
pub mod ring;
|
pub mod ring;
|
||||||
pub mod system;
|
pub mod system;
|
||||||
|
|
||||||
|
|
197
src/rpc/ring.rs
|
@ -1,12 +1,11 @@
|
||||||
//! Module containing types related to computing nodes which should receive a copy of data blocks
|
//! Module containing types related to computing nodes which should receive a copy of data blocks
|
||||||
//! and metadata
|
//! and metadata
|
||||||
use std::collections::{HashMap, HashSet};
|
|
||||||
use std::convert::TryInto;
|
use std::convert::TryInto;
|
||||||
|
|
||||||
use serde::{Deserialize, Serialize};
|
|
||||||
|
|
||||||
use garage_util::data::*;
|
use garage_util::data::*;
|
||||||
|
|
||||||
|
use crate::layout::ClusterLayout;
|
||||||
|
|
||||||
/// A partition id, which is stored on 16 bits
|
/// A partition id, which is stored on 16 bits
|
||||||
/// i.e. we have up to 2**16 partitions.
|
/// i.e. we have up to 2**16 partitions.
|
||||||
/// (in practice we have exactly 2**PARTITION_BITS partitions)
|
/// (in practice we have exactly 2**PARTITION_BITS partitions)
|
||||||
|
@ -22,47 +21,6 @@ pub const PARTITION_BITS: usize = 8;
|
||||||
|
|
||||||
const PARTITION_MASK_U16: u16 = ((1 << PARTITION_BITS) - 1) << (16 - PARTITION_BITS);
|
const PARTITION_MASK_U16: u16 = ((1 << PARTITION_BITS) - 1) << (16 - PARTITION_BITS);
|
||||||
|
|
||||||
/// The user-defined configuration of the cluster's nodes
|
|
||||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
|
||||||
pub struct NetworkConfig {
|
|
||||||
/// Map of each node's id to it's configuration
|
|
||||||
pub members: HashMap<Uuid, NetworkConfigEntry>,
|
|
||||||
/// Version of this config
|
|
||||||
pub version: u64,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl NetworkConfig {
|
|
||||||
pub(crate) fn new() -> Self {
|
|
||||||
Self {
|
|
||||||
members: HashMap::new(),
|
|
||||||
version: 0,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// The overall configuration of one (possibly remote) node
|
|
||||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
|
||||||
pub struct NetworkConfigEntry {
|
|
||||||
/// Datacenter at which this entry belong. This infromation might be used to perform a better
|
|
||||||
/// geodistribution
|
|
||||||
pub zone: String,
|
|
||||||
/// The (relative) capacity of the node
|
|
||||||
/// If this is set to None, the node does not participate in storing data for the system
|
|
||||||
/// and is only active as an API gateway to other nodes
|
|
||||||
pub capacity: Option<u32>,
|
|
||||||
/// A tag to recognize the entry, not used for other things than display
|
|
||||||
pub tag: String,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl NetworkConfigEntry {
|
|
||||||
pub fn capacity_string(&self) -> String {
|
|
||||||
match self.capacity {
|
|
||||||
Some(c) => format!("{}", c),
|
|
||||||
None => "gateway".to_string(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// A ring distributing fairly objects to nodes
|
/// A ring distributing fairly objects to nodes
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct Ring {
|
pub struct Ring {
|
||||||
|
@ -70,7 +28,7 @@ pub struct Ring {
|
||||||
pub replication_factor: usize,
|
pub replication_factor: usize,
|
||||||
|
|
||||||
/// The network configuration used to generate this ring
|
/// The network configuration used to generate this ring
|
||||||
pub config: NetworkConfig,
|
pub layout: ClusterLayout,
|
||||||
|
|
||||||
// Internal order of nodes used to make a more compact representation of the ring
|
// Internal order of nodes used to make a more compact representation of the ring
|
||||||
nodes: Vec<Uuid>,
|
nodes: Vec<Uuid>,
|
||||||
|
@ -81,7 +39,7 @@ pub struct Ring {
|
||||||
|
|
||||||
// Type to store compactly the id of a node in the system
|
// Type to store compactly the id of a node in the system
|
||||||
// Change this to u16 the day we want to have more than 256 nodes in a cluster
|
// Change this to u16 the day we want to have more than 256 nodes in a cluster
|
||||||
type CompactNodeType = u8;
|
pub type CompactNodeType = u8;
|
||||||
|
|
||||||
// The maximum number of times an object might get replicated
|
// The maximum number of times an object might get replicated
|
||||||
// This must be at least 3 because Garage supports 3-way replication
|
// This must be at least 3 because Garage supports 3-way replication
|
||||||
|
@ -102,132 +60,26 @@ struct RingEntry {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Ring {
|
impl Ring {
|
||||||
// TODO this function MUST be refactored, it's 100 lines long, with a 50 lines loop, going up to 6
|
pub(crate) fn new(layout: ClusterLayout, replication_factor: usize) -> Self {
|
||||||
// levels of imbrication. It is basically impossible to test, maintain, or understand for an
|
if replication_factor != layout.replication_factor {
|
||||||
// outsider.
|
warn!("Could not build ring: replication factor does not match between local configuration and network role assignation.");
|
||||||
pub(crate) fn new(config: NetworkConfig, replication_factor: usize) -> Self {
|
return Self::empty(layout, replication_factor);
|
||||||
// Create a vector of partition indices (0 to 2**PARTITION_BITS-1)
|
|
||||||
let partitions_idx = (0usize..(1usize << PARTITION_BITS)).collect::<Vec<_>>();
|
|
||||||
|
|
||||||
let zones = config
|
|
||||||
.members
|
|
||||||
.iter()
|
|
||||||
.filter(|(_id, info)| info.capacity.is_some())
|
|
||||||
.map(|(_id, info)| info.zone.as_str())
|
|
||||||
.collect::<HashSet<&str>>();
|
|
||||||
let n_zones = zones.len();
|
|
||||||
|
|
||||||
// Prepare ring
|
|
||||||
let mut partitions: Vec<Vec<(&Uuid, &NetworkConfigEntry)>> = partitions_idx
|
|
||||||
.iter()
|
|
||||||
.map(|_i| Vec::new())
|
|
||||||
.collect::<Vec<_>>();
|
|
||||||
|
|
||||||
// Create MagLev priority queues for each node
|
|
||||||
let mut queues = config
|
|
||||||
.members
|
|
||||||
.iter()
|
|
||||||
.filter(|(_id, info)| info.capacity.is_some())
|
|
||||||
.map(|(node_id, node_info)| {
|
|
||||||
let mut parts = partitions_idx
|
|
||||||
.iter()
|
|
||||||
.map(|i| {
|
|
||||||
let part_data =
|
|
||||||
[&u16::to_be_bytes(*i as u16)[..], node_id.as_slice()].concat();
|
|
||||||
(*i, fasthash(&part_data[..]))
|
|
||||||
})
|
|
||||||
.collect::<Vec<_>>();
|
|
||||||
parts.sort_by_key(|(_i, h)| *h);
|
|
||||||
let parts_i = parts.iter().map(|(i, _h)| *i).collect::<Vec<_>>();
|
|
||||||
(node_id, node_info, parts_i, 0)
|
|
||||||
})
|
|
||||||
.collect::<Vec<_>>();
|
|
||||||
|
|
||||||
let max_capacity = config
|
|
||||||
.members
|
|
||||||
.iter()
|
|
||||||
.filter_map(|(_, node_info)| node_info.capacity)
|
|
||||||
.fold(0, std::cmp::max);
|
|
||||||
|
|
||||||
assert!(replication_factor <= MAX_REPLICATION);
|
|
||||||
|
|
||||||
// Fill up ring
|
|
||||||
for rep in 0..replication_factor {
|
|
||||||
queues.sort_by_key(|(ni, _np, _q, _p)| {
|
|
||||||
let queue_data = [&u16::to_be_bytes(rep as u16)[..], ni.as_slice()].concat();
|
|
||||||
fasthash(&queue_data[..])
|
|
||||||
});
|
|
||||||
|
|
||||||
for (_, _, _, pos) in queues.iter_mut() {
|
|
||||||
*pos = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut remaining = partitions_idx.len();
|
|
||||||
while remaining > 0 {
|
|
||||||
let remaining0 = remaining;
|
|
||||||
for i_round in 0..max_capacity {
|
|
||||||
for (node_id, node_info, q, pos) in queues.iter_mut() {
|
|
||||||
if i_round >= node_info.capacity.unwrap() {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
for (pos2, &qv) in q.iter().enumerate().skip(*pos) {
|
|
||||||
if partitions[qv].len() != rep {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
let p_zns = partitions[qv]
|
|
||||||
.iter()
|
|
||||||
.map(|(_id, info)| info.zone.as_str())
|
|
||||||
.collect::<HashSet<&str>>();
|
|
||||||
if (p_zns.len() < n_zones && !p_zns.contains(&node_info.zone.as_str()))
|
|
||||||
|| (p_zns.len() == n_zones
|
|
||||||
&& !partitions[qv].iter().any(|(id, _i)| id == node_id))
|
|
||||||
{
|
|
||||||
partitions[qv].push((node_id, node_info));
|
|
||||||
remaining -= 1;
|
|
||||||
*pos = pos2 + 1;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if remaining == remaining0 {
|
|
||||||
// No progress made, exit
|
|
||||||
warn!("Could not build ring, not enough nodes configured.");
|
|
||||||
return Self {
|
|
||||||
replication_factor,
|
|
||||||
config,
|
|
||||||
nodes: vec![],
|
|
||||||
ring: vec![],
|
|
||||||
};
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Make a canonical order for nodes
|
if layout.ring_assignation_data.len() != replication_factor * (1 << PARTITION_BITS) {
|
||||||
let nodes = config
|
warn!("Could not build ring: network role assignation data has invalid length");
|
||||||
.members
|
return Self::empty(layout, replication_factor);
|
||||||
.iter()
|
}
|
||||||
.filter(|(_id, info)| info.capacity.is_some())
|
|
||||||
.map(|(id, _)| *id)
|
|
||||||
.collect::<Vec<_>>();
|
|
||||||
let nodes_rev = nodes
|
|
||||||
.iter()
|
|
||||||
.enumerate()
|
|
||||||
.map(|(i, id)| (*id, i as CompactNodeType))
|
|
||||||
.collect::<HashMap<Uuid, CompactNodeType>>();
|
|
||||||
|
|
||||||
let ring = partitions
|
let nodes = layout.node_id_vec.clone();
|
||||||
.iter()
|
let ring = (0..(1 << PARTITION_BITS))
|
||||||
.enumerate()
|
.map(|i| {
|
||||||
.map(|(i, nodes)| {
|
|
||||||
let top = (i as u16) << (16 - PARTITION_BITS);
|
let top = (i as u16) << (16 - PARTITION_BITS);
|
||||||
let nodes = nodes
|
|
||||||
.iter()
|
|
||||||
.map(|(id, _info)| *nodes_rev.get(id).unwrap())
|
|
||||||
.collect::<Vec<CompactNodeType>>();
|
|
||||||
assert!(nodes.len() == replication_factor);
|
|
||||||
let mut nodes_buf = [0u8; MAX_REPLICATION];
|
let mut nodes_buf = [0u8; MAX_REPLICATION];
|
||||||
nodes_buf[..replication_factor].copy_from_slice(&nodes[..]);
|
nodes_buf[..replication_factor].copy_from_slice(
|
||||||
|
&layout.ring_assignation_data
|
||||||
|
[replication_factor * i..replication_factor * (i + 1)],
|
||||||
|
);
|
||||||
RingEntry {
|
RingEntry {
|
||||||
hash_prefix: top,
|
hash_prefix: top,
|
||||||
nodes_buf,
|
nodes_buf,
|
||||||
|
@ -237,12 +89,21 @@ impl Ring {
|
||||||
|
|
||||||
Self {
|
Self {
|
||||||
replication_factor,
|
replication_factor,
|
||||||
config,
|
layout,
|
||||||
nodes,
|
nodes,
|
||||||
ring,
|
ring,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn empty(layout: ClusterLayout, replication_factor: usize) -> Self {
|
||||||
|
Self {
|
||||||
|
replication_factor,
|
||||||
|
layout,
|
||||||
|
nodes: vec![],
|
||||||
|
ring: vec![],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Get the partition in which data would fall on
|
/// Get the partition in which data would fall on
|
||||||
pub fn partition_of(&self, position: &Hash) -> Partition {
|
pub fn partition_of(&self, position: &Hash) -> Partition {
|
||||||
let top = u16::from_be_bytes(position.as_slice()[0..2].try_into().unwrap());
|
let top = u16::from_be_bytes(position.as_slice()[0..2].try_into().unwrap());
|
||||||
|
|
|
@ -225,7 +225,7 @@ impl RpcHelper {
|
||||||
// Retrieve some status variables that we will use to sort requests
|
// Retrieve some status variables that we will use to sort requests
|
||||||
let peer_list = self.0.fullmesh.get_peer_list();
|
let peer_list = self.0.fullmesh.get_peer_list();
|
||||||
let ring: Arc<Ring> = self.0.ring.borrow().clone();
|
let ring: Arc<Ring> = self.0.ring.borrow().clone();
|
||||||
let our_zone = match ring.config.members.get(&self.0.our_node_id) {
|
let our_zone = match ring.layout.node_role(&self.0.our_node_id) {
|
||||||
Some(pc) => &pc.zone,
|
Some(pc) => &pc.zone,
|
||||||
None => "",
|
None => "",
|
||||||
};
|
};
|
||||||
|
@ -238,7 +238,7 @@ impl RpcHelper {
|
||||||
// and within a same zone we priorize nodes with the lowest latency.
|
// and within a same zone we priorize nodes with the lowest latency.
|
||||||
let mut requests = requests
|
let mut requests = requests
|
||||||
.map(|(to, fut)| {
|
.map(|(to, fut)| {
|
||||||
let peer_zone = match ring.config.members.get(&to) {
|
let peer_zone = match ring.layout.node_role(&to) {
|
||||||
Some(pc) => &pc.zone,
|
Some(pc) => &pc.zone,
|
||||||
None => "",
|
None => "",
|
||||||
};
|
};
|
||||||
|
|
|
@ -23,12 +23,13 @@ use netapp::{NetApp, NetworkKey, NodeID, NodeKey};
|
||||||
|
|
||||||
use garage_util::background::BackgroundRunner;
|
use garage_util::background::BackgroundRunner;
|
||||||
use garage_util::config::Config;
|
use garage_util::config::Config;
|
||||||
use garage_util::data::Uuid;
|
use garage_util::data::*;
|
||||||
use garage_util::error::*;
|
use garage_util::error::*;
|
||||||
use garage_util::persister::Persister;
|
use garage_util::persister::Persister;
|
||||||
use garage_util::time::*;
|
use garage_util::time::*;
|
||||||
|
|
||||||
use crate::consul::*;
|
use crate::consul::*;
|
||||||
|
use crate::layout::*;
|
||||||
use crate::ring::*;
|
use crate::ring::*;
|
||||||
use crate::rpc_helper::*;
|
use crate::rpc_helper::*;
|
||||||
|
|
||||||
|
@ -48,13 +49,13 @@ pub enum SystemRpc {
|
||||||
Ok,
|
Ok,
|
||||||
/// Request to connect to a specific node (in <pubkey>@<host>:<port> format)
|
/// Request to connect to a specific node (in <pubkey>@<host>:<port> format)
|
||||||
Connect(String),
|
Connect(String),
|
||||||
/// Ask other node its config. Answered with AdvertiseConfig
|
/// Ask other node its cluster layout. Answered with AdvertiseClusterLayout
|
||||||
PullConfig,
|
PullClusterLayout,
|
||||||
/// Advertise Garage status. Answered with another AdvertiseStatus.
|
/// Advertise Garage status. Answered with another AdvertiseStatus.
|
||||||
/// Exchanged with every node on a regular basis.
|
/// Exchanged with every node on a regular basis.
|
||||||
AdvertiseStatus(NodeStatus),
|
AdvertiseStatus(NodeStatus),
|
||||||
/// Advertisement of nodes config. Sent spontanously or in response to PullConfig
|
/// Advertisement of cluster layout. Sent spontanously or in response to PullClusterLayout
|
||||||
AdvertiseConfig(NetworkConfig),
|
AdvertiseClusterLayout(ClusterLayout),
|
||||||
/// Get known nodes states
|
/// Get known nodes states
|
||||||
GetKnownNodes,
|
GetKnownNodes,
|
||||||
/// Return known nodes
|
/// Return known nodes
|
||||||
|
@ -70,7 +71,7 @@ pub struct System {
|
||||||
/// The id of this node
|
/// The id of this node
|
||||||
pub id: Uuid,
|
pub id: Uuid,
|
||||||
|
|
||||||
persist_config: Persister<NetworkConfig>,
|
persist_cluster_layout: Persister<ClusterLayout>,
|
||||||
persist_peer_list: Persister<Vec<(Uuid, SocketAddr)>>,
|
persist_peer_list: Persister<Vec<(Uuid, SocketAddr)>>,
|
||||||
|
|
||||||
local_status: ArcSwap<NodeStatus>,
|
local_status: ArcSwap<NodeStatus>,
|
||||||
|
@ -103,8 +104,10 @@ pub struct NodeStatus {
|
||||||
pub hostname: String,
|
pub hostname: String,
|
||||||
/// Replication factor configured on the node
|
/// Replication factor configured on the node
|
||||||
pub replication_factor: usize,
|
pub replication_factor: usize,
|
||||||
/// Configuration version
|
/// Cluster layout version
|
||||||
pub config_version: u64,
|
pub cluster_layout_version: u64,
|
||||||
|
/// Hash of cluster layout staging data
|
||||||
|
pub cluster_layout_staging_hash: Hash,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
@ -187,17 +190,17 @@ impl System {
|
||||||
gen_node_key(&config.metadata_dir).expect("Unable to read or generate node ID");
|
gen_node_key(&config.metadata_dir).expect("Unable to read or generate node ID");
|
||||||
info!("Node public key: {}", hex::encode(&node_key.public_key()));
|
info!("Node public key: {}", hex::encode(&node_key.public_key()));
|
||||||
|
|
||||||
let persist_config = Persister::new(&config.metadata_dir, "network_config");
|
let persist_cluster_layout = Persister::new(&config.metadata_dir, "cluster_layout");
|
||||||
let persist_peer_list = Persister::new(&config.metadata_dir, "peer_list");
|
let persist_peer_list = Persister::new(&config.metadata_dir, "peer_list");
|
||||||
|
|
||||||
let net_config = match persist_config.load() {
|
let cluster_layout = match persist_cluster_layout.load() {
|
||||||
Ok(x) => x,
|
Ok(x) => x,
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
info!(
|
info!(
|
||||||
"No valid previous network configuration stored ({}), starting fresh.",
|
"No valid previous cluster layout stored ({}), starting fresh.",
|
||||||
e
|
e
|
||||||
);
|
);
|
||||||
NetworkConfig::new()
|
ClusterLayout::new(replication_factor)
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -206,10 +209,11 @@ impl System {
|
||||||
.into_string()
|
.into_string()
|
||||||
.unwrap_or_else(|_| "<invalid utf-8>".to_string()),
|
.unwrap_or_else(|_| "<invalid utf-8>".to_string()),
|
||||||
replication_factor,
|
replication_factor,
|
||||||
config_version: net_config.version,
|
cluster_layout_version: cluster_layout.version,
|
||||||
|
cluster_layout_staging_hash: cluster_layout.staging_hash,
|
||||||
};
|
};
|
||||||
|
|
||||||
let ring = Ring::new(net_config, replication_factor);
|
let ring = Ring::new(cluster_layout, replication_factor);
|
||||||
let (update_ring, ring) = watch::channel(Arc::new(ring));
|
let (update_ring, ring) = watch::channel(Arc::new(ring));
|
||||||
|
|
||||||
if let Some(addr) = config.rpc_public_addr {
|
if let Some(addr) = config.rpc_public_addr {
|
||||||
|
@ -229,7 +233,7 @@ impl System {
|
||||||
|
|
||||||
let sys = Arc::new(System {
|
let sys = Arc::new(System {
|
||||||
id: netapp.id.into(),
|
id: netapp.id.into(),
|
||||||
persist_config,
|
persist_cluster_layout,
|
||||||
persist_peer_list,
|
persist_peer_list,
|
||||||
local_status: ArcSwap::new(Arc::new(local_status)),
|
local_status: ArcSwap::new(Arc::new(local_status)),
|
||||||
node_status: RwLock::new(HashMap::new()),
|
node_status: RwLock::new(HashMap::new()),
|
||||||
|
@ -292,12 +296,12 @@ impl System {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Save network configuration to disc
|
/// Save network configuration to disc
|
||||||
async fn save_network_config(self: Arc<Self>) -> Result<(), Error> {
|
async fn save_cluster_layout(self: Arc<Self>) -> Result<(), Error> {
|
||||||
let ring: Arc<Ring> = self.ring.borrow().clone();
|
let ring: Arc<Ring> = self.ring.borrow().clone();
|
||||||
self.persist_config
|
self.persist_cluster_layout
|
||||||
.save_async(&ring.config)
|
.save_async(&ring.layout)
|
||||||
.await
|
.await
|
||||||
.expect("Cannot save current cluster configuration");
|
.expect("Cannot save current cluster layout");
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -305,7 +309,8 @@ impl System {
|
||||||
let mut new_si: NodeStatus = self.local_status.load().as_ref().clone();
|
let mut new_si: NodeStatus = self.local_status.load().as_ref().clone();
|
||||||
|
|
||||||
let ring = self.ring.borrow();
|
let ring = self.ring.borrow();
|
||||||
new_si.config_version = ring.config.version;
|
new_si.cluster_layout_version = ring.layout.version;
|
||||||
|
new_si.cluster_layout_staging_hash = ring.layout.staging_hash;
|
||||||
self.local_status.swap(Arc::new(new_si));
|
self.local_status.swap(Arc::new(new_si));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -337,9 +342,9 @@ impl System {
|
||||||
)));
|
)));
|
||||||
}
|
}
|
||||||
|
|
||||||
fn handle_pull_config(&self) -> SystemRpc {
|
fn handle_pull_cluster_layout(&self) -> SystemRpc {
|
||||||
let ring = self.ring.borrow().clone();
|
let ring = self.ring.borrow().clone();
|
||||||
SystemRpc::AdvertiseConfig(ring.config.clone())
|
SystemRpc::AdvertiseClusterLayout(ring.layout.clone())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn handle_get_known_nodes(&self) -> SystemRpc {
|
fn handle_get_known_nodes(&self) -> SystemRpc {
|
||||||
|
@ -360,7 +365,8 @@ impl System {
|
||||||
.unwrap_or(NodeStatus {
|
.unwrap_or(NodeStatus {
|
||||||
hostname: "?".to_string(),
|
hostname: "?".to_string(),
|
||||||
replication_factor: 0,
|
replication_factor: 0,
|
||||||
config_version: 0,
|
cluster_layout_version: 0,
|
||||||
|
cluster_layout_staging_hash: Hash::from([0u8; 32]),
|
||||||
}),
|
}),
|
||||||
})
|
})
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
|
@ -381,10 +387,12 @@ impl System {
|
||||||
std::process::exit(1);
|
std::process::exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if info.config_version > local_info.config_version {
|
if info.cluster_layout_version > local_info.cluster_layout_version
|
||||||
|
|| info.cluster_layout_staging_hash != local_info.cluster_layout_staging_hash
|
||||||
|
{
|
||||||
let self2 = self.clone();
|
let self2 = self.clone();
|
||||||
self.background.spawn_cancellable(async move {
|
self.background.spawn_cancellable(async move {
|
||||||
self2.pull_config(from).await;
|
self2.pull_cluster_layout(from).await;
|
||||||
Ok(())
|
Ok(())
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
@ -397,32 +405,39 @@ impl System {
|
||||||
Ok(SystemRpc::Ok)
|
Ok(SystemRpc::Ok)
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn handle_advertise_config(
|
async fn handle_advertise_cluster_layout(
|
||||||
self: Arc<Self>,
|
self: Arc<Self>,
|
||||||
adv: &NetworkConfig,
|
adv: &ClusterLayout,
|
||||||
) -> Result<SystemRpc, Error> {
|
) -> Result<SystemRpc, Error> {
|
||||||
let update_ring = self.update_ring.lock().await;
|
let update_ring = self.update_ring.lock().await;
|
||||||
let ring: Arc<Ring> = self.ring.borrow().clone();
|
let mut layout: ClusterLayout = self.ring.borrow().layout.clone();
|
||||||
|
|
||||||
if adv.version > ring.config.version {
|
let prev_layout_check = layout.check();
|
||||||
let ring = Ring::new(adv.clone(), self.replication_factor);
|
if layout.merge(adv) {
|
||||||
|
if prev_layout_check && !layout.check() {
|
||||||
|
error!("New cluster layout is invalid, discarding.");
|
||||||
|
return Err(Error::Message(
|
||||||
|
"New cluster layout is invalid, discarding.".into(),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
let ring = Ring::new(layout.clone(), self.replication_factor);
|
||||||
update_ring.send(Arc::new(ring))?;
|
update_ring.send(Arc::new(ring))?;
|
||||||
drop(update_ring);
|
drop(update_ring);
|
||||||
|
|
||||||
let self2 = self.clone();
|
let self2 = self.clone();
|
||||||
let adv = adv.clone();
|
|
||||||
self.background.spawn_cancellable(async move {
|
self.background.spawn_cancellable(async move {
|
||||||
self2
|
self2
|
||||||
.rpc
|
.rpc
|
||||||
.broadcast(
|
.broadcast(
|
||||||
&self2.system_endpoint,
|
&self2.system_endpoint,
|
||||||
SystemRpc::AdvertiseConfig(adv),
|
SystemRpc::AdvertiseClusterLayout(layout),
|
||||||
RequestStrategy::with_priority(PRIO_HIGH),
|
RequestStrategy::with_priority(PRIO_HIGH),
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
Ok(())
|
Ok(())
|
||||||
});
|
});
|
||||||
self.background.spawn(self.clone().save_network_config());
|
self.background.spawn(self.clone().save_cluster_layout());
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(SystemRpc::Ok)
|
Ok(SystemRpc::Ok)
|
||||||
|
@ -456,14 +471,15 @@ impl System {
|
||||||
};
|
};
|
||||||
|
|
||||||
while !*stop_signal.borrow() {
|
while !*stop_signal.borrow() {
|
||||||
let not_configured = self.ring.borrow().config.members.is_empty();
|
let not_configured = !self.ring.borrow().layout.check();
|
||||||
let no_peers = self.fullmesh.get_peer_list().len() < self.replication_factor;
|
let no_peers = self.fullmesh.get_peer_list().len() < self.replication_factor;
|
||||||
|
let expected_n_nodes = self.ring.borrow().layout.num_nodes();
|
||||||
let bad_peers = self
|
let bad_peers = self
|
||||||
.fullmesh
|
.fullmesh
|
||||||
.get_peer_list()
|
.get_peer_list()
|
||||||
.iter()
|
.iter()
|
||||||
.filter(|p| p.is_up())
|
.filter(|p| p.is_up())
|
||||||
.count() != self.ring.borrow().config.members.len();
|
.count() != expected_n_nodes;
|
||||||
|
|
||||||
if not_configured || no_peers || bad_peers {
|
if not_configured || no_peers || bad_peers {
|
||||||
info!("Doing a bootstrap/discovery step (not_configured: {}, no_peers: {}, bad_peers: {})", not_configured, no_peers, bad_peers);
|
info!("Doing a bootstrap/discovery step (not_configured: {}, no_peers: {}, bad_peers: {})", not_configured, no_peers, bad_peers);
|
||||||
|
@ -533,18 +549,18 @@ impl System {
|
||||||
self.persist_peer_list.save_async(&peer_list).await
|
self.persist_peer_list.save_async(&peer_list).await
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn pull_config(self: Arc<Self>, peer: Uuid) {
|
async fn pull_cluster_layout(self: Arc<Self>, peer: Uuid) {
|
||||||
let resp = self
|
let resp = self
|
||||||
.rpc
|
.rpc
|
||||||
.call(
|
.call(
|
||||||
&self.system_endpoint,
|
&self.system_endpoint,
|
||||||
peer,
|
peer,
|
||||||
SystemRpc::PullConfig,
|
SystemRpc::PullClusterLayout,
|
||||||
RequestStrategy::with_priority(PRIO_HIGH).with_timeout(PING_TIMEOUT),
|
RequestStrategy::with_priority(PRIO_HIGH).with_timeout(PING_TIMEOUT),
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
if let Ok(SystemRpc::AdvertiseConfig(config)) = resp {
|
if let Ok(SystemRpc::AdvertiseClusterLayout(layout)) = resp {
|
||||||
let _: Result<_, _> = self.handle_advertise_config(&config).await;
|
let _: Result<_, _> = self.handle_advertise_cluster_layout(&layout).await;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -554,9 +570,11 @@ impl EndpointHandler<SystemRpc> for System {
|
||||||
async fn handle(self: &Arc<Self>, msg: &SystemRpc, from: NodeID) -> Result<SystemRpc, Error> {
|
async fn handle(self: &Arc<Self>, msg: &SystemRpc, from: NodeID) -> Result<SystemRpc, Error> {
|
||||||
match msg {
|
match msg {
|
||||||
SystemRpc::Connect(node) => self.handle_connect(node).await,
|
SystemRpc::Connect(node) => self.handle_connect(node).await,
|
||||||
SystemRpc::PullConfig => Ok(self.handle_pull_config()),
|
SystemRpc::PullClusterLayout => Ok(self.handle_pull_cluster_layout()),
|
||||||
SystemRpc::AdvertiseStatus(adv) => self.handle_advertise_status(from.into(), adv).await,
|
SystemRpc::AdvertiseStatus(adv) => self.handle_advertise_status(from.into(), adv).await,
|
||||||
SystemRpc::AdvertiseConfig(adv) => self.clone().handle_advertise_config(adv).await,
|
SystemRpc::AdvertiseClusterLayout(adv) => {
|
||||||
|
self.clone().handle_advertise_cluster_layout(adv).await
|
||||||
|
}
|
||||||
SystemRpc::GetKnownNodes => Ok(self.handle_get_known_nodes()),
|
SystemRpc::GetKnownNodes => Ok(self.handle_get_known_nodes()),
|
||||||
_ => Err(Error::BadRpc("Unexpected RPC message".to_string())),
|
_ => Err(Error::BadRpc("Unexpected RPC message".to_string())),
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,11 +1,12 @@
|
||||||
[package]
|
[package]
|
||||||
name = "garage_table"
|
name = "garage_table"
|
||||||
version = "0.4.0"
|
version = "0.5.0"
|
||||||
authors = ["Alex Auvolat <alex@adnab.me>"]
|
authors = ["Alex Auvolat <alex@adnab.me>"]
|
||||||
edition = "2018"
|
edition = "2018"
|
||||||
license = "AGPL-3.0"
|
license = "AGPL-3.0"
|
||||||
description = "Table sharding and replication engine (DynamoDB-like) for the Garage object store"
|
description = "Table sharding and replication engine (DynamoDB-like) for the Garage object store"
|
||||||
repository = "https://git.deuxfleurs.fr/Deuxfleurs/garage"
|
repository = "https://git.deuxfleurs.fr/Deuxfleurs/garage"
|
||||||
|
readme = "../../README.md"
|
||||||
|
|
||||||
[lib]
|
[lib]
|
||||||
path = "lib.rs"
|
path = "lib.rs"
|
||||||
|
@ -13,8 +14,8 @@ path = "lib.rs"
|
||||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
garage_rpc = { version = "0.4.0", path = "../rpc" }
|
garage_rpc = { version = "0.5.0", path = "../rpc" }
|
||||||
garage_util = { version = "0.4.0", path = "../util" }
|
garage_util = { version = "0.5.0", path = "../util" }
|
||||||
|
|
||||||
async-trait = "0.1.7"
|
async-trait = "0.1.7"
|
||||||
bytes = "1.0"
|
bytes = "1.0"
|
||||||
|
|
|
@ -4,7 +4,6 @@
|
||||||
#[macro_use]
|
#[macro_use]
|
||||||
extern crate log;
|
extern crate log;
|
||||||
|
|
||||||
pub mod crdt;
|
|
||||||
pub mod schema;
|
pub mod schema;
|
||||||
pub mod util;
|
pub mod util;
|
||||||
|
|
||||||
|
@ -18,3 +17,7 @@ pub mod table;
|
||||||
pub use schema::*;
|
pub use schema::*;
|
||||||
pub use table::*;
|
pub use table::*;
|
||||||
pub use util::*;
|
pub use util::*;
|
||||||
|
|
||||||
|
pub mod crdt {
|
||||||
|
pub use garage_util::crdt::*;
|
||||||
|
}
|
||||||
|
|
|
@ -28,10 +28,10 @@ impl TableReplication for TableFullReplication {
|
||||||
|
|
||||||
fn write_nodes(&self, _hash: &Hash) -> Vec<Uuid> {
|
fn write_nodes(&self, _hash: &Hash) -> Vec<Uuid> {
|
||||||
let ring = self.system.ring.borrow();
|
let ring = self.system.ring.borrow();
|
||||||
ring.config.members.keys().cloned().collect::<Vec<_>>()
|
ring.layout.node_ids().to_vec()
|
||||||
}
|
}
|
||||||
fn write_quorum(&self) -> usize {
|
fn write_quorum(&self) -> usize {
|
||||||
let nmembers = self.system.ring.borrow().config.members.len();
|
let nmembers = self.system.ring.borrow().layout.node_ids().len();
|
||||||
if nmembers > self.max_faults {
|
if nmembers > self.max_faults {
|
||||||
nmembers - self.max_faults
|
nmembers - self.max_faults
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -1,11 +1,12 @@
|
||||||
[package]
|
[package]
|
||||||
name = "garage_util"
|
name = "garage_util"
|
||||||
version = "0.4.0"
|
version = "0.5.0"
|
||||||
authors = ["Alex Auvolat <alex@adnab.me>"]
|
authors = ["Alex Auvolat <alex@adnab.me>"]
|
||||||
edition = "2018"
|
edition = "2018"
|
||||||
license = "AGPL-3.0"
|
license = "AGPL-3.0"
|
||||||
description = "Utility crate for the Garage object store"
|
description = "Utility crate for the Garage object store"
|
||||||
repository = "https://git.deuxfleurs.fr/Deuxfleurs/garage"
|
repository = "https://git.deuxfleurs.fr/Deuxfleurs/garage"
|
||||||
|
readme = "../../README.md"
|
||||||
quentin
commented
:P :P
|
|||||||
|
|
||||||
[lib]
|
[lib]
|
||||||
path = "lib.rs"
|
path = "lib.rs"
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
use garage_util::data::*;
|
use crate::data::*;
|
||||||
|
|
||||||
/// Definition of a CRDT - all CRDT Rust types implement this.
|
/// Definition of a CRDT - all CRDT Rust types implement this.
|
||||||
///
|
///
|
|
@ -1,6 +1,8 @@
|
||||||
|
use std::cmp::Ordering;
|
||||||
|
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
use garage_util::time::now_msec;
|
use crate::time::now_msec;
|
||||||
|
|
||||||
use crate::crdt::crdt::*;
|
use crate::crdt::crdt::*;
|
||||||
|
|
||||||
|
@ -104,11 +106,15 @@ where
|
||||||
T: Clone + Crdt,
|
T: Clone + Crdt,
|
||||||
{
|
{
|
||||||
fn merge(&mut self, other: &Self) {
|
fn merge(&mut self, other: &Self) {
|
||||||
if other.ts > self.ts {
|
match other.ts.cmp(&self.ts) {
|
||||||
self.ts = other.ts;
|
Ordering::Greater => {
|
||||||
self.v = other.v.clone();
|
self.ts = other.ts;
|
||||||
} else if other.ts == self.ts {
|
self.v = other.v.clone();
|
||||||
self.v.merge(&other.v);
|
}
|
||||||
|
Ordering::Equal => {
|
||||||
|
self.v.merge(&other.v);
|
||||||
|
}
|
||||||
|
Ordering::Less => (),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -1,6 +1,8 @@
|
||||||
|
use std::cmp::Ordering;
|
||||||
|
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
use garage_util::time::now_msec;
|
use crate::time::now_msec;
|
||||||
|
|
||||||
use crate::crdt::crdt::*;
|
use crate::crdt::crdt::*;
|
||||||
|
|
||||||
|
@ -135,11 +137,15 @@ where
|
||||||
match self.vals.binary_search_by(|(k2, _, _)| k2.cmp(k)) {
|
match self.vals.binary_search_by(|(k2, _, _)| k2.cmp(k)) {
|
||||||
Ok(i) => {
|
Ok(i) => {
|
||||||
let (_, ts1, _v1) = &self.vals[i];
|
let (_, ts1, _v1) = &self.vals[i];
|
||||||
if ts2 > ts1 {
|
match ts2.cmp(ts1) {
|
||||||
self.vals[i].1 = *ts2;
|
Ordering::Greater => {
|
||||||
self.vals[i].2 = v2.clone();
|
self.vals[i].1 = *ts2;
|
||||||
} else if ts1 == ts2 {
|
self.vals[i].2 = v2.clone();
|
||||||
self.vals[i].2.merge(v2);
|
}
|
||||||
|
Ordering::Equal => {
|
||||||
|
self.vals[i].2.merge(v2);
|
||||||
|
}
|
||||||
|
Ordering::Less => (),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Err(i) => {
|
Err(i) => {
|
|
@ -5,6 +5,7 @@ extern crate log;
|
||||||
|
|
||||||
pub mod background;
|
pub mod background;
|
||||||
pub mod config;
|
pub mod config;
|
||||||
|
pub mod crdt;
|
||||||
pub mod data;
|
pub mod data;
|
||||||
pub mod error;
|
pub mod error;
|
||||||
pub mod persister;
|
pub mod persister;
|
||||||
|
|
|
@ -1,11 +1,12 @@
|
||||||
[package]
|
[package]
|
||||||
name = "garage_web"
|
name = "garage_web"
|
||||||
version = "0.4.0"
|
version = "0.5.0"
|
||||||
authors = ["Alex Auvolat <alex@adnab.me>", "Quentin Dufour <quentin@dufour.io>"]
|
authors = ["Alex Auvolat <alex@adnab.me>", "Quentin Dufour <quentin@dufour.io>"]
|
||||||
edition = "2018"
|
edition = "2018"
|
||||||
license = "AGPL-3.0"
|
license = "AGPL-3.0"
|
||||||
description = "S3-like website endpoint crate for the Garage object store"
|
description = "S3-like website endpoint crate for the Garage object store"
|
||||||
repository = "https://git.deuxfleurs.fr/Deuxfleurs/garage"
|
repository = "https://git.deuxfleurs.fr/Deuxfleurs/garage"
|
||||||
|
readme = "../../README.md"
|
||||||
|
|
||||||
[lib]
|
[lib]
|
||||||
path = "lib.rs"
|
path = "lib.rs"
|
||||||
|
@ -13,10 +14,10 @@ path = "lib.rs"
|
||||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
garage_api = { version = "0.4.0", path = "../api" }
|
garage_api = { version = "0.5.0", path = "../api" }
|
||||||
garage_model = { version = "0.4.0", path = "../model" }
|
garage_model = { version = "0.5.0", path = "../model" }
|
||||||
garage_util = { version = "0.4.0", path = "../util" }
|
garage_util = { version = "0.5.0", path = "../util" }
|
||||||
garage_table = { version = "0.4.0", path = "../table" }
|
garage_table = { version = "0.5.0", path = "../table" }
|
||||||
|
|
||||||
err-derive = "0.3"
|
err-derive = "0.3"
|
||||||
log = "0.4"
|
log = "0.4"
|
||||||
|
|
Interesting that you changed the order here.
I put this guide way below because I thought that it was important to master the different concepts of Garage and its deployment before considering a production deployment ;)
The reasoning to me is that this is one of the most important pages in the documentation because it's not just about "production deployments" but more generally how to run Garage in a multi-node setup, a core feature of Garage. So it makes sense that it's on top just to be more visible. Maybe we should rename to "multi-node deployment" if it makes more sense.