replace RPC stack with netapp #123

Merged
lx merged 5 commits from netapp into main 2021-10-25 13:49:35 +00:00
26 changed files with 1415 additions and 1523 deletions
Showing only changes of commit 1b450c4b49 - Show all commits

225
Cargo.lock generated
View file

@ -83,12 +83,6 @@ dependencies = [
"generic-array", "generic-array",
] ]
[[package]]
name = "bumpalo"
version = "3.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d9df67f7bf9ef8498769f994239c45613ef0c5899415fb58e9add412d2c1a538"
[[package]] [[package]]
name = "byteorder" name = "byteorder"
version = "1.4.3" version = "1.4.3"
@ -393,9 +387,9 @@ dependencies = [
"futures-util", "futures-util",
"garage_api", "garage_api",
"garage_model", "garage_model",
"garage_rpc 0.3.0", "garage_rpc",
"garage_table", "garage_table",
"garage_util 0.3.0", "garage_util",
"garage_web", "garage_web",
"git-version", "git-version",
"hex", "hex",
@ -425,7 +419,7 @@ dependencies = [
"futures-util", "futures-util",
"garage_model", "garage_model",
"garage_table", "garage_table",
"garage_util 0.3.0", "garage_util",
"hex", "hex",
"hmac", "hmac",
"http", "http",
@ -451,9 +445,9 @@ dependencies = [
"async-trait", "async-trait",
"futures", "futures",
"futures-util", "futures-util",
"garage_rpc 0.3.0", "garage_rpc",
"garage_table", "garage_table",
"garage_util 0.3.0", "garage_util",
"hex", "hex",
"log", "log",
"netapp", "netapp",
@ -474,8 +468,7 @@ dependencies = [
"bytes 1.1.0", "bytes 1.1.0",
"futures", "futures",
"futures-util", "futures-util",
"garage_rpc 0.2.1", "garage_util",
"garage_util 0.3.0",
"gethostname", "gethostname",
"hex", "hex",
"hyper", "hyper",
@ -498,8 +491,8 @@ dependencies = [
"bytes 1.1.0", "bytes 1.1.0",
"futures", "futures",
"futures-util", "futures-util",
"garage_rpc 0.3.0", "garage_rpc",
"garage_util 0.3.0", "garage_util",
"hexdump", "hexdump",
"log", "log",
"rand", "rand",
@ -543,7 +536,7 @@ dependencies = [
"garage_api", "garage_api",
"garage_model", "garage_model",
"garage_table", "garage_table",
"garage_util 0.3.0", "garage_util",
"http", "http",
"hyper", "hyper",
"idna", "idna",
@ -604,31 +597,6 @@ dependencies = [
"syn", "syn",
] ]
[[package]]
name = "h2"
version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c06815895acec637cd6ed6e9662c935b866d20a106f8361892893a7d9234964"
dependencies = [
"bytes 1.1.0",
"fnv",
"futures-core",
"futures-sink",
"futures-util",
"http",
"indexmap",
"slab",
"tokio",
"tokio-util",
"tracing",
]
[[package]]
name = "hashbrown"
version = "0.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e"
[[package]] [[package]]
name = "heck" name = "heck"
version = "0.3.3" version = "0.3.3"
@ -738,7 +706,6 @@ dependencies = [
"futures-channel", "futures-channel",
"futures-core", "futures-core",
"futures-util", "futures-util",
"h2",
"http", "http",
"http-body", "http-body",
"httparse", "httparse",
@ -752,21 +719,6 @@ dependencies = [
"want", "want",
] ]
[[package]]
name = "hyper-rustls"
version = "0.22.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5f9f7a97316d44c0af9b0301e65010573a853a9fc97046d7331d7f6bc0fd5a64"
dependencies = [
"futures-util",
"hyper",
"log",
"rustls",
"tokio",
"tokio-rustls",
"webpki",
]
[[package]] [[package]]
name = "idna" name = "idna"
version = "0.2.3" version = "0.2.3"
@ -778,16 +730,6 @@ dependencies = [
"unicode-normalization", "unicode-normalization",
] ]
[[package]]
name = "indexmap"
version = "1.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bc633605454125dec4b66843673f01c7df2b89479b32e0ed634e43a91cff62a5"
dependencies = [
"autocfg",
"hashbrown",
]
[[package]] [[package]]
name = "instant" name = "instant"
version = "0.1.11" version = "0.1.11"
@ -809,19 +751,11 @@ version = "0.4.8"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4" checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4"
[[package]]
name = "js-sys"
version = "0.3.55"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7cc9ffccd38c451a86bf13657df244e9c3f37493cce8e5e21e940963777acc84"
dependencies = [
"wasm-bindgen",
]
[[package]] [[package]]
name = "kuska-handshake" name = "kuska-handshake"
version = "0.2.0" version = "0.2.0"
source = "git+https://github.com/Alexis211/handshake?branch=tokio1.0#61bf144643b177797b2d16b9b2ffcfb648face00" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e33da4b69f23c2ece0b3e729d079cebdc2c0206e493e42f510f500ad81c631d5"
dependencies = [ dependencies = [
"futures", "futures",
"hex", "hex",
@ -940,7 +874,7 @@ dependencies = [
[[package]] [[package]]
name = "netapp" name = "netapp"
version = "0.3.0" version = "0.3.0"
source = "git+https://git.deuxfleurs.fr/lx/netapp#cfa64bc745969cfc3684a70b45d71128f8335460" source = "git+https://git.deuxfleurs.fr/lx/netapp#9b64c27da68f7ac9049e02e26da918e871a63f07"
dependencies = [ dependencies = [
"arc-swap", "arc-swap",
"async-trait", "async-trait",
@ -1209,21 +1143,6 @@ version = "0.6.25"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b" checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b"
[[package]]
name = "ring"
version = "0.16.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3053cf52e236a3ed746dfc745aa9cacf1b791d846bdaf412f60a8d7d6e17c8fc"
dependencies = [
"cc",
"libc",
"once_cell",
"spin",
"untrusted",
"web-sys",
"winapi",
]
[[package]] [[package]]
name = "rmp" name = "rmp"
version = "0.8.10" version = "0.8.10"
@ -1265,19 +1184,6 @@ dependencies = [
"xmlparser", "xmlparser",
] ]
[[package]]
name = "rustls"
version = "0.19.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "35edb675feee39aec9c99fa5ff985081995a06d594114ae14cbe797ad7b7a6d7"
dependencies = [
"base64",
"log",
"ring",
"sct",
"webpki",
]
[[package]] [[package]]
name = "rustversion" name = "rustversion"
version = "1.0.5" version = "1.0.5"
@ -1305,16 +1211,6 @@ version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
[[package]]
name = "sct"
version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b362b83898e0e69f38515b82ee15aa80636befe47c3b6d3d89a911e78fc228ce"
dependencies = [
"ring",
"untrusted",
]
[[package]] [[package]]
name = "serde" name = "serde"
version = "1.0.130" version = "1.0.130"
@ -1415,12 +1311,6 @@ dependencies = [
"winapi", "winapi",
] ]
[[package]]
name = "spin"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d"
[[package]] [[package]]
name = "structopt" name = "structopt"
version = "0.3.23" version = "0.3.23"
@ -1567,17 +1457,6 @@ dependencies = [
"syn", "syn",
] ]
[[package]]
name = "tokio-rustls"
version = "0.22.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bc6844de72e57df1980054b38be3a9f4702aba4858be64dd700181a8a6d0e1b6"
dependencies = [
"rustls",
"tokio",
"webpki",
]
[[package]] [[package]]
name = "tokio-stream" name = "tokio-stream"
version = "0.1.7" version = "0.1.7"
@ -1684,12 +1563,6 @@ version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3" checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3"
[[package]]
name = "untrusted"
version = "0.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a"
[[package]] [[package]]
name = "url" name = "url"
version = "2.2.2" version = "2.2.2"
@ -1735,80 +1608,6 @@ version = "0.10.2+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6"
[[package]]
name = "wasm-bindgen"
version = "0.2.78"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "632f73e236b219150ea279196e54e610f5dbafa5d61786303d4da54f84e47fce"
dependencies = [
"cfg-if",
"wasm-bindgen-macro",
]
[[package]]
name = "wasm-bindgen-backend"
version = "0.2.78"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a317bf8f9fba2476b4b2c85ef4c4af8ff39c3c7f0cdfeed4f82c34a880aa837b"
dependencies = [
"bumpalo",
"lazy_static",
"log",
"proc-macro2",
"quote",
"syn",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-macro"
version = "0.2.78"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d56146e7c495528bf6587663bea13a8eb588d39b36b679d83972e1a2dbbdacf9"
dependencies = [
"quote",
"wasm-bindgen-macro-support",
]
[[package]]
name = "wasm-bindgen-macro-support"
version = "0.2.78"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7803e0eea25835f8abdc585cd3021b3deb11543c6fe226dcd30b228857c5c5ab"
dependencies = [
"proc-macro2",
"quote",
"syn",
"wasm-bindgen-backend",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-shared"
version = "0.2.78"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0237232789cf037d5480773fe568aac745bfe2afbc11a863e97901780a6b47cc"
[[package]]
name = "web-sys"
version = "0.3.55"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38eb105f1c59d9eaa6b5cdc92b859d85b926e82cb2e0945cd0c9259faa6fe9fb"
dependencies = [
"js-sys",
"wasm-bindgen",
]
[[package]]
name = "webpki"
version = "0.21.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b8e38c0608262c46d4a56202ebabdeb094cef7e560ca7a226c6bf055188aa4ea"
dependencies = [
"ring",
"untrusted",
]
[[package]] [[package]]
name = "winapi" name = "winapi"
version = "0.3.9" version = "0.3.9"

1635
Cargo.nix

File diff suppressed because it is too large Load diff

View file

@ -35,7 +35,7 @@ tokio = { version = "1.0", default-features = false, features = ["rt", "rt-multi
http = "0.2" http = "0.2"
httpdate = "0.3" httpdate = "0.3"
http-range = "0.1" http-range = "0.1"
hyper = "0.14" hyper = { version = "0.14", features = ["server", "http1", "runtime", "tcp", "stream"] }
percent-encoding = "2.1.0" percent-encoding = "2.1.0"
roxmltree = "0.14" roxmltree = "0.14"
serde = { version = "1.0", features = ["derive"] } serde = { version = "1.0", features = ["derive"] }

View file

@ -82,7 +82,9 @@ impl Error {
match self { match self {
Error::NotFound => StatusCode::NOT_FOUND, Error::NotFound => StatusCode::NOT_FOUND,
Error::Forbidden(_) => StatusCode::FORBIDDEN, Error::Forbidden(_) => StatusCode::FORBIDDEN,
Error::InternalError(GarageError::Rpc(_)) => StatusCode::SERVICE_UNAVAILABLE, Error::InternalError(
GarageError::Timeout | GarageError::RemoteError(_) | GarageError::TooManyErrors(_),
) => StatusCode::SERVICE_UNAVAILABLE,
Error::InternalError(_) | Error::Hyper(_) | Error::Http(_) => { Error::InternalError(_) | Error::Hyper(_) | Error::Http(_) => {
StatusCode::INTERNAL_SERVER_ERROR StatusCode::INTERNAL_SERVER_ERROR
} }
@ -95,7 +97,9 @@ impl Error {
Error::NotFound => "NoSuchKey", Error::NotFound => "NoSuchKey",
Error::Forbidden(_) => "AccessDenied", Error::Forbidden(_) => "AccessDenied",
Error::AuthorizationHeaderMalformed(_) => "AuthorizationHeaderMalformed", Error::AuthorizationHeaderMalformed(_) => "AuthorizationHeaderMalformed",
Error::InternalError(GarageError::Rpc(_)) => "ServiceUnavailable", Error::InternalError(
GarageError::Timeout | GarageError::RemoteError(_) | GarageError::TooManyErrors(_),
) => "ServiceUnavailable",
Error::InternalError(_) | Error::Hyper(_) | Error::Http(_) => "InternalError", Error::InternalError(_) | Error::Hyper(_) | Error::Http(_) => "InternalError",
_ => "InvalidRequest", _ => "InvalidRequest",
} }

View file

@ -31,15 +31,14 @@ pub enum AdminRpc {
// Replies // Replies
Ok(String), Ok(String),
Error(String),
BucketList(Vec<String>), BucketList(Vec<String>),
BucketInfo(Bucket), BucketInfo(Bucket),
KeyList(Vec<(String, String)>), KeyList(Vec<(String, String)>),
KeyInfo(Key), KeyInfo(Key),
} }
impl Message for AdminRpc { impl Rpc for AdminRpc {
type Response = AdminRpc; type Response = Result<AdminRpc, Error>;
} }
pub struct AdminRpcHandler { pub struct AdminRpcHandler {
@ -341,17 +340,20 @@ impl AdminRpcHandler {
let mut failures = vec![]; let mut failures = vec![];
let ring = self.garage.system.ring.borrow().clone(); let ring = self.garage.system.ring.borrow().clone();
for node in ring.config.members.keys() { for node in ring.config.members.keys() {
let node = NodeID::from_slice(node.as_slice()).unwrap(); let node = (*node).into();
if self let resp = self
.endpoint .endpoint
.call( .call(
&node, &node,
&AdminRpc::LaunchRepair(opt_to_send.clone()), &AdminRpc::LaunchRepair(opt_to_send.clone()),
PRIO_NORMAL, PRIO_NORMAL,
) )
.await .await;
.is_err() let is_err = match resp {
{ Ok(Ok(_)) => false,
_ => true,
};
if is_err {
failures.push(node); failures.push(node);
} }
} }
@ -386,17 +388,17 @@ impl AdminRpcHandler {
let ring = self.garage.system.ring.borrow().clone(); let ring = self.garage.system.ring.borrow().clone();
for node in ring.config.members.keys() { for node in ring.config.members.keys() {
let node = NodeID::from_slice(node.as_slice()).unwrap();
let mut opt = opt.clone(); let mut opt = opt.clone();
opt.all_nodes = false; opt.all_nodes = false;
writeln!(&mut ret, "\n======================").unwrap(); writeln!(&mut ret, "\n======================").unwrap();
writeln!(&mut ret, "Stats for node {:?}:", node).unwrap(); writeln!(&mut ret, "Stats for node {:?}:", node).unwrap();
let node_id = (*node).into();
match self match self
.endpoint .endpoint
.call(&node, &AdminRpc::Stats(opt), PRIO_NORMAL) .call(&node_id, &AdminRpc::Stats(opt), PRIO_NORMAL)
.await .await?
{ {
Ok(AdminRpc::Ok(s)) => writeln!(&mut ret, "{}", s).unwrap(), Ok(AdminRpc::Ok(s)) => writeln!(&mut ret, "{}", s).unwrap(),
Ok(x) => writeln!(&mut ret, "Bad answer: {:?}", x).unwrap(), Ok(x) => writeln!(&mut ret, "Bad answer: {:?}", x).unwrap(),
@ -486,9 +488,16 @@ impl AdminRpcHandler {
.unwrap(); .unwrap();
writeln!(to, " GC todo queue length: {}", t.data.gc_todo_len()).unwrap(); writeln!(to, " GC todo queue length: {}", t.data.gc_todo_len()).unwrap();
} }
}
async fn handle_rpc(self: &Arc<Self>, msg: &AdminRpc) -> Result<AdminRpc, Error> { #[async_trait]
match msg { impl EndpointHandler<AdminRpc> for AdminRpcHandler {
async fn handle(
self: &Arc<Self>,
message: &AdminRpc,
_from: NodeID,
) -> Result<AdminRpc, Error> {
match message {
AdminRpc::BucketOperation(bo) => self.handle_bucket_cmd(bo).await, AdminRpc::BucketOperation(bo) => self.handle_bucket_cmd(bo).await,
AdminRpc::KeyOperation(ko) => self.handle_key_cmd(ko).await, AdminRpc::KeyOperation(ko) => self.handle_key_cmd(ko).await,
AdminRpc::LaunchRepair(opt) => self.handle_launch_repair(opt.clone()).await, AdminRpc::LaunchRepair(opt) => self.handle_launch_repair(opt.clone()).await,
@ -497,12 +506,3 @@ impl AdminRpcHandler {
} }
} }
} }
#[async_trait]
impl EndpointHandler<AdminRpc> for AdminRpcHandler {
async fn handle(self: &Arc<Self>, message: &AdminRpc, _from: NodeID) -> AdminRpc {
self.handle_rpc(message)
.await
.unwrap_or_else(|e| AdminRpc::Error(format!("{}", e)))
}
}

View file

@ -1,5 +1,4 @@
//use std::cmp::max; use std::collections::HashSet;
//use std::collections::HashSet;
use std::path::PathBuf; use std::path::PathBuf;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
@ -7,7 +6,7 @@ use structopt::StructOpt;
use garage_util::data::Uuid; use garage_util::data::Uuid;
use garage_util::error::Error; use garage_util::error::Error;
//use garage_util::time::*; use garage_util::time::*;
use garage_rpc::ring::*; use garage_rpc::ring::*;
use garage_rpc::system::*; use garage_rpc::system::*;
@ -58,6 +57,10 @@ pub struct ServerOpt {
#[derive(StructOpt, Debug)] #[derive(StructOpt, Debug)]
pub enum NodeOperation { pub enum NodeOperation {
/// Connect to Garage node that is currently isolated from the system
#[structopt(name = "connect")]
Connect(ConnectNodeOpt),
/// Configure Garage node /// Configure Garage node
#[structopt(name = "configure")] #[structopt(name = "configure")]
Configure(ConfigureNodeOpt), Configure(ConfigureNodeOpt),
@ -67,6 +70,13 @@ pub enum NodeOperation {
Remove(RemoveNodeOpt), Remove(RemoveNodeOpt),
} }
#[derive(StructOpt, Debug)]
pub struct ConnectNodeOpt {
/// Node public key and address, in the format:
/// `<public key hexadecimal>@<ip or hostname>:<port>`
node: String,
}
#[derive(StructOpt, Debug)] #[derive(StructOpt, Debug)]
pub struct ConfigureNodeOpt { pub struct ConfigureNodeOpt {
/// Node to configure (prefix of hexadecimal node id) /// Node to configure (prefix of hexadecimal node id)
@ -303,6 +313,9 @@ pub async fn cli_cmd(
) -> Result<(), Error> { ) -> Result<(), Error> {
match cmd { match cmd {
Command::Status => cmd_status(system_rpc_endpoint, rpc_host).await, Command::Status => cmd_status(system_rpc_endpoint, rpc_host).await,
Command::Node(NodeOperation::Connect(connect_opt)) => {
cmd_connect(system_rpc_endpoint, rpc_host, connect_opt).await
}
Command::Node(NodeOperation::Configure(configure_opt)) => { Command::Node(NodeOperation::Configure(configure_opt)) => {
cmd_configure(system_rpc_endpoint, rpc_host, configure_opt).await cmd_configure(system_rpc_endpoint, rpc_host, configure_opt).await
} }
@ -326,142 +339,96 @@ pub async fn cli_cmd(
pub async fn cmd_status(rpc_cli: &Endpoint<SystemRpc, ()>, rpc_host: NodeID) -> Result<(), Error> { pub async fn cmd_status(rpc_cli: &Endpoint<SystemRpc, ()>, rpc_host: NodeID) -> Result<(), Error> {
let status = match rpc_cli let status = match rpc_cli
.call(&rpc_host, &SystemRpc::GetKnownNodes, PRIO_NORMAL) .call(&rpc_host, &SystemRpc::GetKnownNodes, PRIO_NORMAL)
.await? .await??
{ {
SystemRpc::ReturnKnownNodes(nodes) => nodes, SystemRpc::ReturnKnownNodes(nodes) => nodes,
resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))), resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
}; };
let config = match rpc_cli let config = match rpc_cli
.call(&rpc_host, &SystemRpc::PullConfig, PRIO_NORMAL) .call(&rpc_host, &SystemRpc::PullConfig, PRIO_NORMAL)
.await? .await??
{ {
SystemRpc::AdvertiseConfig(cfg) => cfg, SystemRpc::AdvertiseConfig(cfg) => cfg,
resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))), resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
}; };
println!("STATUS:");
for node in status {
println!("{:?}", node);
}
println!("CONFIG: (v{})", config.version);
for (id, node) in config.members {
println!("{} {:?}", hex::encode(id.as_slice()), node);
}
/* TODO
let (hostname_len, addr_len, tag_len, zone_len) = status
.iter()
.map(|(id, addr, _)| (addr, config.members.get(&adv.id)))
.map(|(addr, cfg)| {
(
8,
addr.to_string().len(),
cfg.map(|c| c.tag.len()).unwrap_or(0),
cfg.map(|c| c.zone.len()).unwrap_or(0),
)
})
.fold((0, 0, 0, 0), |(h, a, t, z), (mh, ma, mt, mz)| {
(max(h, mh), max(a, ma), max(t, mt), max(z, mz))
});
println!("Healthy nodes:"); println!("Healthy nodes:");
for (id, addr, _) in status.iter().filter(|(id, addr, is_up)| is_up) { let mut healthy_nodes = vec!["ID\tHostname\tAddress\tTag\tZone\tCapacity".to_string()];
for adv in status.iter().filter(|adv| adv.is_up) {
if let Some(cfg) = config.members.get(&adv.id) { if let Some(cfg) = config.members.get(&adv.id) {
println!( healthy_nodes.push(format!(
"{id:?}\t{host}{h_pad}\t{addr}{a_pad}\t[{tag}]{t_pad}\t{zone}{z_pad}\t{capacity}", "{id:?}\t{host}\t{addr}\t[{tag}]\t{zone}\t{capacity}",
id = id, id = adv.id,
host = "", host = adv.status.hostname,
addr = addr, addr = adv.addr,
tag = cfg.tag, tag = cfg.tag,
zone = cfg.zone, zone = cfg.zone,
capacity = cfg.capacity_string(), capacity = cfg.capacity_string(),
h_pad = " ".repeat(hostname_len - adv.state_info.hostname.len()), ));
a_pad = " ".repeat(addr_len - adv.addr.to_string().len()),
t_pad = " ".repeat(tag_len - cfg.tag.len()),
z_pad = " ".repeat(zone_len - cfg.zone.len()),
);
} else { } else {
println!( healthy_nodes.push(format!(
"{id:?}\t{h}{h_pad}\t{addr}{a_pad}\tUNCONFIGURED/REMOVED", "{id:?}\t{h}\t{addr}\tUNCONFIGURED/REMOVED",
id = id, id = adv.id,
h = "", h = adv.status.hostname,
addr = addr, addr = adv.addr,
h_pad = " ".repeat(hostname_len - "".len()), ));
a_pad = " ".repeat(addr_len - addr.to_string().len()),
);
} }
} }
format_table(healthy_nodes);
let status_keys = status.iter().map(|(id, _, _)| id).collect::<HashSet<_>>(); let status_keys = status.iter().map(|adv| adv.id).collect::<HashSet<_>>();
let failure_case_1 = status.iter().any(|(_, _, is_up)| !is_up); let failure_case_1 = status.iter().any(|adv| !adv.is_up);
let failure_case_2 = config let failure_case_2 = config
.members .members
.iter() .iter()
.any(|(id, _)| !status_keys.contains(id)); .any(|(id, _)| !status_keys.contains(id));
if failure_case_1 || failure_case_2 { if failure_case_1 || failure_case_2 {
println!("\nFailed nodes:"); println!("\nFailed nodes:");
for (id, addr) in status.iter().filter(|(_, _, is_up)| !is_up) { let mut failed_nodes = vec!["ID\tHostname\tAddress\tTag\tZone\tCapacity\tLast seen".to_string()];
if let Some(cfg) = config.members.get(&id) { for adv in status.iter().filter(|adv| !adv.is_up) {
println!( if let Some(cfg) = config.members.get(&adv.id) {

If we are on failure_case_2 we display the failed nodes section but it is empty.

It can be reproduced by spanning 2 garage instances then connecting the first instance to the second one.

eg:

$ ./result/bin/garage -c /tmp/garage.toml status
==== HEALTHY NODES ====
ID                 Hostname     Address                  Tag               Zone  Capacity
2f75a8a915f9cceb…  lheureduthe  127.0.0.1:3911           NO ROLE ASSIGNED
c393f0ab78d8ddcd…  lheureduthe  [::ffff:127.0.0.1]:3901  NO ROLE ASSIGNED

$ ./result/bin/garage status
==== HEALTHY NODES ====
ID                 Hostname     Address         Tag               Zone  Capacity
c393f0ab78d8ddcd…  lheureduthe  127.0.0.1:3901  NO ROLE ASSIGNED

==== FAILED NODES ====
ID  Hostname  Address  Tag  Zone  Capacity  Last seen
If we are on `failure_case_2` we display the failed nodes section but it is empty. It can be reproduced by spanning 2 garage instances then connecting the first instance to the second one. eg: ``` $ ./result/bin/garage -c /tmp/garage.toml status ==== HEALTHY NODES ==== ID Hostname Address Tag Zone Capacity 2f75a8a915f9cceb… lheureduthe 127.0.0.1:3911 NO ROLE ASSIGNED c393f0ab78d8ddcd… lheureduthe [::ffff:127.0.0.1]:3901 NO ROLE ASSIGNED $ ./result/bin/garage status ==== HEALTHY NODES ==== ID Hostname Address Tag Zone Capacity c393f0ab78d8ddcd… lheureduthe 127.0.0.1:3901 NO ROLE ASSIGNED ==== FAILED NODES ==== ID Hostname Address Tag Zone Capacity Last seen ```
"{id:?}\t{host}{h_pad}\t{addr}{a_pad}\t[{tag}]{t_pad}\t{zone}{z_pad}\t{capacity}\tlast seen: {last_seen}s ago", failed_nodes.push(format!(
id=id, "{id:?}\t{host}\t{addr}\t[{tag}]\t{zone}\t{capacity}\t{last_seen}s ago",
host="", id = adv.id,
addr=addr, host = adv.status.hostname,
tag=cfg.tag, addr = adv.addr,
zone=cfg.zone, tag = cfg.tag,
capacity=cfg.capacity_string(), zone = cfg.zone,
last_seen=(now_msec() - 0) / 1000, capacity = cfg.capacity_string(),
h_pad=" ".repeat(hostname_len - "".len()), last_seen = (now_msec() - 0) / 1000,
a_pad=" ".repeat(addr_len - addr.to_string().len()), ));
t_pad=" ".repeat(tag_len - cfg.tag.len()),
z_pad=" ".repeat(zone_len - cfg.zone.len()),
);
} }
} }
let (tag_len, zone_len) = config
.members
.iter()
.filter(|(&id, _)| !status.iter().any(|(xid, _, _)| xid == id))
.map(|(_, cfg)| (cfg.tag.len(), cfg.zone.len()))
.fold((0, 0), |(t, z), (mt, mz)| (max(t, mt), max(z, mz)));
for (id, cfg) in config.members.iter() { for (id, cfg) in config.members.iter() {
if !status.iter().any(|(xid, _, _)| xid == *id) { if !status.iter().any(|adv| adv.id == *id) {
println!( failed_nodes.push(format!(
"{id:?}\t{tag}{t_pad}\t{zone}{z_pad}\t{capacity}\tnever seen", "{id:?}\t??\t??\t[{tag}]\t{zone}\t{capacity}\tnever seen",
id = id, id = id,
tag = cfg.tag, tag = cfg.tag,
zone = cfg.zone, zone = cfg.zone,
capacity = cfg.capacity_string(), capacity = cfg.capacity_string(),
t_pad = " ".repeat(tag_len - cfg.tag.len()), ));
z_pad = " ".repeat(zone_len - cfg.zone.len()),
);
} }
} }
format_table(failed_nodes);
} }
*/
Ok(()) Ok(())
} }
pub fn find_matching_node( pub async fn cmd_connect(
cand: impl std::iter::Iterator<Item = Uuid>, rpc_cli: &Endpoint<SystemRpc, ()>,
pattern: &str, rpc_host: NodeID,
) -> Result<Uuid, Error> { args: ConnectNodeOpt,
let mut candidates = vec![]; ) -> Result<(), Error> {
for c in cand { match rpc_cli.call(&rpc_host, &SystemRpc::Connect(args.node), PRIO_NORMAL).await?? {
if hex::encode(&c).starts_with(&pattern) { SystemRpc::Ok => {
candidates.push(c); println!("Success.");
Ok(())
}
r => {
Err(Error::BadRpc(format!("Unexpected response: {:?}", r)))
} }
}
if candidates.len() != 1 {
Err(Error::Message(format!(
"{} nodes match '{}'",
candidates.len(),
pattern,
)))
} else {
Ok(candidates[0])
} }
} }
@ -472,22 +439,17 @@ pub async fn cmd_configure(
) -> Result<(), Error> { ) -> Result<(), Error> {
let status = match rpc_cli let status = match rpc_cli
.call(&rpc_host, &SystemRpc::GetKnownNodes, PRIO_NORMAL) .call(&rpc_host, &SystemRpc::GetKnownNodes, PRIO_NORMAL)
.await? .await??
{ {
SystemRpc::ReturnKnownNodes(nodes) => nodes, SystemRpc::ReturnKnownNodes(nodes) => nodes,
resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))), resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
}; };
let added_node = find_matching_node( let added_node = find_matching_node(status.iter().map(|adv| adv.id), &args.node_id)?;
status
.iter()
.map(|(id, _, _)| Uuid::try_from(id.as_ref()).unwrap()),
&args.node_id,
)?;
let mut config = match rpc_cli let mut config = match rpc_cli
.call(&rpc_host, &SystemRpc::PullConfig, PRIO_NORMAL) .call(&rpc_host, &SystemRpc::PullConfig, PRIO_NORMAL)
.await? .await??
{ {
SystemRpc::AdvertiseConfig(cfg) => cfg, SystemRpc::AdvertiseConfig(cfg) => cfg,
resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))), resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
@ -544,7 +506,7 @@ pub async fn cmd_configure(
rpc_cli rpc_cli
.call(&rpc_host, &SystemRpc::AdvertiseConfig(config), PRIO_NORMAL) .call(&rpc_host, &SystemRpc::AdvertiseConfig(config), PRIO_NORMAL)
.await?; .await??;
Ok(()) Ok(())
} }
@ -555,7 +517,7 @@ pub async fn cmd_remove(
) -> Result<(), Error> { ) -> Result<(), Error> {
let mut config = match rpc_cli let mut config = match rpc_cli
.call(&rpc_host, &SystemRpc::PullConfig, PRIO_NORMAL) .call(&rpc_host, &SystemRpc::PullConfig, PRIO_NORMAL)
.await? .await??
{ {
SystemRpc::AdvertiseConfig(cfg) => cfg, SystemRpc::AdvertiseConfig(cfg) => cfg,
resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))), resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
@ -575,7 +537,7 @@ pub async fn cmd_remove(
rpc_cli rpc_cli
.call(&rpc_host, &SystemRpc::AdvertiseConfig(config), PRIO_NORMAL) .call(&rpc_host, &SystemRpc::AdvertiseConfig(config), PRIO_NORMAL)
.await?; .await??;
Ok(()) Ok(())
} }
@ -584,7 +546,7 @@ pub async fn cmd_admin(
rpc_host: NodeID, rpc_host: NodeID,
args: AdminRpc, args: AdminRpc,
) -> Result<(), Error> { ) -> Result<(), Error> {
match rpc_cli.call(&rpc_host, &args, PRIO_NORMAL).await? { match rpc_cli.call(&rpc_host, &args, PRIO_NORMAL).await?? {
AdminRpc::Ok(msg) => { AdminRpc::Ok(msg) => {
println!("{}", msg); println!("{}", msg);
} }
@ -613,6 +575,8 @@ pub async fn cmd_admin(
Ok(()) Ok(())
} }
// --- Utility functions ----
fn print_key_info(key: &Key) { fn print_key_info(key: &Key) {
println!("Key name: {}", key.name.get()); println!("Key name: {}", key.name.get());
println!("Key ID: {}", key.key_id); println!("Key ID: {}", key.key_id);
@ -640,3 +604,54 @@ fn print_bucket_info(bucket: &Bucket) {
} }
}; };
} }
fn format_table(data: Vec<String>) {
let data = data
.iter()
.map(|s| s.split('\t').collect::<Vec<_>>())
.collect::<Vec<_>>();
let columns = data.iter().map(|row| row.len()).fold(0, std::cmp::max);
let mut column_size = vec![0; columns];
let mut out = String::new();
for row in data.iter() {
for (i, col) in row.iter().enumerate() {
column_size[i] = std::cmp::max(column_size[i], col.chars().count());
}
}
for row in data.iter() {
for (col, col_len) in row[..row.len() - 1].iter().zip(column_size.iter()) {
out.push_str(col);
(0..col_len - col.chars().count() + 2).for_each(|_| out.push(' '));
}
out.push_str(&row[row.len() - 1]);
out.push('\n');
}
print!("{}", out);
}
pub fn find_matching_node(
cand: impl std::iter::Iterator<Item = Uuid>,
pattern: &str,
) -> Result<Uuid, Error> {
let mut candidates = vec![];
for c in cand {
if hex::encode(&c).starts_with(&pattern) {
candidates.push(c);
}
}
if candidates.len() != 1 {
Err(Error::Message(format!(
"{} nodes match '{}'",
candidates.len(),
pattern,
)))
} else {
Ok(candidates[0])
}
}

View file

@ -9,8 +9,6 @@ mod cli;
mod repair; mod repair;
mod server; mod server;
use std::net::SocketAddr;
use structopt::StructOpt; use structopt::StructOpt;
use netapp::util::parse_peer_addr; use netapp::util::parse_peer_addr;
@ -43,6 +41,7 @@ struct Opt {
#[tokio::main] #[tokio::main]
async fn main() { async fn main() {
pretty_env_logger::init(); pretty_env_logger::init();
sodiumoxide::init().expect("Unable to init sodiumoxide");
let opt = Opt::from_args(); let opt = Opt::from_args();

View file

@ -71,8 +71,14 @@ pub async fn run_server(config_file: PathBuf) -> Result<(), Error> {
// Remove RPC handlers for system to break reference cycles // Remove RPC handlers for system to break reference cycles
garage.system.netapp.drop_all_handlers(); garage.system.netapp.drop_all_handlers();
// Await for last parts to end // Await for netapp RPC system to end
run_system.await?; run_system.await?;
// Break last reference cycles so that stuff can terminate properly
garage.break_reference_cycles();
drop(garage);
// Await for all background tasks to end
await_background_done.await?; await_background_done.await?;
info!("Cleaning up..."); info!("Cleaning up...");

View file

@ -38,7 +38,6 @@ const RESYNC_RETRY_TIMEOUT: Duration = Duration::from_secs(10);
#[derive(Debug, Serialize, Deserialize)] #[derive(Debug, Serialize, Deserialize)]
pub enum BlockRpc { pub enum BlockRpc {
Ok, Ok,
Error(String),
/// Message to ask for a block of data, by hash /// Message to ask for a block of data, by hash
GetBlock(Hash), GetBlock(Hash),
/// Message to send a block of data, either because requested, of for first delivery of new /// Message to send a block of data, either because requested, of for first delivery of new
@ -61,8 +60,8 @@ pub struct PutBlockMessage {
pub data: Vec<u8>, pub data: Vec<u8>,
} }
impl Message for BlockRpc { impl Rpc for BlockRpc {
type Response = BlockRpc; type Response = Result<BlockRpc, Error>;
} }
/// The block manager, handling block exchange between nodes, and block storage on local node /// The block manager, handling block exchange between nodes, and block storage on local node
@ -117,15 +116,6 @@ impl BlockManager {
block_manager block_manager
} }
async fn handle_rpc(self: Arc<Self>, msg: &BlockRpc) -> Result<BlockRpc, Error> {
match msg {
BlockRpc::PutBlock(m) => self.write_block(&m.hash, &m.data).await,
BlockRpc::GetBlock(h) => self.read_block(h).await,
BlockRpc::NeedBlockQuery(h) => self.need_block(h).await.map(BlockRpc::NeedBlockReply),
_ => Err(Error::BadRpc("Unexpected RPC message".to_string())),
}
}
pub fn spawn_background_worker(self: Arc<Self>) { pub fn spawn_background_worker(self: Arc<Self>) {
// Launch 2 simultaneous workers for background resync loop preprocessing <= TODO actually this // Launch 2 simultaneous workers for background resync loop preprocessing <= TODO actually this
// launches only one worker with current value of BACKGROUND_WORKERS // launches only one worker with current value of BACKGROUND_WORKERS
@ -532,11 +522,17 @@ impl BlockManager {
#[async_trait] #[async_trait]
impl EndpointHandler<BlockRpc> for BlockManager { impl EndpointHandler<BlockRpc> for BlockManager {
async fn handle(self: &Arc<Self>, message: &BlockRpc, _from: NodeID) -> BlockRpc { async fn handle(
self.clone() self: &Arc<Self>,
.handle_rpc(message) message: &BlockRpc,
.await _from: NodeID,
.unwrap_or_else(|e| BlockRpc::Error(format!("{}", e))) ) -> Result<BlockRpc, Error> {
match message {
BlockRpc::PutBlock(m) => self.write_block(&m.hash, &m.data).await,
BlockRpc::GetBlock(h) => self.read_block(h).await,
BlockRpc::NeedBlockQuery(h) => self.need_block(h).await.map(BlockRpc::NeedBlockReply),
_ => Err(Error::BadRpc("Unexpected RPC message".to_string())),
}
} }
} }

View file

@ -61,6 +61,7 @@ impl Garage {
background.clone(), background.clone(),
replication_mode.replication_factor(), replication_mode.replication_factor(),
config.rpc_bind_addr, config.rpc_bind_addr,
config.rpc_public_addr,
config.bootstrap_peers.clone(), config.bootstrap_peers.clone(),
config.consul_host.clone(), config.consul_host.clone(),
config.consul_service_name.clone(), config.consul_service_name.clone(),
@ -162,4 +163,9 @@ impl Garage {
garage garage
} }
/// Use this for shutdown
pub fn break_reference_cycles(&self) {
self.block_manager.garage.swap(None);
}
} }

View file

@ -15,8 +15,6 @@ path = "lib.rs"
[dependencies] [dependencies]
garage_util = { version = "0.4.0", path = "../util" } garage_util = { version = "0.4.0", path = "../util" }
garage_rpc_021 = { package = "garage_rpc", version = "0.2.1" }
arc-swap = "1.0" arc-swap = "1.0"
bytes = "1.0" bytes = "1.0"
gethostname = "0.2" gethostname = "0.2"
@ -36,5 +34,5 @@ tokio = { version = "1.0", default-features = false, features = ["rt", "rt-multi
tokio-stream = { version = "0.1", features = ["net"] } tokio-stream = { version = "0.1", features = ["net"] }
netapp = { version = "0.3.0", git = "https://git.deuxfleurs.fr/lx/netapp" } netapp = { version = "0.3.0", git = "https://git.deuxfleurs.fr/lx/netapp" }
hyper = "0.14" hyper = { version = "0.14", features = ["client", "http1", "runtime", "tcp"] }

View file

@ -1,24 +1,31 @@
use std::collections::HashMap;
use std::net::{IpAddr, SocketAddr}; use std::net::{IpAddr, SocketAddr};
use hyper::client::Client; use hyper::client::Client;
use hyper::StatusCode; use hyper::StatusCode;
use hyper::{Body, Method, Request}; use hyper::{Body, Method, Request};
use serde::Deserialize; use serde::{Deserialize, Serialize};
use netapp::NodeID;
use garage_util::error::Error; use garage_util::error::Error;
#[derive(Deserialize, Clone)] // ---- READING FROM CONSUL CATALOG ----
struct ConsulEntry {
#[serde(alias = "Address")] #[derive(Deserialize, Clone, Debug)]
struct ConsulQueryEntry {
#[serde(rename = "Address")]
address: String, address: String,
#[serde(alias = "ServicePort")] #[serde(rename = "ServicePort")]
service_port: u16, service_port: u16,
#[serde(rename = "NodeMeta")]
node_meta: HashMap<String, String>,
} }
pub async fn get_consul_nodes( pub async fn get_consul_nodes(
consul_host: &str, consul_host: &str,
consul_service_name: &str, consul_service_name: &str,
) -> Result<Vec<SocketAddr>, Error> { ) -> Result<Vec<(NodeID, SocketAddr)>, Error> {
let url = format!( let url = format!(
"http://{}/v1/catalog/service/{}", "http://{}/v1/catalog/service/{}",
consul_host, consul_service_name consul_host, consul_service_name
@ -36,17 +43,111 @@ pub async fn get_consul_nodes(
} }
let body = hyper::body::to_bytes(resp.into_body()).await?; let body = hyper::body::to_bytes(resp.into_body()).await?;
let entries = serde_json::from_slice::<Vec<ConsulEntry>>(body.as_ref())?; let entries = serde_json::from_slice::<Vec<ConsulQueryEntry>>(body.as_ref())?;
let mut ret = vec![]; let mut ret = vec![];
for ent in entries { for ent in entries {
let ip = ent let ip = ent.address.parse::<IpAddr>().ok();
.address let pubkey = ent
.parse::<IpAddr>() .node_meta
.map_err(|e| Error::Message(format!("Could not parse IP address: {}", e)))?; .get("pubkey")
ret.push(SocketAddr::new(ip, ent.service_port)); .map(|k| hex::decode(&k).ok())
.flatten()
.map(|k| NodeID::from_slice(&k[..]))
.flatten();
if let (Some(ip), Some(pubkey)) = (ip, pubkey) {
ret.push((pubkey, SocketAddr::new(ip, ent.service_port)));
} else {
warn!(
"Could not process node spec from Consul: {:?} (invalid IP or public key)",
ent
);
}
} }
debug!("Got nodes from Consul: {:?}", ret); debug!("Got nodes from Consul: {:?}", ret);
Ok(ret) Ok(ret)
} }
// ---- PUBLISHING TO CONSUL CATALOG ----
#[derive(Serialize, Clone, Debug)]
struct ConsulPublishEntry {
#[serde(rename = "Node")]
node: String,
#[serde(rename = "Address")]
address: IpAddr,
#[serde(rename = "NodeMeta")]
node_meta: HashMap<String, String>,
#[serde(rename = "Service")]
service: ConsulPublishService,
}
#[derive(Serialize, Clone, Debug)]
struct ConsulPublishService {
#[serde(rename = "ID")]
service_id: String,
#[serde(rename = "Service")]
service_name: String,
#[serde(rename = "Tags")]
tags: Vec<String>,
#[serde(rename = "Address")]
address: IpAddr,
#[serde(rename = "Port")]
port: u16,
}
pub async fn publish_consul_service(
consul_host: &str,
consul_service_name: &str,
node_id: NodeID,
hostname: &str,
rpc_public_addr: SocketAddr,
) -> Result<(), Error> {
let node = format!("garage:{}", hex::encode(&node_id[..8]));
let advertisment = ConsulPublishEntry {
node: node.clone(),
address: rpc_public_addr.ip(),
node_meta: [
("pubkey".to_string(), hex::encode(node_id)),
("hostname".to_string(), hostname.to_string()),
]
.iter()
.cloned()
.collect(),
service: ConsulPublishService {
service_id: node.clone(),
service_name: consul_service_name.to_string(),
tags: vec!["advertised-by-garage".into(), hostname.into()],
address: rpc_public_addr.ip(),
port: rpc_public_addr.port(),
},
};
let url = format!("http://{}/v1/catalog/register", consul_host);
let req_body = serde_json::to_string(&advertisment)?;
debug!("Request body for consul adv: {}", req_body);
let req = Request::builder()
.uri(url)
.method(Method::PUT)
.body(Body::from(req_body))?;
let client = Client::new();
let resp = client.request(req).await?;
debug!("Response of advertising to Consul: {:?}", resp);
let resp_code = resp.status();
debug!(
"{}",
std::str::from_utf8(&hyper::body::to_bytes(resp.into_body()).await?)
.unwrap_or("<invalid utf8>")
);
if resp_code != StatusCode::OK {
return Err(Error::Message(format!("HTTP error {}", resp_code)));
}
Ok(())
}

View file

@ -3,8 +3,6 @@
use std::collections::{HashMap, HashSet}; use std::collections::{HashMap, HashSet};
use std::convert::TryInto; use std::convert::TryInto;
use netapp::NodeID;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use garage_util::data::*; use garage_util::data::*;
@ -40,31 +38,6 @@ impl NetworkConfig {
version: 0, version: 0,
} }
} }
pub(crate) fn migrate_from_021(old: garage_rpc_021::ring::NetworkConfig) -> Self {
let members = old
.members
.into_iter()
.map(|(id, conf)| {
(
Hash::try_from(id.as_slice()).unwrap(),
NetworkConfigEntry {
zone: conf.datacenter,
capacity: if conf.capacity == 0 {
None
} else {
Some(conf.capacity)
},
tag: conf.tag,
},
)
})
.collect();
Self {
members,
version: old.version,
}
}
} }
/// The overall configuration of one (possibly remote) node /// The overall configuration of one (possibly remote) node
@ -100,7 +73,7 @@ pub struct Ring {
pub config: NetworkConfig, pub config: NetworkConfig,
// Internal order of nodes used to make a more compact representation of the ring // Internal order of nodes used to make a more compact representation of the ring
nodes: Vec<NodeID>, nodes: Vec<Uuid>,
// The list of entries in the ring // The list of entries in the ring
ring: Vec<RingEntry>, ring: Vec<RingEntry>,
@ -262,11 +235,6 @@ impl Ring {
}) })
.collect::<Vec<_>>(); .collect::<Vec<_>>();
let nodes = nodes
.iter()
.map(|id| NodeID::from_slice(id.as_slice()).unwrap())
.collect::<Vec<_>>();
Self { Self {
replication_factor, replication_factor,
config, config,
@ -298,7 +266,7 @@ impl Ring {
} }
/// Walk the ring to find the n servers in which data should be replicated /// Walk the ring to find the n servers in which data should be replicated
pub fn get_nodes(&self, position: &Hash, n: usize) -> Vec<NodeID> { pub fn get_nodes(&self, position: &Hash, n: usize) -> Vec<Uuid> {
if self.ring.len() != 1 << PARTITION_BITS { if self.ring.len() != 1 << PARTITION_BITS {
warn!("Ring not yet ready, read/writes will be lost!"); warn!("Ring not yet ready, read/writes will be lost!");
return vec![]; return vec![];

View file

@ -8,13 +8,14 @@ use futures::stream::StreamExt;
use futures_util::future::FutureExt; use futures_util::future::FutureExt;
use tokio::select; use tokio::select;
pub use netapp::endpoint::{Endpoint, EndpointHandler, Message}; pub use netapp::endpoint::{Endpoint, EndpointHandler, Message as Rpc};
use netapp::peering::fullmesh::FullMeshPeeringStrategy; use netapp::peering::fullmesh::FullMeshPeeringStrategy;
pub use netapp::proto::*; pub use netapp::proto::*;
pub use netapp::{NetApp, NodeID}; pub use netapp::{NetApp, NodeID};
use garage_util::background::BackgroundRunner; use garage_util::background::BackgroundRunner;
use garage_util::error::{Error, RpcError}; use garage_util::error::Error;
use garage_util::data::Uuid;
const DEFAULT_TIMEOUT: Duration = Duration::from_secs(10); const DEFAULT_TIMEOUT: Duration = Duration::from_secs(10);
@ -66,46 +67,47 @@ pub struct RpcHelper {
} }
impl RpcHelper { impl RpcHelper {
pub async fn call<M, H>( pub async fn call<M, H, S>(
&self, &self,
endpoint: &Endpoint<M, H>, endpoint: &Endpoint<M, H>,
to: NodeID, to: Uuid,
msg: M, msg: M,
strat: RequestStrategy, strat: RequestStrategy,
) -> Result<M::Response, Error> ) -> Result<S, Error>
where where
M: Message, M: Rpc<Response = Result<S, Error>>,
H: EndpointHandler<M>, H: EndpointHandler<M>,
{ {
self.call_arc(endpoint, to, Arc::new(msg), strat).await self.call_arc(endpoint, to, Arc::new(msg), strat).await
} }
pub async fn call_arc<M, H>( pub async fn call_arc<M, H, S>(
&self, &self,
endpoint: &Endpoint<M, H>, endpoint: &Endpoint<M, H>,
to: NodeID, to: Uuid,
msg: Arc<M>, msg: Arc<M>,
strat: RequestStrategy, strat: RequestStrategy,
) -> Result<M::Response, Error> ) -> Result<S, Error>
where where
M: Message, M: Rpc<Response = Result<S, Error>>,
H: EndpointHandler<M>, H: EndpointHandler<M>,
{ {
let node_id = to.into();
select! { select! {
res = endpoint.call(&to, &msg, strat.rs_priority) => Ok(res?), res = endpoint.call(&node_id, &msg, strat.rs_priority) => Ok(res??),
_ = tokio::time::sleep(strat.rs_timeout) => Err(Error::Rpc(RpcError::Timeout)), _ = tokio::time::sleep(strat.rs_timeout) => Err(Error::Timeout),
} }
} }
pub async fn call_many<M, H>( pub async fn call_many<M, H, S>(
&self, &self,
endpoint: &Endpoint<M, H>, endpoint: &Endpoint<M, H>,
to: &[NodeID], to: &[Uuid],
msg: M, msg: M,
strat: RequestStrategy, strat: RequestStrategy,
) -> Vec<(NodeID, Result<M::Response, Error>)> ) -> Vec<(Uuid, Result<S, Error>)>
where where
M: Message, M: Rpc<Response = Result<S, Error>>,
H: EndpointHandler<M>, H: EndpointHandler<M>,
{ {
let msg = Arc::new(msg); let msg = Arc::new(msg);
@ -120,37 +122,38 @@ impl RpcHelper {
.collect::<Vec<_>>() .collect::<Vec<_>>()
} }
pub async fn broadcast<M, H>( pub async fn broadcast<M, H, S>(
&self, &self,
endpoint: &Endpoint<M, H>, endpoint: &Endpoint<M, H>,
msg: M, msg: M,
strat: RequestStrategy, strat: RequestStrategy,
) -> Vec<(NodeID, Result<M::Response, Error>)> ) -> Vec<(Uuid, Result<S, Error>)>
where where
M: Message, M: Rpc<Response = Result<S, Error>>,
H: EndpointHandler<M>, H: EndpointHandler<M>,
{ {
let to = self let to = self
.fullmesh .fullmesh
.get_peer_list() .get_peer_list()
.iter() .iter()
.map(|p| p.id) .map(|p| p.id.into())
.collect::<Vec<_>>(); .collect::<Vec<_>>();
self.call_many(endpoint, &to[..], msg, strat).await self.call_many(endpoint, &to[..], msg, strat).await
} }
/// Make a RPC call to multiple servers, returning either a Vec of responses, or an error if /// Make a RPC call to multiple servers, returning either a Vec of responses, or an error if
/// strategy could not be respected due to too many errors /// strategy could not be respected due to too many errors
pub async fn try_call_many<M, H>( pub async fn try_call_many<M, H, S>(
&self, &self,
endpoint: &Arc<Endpoint<M, H>>, endpoint: &Arc<Endpoint<M, H>>,
to: &[NodeID], to: &[Uuid],
msg: M, msg: M,
strategy: RequestStrategy, strategy: RequestStrategy,
) -> Result<Vec<M::Response>, Error> ) -> Result<Vec<S>, Error>
where where
M: Message + 'static, M: Rpc<Response = Result<S, Error>> + 'static,
H: EndpointHandler<M> + 'static, H: EndpointHandler<M> + 'static,
S: Send,
{ {
let msg = Arc::new(msg); let msg = Arc::new(msg);
let mut resp_stream = to let mut resp_stream = to
@ -200,7 +203,7 @@ impl RpcHelper {
Ok(results) Ok(results)
} else { } else {
let errors = errors.iter().map(|e| format!("{}", e)).collect::<Vec<_>>(); let errors = errors.iter().map(|e| format!("{}", e)).collect::<Vec<_>>();
Err(Error::from(RpcError::TooManyErrors(errors))) Err(Error::TooManyErrors(errors))
} }
} }
} }

View file

@ -1,8 +1,9 @@
//! Module containing structs related to membership management //! Module containing structs related to membership management
use std::collections::HashMap;
use std::io::{Read, Write}; use std::io::{Read, Write};
use std::net::SocketAddr; use std::net::SocketAddr;
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use std::sync::Arc; use std::sync::{Arc, RwLock};
use std::time::Duration; use std::time::Duration;
use arc_swap::ArcSwap; use arc_swap::ArcSwap;
@ -14,21 +15,24 @@ use sodiumoxide::crypto::sign::ed25519;
use tokio::sync::watch; use tokio::sync::watch;
use tokio::sync::Mutex; use tokio::sync::Mutex;
use netapp::endpoint::{Endpoint, EndpointHandler, Message}; use netapp::endpoint::{Endpoint, EndpointHandler};
use netapp::peering::fullmesh::FullMeshPeeringStrategy; use netapp::peering::fullmesh::FullMeshPeeringStrategy;
use netapp::proto::*; use netapp::proto::*;
use netapp::{NetApp, NetworkKey, NodeID, NodeKey}; use netapp::{NetApp, NetworkKey, NodeID, NodeKey};
use netapp::util::parse_and_resolve_peer_addr;
use garage_util::background::BackgroundRunner; use garage_util::background::BackgroundRunner;
use garage_util::data::Uuid;
use garage_util::error::Error; use garage_util::error::Error;
use garage_util::persister::Persister; use garage_util::persister::Persister;
//use garage_util::time::*; use garage_util::time::*;
//use crate::consul::get_consul_nodes; use crate::consul::*;
use crate::ring::*; use crate::ring::*;
use crate::rpc_helper::{RequestStrategy, RpcHelper}; use crate::rpc_helper::*;
const DISCOVERY_INTERVAL: Duration = Duration::from_secs(60); const DISCOVERY_INTERVAL: Duration = Duration::from_secs(60);
const STATUS_EXCHANGE_INTERVAL: Duration = Duration::from_secs(10);
const PING_TIMEOUT: Duration = Duration::from_secs(2); const PING_TIMEOUT: Duration = Duration::from_secs(2);
/// RPC endpoint used for calls related to membership /// RPC endpoint used for calls related to membership
@ -39,33 +43,35 @@ pub const SYSTEM_RPC_PATH: &str = "garage_rpc/membership.rs/SystemRpc";
pub enum SystemRpc { pub enum SystemRpc {
/// Response to successfull advertisements /// Response to successfull advertisements
Ok, Ok,
/// Error response /// Request to connect to a specific node (in <pubkey>@<host>:<port> format)
Error(String), Connect(String),
/// Ask other node its config. Answered with AdvertiseConfig /// Ask other node its config. Answered with AdvertiseConfig
PullConfig, PullConfig,
/// Advertise Garage status. Answered with another AdvertiseStatus. /// Advertise Garage status. Answered with another AdvertiseStatus.
/// Exchanged with every node on a regular basis. /// Exchanged with every node on a regular basis.
AdvertiseStatus(StateInfo), AdvertiseStatus(NodeStatus),
/// Advertisement of nodes config. Sent spontanously or in response to PullConfig /// Advertisement of nodes config. Sent spontanously or in response to PullConfig
AdvertiseConfig(NetworkConfig), AdvertiseConfig(NetworkConfig),
/// Get known nodes states /// Get known nodes states
GetKnownNodes, GetKnownNodes,
/// Return known nodes /// Return known nodes
ReturnKnownNodes(Vec<(NodeID, SocketAddr, bool)>), ReturnKnownNodes(Vec<KnownNodeInfo>),
} }
impl Message for SystemRpc { impl Rpc for SystemRpc {
type Response = SystemRpc; type Response = Result<SystemRpc, Error>;
} }
/// This node's membership manager /// This node's membership manager
pub struct System { pub struct System {
/// The id of this node /// The id of this node
pub id: NodeID, pub id: Uuid,
persist_config: Persister<NetworkConfig>, persist_config: Persister<NetworkConfig>,
persist_peer_list: Persister<Vec<(Uuid, SocketAddr)>>,
state_info: ArcSwap<StateInfo>, local_status: ArcSwap<NodeStatus>,
node_status: RwLock<HashMap<Uuid, (u64, NodeStatus)>>,
pub netapp: Arc<NetApp>, pub netapp: Arc<NetApp>,
fullmesh: Arc<FullMeshPeeringStrategy>, fullmesh: Arc<FullMeshPeeringStrategy>,
@ -74,6 +80,7 @@ pub struct System {
system_endpoint: Arc<Endpoint<SystemRpc, System>>, system_endpoint: Arc<Endpoint<SystemRpc, System>>,
rpc_listen_addr: SocketAddr, rpc_listen_addr: SocketAddr,
rpc_public_addr: Option<SocketAddr>,
bootstrap_peers: Vec<(NodeID, SocketAddr)>, bootstrap_peers: Vec<(NodeID, SocketAddr)>,
consul_host: Option<String>, consul_host: Option<String>,
consul_service_name: Option<String>, consul_service_name: Option<String>,
@ -88,7 +95,7 @@ pub struct System {
} }
#[derive(Debug, Clone, Serialize, Deserialize)] #[derive(Debug, Clone, Serialize, Deserialize)]
pub struct StateInfo { pub struct NodeStatus {
/// Hostname of the node /// Hostname of the node
pub hostname: String, pub hostname: String,
/// Replication factor configured on the node /// Replication factor configured on the node
@ -97,26 +104,34 @@ pub struct StateInfo {
pub config_version: u64, pub config_version: u64,
} }
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct KnownNodeInfo {
pub id: Uuid,
pub addr: SocketAddr,
pub is_up: bool,
pub status: NodeStatus,
}
fn gen_node_key(metadata_dir: &Path) -> Result<NodeKey, Error> { fn gen_node_key(metadata_dir: &Path) -> Result<NodeKey, Error> {
let mut id_file = metadata_dir.to_path_buf(); let mut key_file = metadata_dir.to_path_buf();
id_file.push("node_id"); key_file.push("node_key");
if id_file.as_path().exists() { if key_file.as_path().exists() {
let mut f = std::fs::File::open(id_file.as_path())?; let mut f = std::fs::File::open(key_file.as_path())?;
let mut d = vec![]; let mut d = vec![];
f.read_to_end(&mut d)?; f.read_to_end(&mut d)?;
if d.len() != 64 { if d.len() != 64 {
return Err(Error::Message("Corrupt node_id file".to_string())); return Err(Error::Message("Corrupt node_key file".to_string()));
} }
let mut key = [0u8; 64]; let mut key = [0u8; 64];
key.copy_from_slice(&d[..]); key.copy_from_slice(&d[..]);
Ok(NodeKey::from_slice(&key[..]).unwrap()) Ok(NodeKey::from_slice(&key[..]).unwrap())
} else { } else {
let (key, _) = ed25519::gen_keypair(); let (_, key) = ed25519::gen_keypair();
  1. Create a file /etc/garage.toml with the content given in the Quickstart
  2. Never start the daemon (check that no meta or data folder have been created)
  3. Run garage node-id (this is similar to the steps advertised in "Cookbook > Deploying Garage"
  4. Get an error:
strace -e open ./result/bin/garage node-id
open("/proc/self/cgroup", O_RDONLY|O_CLOEXEC) = 3
open("/proc/self/mountinfo", O_RDONLY|O_CLOEXEC) = 3
open("/sys/fs/cgroup/cpu,cpuacct/user.slice/user-1000.slice/user@1000.service/cpu.cfs_quota_us", O_RDONLY|O_CLOEXEC) = 3
open("/etc/garage.toml", O_RDONLY|O_CLOEXEC) = 9
open("/tmp/meta/node_key", O_WRONLY|O_CREAT|O_TRUNC|O_CLOEXEC, 0666) = -1 ENOENT (Aucun fichier ou dossier de ce type)
 ERROR garage > Unable to read or generate node key: IO error: No such file or directory (os error 2)
+++ exited with 1 +++

2 points:

  1. We need to recursively create folders before trying to write the key
  2. We might want to improve our error reporting by specifying the failed path.
1. Create a file `/etc/garage.toml` with the content given in the Quickstart 2. Never start the daemon (check that no meta or data folder have been created) 3. Run `garage node-id` (this is similar to the steps advertised in "Cookbook > Deploying Garage" 4. Get an error: ``` strace -e open ./result/bin/garage node-id open("/proc/self/cgroup", O_RDONLY|O_CLOEXEC) = 3 open("/proc/self/mountinfo", O_RDONLY|O_CLOEXEC) = 3 open("/sys/fs/cgroup/cpu,cpuacct/user.slice/user-1000.slice/user@1000.service/cpu.cfs_quota_us", O_RDONLY|O_CLOEXEC) = 3 open("/etc/garage.toml", O_RDONLY|O_CLOEXEC) = 9 open("/tmp/meta/node_key", O_WRONLY|O_CREAT|O_TRUNC|O_CLOEXEC, 0666) = -1 ENOENT (Aucun fichier ou dossier de ce type) ERROR garage > Unable to read or generate node key: IO error: No such file or directory (os error 2) +++ exited with 1 +++ ``` 2 points: 1. We need to recursively create folders before trying to write the key 2. We might want to improve our error reporting by specifying the failed path.
let mut f = std::fs::File::create(id_file.as_path())?; let mut f = std::fs::File::create(key_file.as_path())?;
f.write_all(&key[..])?; f.write_all(&key[..])?;
$ RUST_LOG=garage=debug,netapp=debug strace -e open ./result/bin/garage node-id
open("/proc/self/cgroup", O_RDONLY|O_CLOEXEC) = 3
open("/proc/self/mountinfo", O_RDONLY|O_CLOEXEC) = 3
open("/sys/fs/cgroup/cpu,cpuacct/user.slice/user-1000.slice/user@1000.service/cpu.cfs_quota_us", O_RDONLY|O_CLOEXEC) = 3
open("/etc/garage.toml", O_RDONLY|O_CLOEXEC) = 9
open("/tmp/meta/node_key", O_WRONLY|O_CREAT|O_TRUNC|O_CLOEXEC, 0666) = -1 ENOENT (Aucun fichier ou dossier de ce type)
 ERROR garage > Unable to read or generate node key: IO error: No such file or directory (os error 2)
+++ exited with 1 +++

There are many chance that the garage node-id command will fail as the key will be stored in the meta folder that will very likely not be created yet and it will throw the previous cryptic error I a diagnosed through strace.

``` $ RUST_LOG=garage=debug,netapp=debug strace -e open ./result/bin/garage node-id open("/proc/self/cgroup", O_RDONLY|O_CLOEXEC) = 3 open("/proc/self/mountinfo", O_RDONLY|O_CLOEXEC) = 3 open("/sys/fs/cgroup/cpu,cpuacct/user.slice/user-1000.slice/user@1000.service/cpu.cfs_quota_us", O_RDONLY|O_CLOEXEC) = 3 open("/etc/garage.toml", O_RDONLY|O_CLOEXEC) = 9 open("/tmp/meta/node_key", O_WRONLY|O_CREAT|O_TRUNC|O_CLOEXEC, 0666) = -1 ENOENT (Aucun fichier ou dossier de ce type) ERROR garage > Unable to read or generate node key: IO error: No such file or directory (os error 2) +++ exited with 1 +++ ``` There are many chance that the `garage node-id` command will fail as the key will be stored in the `meta` folder that will very likely not be created yet and it will throw the previous cryptic error I a diagnosed through strace.
Ok(NodeKey::from_slice(&key[..]).unwrap()) Ok(key)
} }
} }
@ -128,6 +143,7 @@ impl System {
background: Arc<BackgroundRunner>, background: Arc<BackgroundRunner>,
replication_factor: usize, replication_factor: usize,
rpc_listen_addr: SocketAddr, rpc_listen_addr: SocketAddr,
rpc_public_address: Option<SocketAddr>,
bootstrap_peers: Vec<(NodeID, SocketAddr)>, bootstrap_peers: Vec<(NodeID, SocketAddr)>,
consul_host: Option<String>, consul_host: Option<String>,
consul_service_name: Option<String>, consul_service_name: Option<String>,
@ -136,29 +152,20 @@ impl System {
info!("Node public key: {}", hex::encode(&node_key.public_key())); info!("Node public key: {}", hex::encode(&node_key.public_key()));
let persist_config = Persister::new(&metadata_dir, "network_config"); let persist_config = Persister::new(&metadata_dir, "network_config");
let persist_peer_list = Persister::new(&metadata_dir, "peer_list");
let net_config = match persist_config.load() { let net_config = match persist_config.load() {
Ok(x) => x, Ok(x) => x,
Err(e) => { Err(e) => {
match Persister::<garage_rpc_021::ring::NetworkConfig>::new( info!(
&metadata_dir, "No valid previous network configuration stored ({}), starting fresh.",
"network_config", e
) );
.load() NetworkConfig::new()
{
Ok(old_config) => NetworkConfig::migrate_from_021(old_config),
Err(e2) => {
info!(
"No valid previous network configuration stored ({}, {}), starting fresh.",
e, e2
);
NetworkConfig::new()
}
}
} }
}; };
let state_info = StateInfo { let local_status = NodeStatus {
hostname: gethostname::gethostname() hostname: gethostname::gethostname()
.into_string() .into_string()
.unwrap_or_else(|_| "<invalid utf-8>".to_string()), .unwrap_or_else(|_| "<invalid utf-8>".to_string()),
@ -169,15 +176,27 @@ impl System {
let ring = Ring::new(net_config, replication_factor); let ring = Ring::new(net_config, replication_factor);
let (update_ring, ring) = watch::channel(Arc::new(ring)); let (update_ring, ring) = watch::channel(Arc::new(ring));
if let Some(addr) = rpc_public_address {
println!("{}@{}", hex::encode(&node_key.public_key()), addr);
} else {
println!("{}", hex::encode(&node_key.public_key()));
}
let netapp = NetApp::new(network_key, node_key); let netapp = NetApp::new(network_key, node_key);
let fullmesh = FullMeshPeeringStrategy::new(netapp.clone(), bootstrap_peers.clone()); let fullmesh = FullMeshPeeringStrategy::new(
netapp.clone(),
bootstrap_peers.clone(),
rpc_public_address,
);
let system_endpoint = netapp.endpoint(SYSTEM_RPC_PATH.into()); let system_endpoint = netapp.endpoint(SYSTEM_RPC_PATH.into());
let sys = Arc::new(System { let sys = Arc::new(System {
id: netapp.id.clone(), id: netapp.id.into(),
persist_config, persist_config,
state_info: ArcSwap::new(Arc::new(state_info)), persist_peer_list,
local_status: ArcSwap::new(Arc::new(local_status)),
node_status: RwLock::new(HashMap::new()),
netapp: netapp.clone(), netapp: netapp.clone(),
fullmesh: fullmesh.clone(), fullmesh: fullmesh.clone(),
rpc: RpcHelper { rpc: RpcHelper {
@ -187,6 +206,7 @@ impl System {
system_endpoint, system_endpoint,
replication_factor, replication_factor,
rpc_listen_addr, rpc_listen_addr,
rpc_public_addr: rpc_public_address,
bootstrap_peers, bootstrap_peers,
consul_host, consul_host,
consul_service_name, consul_service_name,
@ -206,11 +226,38 @@ impl System {
.listen(self.rpc_listen_addr, None, must_exit.clone()), .listen(self.rpc_listen_addr, None, must_exit.clone()),
self.fullmesh.clone().run(must_exit.clone()), self.fullmesh.clone().run(must_exit.clone()),
self.discovery_loop(must_exit.clone()), self.discovery_loop(must_exit.clone()),
self.status_exchange_loop(must_exit.clone()),
); );
} }
// ---- INTERNALS ---- // ---- INTERNALS ----
async fn advertise_to_consul(self: Arc<Self>) -> Result<(), Error> {
let (consul_host, consul_service_name) =
match (&self.consul_host, &self.consul_service_name) {
(Some(ch), Some(csn)) => (ch, csn),
_ => return Ok(()),
};
let rpc_public_addr = match self.rpc_public_addr {
Some(addr) => addr,
None => {
warn!("Not advertising to Consul because rpc_public_addr is not defined in config file.");
return Ok(());
}
};
publish_consul_service(
consul_host,
consul_service_name,
self.netapp.id,
&self.local_status.load_full().hostname,
rpc_public_addr,
)
.await
.map_err(|e| Error::Message(format!("Error while publishing Consul service: {}", e)))
}
/// Save network configuration to disc /// Save network configuration to disc
async fn save_network_config(self: Arc<Self>) -> Result<(), Error> { async fn save_network_config(self: Arc<Self>) -> Result<(), Error> {
let ring: Arc<Ring> = self.ring.borrow().clone(); let ring: Arc<Ring> = self.ring.borrow().clone();
@ -221,12 +268,27 @@ impl System {
Ok(()) Ok(())
} }
fn update_state_info(&self) { fn update_local_status(&self) {
let mut new_si: StateInfo = self.state_info.load().as_ref().clone(); let mut new_si: NodeStatus = self.local_status.load().as_ref().clone();
let ring = self.ring.borrow(); let ring = self.ring.borrow();
new_si.config_version = ring.config.version; new_si.config_version = ring.config.version;
self.state_info.swap(Arc::new(new_si)); self.local_status.swap(Arc::new(new_si));
}
async fn handle_connect(&self, node: &str) -> Result<SystemRpc, Error> {
let (pubkey, addrs) = parse_and_resolve_peer_addr(node)
.ok_or_else(|| Error::Message(format!("Unable to parse or resolve node specification: {}", node)))?;
let mut errors = vec![];
for ip in addrs.iter() {
match self.netapp.clone().try_connect(*ip, pubkey).await {
Ok(()) => return Ok(SystemRpc::Ok),
Err(e) => {
errors.push((*ip, e));
}
}
}
return Err(Error::Message(format!("Could not connect to specified peers. Errors: {:?}", errors)));
} }
fn handle_pull_config(&self) -> SystemRpc { fn handle_pull_config(&self) -> SystemRpc {
@ -234,6 +296,58 @@ impl System {
SystemRpc::AdvertiseConfig(ring.config.clone()) SystemRpc::AdvertiseConfig(ring.config.clone())
} }
fn handle_get_known_nodes(&self) -> SystemRpc {
let node_status = self.node_status.read().unwrap();
let known_nodes =
self.fullmesh
.get_peer_list()
.iter()
.map(|n| KnownNodeInfo {
id: n.id.into(),
addr: n.addr,
is_up: n.is_up(),
status: node_status.get(&n.id.into()).cloned().map(|(_, st)| st).unwrap_or(
NodeStatus {
hostname: "?".to_string(),
replication_factor: 0,
config_version: 0,
},
),
})
.collect::<Vec<_>>();
SystemRpc::ReturnKnownNodes(known_nodes)
}
async fn handle_advertise_status(
self: &Arc<Self>,
from: Uuid,
info: &NodeStatus,
) -> Result<SystemRpc, Error> {
let local_info = self.local_status.load();
if local_info.replication_factor < info.replication_factor {
error!("Some node have a higher replication factor ({}) than this one ({}). This is not supported and might lead to bugs",
info.replication_factor,
local_info.replication_factor);
std::process::exit(1);
}
if info.config_version > local_info.config_version {
let self2 = self.clone();
self.background.spawn_cancellable(async move {
self2.pull_config(from).await;
Ok(())
});
}
self.node_status
.write()
.unwrap()
.insert(from, (now_msec(), info.clone()));
Ok(SystemRpc::Ok)
}
async fn handle_advertise_config( async fn handle_advertise_config(
self: Arc<Self>, self: Arc<Self>,
adv: &NetworkConfig, adv: &NetworkConfig,
@ -265,13 +379,32 @@ impl System {
Ok(SystemRpc::Ok) Ok(SystemRpc::Ok)
} }
async fn discovery_loop(&self, mut stop_signal: watch::Receiver<bool>) { async fn status_exchange_loop(&self, mut stop_signal: watch::Receiver<bool>) {
/* TODO while !*stop_signal.borrow() {
let restart_at = tokio::time::sleep(STATUS_EXCHANGE_INTERVAL);
self.update_local_status();
let local_status: NodeStatus = self.local_status.load().as_ref().clone();
self.rpc
.broadcast(
&self.system_endpoint,
SystemRpc::AdvertiseStatus(local_status),
RequestStrategy::with_priority(PRIO_HIGH).with_timeout(PING_TIMEOUT),
)
.await;
select! {
_ = restart_at.fuse() => {},
_ = stop_signal.changed().fuse() => {},
}
}
}
async fn discovery_loop(self: &Arc<Self>, mut stop_signal: watch::Receiver<bool>) {
let consul_config = match (&self.consul_host, &self.consul_service_name) { let consul_config = match (&self.consul_host, &self.consul_service_name) {
(Some(ch), Some(csn)) => Some((ch.clone(), csn.clone())), (Some(ch), Some(csn)) => Some((ch.clone(), csn.clone())),
_ => None, _ => None,
}; };
*/
while !*stop_signal.borrow() { while !*stop_signal.borrow() {
let not_configured = self.ring.borrow().config.members.is_empty(); let not_configured = self.ring.borrow().config.members.is_empty();
@ -286,34 +419,42 @@ impl System {
if not_configured || no_peers || bad_peers { if not_configured || no_peers || bad_peers {
info!("Doing a bootstrap/discovery step (not_configured: {}, no_peers: {}, bad_peers: {})", not_configured, no_peers, bad_peers); info!("Doing a bootstrap/discovery step (not_configured: {}, no_peers: {}, bad_peers: {})", not_configured, no_peers, bad_peers);
let ping_list = self.bootstrap_peers.clone(); let mut ping_list = self.bootstrap_peers.clone();
/* // Add peer list from list stored on disk
*TODO bring this back: persisted list of peers if let Ok(peers) = self.persist_peer_list.load_async().await {
if let Ok(peers) = self.persist_status.load_async().await { ping_list.extend(peers.iter().map(|(id, addr)| ((*id).into(), *addr)))
ping_list.extend(peers.iter().map(|x| (x.addr, Some(x.id))));
} }
*/
/* // Fetch peer list from Consul
* TODO bring this back: get peers from consul
if let Some((consul_host, consul_service_name)) = &consul_config { if let Some((consul_host, consul_service_name)) = &consul_config {
match get_consul_nodes(consul_host, consul_service_name).await { match get_consul_nodes(consul_host, consul_service_name).await {
Ok(node_list) => { Ok(node_list) => {
ping_list.extend(node_list.iter().map(|a| (*a, None))); ping_list.extend(node_list);
} }
Err(e) => { Err(e) => {
warn!("Could not retrieve node list from Consul: {}", e); warn!("Could not retrieve node list from Consul: {}", e);
} }
} }
} }
*/
for (node_id, node_addr) in ping_list { for (node_id, node_addr) in ping_list {
tokio::spawn(self.netapp.clone().try_connect(node_addr, node_id)); tokio::spawn(self.netapp.clone().try_connect(node_addr, node_id));
} }
} }
let peer_list = self
.fullmesh
.get_peer_list()
.iter()
.map(|n| (n.id.into(), n.addr))
.collect::<Vec<_>>();
if let Err(e) = self.persist_peer_list.save_async(&peer_list).await {
warn!("Could not save peer list to file: {}", e);
}
self.background.spawn(self.clone().advertise_to_consul());
let restart_at = tokio::time::sleep(DISCOVERY_INTERVAL); let restart_at = tokio::time::sleep(DISCOVERY_INTERVAL);
select! { select! {
_ = restart_at.fuse() => {}, _ = restart_at.fuse() => {},
@ -322,7 +463,7 @@ impl System {
} }
} }
async fn pull_config(self: Arc<Self>, peer: NodeID) { async fn pull_config(self: Arc<Self>, peer: Uuid) {
let resp = self let resp = self
.rpc .rpc
.call( .call(
@ -340,24 +481,14 @@ impl System {
#[async_trait] #[async_trait]
impl EndpointHandler<SystemRpc> for System { impl EndpointHandler<SystemRpc> for System {
async fn handle(self: &Arc<Self>, msg: &SystemRpc, _from: NodeID) -> SystemRpc { async fn handle(self: &Arc<Self>, msg: &SystemRpc, from: NodeID) -> Result<SystemRpc, Error> {
let resp = match msg { match msg {
SystemRpc::Connect(node) => self.handle_connect(node).await,
SystemRpc::PullConfig => Ok(self.handle_pull_config()), SystemRpc::PullConfig => Ok(self.handle_pull_config()),
SystemRpc::AdvertiseStatus(adv) => self.handle_advertise_status(from.into(), adv).await,
SystemRpc::AdvertiseConfig(adv) => self.clone().handle_advertise_config(&adv).await, SystemRpc::AdvertiseConfig(adv) => self.clone().handle_advertise_config(&adv).await,
SystemRpc::GetKnownNodes => { SystemRpc::GetKnownNodes => Ok(self.handle_get_known_nodes()),
let known_nodes = self
.fullmesh
.get_peer_list()
.iter()
.map(|n| (n.id, n.addr, n.is_up()))
.collect::<Vec<_>>();
Ok(SystemRpc::ReturnKnownNodes(known_nodes))
}
_ => Err(Error::BadRpc("Unexpected RPC message".to_string())), _ => Err(Error::BadRpc("Unexpected RPC message".to_string())),
};
match resp {
Ok(r) => r,
Err(e) => SystemRpc::Error(format!("{}", e)),
} }
} }
} }

View file

@ -36,11 +36,10 @@ enum GcRpc {
Update(Vec<ByteBuf>), Update(Vec<ByteBuf>),
DeleteIfEqualHash(Vec<(ByteBuf, Hash)>), DeleteIfEqualHash(Vec<(ByteBuf, Hash)>),
Ok, Ok,
Error(String),
} }
impl Message for GcRpc { impl Rpc for GcRpc {
type Response = GcRpc; type Response = Result<GcRpc, Error>;
} }
impl<F, R> TableGc<F, R> impl<F, R> TableGc<F, R>
@ -168,7 +167,7 @@ where
async fn try_send_and_delete( async fn try_send_and_delete(
&self, &self,
nodes: Vec<NodeID>, nodes: Vec<Uuid>,
items: Vec<(ByteBuf, Hash, ByteBuf)>, items: Vec<(ByteBuf, Hash, ByteBuf)>,
) -> Result<(), Error> { ) -> Result<(), Error> {
let n_items = items.len(); let n_items = items.len();
@ -224,8 +223,15 @@ where
.compare_and_swap::<_, _, Vec<u8>>(key, Some(vhash), None)?; .compare_and_swap::<_, _, Vec<u8>>(key, Some(vhash), None)?;
Ok(()) Ok(())
} }
}
async fn handle_rpc(&self, message: &GcRpc) -> Result<GcRpc, Error> { #[async_trait]
impl<F, R> EndpointHandler<GcRpc> for TableGc<F, R>
where
F: TableSchema + 'static,
R: TableReplication + 'static,
{
async fn handle(self: &Arc<Self>, message: &GcRpc, _from: NodeID) -> Result<GcRpc, Error> {
match message { match message {
GcRpc::Update(items) => { GcRpc::Update(items) => {
self.data.update_many(items)?; self.data.update_many(items)?;
@ -242,16 +248,3 @@ where
} }
} }
} }
#[async_trait]
impl<F, R> EndpointHandler<GcRpc> for TableGc<F, R>
where
F: TableSchema + 'static,
R: TableReplication + 'static,
{
async fn handle(self: &Arc<Self>, message: &GcRpc, _from: NodeID) -> GcRpc {
self.handle_rpc(message)
.await
.unwrap_or_else(|e| GcRpc::Error(format!("{}", e)))
}
}

View file

@ -2,7 +2,6 @@ use std::sync::Arc;
use garage_rpc::ring::*; use garage_rpc::ring::*;
use garage_rpc::system::System; use garage_rpc::system::System;
use garage_rpc::NodeID;
use garage_util::data::*; use garage_util::data::*;
use crate::replication::*; use crate::replication::*;
@ -20,19 +19,19 @@ pub struct TableFullReplication {
} }
impl TableReplication for TableFullReplication { impl TableReplication for TableFullReplication {
fn read_nodes(&self, _hash: &Hash) -> Vec<NodeID> { fn read_nodes(&self, _hash: &Hash) -> Vec<Uuid> {
vec![self.system.id] vec![self.system.id]
} }
fn read_quorum(&self) -> usize { fn read_quorum(&self) -> usize {
1 1
} }
fn write_nodes(&self, _hash: &Hash) -> Vec<NodeID> { fn write_nodes(&self, _hash: &Hash) -> Vec<Uuid> {
let ring = self.system.ring.borrow(); let ring = self.system.ring.borrow();
ring.config ring.config
.members .members
.keys() .keys()
.map(|id| NodeID::from_slice(id.as_slice()).unwrap()) .cloned()
.collect::<Vec<_>>() .collect::<Vec<_>>()
} }
fn write_quorum(&self) -> usize { fn write_quorum(&self) -> usize {

View file

@ -1,5 +1,4 @@
use garage_rpc::ring::*; use garage_rpc::ring::*;
use garage_rpc::NodeID;
use garage_util::data::*; use garage_util::data::*;
/// Trait to describe how a table shall be replicated /// Trait to describe how a table shall be replicated
@ -8,12 +7,12 @@ pub trait TableReplication: Send + Sync {
// To understand various replication methods // To understand various replication methods
/// Which nodes to send read requests to /// Which nodes to send read requests to
fn read_nodes(&self, hash: &Hash) -> Vec<NodeID>; fn read_nodes(&self, hash: &Hash) -> Vec<Uuid>;
/// Responses needed to consider a read succesfull /// Responses needed to consider a read succesfull
fn read_quorum(&self) -> usize; fn read_quorum(&self) -> usize;
/// Which nodes to send writes to /// Which nodes to send writes to
fn write_nodes(&self, hash: &Hash) -> Vec<NodeID>; fn write_nodes(&self, hash: &Hash) -> Vec<Uuid>;
/// Responses needed to consider a write succesfull /// Responses needed to consider a write succesfull
fn write_quorum(&self) -> usize; fn write_quorum(&self) -> usize;
fn max_write_errors(&self) -> usize; fn max_write_errors(&self) -> usize;

View file

@ -2,7 +2,6 @@ use std::sync::Arc;
use garage_rpc::ring::*; use garage_rpc::ring::*;
use garage_rpc::system::System; use garage_rpc::system::System;
use garage_rpc::NodeID;
use garage_util::data::*; use garage_util::data::*;
use crate::replication::*; use crate::replication::*;
@ -26,7 +25,7 @@ pub struct TableShardedReplication {
} }
impl TableReplication for TableShardedReplication { impl TableReplication for TableShardedReplication {
fn read_nodes(&self, hash: &Hash) -> Vec<NodeID> { fn read_nodes(&self, hash: &Hash) -> Vec<Uuid> {
let ring = self.system.ring.borrow(); let ring = self.system.ring.borrow();
ring.get_nodes(&hash, self.replication_factor) ring.get_nodes(&hash, self.replication_factor)
} }
@ -34,7 +33,7 @@ impl TableReplication for TableShardedReplication {
self.read_quorum self.read_quorum
} }
fn write_nodes(&self, hash: &Hash) -> Vec<NodeID> { fn write_nodes(&self, hash: &Hash) -> Vec<Uuid> {
let ring = self.system.ring.borrow(); let ring = self.system.ring.borrow();
ring.get_nodes(&hash, self.replication_factor) ring.get_nodes(&hash, self.replication_factor)
} }

View file

@ -45,11 +45,10 @@ pub(crate) enum SyncRpc {
Node(MerkleNodeKey, MerkleNode), Node(MerkleNodeKey, MerkleNode),
Items(Vec<Arc<ByteBuf>>), Items(Vec<Arc<ByteBuf>>),
Ok, Ok,
Error(String),
} }
impl Message for SyncRpc { impl Rpc for SyncRpc {
type Response = SyncRpc; type Response = Result<SyncRpc, Error>;
} }
struct SyncTodo { struct SyncTodo {
@ -305,7 +304,7 @@ where
async fn offload_items( async fn offload_items(
self: &Arc<Self>, self: &Arc<Self>,
items: &[(Vec<u8>, Arc<ByteBuf>)], items: &[(Vec<u8>, Arc<ByteBuf>)],
nodes: &[NodeID], nodes: &[Uuid],
) -> Result<(), Error> { ) -> Result<(), Error> {
let values = items.iter().map(|(_k, v)| v.clone()).collect::<Vec<_>>(); let values = items.iter().map(|(_k, v)| v.clone()).collect::<Vec<_>>();
@ -354,7 +353,7 @@ where
async fn do_sync_with( async fn do_sync_with(
self: Arc<Self>, self: Arc<Self>,
partition: TodoPartition, partition: TodoPartition,
who: NodeID, who: Uuid,
must_exit: watch::Receiver<bool>, must_exit: watch::Receiver<bool>,
) -> Result<(), Error> { ) -> Result<(), Error> {
let (root_ck_key, root_ck) = self.get_root_ck(partition.partition)?; let (root_ck_key, root_ck) = self.get_root_ck(partition.partition)?;
@ -480,7 +479,7 @@ where
Ok(()) Ok(())
} }
async fn send_items(&self, who: NodeID, item_value_list: Vec<Vec<u8>>) -> Result<(), Error> { async fn send_items(&self, who: Uuid, item_value_list: Vec<Vec<u8>>) -> Result<(), Error> {
info!( info!(
"({}) Sending {} items to {:?}", "({}) Sending {} items to {:?}",
self.data.name, self.data.name,
@ -513,9 +512,17 @@ where
))) )))
} }
} }
}
// ======= SYNCHRONIZATION PROCEDURE -- RECEIVER SIDE ====== // ======= SYNCHRONIZATION PROCEDURE -- RECEIVER SIDE ======
async fn handle_rpc(self: &Arc<Self>, message: &SyncRpc) -> Result<SyncRpc, Error> {
#[async_trait]
impl<F, R> EndpointHandler<SyncRpc> for TableSyncer<F, R>
where
F: TableSchema + 'static,
R: TableReplication + 'static,
{
async fn handle(self: &Arc<Self>, message: &SyncRpc, _from: NodeID) -> Result<SyncRpc, Error> {
match message { match message {
SyncRpc::RootCkHash(range, h) => { SyncRpc::RootCkHash(range, h) => {
let (_root_ck_key, root_ck) = self.get_root_ck(*range)?; let (_root_ck_key, root_ck) = self.get_root_ck(*range)?;
@ -535,19 +542,6 @@ where
} }
} }
#[async_trait]
impl<F, R> EndpointHandler<SyncRpc> for TableSyncer<F, R>
where
F: TableSchema + 'static,
R: TableReplication + 'static,
{
async fn handle(self: &Arc<Self>, message: &SyncRpc, _from: NodeID) -> SyncRpc {
self.handle_rpc(message)
.await
.unwrap_or_else(|e| SyncRpc::Error(format!("{}", e)))
}
}
impl SyncTodo { impl SyncTodo {
fn add_full_sync<F: TableSchema, R: TableReplication>( fn add_full_sync<F: TableSchema, R: TableReplication>(
&mut self, &mut self,

View file

@ -34,7 +34,6 @@ pub struct Table<F: TableSchema + 'static, R: TableReplication + 'static> {
#[derive(Serialize, Deserialize)] #[derive(Serialize, Deserialize)]
pub(crate) enum TableRpc<F: TableSchema> { pub(crate) enum TableRpc<F: TableSchema> {
Ok, Ok,
Error(String),
ReadEntry(F::P, F::S), ReadEntry(F::P, F::S),
ReadEntryResponse(Option<ByteBuf>), ReadEntryResponse(Option<ByteBuf>),
@ -45,8 +44,8 @@ pub(crate) enum TableRpc<F: TableSchema> {
Update(Vec<Arc<ByteBuf>>), Update(Vec<Arc<ByteBuf>>),
} }
impl<F: TableSchema> Message for TableRpc<F> { impl<F: TableSchema> Rpc for TableRpc<F> {
type Response = TableRpc<F>; type Response = Result<TableRpc<F>, Error>;
} }
impl<F, R> Table<F, R> impl<F, R> Table<F, R>
@ -277,7 +276,7 @@ where
// =============== UTILITY FUNCTION FOR CLIENT OPERATIONS =============== // =============== UTILITY FUNCTION FOR CLIENT OPERATIONS ===============
async fn repair_on_read(&self, who: &[NodeID], what: F::E) -> Result<(), Error> { async fn repair_on_read(&self, who: &[Uuid], what: F::E) -> Result<(), Error> {
let what_enc = Arc::new(ByteBuf::from(rmp_to_vec_all_named(&what)?)); let what_enc = Arc::new(ByteBuf::from(rmp_to_vec_all_named(&what)?));
self.system self.system
.rpc .rpc
@ -292,10 +291,19 @@ where
.await?; .await?;
Ok(()) Ok(())
} }
}
// ====== RPC HANDLER ===== #[async_trait]
// impl<F, R> EndpointHandler<TableRpc<F>> for Table<F, R>
async fn handle_rpc(self: &Arc<Self>, msg: &TableRpc<F>) -> Result<TableRpc<F>, Error> { where
F: TableSchema + 'static,
R: TableReplication + 'static,
{
async fn handle(
self: &Arc<Self>,
msg: &TableRpc<F>,
_from: NodeID,
) -> Result<TableRpc<F>, Error> {
match msg { match msg {
TableRpc::ReadEntry(key, sort_key) => { TableRpc::ReadEntry(key, sort_key) => {
let value = self.data.read_entry(key, sort_key)?; let value = self.data.read_entry(key, sort_key)?;
@ -313,16 +321,3 @@ where
} }
} }
} }
#[async_trait]
impl<F, R> EndpointHandler<TableRpc<F>> for Table<F, R>
where
F: TableSchema + 'static,
R: TableReplication + 'static,
{
async fn handle(self: &Arc<Self>, msg: &TableRpc<F>, _from: NodeID) -> TableRpc<F> {
self.handle_rpc(msg)
.await
.unwrap_or_else(|e| TableRpc::<F>::Error(format!("{}", e)))
}
}

View file

@ -7,6 +7,7 @@ use serde::de::Error as SerdeError;
use serde::{de, Deserialize}; use serde::{de, Deserialize};
use netapp::NodeID; use netapp::NodeID;
use netapp::util::parse_and_resolve_peer_addr;
use crate::error::Error; use crate::error::Error;
@ -34,11 +35,13 @@ pub struct Config {
/// Address to bind for RPC /// Address to bind for RPC
pub rpc_bind_addr: SocketAddr, pub rpc_bind_addr: SocketAddr,
/// Public IP address of this node
pub rpc_public_addr: Option<SocketAddr>,
/// Bootstrap peers RPC address /// Bootstrap peers RPC address
#[serde(deserialize_with = "deserialize_vec_addr")] #[serde(deserialize_with = "deserialize_vec_addr")]
pub bootstrap_peers: Vec<(NodeID, SocketAddr)>, pub bootstrap_peers: Vec<(NodeID, SocketAddr)>,
/// Consule host to connect to to discover more peers /// Consul host to connect to to discover more peers
pub consul_host: Option<String>, pub consul_host: Option<String>,
/// Consul service name to use /// Consul service name to use
pub consul_service_name: Option<String>, pub consul_service_name: Option<String>,
@ -111,26 +114,13 @@ fn deserialize_vec_addr<'de, D>(deserializer: D) -> Result<Vec<(NodeID, SocketAd
where where
D: de::Deserializer<'de>, D: de::Deserializer<'de>,
{ {
use std::net::ToSocketAddrs;
let mut ret = vec![]; let mut ret = vec![];
for peer in <Vec<&str>>::deserialize(deserializer)? { for peer in <Vec<&str>>::deserialize(deserializer)? {
let delim = peer let (pubkey, addrs) = parse_and_resolve_peer_addr(peer)
.find('@') .ok_or_else(|| D::Error::custom(format!("Unable to parse or resolve peer: {}", peer)))?;
.ok_or_else(|| D::Error::custom("Invalid bootstrap peer: public key not specified"))?; for ip in addrs {
let (key, host) = peer.split_at(delim); ret.push((pubkey.clone(), ip));
let pubkey = NodeID::from_slice(&hex::decode(&key).map_err(D::Error::custom)?)
.ok_or_else(|| D::Error::custom("Invalid bootstrap peer public key"))?;
let hosts = host[1..]
.to_socket_addrs()
.map_err(D::Error::custom)?
.collect::<Vec<_>>();
if hosts.is_empty() {
return Err(D::Error::custom(format!("Error resolving {}", &host[1..])));
}
for host in hosts {
ret.push((pubkey.clone(), host));
} }
} }

View file

@ -87,6 +87,18 @@ impl FixedBytes32 {
} }
} }
impl From<netapp::NodeID> for FixedBytes32 {
fn from(node_id: netapp::NodeID) -> FixedBytes32 {
FixedBytes32::try_from(node_id.as_ref()).unwrap()
}
}
impl Into<netapp::NodeID> for FixedBytes32 {
fn into(self) -> netapp::NodeID {
netapp::NodeID::from_slice(self.as_slice()).unwrap()
}
}
/// A 32 bytes UUID /// A 32 bytes UUID
pub type Uuid = FixedBytes32; pub type Uuid = FixedBytes32;
/// A 256 bit cryptographic hash, can be sha256 or blake2 depending on provenance /// A 256 bit cryptographic hash, can be sha256 or blake2 depending on provenance

View file

@ -1,35 +1,13 @@
//! Module containing error types used in Garage //! Module containing error types used in Garage
use err_derive::Error; use std::fmt;
use hyper::StatusCode;
use std::io; use std::io;
use err_derive::Error;
use serde::{de::Visitor, Deserialize, Deserializer, Serialize, Serializer};
use crate::data::*; use crate::data::*;
/// RPC related errors
#[derive(Debug, Error)]
pub enum RpcError {
#[error(display = "Node is down: {:?}.", _0)]
NodeDown(Uuid),
#[error(display = "Timeout")]
Timeout,
#[error(display = "HTTP error: {}", _0)]
Http(#[error(source)] http::Error),
#[error(display = "Hyper error: {}", _0)]
Hyper(#[error(source)] hyper::Error),
#[error(display = "Messagepack encode error: {}", _0)]
RmpEncode(#[error(source)] rmp_serde::encode::Error),
#[error(display = "Messagepack decode error: {}", _0)]
RmpDecode(#[error(source)] rmp_serde::decode::Error),
#[error(display = "Too many errors: {:?}", _0)]
TooManyErrors(Vec<String>),
}
/// Regroup all Garage errors /// Regroup all Garage errors
#[derive(Debug, Error)] #[derive(Debug, Error)]
pub enum Error { pub enum Error {
@ -63,11 +41,14 @@ pub enum Error {
#[error(display = "Tokio join error: {}", _0)] #[error(display = "Tokio join error: {}", _0)]
TokioJoin(#[error(source)] tokio::task::JoinError), TokioJoin(#[error(source)] tokio::task::JoinError),
#[error(display = "RPC call error: {}", _0)] #[error(display = "Remote error: {}", _0)]
Rpc(#[error(source)] RpcError), RemoteError(String),
#[error(display = "Remote error: {} (status code {})", _0, _1)] #[error(display = "Timeout")]
RemoteError(String, StatusCode), Timeout,
#[error(display = "Too many errors: {:?}", _0)]

We discussed renaming this error "FailedQuorumError" or something similar as this is the only case it is fired.

We discussed renaming this error "FailedQuorumError" or something similar as this is the only case it is fired.
TooManyErrors(Vec<String>),
#[error(display = "Bad RPC: {}", _0)] #[error(display = "Bad RPC: {}", _0)]
BadRpc(String), BadRpc(String),
@ -99,3 +80,44 @@ impl<T> From<tokio::sync::mpsc::error::SendError<T>> for Error {
Error::Message("MPSC send error".to_string()) Error::Message("MPSC send error".to_string())
} }
} }
// Custom serialization for our error type, for use in RPC.
// Errors are serialized as a string of their Display representation.
// Upon deserialization, they all become a RemoteError with the
// given representation.
impl Serialize for Error {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
serializer.serialize_str(&format!("{}", self))
}
}
impl<'de> Deserialize<'de> for Error {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
deserializer.deserialize_string(ErrorVisitor)
}
}
struct ErrorVisitor;
impl<'de> Visitor<'de> for ErrorVisitor {
type Value = Error;
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
write!(formatter, "a string that represents an error value")
}
fn visit_str<E>(self, error_msg: &str) -> Result<Self::Value, E> {
Ok(Error::RemoteError(error_msg.to_string()))
}
fn visit_string<E>(self, error_msg: String) -> Result<Self::Value, E> {
Ok(Error::RemoteError(error_msg))
}
}

View file

@ -26,4 +26,4 @@ percent-encoding = "2.1.0"
futures = "0.3" futures = "0.3"
http = "0.2" http = "0.2"
hyper = "0.14" hyper = { version = "0.14", features = ["server", "http1", "runtime", "tcp", "stream"] }

View file

@ -38,7 +38,9 @@ impl Error {
match self { match self {
Error::NotFound => StatusCode::NOT_FOUND, Error::NotFound => StatusCode::NOT_FOUND,
Error::ApiError(e) => e.http_status_code(), Error::ApiError(e) => e.http_status_code(),
Error::InternalError(GarageError::Rpc(_)) => StatusCode::SERVICE_UNAVAILABLE, Error::InternalError(
GarageError::Timeout | GarageError::RemoteError(_) | GarageError::TooManyErrors(_),
) => StatusCode::SERVICE_UNAVAILABLE,
Error::InternalError(_) => StatusCode::INTERNAL_SERVER_ERROR, Error::InternalError(_) => StatusCode::INTERNAL_SERVER_ERROR,
_ => StatusCode::BAD_REQUEST, _ => StatusCode::BAD_REQUEST,
} }