replace RPC stack with netapp #123

Merged
lx merged 5 commits from netapp into main 2021-10-25 13:49:35 +00:00
31 changed files with 1497 additions and 2289 deletions
Showing only changes of commit 4067797d01 - Show all commits

705
Cargo.lock generated

File diff suppressed because it is too large Load diff

View file

@ -1,6 +1,6 @@
[package] [package]
name = "garage_api" name = "garage_api"
version = "0.3.0" version = "0.4.0"
authors = ["Alex Auvolat <alex@adnab.me>"] authors = ["Alex Auvolat <alex@adnab.me>"]
edition = "2018" edition = "2018"
license = "AGPL-3.0" license = "AGPL-3.0"
@ -13,9 +13,9 @@ path = "lib.rs"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies] [dependencies]
garage_model = { version = "0.3.0", path = "../model" } garage_model = { version = "0.4.0", path = "../model" }
garage_table = { version = "0.3.0", path = "../table" } garage_table = { version = "0.4.0", path = "../table" }
garage_util = { version = "0.3.0", path = "../util" } garage_util = { version = "0.4.0", path = "../util" }
base64 = "0.13" base64 = "0.13"
bytes = "1.0" bytes = "1.0"

View file

@ -1,6 +1,6 @@
[package] [package]
name = "garage" name = "garage"
version = "0.3.0" version = "0.4.0"
authors = ["Alex Auvolat <alex@adnab.me>"] authors = ["Alex Auvolat <alex@adnab.me>"]
edition = "2018" edition = "2018"
license = "AGPL-3.0" license = "AGPL-3.0"
@ -14,12 +14,12 @@ path = "main.rs"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies] [dependencies]
garage_api = { version = "0.3.0", path = "../api" } garage_api = { version = "0.4.0", path = "../api" }
garage_model = { version = "0.3.0", path = "../model" } garage_model = { version = "0.4.0", path = "../model" }
garage_rpc = { version = "0.3.0", path = "../rpc" } garage_rpc = { version = "0.4.0", path = "../rpc" }
garage_table = { version = "0.3.0", path = "../table" } garage_table = { version = "0.4.0", path = "../table" }
garage_util = { version = "0.3.0", path = "../util" } garage_util = { version = "0.4.0", path = "../util" }
garage_web = { version = "0.3.0", path = "../web" } garage_web = { version = "0.4.0", path = "../web" }
bytes = "1.0" bytes = "1.0"
git-version = "0.3.4" git-version = "0.3.4"
@ -27,6 +27,8 @@ hex = "0.4"
log = "0.4" log = "0.4"
pretty_env_logger = "0.4" pretty_env_logger = "0.4"
rand = "0.8" rand = "0.8"
async-trait = "0.1.7"
sodiumoxide = { version = "0.2.5-0", package = "kuska-sodiumoxide" }
sled = "0.34" sled = "0.34"
@ -38,3 +40,5 @@ toml = "0.5"
futures = "0.3" futures = "0.3"
futures-util = "0.3" futures-util = "0.3"
tokio = { version = "1.0", default-features = false, features = ["rt", "rt-multi-thread", "io-util", "net", "time", "macros", "sync", "signal", "fs"] } tokio = { version = "1.0", default-features = false, features = ["rt", "rt-multi-thread", "io-util", "net", "time", "macros", "sync", "signal", "fs"] }
netapp = { version = "0.3.0", git = "https://git.deuxfleurs.fr/lx/netapp" }

View file

@ -2,6 +2,7 @@ use std::collections::HashMap;
use std::fmt::Write; use std::fmt::Write;
use std::sync::Arc; use std::sync::Arc;
use async_trait::async_trait;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use garage_util::error::Error; use garage_util::error::Error;
@ -10,8 +11,7 @@ use garage_table::crdt::Crdt;
use garage_table::replication::*; use garage_table::replication::*;
use garage_table::*; use garage_table::*;
use garage_rpc::rpc_client::*; use garage_rpc::*;
use garage_rpc::rpc_server::*;
use garage_model::bucket_table::*; use garage_model::bucket_table::*;
use garage_model::garage::Garage; use garage_model::garage::Garage;
@ -19,10 +19,8 @@ use garage_model::key_table::*;
use crate::cli::*; use crate::cli::*;
use crate::repair::Repair; use crate::repair::Repair;
use crate::*;
pub const ADMIN_RPC_TIMEOUT: Duration = Duration::from_secs(30); pub const ADMIN_RPC_PATH: &str = "garage/admin_rpc.rs/Rpc";
pub const ADMIN_RPC_PATH: &str = "_admin";
#[derive(Debug, Serialize, Deserialize)] #[derive(Debug, Serialize, Deserialize)]
pub enum AdminRpc { pub enum AdminRpc {
@ -33,41 +31,31 @@ pub enum AdminRpc {
// Replies // Replies
Ok(String), Ok(String),
Error(String),
BucketList(Vec<String>), BucketList(Vec<String>),
BucketInfo(Bucket), BucketInfo(Bucket),
KeyList(Vec<(String, String)>), KeyList(Vec<(String, String)>),
KeyInfo(Key), KeyInfo(Key),
} }
impl RpcMessage for AdminRpc {} impl Message for AdminRpc {
type Response = AdminRpc;
}
pub struct AdminRpcHandler { pub struct AdminRpcHandler {
garage: Arc<Garage>, garage: Arc<Garage>,
rpc_client: Arc<RpcClient<AdminRpc>>, endpoint: Arc<Endpoint<AdminRpc, Self>>,
} }
impl AdminRpcHandler { impl AdminRpcHandler {
pub fn new(garage: Arc<Garage>) -> Arc<Self> { pub fn new(garage: Arc<Garage>) -> Arc<Self> {
let rpc_client = garage.system.clone().rpc_client::<AdminRpc>(ADMIN_RPC_PATH); let endpoint = garage.system.netapp.endpoint(ADMIN_RPC_PATH.into());
Arc::new(Self { garage, rpc_client }) let admin = Arc::new(Self { garage, endpoint });
admin.endpoint.set_handler(admin.clone());
admin
} }
pub fn register_handler(self: Arc<Self>, rpc_server: &mut RpcServer) { async fn handle_bucket_cmd(&self, cmd: &BucketOperation) -> Result<AdminRpc, Error> {
rpc_server.add_handler::<AdminRpc, _, _>(ADMIN_RPC_PATH.to_string(), move |msg, _addr| {
let self2 = self.clone();
async move {
match msg {
AdminRpc::BucketOperation(bo) => self2.handle_bucket_cmd(bo).await,
AdminRpc::KeyOperation(ko) => self2.handle_key_cmd(ko).await,
AdminRpc::LaunchRepair(opt) => self2.handle_launch_repair(opt).await,
AdminRpc::Stats(opt) => self2.handle_stats(opt).await,
_ => Err(Error::BadRpc("Invalid RPC".to_string())),
}
}
});
}
async fn handle_bucket_cmd(&self, cmd: BucketOperation) -> Result<AdminRpc, Error> {
match cmd { match cmd {
BucketOperation::List => { BucketOperation::List => {
let bucket_names = self let bucket_names = self
@ -187,7 +175,7 @@ impl AdminRpcHandler {
} }
} }
async fn handle_key_cmd(&self, cmd: KeyOperation) -> Result<AdminRpc, Error> { async fn handle_key_cmd(&self, cmd: &KeyOperation) -> Result<AdminRpc, Error> {
match cmd { match cmd {
KeyOperation::List => { KeyOperation::List => {
let key_ids = self let key_ids = self
@ -210,13 +198,13 @@ impl AdminRpcHandler {
Ok(AdminRpc::KeyInfo(key)) Ok(AdminRpc::KeyInfo(key))
} }
KeyOperation::New(query) => { KeyOperation::New(query) => {
let key = Key::new(query.name); let key = Key::new(query.name.clone());
self.garage.key_table.insert(&key).await?; self.garage.key_table.insert(&key).await?;
Ok(AdminRpc::KeyInfo(key)) Ok(AdminRpc::KeyInfo(key))
} }
KeyOperation::Rename(query) => { KeyOperation::Rename(query) => {
let mut key = self.get_existing_key(&query.key_pattern).await?; let mut key = self.get_existing_key(&query.key_pattern).await?;
key.name.update(query.new_name); key.name.update(query.new_name.clone());
self.garage.key_table.insert(&key).await?; self.garage.key_table.insert(&key).await?;
Ok(AdminRpc::KeyInfo(key)) Ok(AdminRpc::KeyInfo(key))
} }
@ -353,17 +341,18 @@ impl AdminRpcHandler {
let mut failures = vec![]; let mut failures = vec![];
let ring = self.garage.system.ring.borrow().clone(); let ring = self.garage.system.ring.borrow().clone();
for node in ring.config.members.keys() { for node in ring.config.members.keys() {
let node = NodeID::from_slice(node.as_slice()).unwrap();
if self if self
.rpc_client .endpoint
.call( .call(
*node, &node,
AdminRpc::LaunchRepair(opt_to_send.clone()), &AdminRpc::LaunchRepair(opt_to_send.clone()),
ADMIN_RPC_TIMEOUT, PRIO_NORMAL,
) )
.await .await
.is_err() .is_err()
{ {
failures.push(*node); failures.push(node);
} }
} }
if failures.is_empty() { if failures.is_empty() {
@ -397,14 +386,16 @@ impl AdminRpcHandler {
let ring = self.garage.system.ring.borrow().clone(); let ring = self.garage.system.ring.borrow().clone();
for node in ring.config.members.keys() { for node in ring.config.members.keys() {
let node = NodeID::from_slice(node.as_slice()).unwrap();
let mut opt = opt.clone(); let mut opt = opt.clone();
opt.all_nodes = false; opt.all_nodes = false;
writeln!(&mut ret, "\n======================").unwrap(); writeln!(&mut ret, "\n======================").unwrap();
writeln!(&mut ret, "Stats for node {:?}:", node).unwrap(); writeln!(&mut ret, "Stats for node {:?}:", node).unwrap();
match self match self
.rpc_client .endpoint
.call(*node, AdminRpc::Stats(opt), ADMIN_RPC_TIMEOUT) .call(&node, &AdminRpc::Stats(opt), PRIO_NORMAL)
.await .await
{ {
Ok(AdminRpc::Ok(s)) => writeln!(&mut ret, "{}", s).unwrap(), Ok(AdminRpc::Ok(s)) => writeln!(&mut ret, "{}", s).unwrap(),
@ -495,4 +486,23 @@ impl AdminRpcHandler {
.unwrap(); .unwrap();
writeln!(to, " GC todo queue length: {}", t.data.gc_todo_len()).unwrap(); writeln!(to, " GC todo queue length: {}", t.data.gc_todo_len()).unwrap();
} }
async fn handle_rpc(self: &Arc<Self>, msg: &AdminRpc) -> Result<AdminRpc, Error> {
match msg {
AdminRpc::BucketOperation(bo) => self.handle_bucket_cmd(bo).await,
AdminRpc::KeyOperation(ko) => self.handle_key_cmd(ko).await,
AdminRpc::LaunchRepair(opt) => self.handle_launch_repair(opt.clone()).await,
AdminRpc::Stats(opt) => self.handle_stats(opt.clone()).await,
_ => Err(Error::BadRpc("Invalid RPC".to_string())),
}
}
}
#[async_trait]
impl EndpointHandler<AdminRpc> for AdminRpcHandler {
async fn handle(self: &Arc<Self>, message: &AdminRpc, _from: NodeID) -> AdminRpc {
self.handle_rpc(message)
.await
.unwrap_or_else(|e| AdminRpc::Error(format!("{}", e)))
}
} }

View file

@ -1,6 +1,5 @@
use std::cmp::max; //use std::cmp::max;
use std::collections::HashSet; //use std::collections::HashSet;
use std::net::SocketAddr;
use std::path::PathBuf; use std::path::PathBuf;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
@ -8,11 +7,11 @@ use structopt::StructOpt;
use garage_util::data::Uuid; use garage_util::data::Uuid;
use garage_util::error::Error; use garage_util::error::Error;
use garage_util::time::*; //use garage_util::time::*;
use garage_rpc::membership::*;
use garage_rpc::ring::*; use garage_rpc::ring::*;
use garage_rpc::rpc_client::*; use garage_rpc::system::*;
use garage_rpc::*;
use garage_model::bucket_table::*; use garage_model::bucket_table::*;
use garage_model::key_table::*; use garage_model::key_table::*;
@ -298,54 +297,65 @@ pub struct StatsOpt {
pub async fn cli_cmd( pub async fn cli_cmd(
cmd: Command, cmd: Command,
membership_rpc_cli: RpcAddrClient<Message>, system_rpc_endpoint: &Endpoint<SystemRpc, ()>,
admin_rpc_cli: RpcAddrClient<AdminRpc>, admin_rpc_endpoint: &Endpoint<AdminRpc, ()>,
rpc_host: SocketAddr, rpc_host: NodeID,
) -> Result<(), Error> { ) -> Result<(), Error> {
match cmd { match cmd {
Command::Status => cmd_status(membership_rpc_cli, rpc_host).await, Command::Status => cmd_status(system_rpc_endpoint, rpc_host).await,
Command::Node(NodeOperation::Configure(configure_opt)) => { Command::Node(NodeOperation::Configure(configure_opt)) => {
cmd_configure(membership_rpc_cli, rpc_host, configure_opt).await cmd_configure(system_rpc_endpoint, rpc_host, configure_opt).await
} }
Command::Node(NodeOperation::Remove(remove_opt)) => { Command::Node(NodeOperation::Remove(remove_opt)) => {
cmd_remove(membership_rpc_cli, rpc_host, remove_opt).await cmd_remove(system_rpc_endpoint, rpc_host, remove_opt).await
} }
Command::Bucket(bo) => { Command::Bucket(bo) => {
cmd_admin(admin_rpc_cli, rpc_host, AdminRpc::BucketOperation(bo)).await cmd_admin(admin_rpc_endpoint, rpc_host, AdminRpc::BucketOperation(bo)).await
} }
Command::Key(ko) => cmd_admin(admin_rpc_cli, rpc_host, AdminRpc::KeyOperation(ko)).await, Command::Key(ko) => {
Command::Repair(ro) => cmd_admin(admin_rpc_cli, rpc_host, AdminRpc::LaunchRepair(ro)).await, cmd_admin(admin_rpc_endpoint, rpc_host, AdminRpc::KeyOperation(ko)).await
Command::Stats(so) => cmd_admin(admin_rpc_cli, rpc_host, AdminRpc::Stats(so)).await, }
Command::Repair(ro) => {
cmd_admin(admin_rpc_endpoint, rpc_host, AdminRpc::LaunchRepair(ro)).await
}
Command::Stats(so) => cmd_admin(admin_rpc_endpoint, rpc_host, AdminRpc::Stats(so)).await,
_ => unreachable!(), _ => unreachable!(),
} }
} }
pub async fn cmd_status( pub async fn cmd_status(rpc_cli: &Endpoint<SystemRpc, ()>, rpc_host: NodeID) -> Result<(), Error> {
rpc_cli: RpcAddrClient<Message>,
rpc_host: SocketAddr,
) -> Result<(), Error> {
let status = match rpc_cli let status = match rpc_cli
.call(&rpc_host, &Message::PullStatus, ADMIN_RPC_TIMEOUT) .call(&rpc_host, &SystemRpc::GetKnownNodes, PRIO_NORMAL)
.await?? .await?
{ {
Message::AdvertiseNodesUp(nodes) => nodes, SystemRpc::ReturnKnownNodes(nodes) => nodes,
resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))), resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
}; };
let config = match rpc_cli let config = match rpc_cli
.call(&rpc_host, &Message::PullConfig, ADMIN_RPC_TIMEOUT) .call(&rpc_host, &SystemRpc::PullConfig, PRIO_NORMAL)
.await?? .await?
{ {
Message::AdvertiseConfig(cfg) => cfg, SystemRpc::AdvertiseConfig(cfg) => cfg,
resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))), resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
}; };
println!("STATUS:");
for node in status {
println!("{:?}", node);
}
println!("CONFIG: (v{})", config.version);
for (id, node) in config.members {
println!("{} {:?}", hex::encode(id.as_slice()), node);
}
/* TODO
let (hostname_len, addr_len, tag_len, zone_len) = status let (hostname_len, addr_len, tag_len, zone_len) = status
.iter() .iter()
.map(|adv| (adv, config.members.get(&adv.id))) .map(|(id, addr, _)| (addr, config.members.get(&adv.id)))
.map(|(adv, cfg)| { .map(|(addr, cfg)| {
( (
adv.state_info.hostname.len(), 8,
adv.addr.to_string().len(), addr.to_string().len(),
cfg.map(|c| c.tag.len()).unwrap_or(0), cfg.map(|c| c.tag.len()).unwrap_or(0),
cfg.map(|c| c.zone.len()).unwrap_or(0), cfg.map(|c| c.zone.len()).unwrap_or(0),
) )
@ -355,13 +365,13 @@ pub async fn cmd_status(
}); });
println!("Healthy nodes:"); println!("Healthy nodes:");
for adv in status.iter().filter(|x| x.is_up) { for (id, addr, _) in status.iter().filter(|(id, addr, is_up)| is_up) {
if let Some(cfg) = config.members.get(&adv.id) { if let Some(cfg) = config.members.get(&adv.id) {
println!( println!(
"{id:?}\t{host}{h_pad}\t{addr}{a_pad}\t[{tag}]{t_pad}\t{zone}{z_pad}\t{capacity}", "{id:?}\t{host}{h_pad}\t{addr}{a_pad}\t[{tag}]{t_pad}\t{zone}{z_pad}\t{capacity}",
id = adv.id, id = id,
host = adv.state_info.hostname, host = "",
addr = adv.addr, addr = addr,
tag = cfg.tag, tag = cfg.tag,
zone = cfg.zone, zone = cfg.zone,
capacity = cfg.capacity_string(), capacity = cfg.capacity_string(),
@ -373,36 +383,36 @@ pub async fn cmd_status(
} else { } else {
println!( println!(
"{id:?}\t{h}{h_pad}\t{addr}{a_pad}\tUNCONFIGURED/REMOVED", "{id:?}\t{h}{h_pad}\t{addr}{a_pad}\tUNCONFIGURED/REMOVED",
id = adv.id, id = id,
h = adv.state_info.hostname, h = "",
addr = adv.addr, addr = addr,
h_pad = " ".repeat(hostname_len - adv.state_info.hostname.len()), h_pad = " ".repeat(hostname_len - "".len()),

If we are on failure_case_2 we display the failed nodes section but it is empty.

It can be reproduced by spanning 2 garage instances then connecting the first instance to the second one.

eg:

$ ./result/bin/garage -c /tmp/garage.toml status
==== HEALTHY NODES ====
ID                 Hostname     Address                  Tag               Zone  Capacity
2f75a8a915f9cceb…  lheureduthe  127.0.0.1:3911           NO ROLE ASSIGNED
c393f0ab78d8ddcd…  lheureduthe  [::ffff:127.0.0.1]:3901  NO ROLE ASSIGNED

$ ./result/bin/garage status
==== HEALTHY NODES ====
ID                 Hostname     Address         Tag               Zone  Capacity
c393f0ab78d8ddcd…  lheureduthe  127.0.0.1:3901  NO ROLE ASSIGNED

==== FAILED NODES ====
ID  Hostname  Address  Tag  Zone  Capacity  Last seen
If we are on `failure_case_2` we display the failed nodes section but it is empty. It can be reproduced by spanning 2 garage instances then connecting the first instance to the second one. eg: ``` $ ./result/bin/garage -c /tmp/garage.toml status ==== HEALTHY NODES ==== ID Hostname Address Tag Zone Capacity 2f75a8a915f9cceb… lheureduthe 127.0.0.1:3911 NO ROLE ASSIGNED c393f0ab78d8ddcd… lheureduthe [::ffff:127.0.0.1]:3901 NO ROLE ASSIGNED $ ./result/bin/garage status ==== HEALTHY NODES ==== ID Hostname Address Tag Zone Capacity c393f0ab78d8ddcd… lheureduthe 127.0.0.1:3901 NO ROLE ASSIGNED ==== FAILED NODES ==== ID Hostname Address Tag Zone Capacity Last seen ```
a_pad = " ".repeat(addr_len - adv.addr.to_string().len()), a_pad = " ".repeat(addr_len - addr.to_string().len()),
); );
} }
} }
let status_keys = status.iter().map(|x| x.id).collect::<HashSet<_>>(); let status_keys = status.iter().map(|(id, _, _)| id).collect::<HashSet<_>>();
let failure_case_1 = status.iter().any(|x| !x.is_up); let failure_case_1 = status.iter().any(|(_, _, is_up)| !is_up);
let failure_case_2 = config let failure_case_2 = config
.members .members
.iter() .iter()
.any(|(id, _)| !status_keys.contains(id)); .any(|(id, _)| !status_keys.contains(id));
if failure_case_1 || failure_case_2 { if failure_case_1 || failure_case_2 {
println!("\nFailed nodes:"); println!("\nFailed nodes:");
for adv in status.iter().filter(|x| !x.is_up) { for (id, addr) in status.iter().filter(|(_, _, is_up)| !is_up) {
if let Some(cfg) = config.members.get(&adv.id) { if let Some(cfg) = config.members.get(&id) {
println!( println!(
"{id:?}\t{host}{h_pad}\t{addr}{a_pad}\t[{tag}]{t_pad}\t{zone}{z_pad}\t{capacity}\tlast seen: {last_seen}s ago", "{id:?}\t{host}{h_pad}\t{addr}{a_pad}\t[{tag}]{t_pad}\t{zone}{z_pad}\t{capacity}\tlast seen: {last_seen}s ago",
id=adv.id, id=id,
host=adv.state_info.hostname, host="",
addr=adv.addr, addr=addr,
tag=cfg.tag, tag=cfg.tag,
zone=cfg.zone, zone=cfg.zone,
capacity=cfg.capacity_string(), capacity=cfg.capacity_string(),
last_seen=(now_msec() - adv.last_seen) / 1000, last_seen=(now_msec() - 0) / 1000,
h_pad=" ".repeat(hostname_len - adv.state_info.hostname.len()), h_pad=" ".repeat(hostname_len - "".len()),
a_pad=" ".repeat(addr_len - adv.addr.to_string().len()), a_pad=" ".repeat(addr_len - addr.to_string().len()),
t_pad=" ".repeat(tag_len - cfg.tag.len()), t_pad=" ".repeat(tag_len - cfg.tag.len()),
z_pad=" ".repeat(zone_len - cfg.zone.len()), z_pad=" ".repeat(zone_len - cfg.zone.len()),
); );
@ -411,12 +421,12 @@ pub async fn cmd_status(
let (tag_len, zone_len) = config let (tag_len, zone_len) = config
.members .members
.iter() .iter()
.filter(|(&id, _)| !status.iter().any(|x| x.id == id)) .filter(|(&id, _)| !status.iter().any(|(xid, _, _)| xid == id))
.map(|(_, cfg)| (cfg.tag.len(), cfg.zone.len())) .map(|(_, cfg)| (cfg.tag.len(), cfg.zone.len()))
.fold((0, 0), |(t, z), (mt, mz)| (max(t, mt), max(z, mz))); .fold((0, 0), |(t, z), (mt, mz)| (max(t, mt), max(z, mz)));
for (id, cfg) in config.members.iter() { for (id, cfg) in config.members.iter() {
if !status.iter().any(|x| x.id == *id) { if !status.iter().any(|(xid, _, _)| xid == *id) {
println!( println!(
"{id:?}\t{tag}{t_pad}\t{zone}{z_pad}\t{capacity}\tnever seen", "{id:?}\t{tag}{t_pad}\t{zone}{z_pad}\t{capacity}\tnever seen",
id = id, id = id,
@ -429,6 +439,7 @@ pub async fn cmd_status(
} }
} }
} }
*/
Ok(()) Ok(())
} }
@ -455,25 +466,30 @@ pub fn find_matching_node(
} }
pub async fn cmd_configure( pub async fn cmd_configure(
rpc_cli: RpcAddrClient<Message>, rpc_cli: &Endpoint<SystemRpc, ()>,
rpc_host: SocketAddr, rpc_host: NodeID,
args: ConfigureNodeOpt, args: ConfigureNodeOpt,
) -> Result<(), Error> { ) -> Result<(), Error> {
let status = match rpc_cli let status = match rpc_cli
.call(&rpc_host, &Message::PullStatus, ADMIN_RPC_TIMEOUT) .call(&rpc_host, &SystemRpc::GetKnownNodes, PRIO_NORMAL)
.await?? .await?
{ {
Message::AdvertiseNodesUp(nodes) => nodes, SystemRpc::ReturnKnownNodes(nodes) => nodes,
resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))), resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
}; };
let added_node = find_matching_node(status.iter().map(|x| x.id), &args.node_id)?; let added_node = find_matching_node(
status
.iter()
.map(|(id, _, _)| Uuid::try_from(id.as_ref()).unwrap()),
&args.node_id,
)?;
let mut config = match rpc_cli let mut config = match rpc_cli
.call(&rpc_host, &Message::PullConfig, ADMIN_RPC_TIMEOUT) .call(&rpc_host, &SystemRpc::PullConfig, PRIO_NORMAL)
.await?? .await?
{ {
Message::AdvertiseConfig(cfg) => cfg, SystemRpc::AdvertiseConfig(cfg) => cfg,
resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))), resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
}; };
@ -527,25 +543,21 @@ pub async fn cmd_configure(
config.version += 1; config.version += 1;
rpc_cli rpc_cli
.call( .call(&rpc_host, &SystemRpc::AdvertiseConfig(config), PRIO_NORMAL)
&rpc_host, .await?;
&Message::AdvertiseConfig(config),
ADMIN_RPC_TIMEOUT,
)
.await??;
Ok(()) Ok(())
} }
pub async fn cmd_remove( pub async fn cmd_remove(
rpc_cli: RpcAddrClient<Message>, rpc_cli: &Endpoint<SystemRpc, ()>,
rpc_host: SocketAddr, rpc_host: NodeID,
args: RemoveNodeOpt, args: RemoveNodeOpt,
) -> Result<(), Error> { ) -> Result<(), Error> {
let mut config = match rpc_cli let mut config = match rpc_cli
.call(&rpc_host, &Message::PullConfig, ADMIN_RPC_TIMEOUT) .call(&rpc_host, &SystemRpc::PullConfig, PRIO_NORMAL)
.await?? .await?
{ {
Message::AdvertiseConfig(cfg) => cfg, SystemRpc::AdvertiseConfig(cfg) => cfg,
resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))), resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
}; };
@ -562,21 +574,17 @@ pub async fn cmd_remove(
config.version += 1; config.version += 1;
rpc_cli rpc_cli
.call( .call(&rpc_host, &SystemRpc::AdvertiseConfig(config), PRIO_NORMAL)
&rpc_host, .await?;
&Message::AdvertiseConfig(config),
ADMIN_RPC_TIMEOUT,
)
.await??;
Ok(()) Ok(())
} }
pub async fn cmd_admin( pub async fn cmd_admin(
rpc_cli: RpcAddrClient<AdminRpc>, rpc_cli: &Endpoint<AdminRpc, ()>,
rpc_host: SocketAddr, rpc_host: NodeID,
args: AdminRpc, args: AdminRpc,
) -> Result<(), Error> { ) -> Result<(), Error> {
match rpc_cli.call(&rpc_host, args, ADMIN_RPC_TIMEOUT).await?? { match rpc_cli.call(&rpc_host, &args, PRIO_NORMAL).await? {
AdminRpc::Ok(msg) => { AdminRpc::Ok(msg) => {
println!("{}", msg); println!("{}", msg);
} }

View file

@ -10,16 +10,16 @@ mod repair;
mod server; mod server;
use std::net::SocketAddr; use std::net::SocketAddr;
use std::sync::Arc;
use std::time::Duration;
use structopt::StructOpt; use structopt::StructOpt;
use garage_util::config::TlsConfig; use netapp::util::parse_peer_addr;
use netapp::NetworkKey;
use garage_util::error::Error; use garage_util::error::Error;
use garage_rpc::membership::*; use garage_rpc::system::*;
use garage_rpc::rpc_client::*; use garage_rpc::*;
use admin_rpc::*; use admin_rpc::*;
use cli::*; use cli::*;
@ -27,16 +27,14 @@ use cli::*;
#[derive(StructOpt, Debug)] #[derive(StructOpt, Debug)]
#[structopt(name = "garage")] #[structopt(name = "garage")]
struct Opt { struct Opt {
/// RPC connect to this host to execute client operations /// Host to connect to for admin operations, in the format:
#[structopt(short = "h", long = "rpc-host", default_value = "127.0.0.1:3901", parse(try_from_str = parse_address))] /// <public-key>@<ip>:<port>
pub rpc_host: SocketAddr, #[structopt(short = "h", long = "rpc-host")]
pub rpc_host: Option<String>,
#[structopt(long = "ca-cert")] /// RPC secret network key for admin operations
pub ca_cert: Option<String>, #[structopt(short = "s", long = "rpc-secret")]
#[structopt(long = "client-cert")] pub rpc_secret: Option<String>,
pub client_cert: Option<String>,
#[structopt(long = "client-key")]
pub client_key: Option<String>,
#[structopt(subcommand)] #[structopt(subcommand)]
cmd: Command, cmd: Command,
@ -66,33 +64,20 @@ async fn main() {
} }
async fn cli_command(opt: Opt) -> Result<(), Error> { async fn cli_command(opt: Opt) -> Result<(), Error> {
let tls_config = match (opt.ca_cert, opt.client_cert, opt.client_key) { let net_key_hex_str = &opt.rpc_secret.expect("No RPC secret provided");
(Some(ca_cert), Some(client_cert), Some(client_key)) => Some(TlsConfig { let network_key = NetworkKey::from_slice(
ca_cert, &hex::decode(net_key_hex_str).expect("Invalid RPC secret key (bad hex)")[..],
node_cert: client_cert, )
node_key: client_key, .expect("Invalid RPC secret provided (wrong length)");
}), let (_pk, sk) = sodiumoxide::crypto::sign::ed25519::gen_keypair();
(None, None, None) => None,
_ => {
warn!("Missing one of: --ca-cert, --node-cert, --node-key. Not using TLS.");
None
}
};
let rpc_http_cli = let netapp = NetApp::new(network_key, sk);
Arc::new(RpcHttpClient::new(8, &tls_config).expect("Could not create RPC client")); let (id, addr) =
let membership_rpc_cli = parse_peer_addr(&opt.rpc_host.expect("No RPC host provided")).expect("Invalid RPC host");
RpcAddrClient::new(rpc_http_cli.clone(), MEMBERSHIP_RPC_PATH.to_string()); netapp.clone().try_connect(addr, id).await?;
let admin_rpc_cli = RpcAddrClient::new(rpc_http_cli.clone(), ADMIN_RPC_PATH.to_string());
cli_cmd(opt.cmd, membership_rpc_cli, admin_rpc_cli, opt.rpc_host).await let system_rpc_endpoint = netapp.endpoint::<SystemRpc, ()>(SYSTEM_RPC_PATH.into());
} let admin_rpc_endpoint = netapp.endpoint::<AdminRpc, ()>(ADMIN_RPC_PATH.into());
fn parse_address(address: &str) -> Result<SocketAddr, String> { cli_cmd(opt.cmd, &system_rpc_endpoint, &admin_rpc_endpoint, id).await
use std::net::ToSocketAddrs;
address
.to_socket_addrs()
.map_err(|_| format!("Could not resolve {}", address))?
.next()
.ok_or_else(|| format!("Could not resolve {}", address))
} }

View file

@ -1,7 +1,5 @@
use std::path::PathBuf; use std::path::PathBuf;
use std::sync::Arc;
use futures_util::future::*;
use tokio::sync::watch; use tokio::sync::watch;
use garage_util::background::*; use garage_util::background::*;
@ -10,21 +8,10 @@ use garage_util::error::Error;
use garage_api::run_api_server; use garage_api::run_api_server;
use garage_model::garage::Garage; use garage_model::garage::Garage;
use garage_rpc::rpc_server::RpcServer;
use garage_web::run_web_server; use garage_web::run_web_server;
use crate::admin_rpc::*; use crate::admin_rpc::*;
async fn shutdown_signal(send_cancel: watch::Sender<bool>) -> Result<(), Error> {
// Wait for the CTRL+C signal
tokio::signal::ctrl_c()
.await
.expect("failed to install CTRL+C signal handler");
info!("Received CTRL+C, shutting down.");
send_cancel.send(true)?;
Ok(())
}
async fn wait_from(mut chan: watch::Receiver<bool>) { async fn wait_from(mut chan: watch::Receiver<bool>) {
while !*chan.borrow() { while !*chan.borrow() {
if chan.changed().await.is_err() { if chan.changed().await.is_err() {
@ -47,52 +34,46 @@ pub async fn run_server(config_file: PathBuf) -> Result<(), Error> {
.open() .open()
.expect("Unable to open sled DB"); .expect("Unable to open sled DB");
info!("Initialize RPC server...");
let mut rpc_server = RpcServer::new(config.rpc_bind_addr, config.rpc_tls.clone());
info!("Initializing background runner..."); info!("Initializing background runner...");
let (send_cancel, watch_cancel) = watch::channel(false); let watch_cancel = netapp::util::watch_ctrl_c();
let (background, await_background_done) = BackgroundRunner::new(16, watch_cancel.clone()); let (background, await_background_done) = BackgroundRunner::new(16, watch_cancel.clone());
info!("Initializing Garage main data store..."); info!("Initializing Garage main data store...");
let garage = Garage::new(config.clone(), db, background, &mut rpc_server); let garage = Garage::new(config.clone(), db, background);
let bootstrap = garage.system.clone().bootstrap(
config.bootstrap_peers, let run_system = tokio::spawn(garage.system.clone().run(watch_cancel.clone()));
config.consul_host,
config.consul_service_name,
);
info!("Crate admin RPC handler..."); info!("Crate admin RPC handler...");
AdminRpcHandler::new(garage.clone()).register_handler(&mut rpc_server); AdminRpcHandler::new(garage.clone());
info!("Initializing RPC and API servers..."); info!("Initializing API server...");
let run_rpc_server = Arc::new(rpc_server).run(wait_from(watch_cancel.clone())); let api_server = tokio::spawn(run_api_server(
let api_server = run_api_server(garage.clone(), wait_from(watch_cancel.clone())); garage.clone(),
let web_server = run_web_server(garage, wait_from(watch_cancel.clone())); wait_from(watch_cancel.clone()),
));
futures::try_join!( info!("Initializing web server...");
bootstrap.map(|()| { let web_server = tokio::spawn(run_web_server(
info!("Bootstrap done"); garage.clone(),
Ok(()) wait_from(watch_cancel.clone()),
}), ));
run_rpc_server.map(|rv| {
info!("RPC server exited"); // Stuff runs
rv
}), // When a cancel signal is sent, stuff stops
api_server.map(|rv| { if let Err(e) = api_server.await? {
info!("API server exited"); warn!("API server exited with error: {}", e);
rv }
}), if let Err(e) = web_server.await? {
web_server.map(|rv| { warn!("Web server exited with error: {}", e);
info!("Web server exited"); }
rv
}), // Remove RPC handlers for system to break reference cycles
await_background_done.map(|rv| { garage.system.netapp.drop_all_handlers();
info!("Background runner exited: {:?}", rv);
Ok(()) // Await for last parts to end
}), run_system.await?;
shutdown_signal(send_cancel), await_background_done.await?;
)?;
info!("Cleaning up..."); info!("Cleaning up...");

View file

@ -1,6 +1,6 @@
[package] [package]
name = "garage_model" name = "garage_model"
version = "0.3.0" version = "0.4.0"
authors = ["Alex Auvolat <alex@adnab.me>"] authors = ["Alex Auvolat <alex@adnab.me>"]
edition = "2018" edition = "2018"
license = "AGPL-3.0" license = "AGPL-3.0"
@ -13,10 +13,11 @@ path = "lib.rs"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies] [dependencies]
garage_rpc = { version = "0.3.0", path = "../rpc" } garage_rpc = { version = "0.4.0", path = "../rpc" }
garage_table = { version = "0.3.0", path = "../table" } garage_table = { version = "0.4.0", path = "../table" }
garage_util = { version = "0.3.0", path = "../util" } garage_util = { version = "0.4.0", path = "../util" }
async-trait = "0.1.7"
arc-swap = "1.0" arc-swap = "1.0"
hex = "0.4" hex = "0.4"
log = "0.4" log = "0.4"
@ -31,3 +32,5 @@ serde_bytes = "0.11"
futures = "0.3" futures = "0.3"
futures-util = "0.3" futures-util = "0.3"
tokio = { version = "1.0", default-features = false, features = ["rt", "rt-multi-thread", "io-util", "net", "time", "macros", "sync", "signal", "fs"] } tokio = { version = "1.0", default-features = false, features = ["rt", "rt-multi-thread", "io-util", "net", "time", "macros", "sync", "signal", "fs"] }
netapp = { version = "0.3.0", git = "https://git.deuxfleurs.fr/lx/netapp" }

View file

@ -3,6 +3,7 @@ use std::sync::Arc;
use std::time::Duration; use std::time::Duration;
use arc_swap::ArcSwapOption; use arc_swap::ArcSwapOption;
use async_trait::async_trait;
use futures::future::*; use futures::future::*;
use futures::select; use futures::select;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
@ -14,9 +15,8 @@ use garage_util::data::*;
use garage_util::error::Error; use garage_util::error::Error;
use garage_util::time::*; use garage_util::time::*;
use garage_rpc::membership::System; use garage_rpc::system::System;
use garage_rpc::rpc_client::*; use garage_rpc::*;
use garage_rpc::rpc_server::*;
use garage_table::replication::{TableReplication, TableShardedReplication}; use garage_table::replication::{TableReplication, TableShardedReplication};
@ -36,8 +36,9 @@ const RESYNC_RETRY_TIMEOUT: Duration = Duration::from_secs(10);
/// RPC messages used to share blocks of data between nodes /// RPC messages used to share blocks of data between nodes
#[derive(Debug, Serialize, Deserialize)] #[derive(Debug, Serialize, Deserialize)]
pub enum Message { pub enum BlockRpc {
Ok, Ok,
Error(String),
/// Message to ask for a block of data, by hash /// Message to ask for a block of data, by hash
GetBlock(Hash), GetBlock(Hash),
/// Message to send a block of data, either because requested, of for first delivery of new /// Message to send a block of data, either because requested, of for first delivery of new
@ -60,7 +61,9 @@ pub struct PutBlockMessage {
pub data: Vec<u8>, pub data: Vec<u8>,
} }
impl RpcMessage for Message {} impl Message for BlockRpc {
type Response = BlockRpc;
}
/// The block manager, handling block exchange between nodes, and block storage on local node /// The block manager, handling block exchange between nodes, and block storage on local node
pub struct BlockManager { pub struct BlockManager {
@ -77,7 +80,7 @@ pub struct BlockManager {
resync_notify: Notify, resync_notify: Notify,
system: Arc<System>, system: Arc<System>,
rpc_client: Arc<RpcClient<Message>>, endpoint: Arc<Endpoint<BlockRpc, Self>>,
pub(crate) garage: ArcSwapOption<Garage>, pub(crate) garage: ArcSwapOption<Garage>,
} }
@ -87,7 +90,6 @@ impl BlockManager {
data_dir: PathBuf, data_dir: PathBuf,
replication: TableShardedReplication, replication: TableShardedReplication,
system: Arc<System>, system: Arc<System>,
rpc_server: &mut RpcServer,
) -> Arc<Self> { ) -> Arc<Self> {
let rc = db let rc = db
.open_tree("block_local_rc") .open_tree("block_local_rc")
@ -97,8 +99,7 @@ impl BlockManager {
.open_tree("block_local_resync_queue") .open_tree("block_local_resync_queue")
.expect("Unable to open block_local_resync_queue tree"); .expect("Unable to open block_local_resync_queue tree");
let rpc_path = "block_manager"; let endpoint = system.netapp.endpoint(format!("garage_model/block.rs/Rpc"));
let rpc_client = system.rpc_client::<Message>(rpc_path);
let block_manager = Arc::new(Self { let block_manager = Arc::new(Self {
replication, replication,
@ -108,35 +109,19 @@ impl BlockManager {
resync_queue, resync_queue,
resync_notify: Notify::new(), resync_notify: Notify::new(),
system, system,
rpc_client, endpoint,
garage: ArcSwapOption::from(None), garage: ArcSwapOption::from(None),
}); });
block_manager block_manager.endpoint.set_handler(block_manager.clone());
.clone()
.register_handler(rpc_server, rpc_path.into());
block_manager block_manager
} }
fn register_handler(self: Arc<Self>, rpc_server: &mut RpcServer, path: String) { async fn handle_rpc(self: Arc<Self>, msg: &BlockRpc) -> Result<BlockRpc, Error> {
let self2 = self.clone();
rpc_server.add_handler::<Message, _, _>(path, move |msg, _addr| {
let self2 = self2.clone();
async move { self2.handle(&msg).await }
});
let self2 = self.clone();
self.rpc_client
.set_local_handler(self.system.id, move |msg| {
let self2 = self2.clone();
async move { self2.handle(&msg).await }
});
}
async fn handle(self: Arc<Self>, msg: &Message) -> Result<Message, Error> {
match msg { match msg {
Message::PutBlock(m) => self.write_block(&m.hash, &m.data).await, BlockRpc::PutBlock(m) => self.write_block(&m.hash, &m.data).await,
Message::GetBlock(h) => self.read_block(h).await, BlockRpc::GetBlock(h) => self.read_block(h).await,
Message::NeedBlockQuery(h) => self.need_block(h).await.map(Message::NeedBlockReply), BlockRpc::NeedBlockQuery(h) => self.need_block(h).await.map(BlockRpc::NeedBlockReply),
_ => Err(Error::BadRpc("Unexpected RPC message".to_string())), _ => Err(Error::BadRpc("Unexpected RPC message".to_string())),
} }
} }
@ -157,7 +142,7 @@ impl BlockManager {
} }
/// Write a block to disk /// Write a block to disk
async fn write_block(&self, hash: &Hash, data: &[u8]) -> Result<Message, Error> { async fn write_block(&self, hash: &Hash, data: &[u8]) -> Result<BlockRpc, Error> {
let _lock = self.data_dir_lock.lock().await; let _lock = self.data_dir_lock.lock().await;
let mut path = self.block_dir(hash); let mut path = self.block_dir(hash);
@ -165,18 +150,18 @@ impl BlockManager {
path.push(hex::encode(hash)); path.push(hex::encode(hash));
if fs::metadata(&path).await.is_ok() { if fs::metadata(&path).await.is_ok() {
return Ok(Message::Ok); return Ok(BlockRpc::Ok);
} }
let mut f = fs::File::create(path).await?; let mut f = fs::File::create(path).await?;
f.write_all(data).await?; f.write_all(data).await?;
drop(f); drop(f);
Ok(Message::Ok) Ok(BlockRpc::Ok)
} }
/// Read block from disk, verifying it's integrity /// Read block from disk, verifying it's integrity
async fn read_block(&self, hash: &Hash) -> Result<Message, Error> { async fn read_block(&self, hash: &Hash) -> Result<BlockRpc, Error> {
let path = self.block_path(hash); let path = self.block_path(hash);
let mut f = match fs::File::open(&path).await { let mut f = match fs::File::open(&path).await {
@ -204,7 +189,7 @@ impl BlockManager {
return Err(Error::CorruptData(*hash)); return Err(Error::CorruptData(*hash));
} }
Ok(Message::PutBlock(PutBlockMessage { hash: *hash, data })) Ok(BlockRpc::PutBlock(PutBlockMessage { hash: *hash, data }))
} }
/// Check if this node should have a block, but don't actually have it /// Check if this node should have a block, but don't actually have it
@ -346,17 +331,22 @@ impl BlockManager {
} }
who.retain(|id| *id != self.system.id); who.retain(|id| *id != self.system.id);
let msg = Arc::new(Message::NeedBlockQuery(*hash)); let msg = Arc::new(BlockRpc::NeedBlockQuery(*hash));
let who_needs_fut = who.iter().map(|to| { let who_needs_fut = who.iter().map(|to| {
self.rpc_client self.system.rpc.call_arc(
.call_arc(*to, msg.clone(), NEED_BLOCK_QUERY_TIMEOUT) &self.endpoint,
*to,
msg.clone(),
RequestStrategy::with_priority(PRIO_NORMAL)
.with_timeout(NEED_BLOCK_QUERY_TIMEOUT),
)
}); });
let who_needs_resps = join_all(who_needs_fut).await; let who_needs_resps = join_all(who_needs_fut).await;
let mut need_nodes = vec![]; let mut need_nodes = vec![];
for (node, needed) in who.iter().zip(who_needs_resps.into_iter()) { for (node, needed) in who.iter().zip(who_needs_resps.into_iter()) {
match needed? { match needed? {
Message::NeedBlockReply(needed) => { BlockRpc::NeedBlockReply(needed) => {
if needed { if needed {
need_nodes.push(*node); need_nodes.push(*node);
} }
@ -377,11 +367,14 @@ impl BlockManager {
); );
let put_block_message = self.read_block(hash).await?; let put_block_message = self.read_block(hash).await?;
self.rpc_client self.system
.rpc
.try_call_many( .try_call_many(
&self.endpoint,
&need_nodes[..], &need_nodes[..],
put_block_message, put_block_message,
RequestStrategy::with_quorum(need_nodes.len()) RequestStrategy::with_priority(PRIO_NORMAL)
.with_quorum(need_nodes.len())
.with_timeout(BLOCK_RW_TIMEOUT), .with_timeout(BLOCK_RW_TIMEOUT),
) )
.await?; .await?;
@ -413,18 +406,21 @@ impl BlockManager {
pub async fn rpc_get_block(&self, hash: &Hash) -> Result<Vec<u8>, Error> { pub async fn rpc_get_block(&self, hash: &Hash) -> Result<Vec<u8>, Error> {
let who = self.replication.read_nodes(&hash); let who = self.replication.read_nodes(&hash);
let resps = self let resps = self
.rpc_client .system
.rpc
.try_call_many( .try_call_many(
&self.endpoint,
&who[..], &who[..],
Message::GetBlock(*hash), BlockRpc::GetBlock(*hash),
RequestStrategy::with_quorum(1) RequestStrategy::with_priority(PRIO_NORMAL)
.with_quorum(1)
.with_timeout(BLOCK_RW_TIMEOUT) .with_timeout(BLOCK_RW_TIMEOUT)
.interrupt_after_quorum(true), .interrupt_after_quorum(true),
) )
.await?; .await?;
for resp in resps { for resp in resps {
if let Message::PutBlock(msg) = resp { if let BlockRpc::PutBlock(msg) = resp {
return Ok(msg.data); return Ok(msg.data);
} }
} }
@ -437,11 +433,14 @@ impl BlockManager {
/// Send block to nodes that should have it /// Send block to nodes that should have it
pub async fn rpc_put_block(&self, hash: Hash, data: Vec<u8>) -> Result<(), Error> { pub async fn rpc_put_block(&self, hash: Hash, data: Vec<u8>) -> Result<(), Error> {
let who = self.replication.write_nodes(&hash); let who = self.replication.write_nodes(&hash);
self.rpc_client self.system
.rpc
.try_call_many( .try_call_many(
&self.endpoint,
&who[..], &who[..],
Message::PutBlock(PutBlockMessage { hash, data }), BlockRpc::PutBlock(PutBlockMessage { hash, data }),
RequestStrategy::with_quorum(self.replication.write_quorum()) RequestStrategy::with_priority(PRIO_NORMAL)
.with_quorum(self.replication.write_quorum())
.with_timeout(BLOCK_RW_TIMEOUT), .with_timeout(BLOCK_RW_TIMEOUT),
) )
.await?; .await?;
@ -531,6 +530,16 @@ impl BlockManager {
} }
} }
#[async_trait]
impl EndpointHandler<BlockRpc> for BlockManager {
async fn handle(self: &Arc<Self>, message: &BlockRpc, _from: NodeID) -> BlockRpc {
self.clone()
.handle_rpc(message)
.await
.unwrap_or_else(|e| BlockRpc::Error(format!("{}", e)))
}
}
fn u64_from_be_bytes<T: AsRef<[u8]>>(bytes: T) -> u64 { fn u64_from_be_bytes<T: AsRef<[u8]>>(bytes: T) -> u64 {
assert!(bytes.as_ref().len() == 8); assert!(bytes.as_ref().len() == 8);
let mut x8 = [0u8; 8]; let mut x8 = [0u8; 8];

View file

@ -1,11 +1,11 @@
use std::sync::Arc; use std::sync::Arc;
use netapp::NetworkKey;
use garage_util::background::*; use garage_util::background::*;
use garage_util::config::*; use garage_util::config::*;
use garage_rpc::membership::System; use garage_rpc::system::System;
use garage_rpc::rpc_client::RpcHttpClient;
use garage_rpc::rpc_server::RpcServer;
use garage_table::replication::ReplicationMode; use garage_table::replication::ReplicationMode;
use garage_table::replication::TableFullReplication; use garage_table::replication::TableFullReplication;
@ -45,26 +45,25 @@ pub struct Garage {
impl Garage { impl Garage {
/// Create and run garage /// Create and run garage
pub fn new( pub fn new(config: Config, db: sled::Db, background: Arc<BackgroundRunner>) -> Arc<Self> {
config: Config, let network_key = NetworkKey::from_slice(
db: sled::Db, &hex::decode(&config.rpc_secret).expect("Invalid RPC secret key")[..],
background: Arc<BackgroundRunner>, )
rpc_server: &mut RpcServer, .expect("Invalid RPC secret key");
) -> Arc<Self> {
let replication_mode = ReplicationMode::parse(&config.replication_mode) let replication_mode = ReplicationMode::parse(&config.replication_mode)
.expect("Invalid replication_mode in config file."); .expect("Invalid replication_mode in config file.");
info!("Initialize membership management system..."); info!("Initialize membership management system...");
let rpc_http_client = Arc::new(
RpcHttpClient::new(config.max_concurrent_rpc_requests, &config.rpc_tls)
.expect("Could not create RPC client"),
);
let system = System::new( let system = System::new(
network_key,
config.metadata_dir.clone(), config.metadata_dir.clone(),
rpc_http_client,
background.clone(), background.clone(),
rpc_server,
replication_mode.replication_factor(), replication_mode.replication_factor(),
config.rpc_bind_addr,
config.bootstrap_peers.clone(),
config.consul_host.clone(),
config.consul_service_name.clone(),
); );
let data_rep_param = TableShardedReplication { let data_rep_param = TableShardedReplication {
@ -87,13 +86,8 @@ impl Garage {
}; };
info!("Initialize block manager..."); info!("Initialize block manager...");
let block_manager = BlockManager::new( let block_manager =
&db, BlockManager::new(&db, config.data_dir.clone(), data_rep_param, system.clone());
config.data_dir.clone(),
data_rep_param,
system.clone(),
rpc_server,
);
info!("Initialize block_ref_table..."); info!("Initialize block_ref_table...");
let block_ref_table = Table::new( let block_ref_table = Table::new(
@ -104,7 +98,6 @@ impl Garage {
system.clone(), system.clone(),
&db, &db,
"block_ref".to_string(), "block_ref".to_string(),
rpc_server,
); );
info!("Initialize version_table..."); info!("Initialize version_table...");
@ -117,7 +110,6 @@ impl Garage {
system.clone(), system.clone(),
&db, &db,
"version".to_string(), "version".to_string(),
rpc_server,
); );
info!("Initialize object_table..."); info!("Initialize object_table...");
@ -130,7 +122,6 @@ impl Garage {
system.clone(), system.clone(),
&db, &db,
"object".to_string(), "object".to_string(),
rpc_server,
); );
info!("Initialize bucket_table..."); info!("Initialize bucket_table...");
@ -140,7 +131,6 @@ impl Garage {
system.clone(), system.clone(),
&db, &db,
"bucket".to_string(), "bucket".to_string(),
rpc_server,
); );
info!("Initialize key_table_table..."); info!("Initialize key_table_table...");
@ -150,7 +140,6 @@ impl Garage {
system.clone(), system.clone(),
&db, &db,
"key".to_string(), "key".to_string(),
rpc_server,
); );
info!("Initialize Garage..."); info!("Initialize Garage...");

View file

@ -1,6 +1,6 @@
[package] [package]
name = "garage_rpc" name = "garage_rpc"
version = "0.3.0" version = "0.4.0"
authors = ["Alex Auvolat <alex@adnab.me>"] authors = ["Alex Auvolat <alex@adnab.me>"]
edition = "2018" edition = "2018"
license = "AGPL-3.0" license = "AGPL-3.0"
@ -13,7 +13,7 @@ path = "lib.rs"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies] [dependencies]
garage_util = { version = "0.3.0", path = "../util" } garage_util = { version = "0.4.0", path = "../util" }
garage_rpc_021 = { package = "garage_rpc", version = "0.2.1" } garage_rpc_021 = { package = "garage_rpc", version = "0.2.1" }
@ -22,7 +22,10 @@ bytes = "1.0"
gethostname = "0.2" gethostname = "0.2"
hex = "0.4" hex = "0.4"
log = "0.4" log = "0.4"
rand = "0.8"
sodiumoxide = { version = "0.2.5-0", package = "kuska-sodiumoxide" }
async-trait = "0.1.7"
rmp-serde = "0.15" rmp-serde = "0.15"
serde = { version = "1.0", default-features = false, features = ["derive", "rc"] } serde = { version = "1.0", default-features = false, features = ["derive", "rc"] }
serde_json = "1.0" serde_json = "1.0"
@ -32,11 +35,6 @@ futures-util = "0.3"
tokio = { version = "1.0", default-features = false, features = ["rt", "rt-multi-thread", "io-util", "net", "time", "macros", "sync", "signal", "fs"] } tokio = { version = "1.0", default-features = false, features = ["rt", "rt-multi-thread", "io-util", "net", "time", "macros", "sync", "signal", "fs"] }
tokio-stream = { version = "0.1", features = ["net"] } tokio-stream = { version = "0.1", features = ["net"] }
http = "0.2" netapp = { version = "0.3.0", git = "https://git.deuxfleurs.fr/lx/netapp" }
hyper = { version = "0.14", features = ["full"] } hyper = "0.14"
hyper-rustls = { version = "0.22", default-features = false }
rustls = "0.19"
tokio-rustls = "0.22"
webpki = "0.21"

View file

@ -4,10 +4,10 @@
extern crate log; extern crate log;
mod consul; mod consul;
pub(crate) mod tls_util;
pub mod membership;
pub mod ring; pub mod ring;
pub mod system;
pub mod rpc_client; pub mod rpc_helper;
pub mod rpc_server;
pub use rpc_helper::*;

View file

@ -1,722 +0,0 @@
//! Module containing structs related to membership management
use std::collections::HashMap;
use std::fmt::Write as FmtWrite;
use std::io::{Read, Write};
use std::net::{IpAddr, SocketAddr};
use std::path::{Path, PathBuf};
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::Arc;
use std::time::Duration;
use futures::future::join_all;
use futures::select;
use futures_util::future::*;
use serde::{Deserialize, Serialize};
use tokio::sync::watch;
use tokio::sync::Mutex;
use garage_util::background::BackgroundRunner;
use garage_util::data::*;
use garage_util::error::Error;
use garage_util::persister::Persister;
use garage_util::time::*;
use crate::consul::get_consul_nodes;
use crate::ring::*;
use crate::rpc_client::*;
use crate::rpc_server::*;
const PING_INTERVAL: Duration = Duration::from_secs(10);
const DISCOVERY_INTERVAL: Duration = Duration::from_secs(60);
const PING_TIMEOUT: Duration = Duration::from_secs(2);
const MAX_FAILURES_BEFORE_CONSIDERED_DOWN: usize = 5;
/// RPC endpoint used for calls related to membership
pub const MEMBERSHIP_RPC_PATH: &str = "_membership";
/// RPC messages related to membership
#[derive(Debug, Serialize, Deserialize)]
pub enum Message {
/// Response to successfull advertisements
Ok,
/// Message sent to detect other nodes status
Ping(PingMessage),
/// Ask other node for the nodes it knows. Answered with AdvertiseNodesUp
PullStatus,
/// Ask other node its config. Answered with AdvertiseConfig
PullConfig,
/// Advertisement of nodes the host knows up. Sent spontanously or in response to PullStatus
AdvertiseNodesUp(Vec<AdvertisedNode>),
/// Advertisement of nodes config. Sent spontanously or in response to PullConfig
AdvertiseConfig(NetworkConfig),
}
impl RpcMessage for Message {}
/// A ping, containing informations about status and config
#[derive(Debug, Serialize, Deserialize)]
pub struct PingMessage {
id: Uuid,
rpc_port: u16,
status_hash: Hash,
config_version: u64,
state_info: StateInfo,
}
/// A node advertisement
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct AdvertisedNode {
/// Id of the node this advertisement relates to
pub id: Uuid,
/// IP and port of the node
pub addr: SocketAddr,
/// Is the node considered up
pub is_up: bool,
/// When was the node last seen up, in milliseconds since UNIX epoch
pub last_seen: u64,
pub state_info: StateInfo,
}
/// This node's membership manager
pub struct System {
/// The id of this node
pub id: Uuid,
persist_config: Persister<NetworkConfig>,
persist_status: Persister<Vec<AdvertisedNode>>,
rpc_local_port: u16,
state_info: StateInfo,
rpc_http_client: Arc<RpcHttpClient>,
rpc_client: Arc<RpcClient<Message>>,
replication_factor: usize,
pub(crate) status: watch::Receiver<Arc<Status>>,
/// The ring
pub ring: watch::Receiver<Arc<Ring>>,
update_lock: Mutex<Updaters>,
/// The job runner of this node
pub background: Arc<BackgroundRunner>,
}
struct Updaters {
update_status: watch::Sender<Arc<Status>>,
update_ring: watch::Sender<Arc<Ring>>,
}
/// The status of each nodes, viewed by this node
#[derive(Debug, Clone)]
pub struct Status {
/// Mapping of each node id to its known status
pub nodes: HashMap<Uuid, Arc<StatusEntry>>,
/// Hash of `nodes`, used to detect when nodes have different views of the cluster
pub hash: Hash,
}
/// The status of a single node
#[derive(Debug)]
pub struct StatusEntry {
/// The IP and port used to connect to this node
pub addr: SocketAddr,
/// Last time this node was seen
pub last_seen: u64,
/// Number of consecutive pings sent without reply to this node
pub num_failures: AtomicUsize,
pub state_info: StateInfo,
}
impl StatusEntry {
/// is the node associated to this entry considered up
pub fn is_up(&self) -> bool {
self.num_failures.load(Ordering::SeqCst) < MAX_FAILURES_BEFORE_CONSIDERED_DOWN
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct StateInfo {
/// Hostname of the node
pub hostname: String,
/// Replication factor configured on the node
pub replication_factor: Option<usize>, // TODO Option is just for retrocompatibility. It should become a simple usize at some point
}
impl Status {
fn handle_ping(&mut self, ip: IpAddr, info: &PingMessage) -> bool {
let addr = SocketAddr::new(ip, info.rpc_port);
let old_status = self.nodes.insert(
info.id,
Arc::new(StatusEntry {
addr,
last_seen: now_msec(),
num_failures: AtomicUsize::from(0),
state_info: info.state_info.clone(),
}),
);
match old_status {
None => {
info!("Newly pingable node: {}", hex::encode(&info.id));
true
}
Some(x) => x.addr != addr,
}
}
fn recalculate_hash(&mut self) {
let mut nodes = self.nodes.iter().collect::<Vec<_>>();
nodes.sort_unstable_by_key(|(id, _status)| *id);
let mut nodes_txt = String::new();
debug!("Current set of pingable nodes: --");
for (id, status) in nodes {
debug!("{} {}", hex::encode(&id), status.addr);
writeln!(&mut nodes_txt, "{} {}", hex::encode(&id), status.addr).unwrap();
}
debug!("END --");
self.hash = blake2sum(nodes_txt.as_bytes());
}
fn to_serializable_membership(&self, system: &System) -> Vec<AdvertisedNode> {
let mut mem = vec![];
for (node, status) in self.nodes.iter() {
let state_info = if *node == system.id {
system.state_info.clone()
} else {
status.state_info.clone()
};
mem.push(AdvertisedNode {
id: *node,
addr: status.addr,
is_up: status.is_up(),
last_seen: status.last_seen,
state_info,
});
}
mem
}
}
fn gen_node_id(metadata_dir: &Path) -> Result<Uuid, Error> {
let mut id_file = metadata_dir.to_path_buf();
id_file.push("node_id");
if id_file.as_path().exists() {
let mut f = std::fs::File::open(id_file.as_path())?;
let mut d = vec![];
f.read_to_end(&mut d)?;
if d.len() != 32 {
return Err(Error::Message("Corrupt node_id file".to_string()));
}
let mut id = [0u8; 32];
id.copy_from_slice(&d[..]);
Ok(id.into())
} else {
let id = gen_uuid();
let mut f = std::fs::File::create(id_file.as_path())?;
f.write_all(id.as_slice())?;
Ok(id)
}
}
impl System {
/// Create this node's membership manager
pub fn new(
metadata_dir: PathBuf,
rpc_http_client: Arc<RpcHttpClient>,
background: Arc<BackgroundRunner>,
rpc_server: &mut RpcServer,
replication_factor: usize,
) -> Arc<Self> {
let id = gen_node_id(&metadata_dir).expect("Unable to read or generate node ID");
info!("Node ID: {}", hex::encode(&id));
let persist_config = Persister::new(&metadata_dir, "network_config");
let persist_status = Persister::new(&metadata_dir, "peer_info");
let net_config = match persist_config.load() {
Ok(x) => x,
Err(e) => {
match Persister::<garage_rpc_021::ring::NetworkConfig>::new(
&metadata_dir,
"network_config",
)
.load()
{
Ok(old_config) => NetworkConfig::migrate_from_021(old_config),
Err(e2) => {
info!(
"No valid previous network configuration stored ({}, {}), starting fresh.",
e, e2
);
NetworkConfig::new()
}
}
}
};
let mut status = Status {
nodes: HashMap::new(),
hash: Hash::default(),
};
status.recalculate_hash();
let (update_status, status) = watch::channel(Arc::new(status));
let state_info = StateInfo {
hostname: gethostname::gethostname()
.into_string()
.unwrap_or_else(|_| "<invalid utf-8>".to_string()),
replication_factor: Some(replication_factor),
};
let ring = Ring::new(net_config, replication_factor);
let (update_ring, ring) = watch::channel(Arc::new(ring));
let rpc_path = MEMBERSHIP_RPC_PATH.to_string();
let rpc_client = RpcClient::new(
RpcAddrClient::<Message>::new(rpc_http_client.clone(), rpc_path.clone()),
background.clone(),
status.clone(),
);
let sys = Arc::new(System {
id,
persist_config,
persist_status,
rpc_local_port: rpc_server.bind_addr.port(),
state_info,
rpc_http_client,
rpc_client,
replication_factor,
status,
ring,
update_lock: Mutex::new(Updaters {
update_status,
update_ring,
}),
background,
});
sys.clone().register_handler(rpc_server, rpc_path);
sys
}
fn register_handler(self: Arc<Self>, rpc_server: &mut RpcServer, path: String) {
rpc_server.add_handler::<Message, _, _>(path, move |msg, addr| {
let self2 = self.clone();
async move {
match msg {
Message::Ping(ping) => self2.handle_ping(&addr, &ping).await,
Message::PullStatus => Ok(self2.handle_pull_status()),
Message::PullConfig => Ok(self2.handle_pull_config()),
Message::AdvertiseNodesUp(adv) => self2.handle_advertise_nodes_up(&adv).await,
Message::AdvertiseConfig(adv) => self2.handle_advertise_config(&adv).await,
_ => Err(Error::BadRpc("Unexpected RPC message".to_string())),
}
}
});
}
/// Get an RPC client
pub fn rpc_client<M: RpcMessage + 'static>(self: &Arc<Self>, path: &str) -> Arc<RpcClient<M>> {
RpcClient::new(
RpcAddrClient::new(self.rpc_http_client.clone(), path.to_string()),
self.background.clone(),
self.status.clone(),
)
}
/// Save network configuration to disc
async fn save_network_config(self: Arc<Self>) -> Result<(), Error> {
let ring = self.ring.borrow().clone();
self.persist_config
.save_async(&ring.config)
.await
.expect("Cannot save current cluster configuration");
Ok(())
}
fn make_ping(&self) -> Message {
let status = self.status.borrow().clone();
let ring = self.ring.borrow().clone();
Message::Ping(PingMessage {
id: self.id,
rpc_port: self.rpc_local_port,
status_hash: status.hash,
config_version: ring.config.version,
state_info: self.state_info.clone(),
})
}
async fn broadcast(self: Arc<Self>, msg: Message, timeout: Duration) {
let status = self.status.borrow().clone();
let to = status
.nodes
.keys()
.filter(|x| **x != self.id)
.cloned()
.collect::<Vec<_>>();
self.rpc_client.call_many(&to[..], msg, timeout).await;
}
/// Perform bootstraping, starting the ping loop
pub async fn bootstrap(
self: Arc<Self>,
peers: Vec<SocketAddr>,
consul_host: Option<String>,
consul_service_name: Option<String>,
) {
let self2 = self.clone();
self.background
.spawn_worker("discovery loop".to_string(), |stop_signal| {
self2.discovery_loop(peers, consul_host, consul_service_name, stop_signal)
});
let self2 = self.clone();
self.background
.spawn_worker("ping loop".to_string(), |stop_signal| {
self2.ping_loop(stop_signal)
});
}
async fn ping_nodes(self: Arc<Self>, peers: Vec<(SocketAddr, Option<Uuid>)>) {
let ping_msg = self.make_ping();
let ping_resps = join_all(peers.iter().map(|(addr, id_option)| {
let sys = self.clone();
let ping_msg_ref = &ping_msg;
async move {
(
id_option,
addr,
sys.rpc_client
.by_addr()
.call(&addr, ping_msg_ref, PING_TIMEOUT)
.await,
)
}
}))
.await;
let update_locked = self.update_lock.lock().await;
let mut status: Status = self.status.borrow().as_ref().clone();
let ring = self.ring.borrow().clone();
let mut has_changes = false;
let mut to_advertise = vec![];
for (id_option, addr, ping_resp) in ping_resps {
if let Ok(Ok(Message::Ping(info))) = ping_resp {
let is_new = status.handle_ping(addr.ip(), &info);
if is_new {
has_changes = true;
to_advertise.push(AdvertisedNode {
id: info.id,
addr: *addr,
is_up: true,
last_seen: now_msec(),
state_info: info.state_info.clone(),
});
}
if is_new || status.hash != info.status_hash {
self.background
.spawn_cancellable(self.clone().pull_status(info.id).map(Ok));
}
if is_new || ring.config.version < info.config_version {
self.background
.spawn_cancellable(self.clone().pull_config(info.id).map(Ok));
}
} else if let Some(id) = id_option {
if let Some(st) = status.nodes.get_mut(id) {
// we need to increment failure counter as call was done using by_addr so the
// counter was not auto-incremented
st.num_failures.fetch_add(1, Ordering::SeqCst);
if !st.is_up() {
warn!("Node {:?} seems to be down.", id);
if !ring.config.members.contains_key(id) {
info!("Removing node {:?} from status (not in config and not responding to pings anymore)", id);
status.nodes.remove(&id);
has_changes = true;
}
}
}
}
}
if has_changes {
status.recalculate_hash();
}
self.update_status(&update_locked, status).await;
drop(update_locked);
if !to_advertise.is_empty() {
self.broadcast(Message::AdvertiseNodesUp(to_advertise), PING_TIMEOUT)
.await;
}
}
async fn handle_ping(
self: Arc<Self>,
from: &SocketAddr,
ping: &PingMessage,
) -> Result<Message, Error> {
let update_locked = self.update_lock.lock().await;
let mut status: Status = self.status.borrow().as_ref().clone();
let is_new = status.handle_ping(from.ip(), ping);
if is_new {
status.recalculate_hash();
}
let status_hash = status.hash;
let config_version = self.ring.borrow().config.version;
self.update_status(&update_locked, status).await;
drop(update_locked);
if is_new || status_hash != ping.status_hash {
self.background
.spawn_cancellable(self.clone().pull_status(ping.id).map(Ok));
}
if is_new || config_version < ping.config_version {
self.background
.spawn_cancellable(self.clone().pull_config(ping.id).map(Ok));
}
Ok(self.make_ping())
}
fn handle_pull_status(&self) -> Message {
Message::AdvertiseNodesUp(self.status.borrow().to_serializable_membership(self))
}
fn handle_pull_config(&self) -> Message {
let ring = self.ring.borrow().clone();
Message::AdvertiseConfig(ring.config.clone())
}
async fn handle_advertise_nodes_up(
self: Arc<Self>,
adv: &[AdvertisedNode],
) -> Result<Message, Error> {
let mut to_ping = vec![];
let update_lock = self.update_lock.lock().await;
let mut status: Status = self.status.borrow().as_ref().clone();
let mut has_changed = false;
let mut max_replication_factor = 0;
for node in adv.iter() {
if node.id == self.id {
// learn our own ip address
let self_addr = SocketAddr::new(node.addr.ip(), self.rpc_local_port);
let old_self = status.nodes.insert(
node.id,
Arc::new(StatusEntry {
addr: self_addr,
last_seen: now_msec(),
num_failures: AtomicUsize::from(0),
state_info: self.state_info.clone(),
}),
);
has_changed = match old_self {
None => true,
Some(x) => x.addr != self_addr,
};
} else {
let ping_them = match status.nodes.get(&node.id) {
// Case 1: new node
None => true,
// Case 2: the node might have changed address
Some(our_node) => node.is_up && !our_node.is_up() && our_node.addr != node.addr,
};
max_replication_factor = std::cmp::max(
max_replication_factor,
node.state_info.replication_factor.unwrap_or_default(),
);
if ping_them {
to_ping.push((node.addr, Some(node.id)));
}
}
}
if self.replication_factor < max_replication_factor {
error!("Some node have a higher replication factor ({}) than this one ({}). This is not supported and might lead to bugs",
max_replication_factor,
self.replication_factor);
std::process::exit(1);
}
if has_changed {
status.recalculate_hash();
}
self.update_status(&update_lock, status).await;
drop(update_lock);
if !to_ping.is_empty() {
self.background
.spawn_cancellable(self.clone().ping_nodes(to_ping).map(Ok));
}
Ok(Message::Ok)
}
async fn handle_advertise_config(
self: Arc<Self>,
adv: &NetworkConfig,
) -> Result<Message, Error> {
let update_lock = self.update_lock.lock().await;
let ring: Arc<Ring> = self.ring.borrow().clone();
if adv.version > ring.config.version {
let ring = Ring::new(adv.clone(), self.replication_factor);
update_lock.update_ring.send(Arc::new(ring))?;
drop(update_lock);
self.background.spawn_cancellable(
self.clone()
.broadcast(Message::AdvertiseConfig(adv.clone()), PING_TIMEOUT)
.map(Ok),
);
self.background.spawn(self.clone().save_network_config());
}
Ok(Message::Ok)
}
async fn ping_loop(self: Arc<Self>, mut stop_signal: watch::Receiver<bool>) {
while !*stop_signal.borrow() {
let restart_at = tokio::time::sleep(PING_INTERVAL);
let status = self.status.borrow().clone();
let ping_addrs = status
.nodes
.iter()
.filter(|(id, _)| **id != self.id)
.map(|(id, status)| (status.addr, Some(*id)))
.collect::<Vec<_>>();
self.clone().ping_nodes(ping_addrs).await;
select! {
_ = restart_at.fuse() => {},
_ = stop_signal.changed().fuse() => {},
}
}
}
async fn discovery_loop(
self: Arc<Self>,
bootstrap_peers: Vec<SocketAddr>,
consul_host: Option<String>,
consul_service_name: Option<String>,
mut stop_signal: watch::Receiver<bool>,
) {
let consul_config = match (consul_host, consul_service_name) {
(Some(ch), Some(csn)) => Some((ch, csn)),
_ => None,
};
while !*stop_signal.borrow() {
let not_configured = self.ring.borrow().config.members.is_empty();
let no_peers = self.status.borrow().nodes.len() < 3;
let bad_peers = self
.status
.borrow()
.nodes
.iter()
.filter(|(_, v)| v.is_up())
.count() != self.ring.borrow().config.members.len();
if not_configured || no_peers || bad_peers {
info!("Doing a bootstrap/discovery step (not_configured: {}, no_peers: {}, bad_peers: {})", not_configured, no_peers, bad_peers);
let mut ping_list = bootstrap_peers
.iter()
.map(|ip| (*ip, None))
.collect::<Vec<_>>();
if let Ok(peers) = self.persist_status.load_async().await {
ping_list.extend(peers.iter().map(|x| (x.addr, Some(x.id))));
}
if let Some((consul_host, consul_service_name)) = &consul_config {
match get_consul_nodes(consul_host, consul_service_name).await {
Ok(node_list) => {
ping_list.extend(node_list.iter().map(|a| (*a, None)));
}
Err(e) => {
warn!("Could not retrieve node list from Consul: {}", e);
}
}
}
self.clone().ping_nodes(ping_list).await;
}
let restart_at = tokio::time::sleep(DISCOVERY_INTERVAL);
select! {
_ = restart_at.fuse() => {},
_ = stop_signal.changed().fuse() => {},
}
}
}
// for some reason fixing this is causing compilation error, see https://github.com/rust-lang/rust-clippy/issues/7052
#[allow(clippy::manual_async_fn)]
fn pull_status(
self: Arc<Self>,
peer: Uuid,
) -> impl futures::future::Future<Output = ()> + Send + 'static {
async move {
let resp = self
.rpc_client
.call(peer, Message::PullStatus, PING_TIMEOUT)
.await;
if let Ok(Message::AdvertiseNodesUp(nodes)) = resp {
let _: Result<_, _> = self.handle_advertise_nodes_up(&nodes).await;
}
}
}
async fn pull_config(self: Arc<Self>, peer: Uuid) {
let resp = self
.rpc_client
.call(peer, Message::PullConfig, PING_TIMEOUT)
.await;
if let Ok(Message::AdvertiseConfig(config)) = resp {
let _: Result<_, _> = self.handle_advertise_config(&config).await;
}
}
async fn update_status(self: &Arc<Self>, updaters: &Updaters, status: Status) {
if status.hash != self.status.borrow().hash {
let mut list = status.to_serializable_membership(&self);
// Combine with old peer list to make sure no peer is lost
if let Ok(old_list) = self.persist_status.load_async().await {
for pp in old_list {
if !list.iter().any(|np| pp.id == np.id) {
list.push(pp);
}
}
}
if !list.is_empty() {
info!("Persisting new peer list ({} peers)", list.len());
self.persist_status
.save_async(&list)
.await
.expect("Unable to persist peer list");
}
}
updaters
.update_status
.send(Arc::new(status))
.expect("Could not update internal membership status");
}
}

View file

@ -3,6 +3,8 @@
use std::collections::{HashMap, HashSet}; use std::collections::{HashMap, HashSet};
use std::convert::TryInto; use std::convert::TryInto;
use netapp::NodeID;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use garage_util::data::*; use garage_util::data::*;
@ -98,7 +100,7 @@ pub struct Ring {
pub config: NetworkConfig, pub config: NetworkConfig,
// Internal order of nodes used to make a more compact representation of the ring // Internal order of nodes used to make a more compact representation of the ring
nodes: Vec<Uuid>, nodes: Vec<NodeID>,
// The list of entries in the ring // The list of entries in the ring
ring: Vec<RingEntry>, ring: Vec<RingEntry>,
@ -260,6 +262,11 @@ impl Ring {
}) })
.collect::<Vec<_>>(); .collect::<Vec<_>>();
let nodes = nodes
.iter()
.map(|id| NodeID::from_slice(id.as_slice()).unwrap())
.collect::<Vec<_>>();
Self { Self {
replication_factor, replication_factor,
config, config,
@ -291,7 +298,7 @@ impl Ring {
} }
/// Walk the ring to find the n servers in which data should be replicated /// Walk the ring to find the n servers in which data should be replicated
pub fn get_nodes(&self, position: &Hash, n: usize) -> Vec<Uuid> { pub fn get_nodes(&self, position: &Hash, n: usize) -> Vec<NodeID> {
if self.ring.len() != 1 << PARTITION_BITS { if self.ring.len() != 1 << PARTITION_BITS {
warn!("Ring not yet ready, read/writes will be lost!"); warn!("Ring not yet ready, read/writes will be lost!");
return vec![]; return vec![];

View file

@ -1,369 +0,0 @@
//! Contain structs related to making RPCs
use std::borrow::Borrow;
use std::marker::PhantomData;
use std::net::SocketAddr;
use std::pin::Pin;
use std::sync::atomic::Ordering;
use std::sync::Arc;
use std::time::Duration;
use arc_swap::ArcSwapOption;
use futures::future::Future;
use futures::stream::futures_unordered::FuturesUnordered;
use futures::stream::StreamExt;
use futures_util::future::FutureExt;
use hyper::client::{Client, HttpConnector};
use hyper::{Body, Method, Request};
use tokio::sync::{watch, Semaphore};
use garage_util::background::BackgroundRunner;
use garage_util::config::TlsConfig;
use garage_util::data::*;
use garage_util::error::{Error, RpcError};
use crate::membership::Status;
use crate::rpc_server::RpcMessage;
use crate::tls_util;
const DEFAULT_TIMEOUT: Duration = Duration::from_secs(10);
/// Strategy to apply when making RPC
#[derive(Copy, Clone)]
pub struct RequestStrategy {
/// Max time to wait for reponse
pub rs_timeout: Duration,
/// Min number of response to consider the request successful
pub rs_quorum: usize,
/// Should requests be dropped after enough response are received
pub rs_interrupt_after_quorum: bool,
}
impl RequestStrategy {
/// Create a RequestStrategy with default timeout and not interrupting when quorum reached
pub fn with_quorum(quorum: usize) -> Self {
RequestStrategy {
rs_timeout: DEFAULT_TIMEOUT,
rs_quorum: quorum,
rs_interrupt_after_quorum: false,
}
}
/// Set timeout of the strategy
pub fn with_timeout(mut self, timeout: Duration) -> Self {
self.rs_timeout = timeout;
self
}
/// Set if requests can be dropped after quorum has been reached
/// In general true for read requests, and false for write
pub fn interrupt_after_quorum(mut self, interrupt: bool) -> Self {
self.rs_interrupt_after_quorum = interrupt;
self
}
}
/// Shortcut for a boxed async function taking a message, and resolving to another message or an
/// error
pub type LocalHandlerFn<M> =
Box<dyn Fn(Arc<M>) -> Pin<Box<dyn Future<Output = Result<M, Error>> + Send>> + Send + Sync>;
/// Client used to send RPC
pub struct RpcClient<M: RpcMessage> {
status: watch::Receiver<Arc<Status>>,
background: Arc<BackgroundRunner>,
local_handler: ArcSwapOption<(Uuid, LocalHandlerFn<M>)>,
rpc_addr_client: RpcAddrClient<M>,
}
impl<M: RpcMessage + 'static> RpcClient<M> {
/// Create a new RpcClient from an address, a job runner, and the status of all RPC servers
pub fn new(
rac: RpcAddrClient<M>,
background: Arc<BackgroundRunner>,
status: watch::Receiver<Arc<Status>>,
) -> Arc<Self> {
Arc::new(Self {
rpc_addr_client: rac,
background,
status,
local_handler: ArcSwapOption::new(None),
})
}
/// Set the local handler, to process RPC to this node without network usage
pub fn set_local_handler<F, Fut>(&self, my_id: Uuid, handler: F)
where
F: Fn(Arc<M>) -> Fut + Send + Sync + 'static,
Fut: Future<Output = Result<M, Error>> + Send + 'static,
{
let handler_arc = Arc::new(handler);
let handler: LocalHandlerFn<M> = Box::new(move |msg| {
let handler_arc2 = handler_arc.clone();
Box::pin(async move { handler_arc2(msg).await })
});
self.local_handler.swap(Some(Arc::new((my_id, handler))));
}
/// Get a RPC client to make calls using node's SocketAddr instead of its ID
pub fn by_addr(&self) -> &RpcAddrClient<M> {
&self.rpc_addr_client
}
/// Make a RPC call
pub async fn call(&self, to: Uuid, msg: M, timeout: Duration) -> Result<M, Error> {
self.call_arc(to, Arc::new(msg), timeout).await
}
/// Make a RPC call from a message stored in an Arc
pub async fn call_arc(&self, to: Uuid, msg: Arc<M>, timeout: Duration) -> Result<M, Error> {
if let Some(lh) = self.local_handler.load_full() {
let (my_id, local_handler) = lh.as_ref();
if to.borrow() == my_id {
return local_handler(msg).await;
}
}
let status = self.status.borrow().clone();
let node_status = match status.nodes.get(&to) {
Some(node_status) => {
if node_status.is_up() {
node_status
} else {
return Err(Error::from(RpcError::NodeDown(to)));
}
}
None => {
return Err(Error::Message(format!(
"Peer ID not found: {:?}",
to.borrow()
)))
}
};
match self
.rpc_addr_client
.call(&node_status.addr, msg, timeout)
.await
{
Err(rpc_error) => {
node_status.num_failures.fetch_add(1, Ordering::SeqCst);
Err(Error::from(rpc_error))
}
Ok(x) => x,
}
}
/// Make a RPC call to multiple servers, returning a Vec containing each result
pub async fn call_many(&self, to: &[Uuid], msg: M, timeout: Duration) -> Vec<Result<M, Error>> {
let msg = Arc::new(msg);
let mut resp_stream = to
.iter()
.map(|to| self.call_arc(*to, msg.clone(), timeout))
.collect::<FuturesUnordered<_>>();
let mut results = vec![];
while let Some(resp) = resp_stream.next().await {
results.push(resp);
}
results
}
/// Make a RPC call to multiple servers, returning either a Vec of responses, or an error if
/// strategy could not be respected due to too many errors
pub async fn try_call_many(
self: &Arc<Self>,
to: &[Uuid],
msg: M,
strategy: RequestStrategy,
) -> Result<Vec<M>, Error> {
let timeout = strategy.rs_timeout;
let msg = Arc::new(msg);
let mut resp_stream = to
.to_vec()
.into_iter()
.map(|to| {
let self2 = self.clone();
let msg = msg.clone();
async move { self2.call_arc(to, msg, timeout).await }
})
.collect::<FuturesUnordered<_>>();
let mut results = vec![];
let mut errors = vec![];
while let Some(resp) = resp_stream.next().await {
match resp {
Ok(msg) => {
results.push(msg);
if results.len() >= strategy.rs_quorum {
break;
}
}
Err(e) => {
errors.push(e);
}
}
}
if results.len() >= strategy.rs_quorum {
// Continue requests in background.
// Continue the remaining requests immediately using tokio::spawn
// but enqueue a task in the background runner
// to ensure that the process won't exit until the requests are done
// (if we had just enqueued the resp_stream.collect directly in the background runner,
// the requests might have been put on hold in the background runner's queue,
// in which case they might timeout or otherwise fail)
if !strategy.rs_interrupt_after_quorum {
let wait_finished_fut = tokio::spawn(async move {
resp_stream.collect::<Vec<_>>().await;
});
self.background.spawn(wait_finished_fut.map(|_| Ok(())));
}
Ok(results)
} else {
let errors = errors.iter().map(|e| format!("{}", e)).collect::<Vec<_>>();
Err(Error::from(RpcError::TooManyErrors(errors)))
}
}
}
/// Thin wrapper arround an `RpcHttpClient` specifying the path of the request
pub struct RpcAddrClient<M: RpcMessage> {
phantom: PhantomData<M>,
http_client: Arc<RpcHttpClient>,
path: String,
}
impl<M: RpcMessage> RpcAddrClient<M> {
/// Create an RpcAddrClient from an HTTP client and the endpoint to reach for RPCs
pub fn new(http_client: Arc<RpcHttpClient>, path: String) -> Self {
Self {
phantom: PhantomData::default(),
http_client,
path,
}
}
/// Make a RPC
pub async fn call<MB>(
&self,
to_addr: &SocketAddr,
msg: MB,
timeout: Duration,
) -> Result<Result<M, Error>, RpcError>
where
MB: Borrow<M>,
{
self.http_client
.call(&self.path, to_addr, msg, timeout)
.await
}
}
/// HTTP client used to make RPCs
pub struct RpcHttpClient {
request_limiter: Semaphore,
method: ClientMethod,
}
enum ClientMethod {
Http(Client<HttpConnector, hyper::Body>),
Https(Client<tls_util::HttpsConnectorFixedDnsname<HttpConnector>, hyper::Body>),
}
impl RpcHttpClient {
/// Create a new RpcHttpClient
pub fn new(
max_concurrent_requests: usize,
tls_config: &Option<TlsConfig>,
) -> Result<Self, Error> {
let method = if let Some(cf) = tls_config {
let ca_certs = tls_util::load_certs(&cf.ca_cert).map_err(|e| {
Error::Message(format!("Failed to open CA certificate file: {:?}", e))
})?;
let node_certs = tls_util::load_certs(&cf.node_cert)
.map_err(|e| Error::Message(format!("Failed to open certificate file: {:?}", e)))?;
let node_key = tls_util::load_private_key(&cf.node_key)
.map_err(|e| Error::Message(format!("Failed to open private key file: {:?}", e)))?;
let mut config = rustls::ClientConfig::new();
for crt in ca_certs.iter() {
config.root_store.add(crt)?;
}
config.set_single_client_cert([&node_certs[..], &ca_certs[..]].concat(), node_key)?;
let connector =
tls_util::HttpsConnectorFixedDnsname::<HttpConnector>::new(config, "garage");
ClientMethod::Https(Client::builder().build(connector))
} else {
ClientMethod::Http(Client::new())
};
Ok(RpcHttpClient {
method,
request_limiter: Semaphore::new(max_concurrent_requests),
})
}
/// Make a RPC
async fn call<M, MB>(
&self,
path: &str,
to_addr: &SocketAddr,
msg: MB,
timeout: Duration,
) -> Result<Result<M, Error>, RpcError>
where
MB: Borrow<M>,
M: RpcMessage,
{
let uri = match self.method {
ClientMethod::Http(_) => format!("http://{}/{}", to_addr, path),
ClientMethod::Https(_) => format!("https://{}/{}", to_addr, path),
};
let req = Request::builder()
.method(Method::POST)
.uri(uri)
.body(Body::from(rmp_to_vec_all_named(msg.borrow())?))?;
let resp_fut = match &self.method {
ClientMethod::Http(client) => client.request(req).fuse(),
ClientMethod::Https(client) => client.request(req).fuse(),
};
trace!("({}) Acquiring request_limiter slot...", path);
let slot = self.request_limiter.acquire().await;
trace!("({}) Got slot, doing request to {}...", path, to_addr);
let resp = tokio::time::timeout(timeout, resp_fut)
.await
.map_err(|e| {
debug!(
"RPC timeout to {}: {}",
to_addr,
debug_serialize(msg.borrow())
);
e
})?
.map_err(|e| {
warn!(
"RPC HTTP client error when connecting to {}: {}",
to_addr, e
);
e
})?;
let status = resp.status();
trace!("({}) Request returned, got status {}", path, status);
let body = hyper::body::to_bytes(resp.into_body()).await?;
drop(slot);
match rmp_serde::decode::from_read::<_, Result<M, String>>(&body[..])? {
Err(e) => Ok(Err(Error::RemoteError(e, status))),
Ok(x) => Ok(Ok(x)),
}
}
}

206
src/rpc/rpc_helper.rs Normal file
View file

@ -0,0 +1,206 @@
//! Contain structs related to making RPCs
use std::sync::Arc;
use std::time::Duration;
use futures::future::join_all;
use futures::stream::futures_unordered::FuturesUnordered;
use futures::stream::StreamExt;
use futures_util::future::FutureExt;
use tokio::select;
pub use netapp::endpoint::{Endpoint, EndpointHandler, Message};
use netapp::peering::fullmesh::FullMeshPeeringStrategy;
pub use netapp::proto::*;
pub use netapp::{NetApp, NodeID};
use garage_util::background::BackgroundRunner;
use garage_util::error::{Error, RpcError};
const DEFAULT_TIMEOUT: Duration = Duration::from_secs(10);
/// Strategy to apply when making RPC
#[derive(Copy, Clone)]
pub struct RequestStrategy {
/// Max time to wait for reponse
pub rs_timeout: Duration,
/// Min number of response to consider the request successful
pub rs_quorum: Option<usize>,
/// Should requests be dropped after enough response are received
pub rs_interrupt_after_quorum: bool,
/// Request priority
pub rs_priority: RequestPriority,
}
impl RequestStrategy {
/// Create a RequestStrategy with default timeout and not interrupting when quorum reached
pub fn with_priority(prio: RequestPriority) -> Self {
RequestStrategy {
rs_timeout: DEFAULT_TIMEOUT,
rs_quorum: None,
rs_interrupt_after_quorum: false,
rs_priority: prio,
}
}
/// Set quorum to be reached for request
pub fn with_quorum(mut self, quorum: usize) -> Self {
self.rs_quorum = Some(quorum);
self
}
/// Set timeout of the strategy
pub fn with_timeout(mut self, timeout: Duration) -> Self {
self.rs_timeout = timeout;
self
}
/// Set if requests can be dropped after quorum has been reached
/// In general true for read requests, and false for write
pub fn interrupt_after_quorum(mut self, interrupt: bool) -> Self {
self.rs_interrupt_after_quorum = interrupt;
self
}
}
#[derive(Clone)]
pub struct RpcHelper {
pub(crate) fullmesh: Arc<FullMeshPeeringStrategy>,
pub(crate) background: Arc<BackgroundRunner>,
}
impl RpcHelper {
pub async fn call<M, H>(
&self,
endpoint: &Endpoint<M, H>,
to: NodeID,
msg: M,
strat: RequestStrategy,
) -> Result<M::Response, Error>
where
M: Message,
H: EndpointHandler<M>,
{
self.call_arc(endpoint, to, Arc::new(msg), strat).await
}
pub async fn call_arc<M, H>(
&self,
endpoint: &Endpoint<M, H>,
to: NodeID,
msg: Arc<M>,
strat: RequestStrategy,
) -> Result<M::Response, Error>
where
M: Message,
H: EndpointHandler<M>,
{
select! {
res = endpoint.call(&to, &msg, strat.rs_priority) => Ok(res?),
_ = tokio::time::sleep(strat.rs_timeout) => Err(Error::Rpc(RpcError::Timeout)),
}
}
pub async fn call_many<M, H>(
&self,
endpoint: &Endpoint<M, H>,
to: &[NodeID],
msg: M,
strat: RequestStrategy,
) -> Vec<(NodeID, Result<M::Response, Error>)>
where
M: Message,
H: EndpointHandler<M>,
{
let msg = Arc::new(msg);
let resps = join_all(
to.iter()
.map(|to| self.call_arc(endpoint, *to, msg.clone(), strat)),
)
.await;
to.iter()
.cloned()
.zip(resps.into_iter())
.collect::<Vec<_>>()
}
pub async fn broadcast<M, H>(
&self,
endpoint: &Endpoint<M, H>,
msg: M,
strat: RequestStrategy,
) -> Vec<(NodeID, Result<M::Response, Error>)>
where
M: Message,
H: EndpointHandler<M>,
{
let to = self
.fullmesh
.get_peer_list()
.iter()
.map(|p| p.id)
.collect::<Vec<_>>();
self.call_many(endpoint, &to[..], msg, strat).await
}
/// Make a RPC call to multiple servers, returning either a Vec of responses, or an error if
/// strategy could not be respected due to too many errors
pub async fn try_call_many<M, H>(
&self,
endpoint: &Arc<Endpoint<M, H>>,
to: &[NodeID],
msg: M,
strategy: RequestStrategy,
) -> Result<Vec<M::Response>, Error>
where
M: Message + 'static,
H: EndpointHandler<M> + 'static,
{
let msg = Arc::new(msg);
let mut resp_stream = to
.to_vec()
.into_iter()
.map(|to| {
let self2 = self.clone();
let msg = msg.clone();
let endpoint2 = endpoint.clone();
async move { self2.call_arc(&endpoint2, to, msg, strategy).await }
})
.collect::<FuturesUnordered<_>>();
let mut results = vec![];
let mut errors = vec![];
let quorum = strategy.rs_quorum.unwrap_or(to.len());
while let Some(resp) = resp_stream.next().await {
match resp {
Ok(msg) => {
results.push(msg);
if results.len() >= quorum {
break;
}
}
Err(e) => {
errors.push(e);
}
}
}
if results.len() >= quorum {
// Continue requests in background.
// Continue the remaining requests immediately using tokio::spawn
// but enqueue a task in the background runner
// to ensure that the process won't exit until the requests are done
// (if we had just enqueued the resp_stream.collect directly in the background runner,
// the requests might have been put on hold in the background runner's queue,
// in which case they might timeout or otherwise fail)
if !strategy.rs_interrupt_after_quorum {
let wait_finished_fut = tokio::spawn(async move {
resp_stream.collect::<Vec<_>>().await;
});
self.background.spawn(wait_finished_fut.map(|_| Ok(())));
}
Ok(results)
} else {
let errors = errors.iter().map(|e| format!("{}", e)).collect::<Vec<_>>();
Err(Error::from(RpcError::TooManyErrors(errors)))
}
}
}

View file

@ -1,247 +0,0 @@
//! Contains structs related to receiving RPCs
use std::collections::HashMap;
use std::net::SocketAddr;
use std::pin::Pin;
use std::sync::Arc;
use std::time::Instant;
use futures::future::Future;
use futures_util::future::*;
use futures_util::stream::*;
use hyper::server::conn::AddrStream;
use hyper::service::{make_service_fn, service_fn};
use hyper::{Body, Method, Request, Response, Server, StatusCode};
use serde::{Deserialize, Serialize};
use tokio::net::{TcpListener, TcpStream};
use tokio_rustls::server::TlsStream;
use tokio_rustls::TlsAcceptor;
use tokio_stream::wrappers::TcpListenerStream;
use garage_util::config::TlsConfig;
use garage_util::data::*;
use garage_util::error::Error;
use crate::tls_util;
/// Trait for messages that can be sent as RPC
pub trait RpcMessage: Serialize + for<'de> Deserialize<'de> + Send + Sync {}
type ResponseFuture = Pin<Box<dyn Future<Output = Result<Response<Body>, Error>> + Send>>;
type Handler = Box<dyn Fn(Request<Body>, SocketAddr) -> ResponseFuture + Send + Sync>;
/// Structure handling RPCs
pub struct RpcServer {
/// The address the RpcServer will bind
pub bind_addr: SocketAddr,
/// The tls configuration used for RPC
pub tls_config: Option<TlsConfig>,
handlers: HashMap<String, Handler>,
}
async fn handle_func<M, F, Fut>(
handler: Arc<F>,
req: Request<Body>,
sockaddr: SocketAddr,
name: Arc<String>,
) -> Result<Response<Body>, Error>
where
M: RpcMessage + 'static,
F: Fn(M, SocketAddr) -> Fut + Send + Sync + 'static,
Fut: Future<Output = Result<M, Error>> + Send + 'static,
{
let begin_time = Instant::now();
let whole_body = hyper::body::to_bytes(req.into_body()).await?;
let msg = rmp_serde::decode::from_read::<_, M>(&whole_body[..])?;
trace!(
"Request message: {}",
serde_json::to_string(&msg)
.unwrap_or_else(|_| "<json error>".into())
.chars()
.take(100)
.collect::<String>()
);
match handler(msg, sockaddr).await {
Ok(resp) => {
let resp_bytes = rmp_to_vec_all_named::<Result<M, String>>(&Ok(resp))?;
let rpc_duration = (Instant::now() - begin_time).as_millis();
if rpc_duration > 100 {
debug!("RPC {} ok, took long: {} ms", name, rpc_duration,);
}
Ok(Response::new(Body::from(resp_bytes)))
}
Err(e) => {
let err_str = format!("{}", e);
let rep_bytes = rmp_to_vec_all_named::<Result<M, String>>(&Err(err_str))?;
let mut err_response = Response::new(Body::from(rep_bytes));
*err_response.status_mut() = match e {
Error::BadRpc(_) => StatusCode::BAD_REQUEST,
_ => StatusCode::INTERNAL_SERVER_ERROR,
};
warn!(
"RPC error ({}): {} ({} ms)",
name,
e,
(Instant::now() - begin_time).as_millis(),
);
Ok(err_response)
}
}
}
impl RpcServer {
/// Create a new RpcServer
pub fn new(bind_addr: SocketAddr, tls_config: Option<TlsConfig>) -> Self {
Self {
bind_addr,
tls_config,
handlers: HashMap::new(),
}
}
/// Add handler handling request made to `name`
pub fn add_handler<M, F, Fut>(&mut self, name: String, handler: F)
where
M: RpcMessage + 'static,
F: Fn(M, SocketAddr) -> Fut + Send + Sync + 'static,
Fut: Future<Output = Result<M, Error>> + Send + 'static,
{
let name2 = Arc::new(name.clone());
let handler_arc = Arc::new(handler);
let handler = Box::new(move |req: Request<Body>, sockaddr: SocketAddr| {
let handler2 = handler_arc.clone();
let b: ResponseFuture = Box::pin(handle_func(handler2, req, sockaddr, name2.clone()));
b
});
self.handlers.insert(name, handler);
}
async fn handler(
self: Arc<Self>,
req: Request<Body>,
addr: SocketAddr,
) -> Result<Response<Body>, Error> {
if req.method() != Method::POST {
let mut bad_request = Response::default();
*bad_request.status_mut() = StatusCode::BAD_REQUEST;
return Ok(bad_request);
}
let path = &req.uri().path()[1..].to_string();
let handler = match self.handlers.get(path) {
Some(h) => h,
None => {
let mut not_found = Response::default();
*not_found.status_mut() = StatusCode::NOT_FOUND;
return Ok(not_found);
}
};
trace!("({}) Handling request", path);
let resp_waiter = tokio::spawn(handler(req, addr));
match resp_waiter.await {
Err(err) => {
warn!("Handler await error: {}", err);
let mut ise = Response::default();
*ise.status_mut() = StatusCode::INTERNAL_SERVER_ERROR;
Ok(ise)
}
Ok(Err(err)) => {
trace!("({}) Request handler failed: {}", path, err);
let mut bad_request = Response::new(Body::from(format!("{}", err)));
*bad_request.status_mut() = StatusCode::BAD_REQUEST;
Ok(bad_request)
}
Ok(Ok(resp)) => {
trace!("({}) Request handler succeeded", path);
Ok(resp)
}
}
}
/// Run the RpcServer
pub async fn run(
self: Arc<Self>,
shutdown_signal: impl Future<Output = ()>,
) -> Result<(), Error> {
if let Some(tls_config) = self.tls_config.as_ref() {
let ca_certs = tls_util::load_certs(&tls_config.ca_cert)?;
let node_certs = tls_util::load_certs(&tls_config.node_cert)?;
let node_key = tls_util::load_private_key(&tls_config.node_key)?;
let mut ca_store = rustls::RootCertStore::empty();
for crt in ca_certs.iter() {
ca_store.add(crt)?;
}
let mut config =
rustls::ServerConfig::new(rustls::AllowAnyAuthenticatedClient::new(ca_store));
config.set_single_cert([&node_certs[..], &ca_certs[..]].concat(), node_key)?;
let tls_acceptor = Arc::new(TlsAcceptor::from(Arc::new(config)));
let listener = TcpListener::bind(&self.bind_addr).await?;
let incoming = TcpListenerStream::new(listener).filter_map(|socket| async {
match socket {
Ok(stream) => match tls_acceptor.clone().accept(stream).await {
Ok(x) => Some(Ok::<_, hyper::Error>(x)),
Err(_e) => None,
},
Err(_) => None,
}
});
let incoming = hyper::server::accept::from_stream(incoming);
let self_arc = self.clone();
let service = make_service_fn(|conn: &TlsStream<TcpStream>| {
let client_addr = conn
.get_ref()
.0
.peer_addr()
.unwrap_or_else(|_| ([0, 0, 0, 0], 0).into());
let self_arc = self_arc.clone();
async move {
Ok::<_, Error>(service_fn(move |req: Request<Body>| {
self_arc.clone().handler(req, client_addr).map_err(|e| {
warn!("RPC handler error: {}", e);
e
})
}))
}
});
let server = Server::builder(incoming).serve(service);
let graceful = server.with_graceful_shutdown(shutdown_signal);
info!("RPC server listening on http://{}", self.bind_addr);
graceful.await?;
} else {
let self_arc = self.clone();
let service = make_service_fn(move |conn: &AddrStream| {
let client_addr = conn.remote_addr();
let self_arc = self_arc.clone();
async move {
Ok::<_, Error>(service_fn(move |req: Request<Body>| {
self_arc.clone().handler(req, client_addr).map_err(|e| {
warn!("RPC handler error: {}", e);
e
})
}))
}
});
let server = Server::bind(&self.bind_addr).serve(service);
let graceful = server.with_graceful_shutdown(shutdown_signal);
info!("RPC server listening on http://{}", self.bind_addr);
graceful.await?;
}
Ok(())
}
}

363
src/rpc/system.rs Normal file
View file

@ -0,0 +1,363 @@
//! Module containing structs related to membership management
use std::io::{Read, Write};
use std::net::SocketAddr;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::time::Duration;
use arc_swap::ArcSwap;
use async_trait::async_trait;
use futures::{join, select};
use futures_util::future::*;
use serde::{Deserialize, Serialize};
use sodiumoxide::crypto::sign::ed25519;
use tokio::sync::watch;
use tokio::sync::Mutex;
use netapp::endpoint::{Endpoint, EndpointHandler, Message};
use netapp::peering::fullmesh::FullMeshPeeringStrategy;
use netapp::proto::*;
use netapp::{NetApp, NetworkKey, NodeID, NodeKey};
use garage_util::background::BackgroundRunner;
use garage_util::error::Error;
use garage_util::persister::Persister;
//use garage_util::time::*;
//use crate::consul::get_consul_nodes;
use crate::ring::*;
use crate::rpc_helper::{RequestStrategy, RpcHelper};
const DISCOVERY_INTERVAL: Duration = Duration::from_secs(60);
const PING_TIMEOUT: Duration = Duration::from_secs(2);
/// RPC endpoint used for calls related to membership
pub const SYSTEM_RPC_PATH: &str = "garage_rpc/membership.rs/SystemRpc";
/// RPC messages related to membership
#[derive(Debug, Serialize, Deserialize, Clone)]
pub enum SystemRpc {
/// Response to successfull advertisements
Ok,
/// Error response
Error(String),
/// Ask other node its config. Answered with AdvertiseConfig
PullConfig,
/// Advertise Garage status. Answered with another AdvertiseStatus.
/// Exchanged with every node on a regular basis.
AdvertiseStatus(StateInfo),
/// Advertisement of nodes config. Sent spontanously or in response to PullConfig
AdvertiseConfig(NetworkConfig),
/// Get known nodes states
GetKnownNodes,
/// Return known nodes
ReturnKnownNodes(Vec<(NodeID, SocketAddr, bool)>),
}
impl Message for SystemRpc {
type Response = SystemRpc;
}
/// This node's membership manager
pub struct System {
/// The id of this node
pub id: NodeID,
persist_config: Persister<NetworkConfig>,
state_info: ArcSwap<StateInfo>,
pub netapp: Arc<NetApp>,
fullmesh: Arc<FullMeshPeeringStrategy>,
pub rpc: RpcHelper,
system_endpoint: Arc<Endpoint<SystemRpc, System>>,
rpc_listen_addr: SocketAddr,
bootstrap_peers: Vec<(NodeID, SocketAddr)>,
consul_host: Option<String>,
consul_service_name: Option<String>,
replication_factor: usize,
/// The ring
pub ring: watch::Receiver<Arc<Ring>>,
update_ring: Mutex<watch::Sender<Arc<Ring>>>,
/// The job runner of this node
pub background: Arc<BackgroundRunner>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct StateInfo {
/// Hostname of the node
pub hostname: String,
/// Replication factor configured on the node
pub replication_factor: usize,
/// Configuration version
pub config_version: u64,
}
fn gen_node_key(metadata_dir: &Path) -> Result<NodeKey, Error> {
let mut id_file = metadata_dir.to_path_buf();
id_file.push("node_id");
if id_file.as_path().exists() {
let mut f = std::fs::File::open(id_file.as_path())?;
let mut d = vec![];
f.read_to_end(&mut d)?;
if d.len() != 64 {
return Err(Error::Message("Corrupt node_id file".to_string()));
}
let mut key = [0u8; 64];
key.copy_from_slice(&d[..]);
Ok(NodeKey::from_slice(&key[..]).unwrap())
} else {
let (key, _) = ed25519::gen_keypair();
let mut f = std::fs::File::create(id_file.as_path())?;
f.write_all(&key[..])?;
Ok(NodeKey::from_slice(&key[..]).unwrap())
}
}
impl System {
/// Create this node's membership manager
pub fn new(
network_key: NetworkKey,
metadata_dir: PathBuf,
background: Arc<BackgroundRunner>,
replication_factor: usize,
rpc_listen_addr: SocketAddr,
bootstrap_peers: Vec<(NodeID, SocketAddr)>,
  1. Create a file /etc/garage.toml with the content given in the Quickstart
  2. Never start the daemon (check that no meta or data folder have been created)
  3. Run garage node-id (this is similar to the steps advertised in "Cookbook > Deploying Garage"
  4. Get an error:
strace -e open ./result/bin/garage node-id
open("/proc/self/cgroup", O_RDONLY|O_CLOEXEC) = 3
open("/proc/self/mountinfo", O_RDONLY|O_CLOEXEC) = 3
open("/sys/fs/cgroup/cpu,cpuacct/user.slice/user-1000.slice/user@1000.service/cpu.cfs_quota_us", O_RDONLY|O_CLOEXEC) = 3
open("/etc/garage.toml", O_RDONLY|O_CLOEXEC) = 9
open("/tmp/meta/node_key", O_WRONLY|O_CREAT|O_TRUNC|O_CLOEXEC, 0666) = -1 ENOENT (Aucun fichier ou dossier de ce type)
 ERROR garage > Unable to read or generate node key: IO error: No such file or directory (os error 2)
+++ exited with 1 +++

2 points:

  1. We need to recursively create folders before trying to write the key
  2. We might want to improve our error reporting by specifying the failed path.
1. Create a file `/etc/garage.toml` with the content given in the Quickstart 2. Never start the daemon (check that no meta or data folder have been created) 3. Run `garage node-id` (this is similar to the steps advertised in "Cookbook > Deploying Garage" 4. Get an error: ``` strace -e open ./result/bin/garage node-id open("/proc/self/cgroup", O_RDONLY|O_CLOEXEC) = 3 open("/proc/self/mountinfo", O_RDONLY|O_CLOEXEC) = 3 open("/sys/fs/cgroup/cpu,cpuacct/user.slice/user-1000.slice/user@1000.service/cpu.cfs_quota_us", O_RDONLY|O_CLOEXEC) = 3 open("/etc/garage.toml", O_RDONLY|O_CLOEXEC) = 9 open("/tmp/meta/node_key", O_WRONLY|O_CREAT|O_TRUNC|O_CLOEXEC, 0666) = -1 ENOENT (Aucun fichier ou dossier de ce type) ERROR garage > Unable to read or generate node key: IO error: No such file or directory (os error 2) +++ exited with 1 +++ ``` 2 points: 1. We need to recursively create folders before trying to write the key 2. We might want to improve our error reporting by specifying the failed path.
consul_host: Option<String>,
consul_service_name: Option<String>,
$ RUST_LOG=garage=debug,netapp=debug strace -e open ./result/bin/garage node-id
open("/proc/self/cgroup", O_RDONLY|O_CLOEXEC) = 3
open("/proc/self/mountinfo", O_RDONLY|O_CLOEXEC) = 3
open("/sys/fs/cgroup/cpu,cpuacct/user.slice/user-1000.slice/user@1000.service/cpu.cfs_quota_us", O_RDONLY|O_CLOEXEC) = 3
open("/etc/garage.toml", O_RDONLY|O_CLOEXEC) = 9
open("/tmp/meta/node_key", O_WRONLY|O_CREAT|O_TRUNC|O_CLOEXEC, 0666) = -1 ENOENT (Aucun fichier ou dossier de ce type)
 ERROR garage > Unable to read or generate node key: IO error: No such file or directory (os error 2)
+++ exited with 1 +++

There are many chance that the garage node-id command will fail as the key will be stored in the meta folder that will very likely not be created yet and it will throw the previous cryptic error I a diagnosed through strace.

``` $ RUST_LOG=garage=debug,netapp=debug strace -e open ./result/bin/garage node-id open("/proc/self/cgroup", O_RDONLY|O_CLOEXEC) = 3 open("/proc/self/mountinfo", O_RDONLY|O_CLOEXEC) = 3 open("/sys/fs/cgroup/cpu,cpuacct/user.slice/user-1000.slice/user@1000.service/cpu.cfs_quota_us", O_RDONLY|O_CLOEXEC) = 3 open("/etc/garage.toml", O_RDONLY|O_CLOEXEC) = 9 open("/tmp/meta/node_key", O_WRONLY|O_CREAT|O_TRUNC|O_CLOEXEC, 0666) = -1 ENOENT (Aucun fichier ou dossier de ce type) ERROR garage > Unable to read or generate node key: IO error: No such file or directory (os error 2) +++ exited with 1 +++ ``` There are many chance that the `garage node-id` command will fail as the key will be stored in the `meta` folder that will very likely not be created yet and it will throw the previous cryptic error I a diagnosed through strace.
) -> Arc<Self> {
let node_key = gen_node_key(&metadata_dir).expect("Unable to read or generate node ID");
info!("Node public key: {}", hex::encode(&node_key.public_key()));
let persist_config = Persister::new(&metadata_dir, "network_config");
let net_config = match persist_config.load() {
Ok(x) => x,
Err(e) => {
match Persister::<garage_rpc_021::ring::NetworkConfig>::new(
&metadata_dir,
"network_config",
)
.load()
{
Ok(old_config) => NetworkConfig::migrate_from_021(old_config),
Err(e2) => {
info!(
"No valid previous network configuration stored ({}, {}), starting fresh.",
e, e2
);
NetworkConfig::new()
}
}
}
};
let state_info = StateInfo {
hostname: gethostname::gethostname()
.into_string()
.unwrap_or_else(|_| "<invalid utf-8>".to_string()),
replication_factor: replication_factor,
config_version: net_config.version,
};
let ring = Ring::new(net_config, replication_factor);
let (update_ring, ring) = watch::channel(Arc::new(ring));
let netapp = NetApp::new(network_key, node_key);
let fullmesh = FullMeshPeeringStrategy::new(netapp.clone(), bootstrap_peers.clone());
let system_endpoint = netapp.endpoint(SYSTEM_RPC_PATH.into());
let sys = Arc::new(System {
id: netapp.id.clone(),
persist_config,
state_info: ArcSwap::new(Arc::new(state_info)),
netapp: netapp.clone(),
fullmesh: fullmesh.clone(),
rpc: RpcHelper {
fullmesh: fullmesh.clone(),
background: background.clone(),
},
system_endpoint,
replication_factor,
rpc_listen_addr,
bootstrap_peers,
consul_host,
consul_service_name,
ring,
update_ring: Mutex::new(update_ring),
background: background.clone(),
});
sys.system_endpoint.set_handler(sys.clone());
sys
}
/// Perform bootstraping, starting the ping loop
pub async fn run(self: Arc<Self>, must_exit: watch::Receiver<bool>) {
join!(
self.netapp
.clone()
.listen(self.rpc_listen_addr, None, must_exit.clone()),
self.fullmesh.clone().run(must_exit.clone()),
self.discovery_loop(must_exit.clone()),
);
}
// ---- INTERNALS ----
/// Save network configuration to disc
async fn save_network_config(self: Arc<Self>) -> Result<(), Error> {
let ring: Arc<Ring> = self.ring.borrow().clone();
self.persist_config
.save_async(&ring.config)
.await
.expect("Cannot save current cluster configuration");
Ok(())
}
fn update_state_info(&self) {
let mut new_si: StateInfo = self.state_info.load().as_ref().clone();
let ring = self.ring.borrow();
new_si.config_version = ring.config.version;
self.state_info.swap(Arc::new(new_si));
}
fn handle_pull_config(&self) -> SystemRpc {
let ring = self.ring.borrow().clone();
SystemRpc::AdvertiseConfig(ring.config.clone())
}
async fn handle_advertise_config(
self: Arc<Self>,
adv: &NetworkConfig,
) -> Result<SystemRpc, Error> {
let update_ring = self.update_ring.lock().await;
let ring: Arc<Ring> = self.ring.borrow().clone();
if adv.version > ring.config.version {
let ring = Ring::new(adv.clone(), self.replication_factor);
update_ring.send(Arc::new(ring))?;
drop(update_ring);
let self2 = self.clone();
let adv2 = adv.clone();
self.background.spawn_cancellable(async move {
self2
.rpc
.broadcast(
&self2.system_endpoint,
SystemRpc::AdvertiseConfig(adv2),
RequestStrategy::with_priority(PRIO_NORMAL),
)
.await;
Ok(())
});
self.background.spawn(self.clone().save_network_config());
}
Ok(SystemRpc::Ok)
}
async fn discovery_loop(&self, mut stop_signal: watch::Receiver<bool>) {
/* TODO
let consul_config = match (&self.consul_host, &self.consul_service_name) {
(Some(ch), Some(csn)) => Some((ch.clone(), csn.clone())),
_ => None,
};
*/
while !*stop_signal.borrow() {
let not_configured = self.ring.borrow().config.members.is_empty();
let no_peers = self.fullmesh.get_peer_list().len() < self.replication_factor;
let bad_peers = self
.fullmesh
.get_peer_list()
.iter()
.filter(|p| p.is_up())
.count() != self.ring.borrow().config.members.len();
if not_configured || no_peers || bad_peers {
info!("Doing a bootstrap/discovery step (not_configured: {}, no_peers: {}, bad_peers: {})", not_configured, no_peers, bad_peers);
let ping_list = self.bootstrap_peers.clone();
/*
*TODO bring this back: persisted list of peers
if let Ok(peers) = self.persist_status.load_async().await {
ping_list.extend(peers.iter().map(|x| (x.addr, Some(x.id))));
}
*/
/*
* TODO bring this back: get peers from consul
if let Some((consul_host, consul_service_name)) = &consul_config {
match get_consul_nodes(consul_host, consul_service_name).await {
Ok(node_list) => {
ping_list.extend(node_list.iter().map(|a| (*a, None)));
}
Err(e) => {
warn!("Could not retrieve node list from Consul: {}", e);
}
}
}
*/
for (node_id, node_addr) in ping_list {
tokio::spawn(self.netapp.clone().try_connect(node_addr, node_id));
}
}
let restart_at = tokio::time::sleep(DISCOVERY_INTERVAL);
select! {
_ = restart_at.fuse() => {},
_ = stop_signal.changed().fuse() => {},
}
}
}
async fn pull_config(self: Arc<Self>, peer: NodeID) {
let resp = self
.rpc
.call(
&self.system_endpoint,
peer,
SystemRpc::PullConfig,
RequestStrategy::with_priority(PRIO_HIGH).with_timeout(PING_TIMEOUT),
)
.await;
if let Ok(SystemRpc::AdvertiseConfig(config)) = resp {
let _: Result<_, _> = self.handle_advertise_config(&config).await;
}
}
}
#[async_trait]
impl EndpointHandler<SystemRpc> for System {
async fn handle(self: &Arc<Self>, msg: &SystemRpc, _from: NodeID) -> SystemRpc {
let resp = match msg {
SystemRpc::PullConfig => Ok(self.handle_pull_config()),
SystemRpc::AdvertiseConfig(adv) => self.clone().handle_advertise_config(&adv).await,
SystemRpc::GetKnownNodes => {
let known_nodes = self
.fullmesh
.get_peer_list()
.iter()
.map(|n| (n.id, n.addr, n.is_up()))
.collect::<Vec<_>>();
Ok(SystemRpc::ReturnKnownNodes(known_nodes))
}
_ => Err(Error::BadRpc("Unexpected RPC message".to_string())),
};
match resp {
Ok(r) => r,
Err(e) => SystemRpc::Error(format!("{}", e)),
}
}
}

View file

@ -1,140 +0,0 @@
use core::future::Future;
use core::task::{Context, Poll};
use std::pin::Pin;
use std::sync::Arc;
use std::{fs, io};
use futures_util::future::*;
use hyper::client::connect::Connection;
use hyper::client::HttpConnector;
use hyper::service::Service;
use hyper::Uri;
use hyper_rustls::MaybeHttpsStream;
use rustls::internal::pemfile;
use tokio::io::{AsyncRead, AsyncWrite};
use tokio_rustls::TlsConnector;
use webpki::DNSNameRef;
use garage_util::error::Error;
pub fn load_certs(filename: &str) -> Result<Vec<rustls::Certificate>, Error> {
let certfile = fs::File::open(&filename)?;
let mut reader = io::BufReader::new(certfile);
let certs = pemfile::certs(&mut reader).map_err(|_| {
Error::Message(format!(
"Could not deecode certificates from file: {}",
filename
))
})?;
if certs.is_empty() {
return Err(Error::Message(format!(
"Invalid certificate file: {}",
filename
)));
}
Ok(certs)
}
pub fn load_private_key(filename: &str) -> Result<rustls::PrivateKey, Error> {
let keydata = fs::read_to_string(filename)?;
let mut buf1 = keydata.as_bytes();
let rsa_keys = pemfile::rsa_private_keys(&mut buf1).unwrap_or_default();
let mut buf2 = keydata.as_bytes();
let pkcs8_keys = pemfile::pkcs8_private_keys(&mut buf2).unwrap_or_default();
let mut keys = rsa_keys;
keys.extend(pkcs8_keys.into_iter());
if keys.len() != 1 {
return Err(Error::Message(format!(
"Invalid private key file: {} ({} private keys)",
filename,
keys.len()
)));
}
Ok(keys[0].clone())
}
// ---- AWFUL COPYPASTA FROM HYPER-RUSTLS connector.rs
// ---- ALWAYS USE `garage` AS HOSTNAME FOR TLS VERIFICATION
#[derive(Clone)]
pub struct HttpsConnectorFixedDnsname<T> {
http: T,
tls_config: Arc<rustls::ClientConfig>,
fixed_dnsname: &'static str,
}
type BoxError = Box<dyn std::error::Error + Send + Sync>;
impl HttpsConnectorFixedDnsname<HttpConnector> {
pub fn new(mut tls_config: rustls::ClientConfig, fixed_dnsname: &'static str) -> Self {
let mut http = HttpConnector::new();
http.enforce_http(false);
tls_config.alpn_protocols = vec![b"h2".to_vec(), b"http/1.1".to_vec()];
Self {
http,
tls_config: Arc::new(tls_config),
fixed_dnsname,
}
}
}
impl<T> Service<Uri> for HttpsConnectorFixedDnsname<T>
where
T: Service<Uri>,
T::Response: Connection + AsyncRead + AsyncWrite + Send + Unpin + 'static,
T::Future: Send + 'static,
T::Error: Into<BoxError>,
{
type Response = MaybeHttpsStream<T::Response>;
type Error = BoxError;
#[allow(clippy::type_complexity)]
type Future =
Pin<Box<dyn Future<Output = Result<MaybeHttpsStream<T::Response>, BoxError>> + Send>>;
fn poll_ready(&mut self, cx: &mut Context<'_>) -> Poll<Result<(), Self::Error>> {
match self.http.poll_ready(cx) {
Poll::Ready(Ok(())) => Poll::Ready(Ok(())),
Poll::Ready(Err(e)) => Poll::Ready(Err(e.into())),
Poll::Pending => Poll::Pending,
}
}
fn call(&mut self, dst: Uri) -> Self::Future {
let is_https = dst.scheme_str() == Some("https");
if !is_https {
let connecting_future = self.http.call(dst);
let f = async move {
let tcp = connecting_future.await.map_err(Into::into)?;
Ok(MaybeHttpsStream::Http(tcp))
};
f.boxed()
} else {
let cfg = self.tls_config.clone();
let connecting_future = self.http.call(dst);
let dnsname =
DNSNameRef::try_from_ascii_str(self.fixed_dnsname).expect("Invalid fixed dnsname");
let f = async move {
let tcp = connecting_future.await.map_err(Into::into)?;
let connector = TlsConnector::from(cfg);
let tls = connector
.connect(dnsname, tcp)
.await
.map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
Ok(MaybeHttpsStream::Https(tls))
};
f.boxed()
}
}
}

View file

@ -1,6 +1,6 @@
[package] [package]
name = "garage_table" name = "garage_table"
version = "0.3.0" version = "0.4.0"
authors = ["Alex Auvolat <alex@adnab.me>"] authors = ["Alex Auvolat <alex@adnab.me>"]
edition = "2018" edition = "2018"
license = "AGPL-3.0" license = "AGPL-3.0"
@ -13,9 +13,10 @@ path = "lib.rs"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies] [dependencies]
garage_rpc = { version = "0.3.0", path = "../rpc" } garage_rpc = { version = "0.4.0", path = "../rpc" }
garage_util = { version = "0.3.0", path = "../util" } garage_util = { version = "0.4.0", path = "../util" }
async-trait = "0.1.7"
bytes = "1.0" bytes = "1.0"
hexdump = "0.1" hexdump = "0.1"
log = "0.4" log = "0.4"
@ -30,4 +31,3 @@ serde_bytes = "0.11"
futures = "0.3" futures = "0.3"
futures-util = "0.3" futures-util = "0.3"
tokio = { version = "1.0", default-features = false, features = ["rt", "rt-multi-thread", "io-util", "net", "time", "macros", "sync", "signal", "fs"] } tokio = { version = "1.0", default-features = false, features = ["rt", "rt-multi-thread", "io-util", "net", "time", "macros", "sync", "signal", "fs"] }

View file

@ -9,7 +9,7 @@ use tokio::sync::Notify;
use garage_util::data::*; use garage_util::data::*;
use garage_util::error::*; use garage_util::error::*;
use garage_rpc::membership::System; use garage_rpc::system::System;
use crate::crdt::Crdt; use crate::crdt::Crdt;
use crate::replication::*; use crate::replication::*;

View file

@ -2,6 +2,7 @@ use std::collections::HashMap;
use std::sync::Arc; use std::sync::Arc;
use std::time::Duration; use std::time::Duration;
use async_trait::async_trait;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use serde_bytes::ByteBuf; use serde_bytes::ByteBuf;
@ -13,9 +14,8 @@ use tokio::sync::watch;
use garage_util::data::*; use garage_util::data::*;
use garage_util::error::Error; use garage_util::error::Error;
use garage_rpc::membership::System; use garage_rpc::system::System;
use garage_rpc::rpc_client::*; use garage_rpc::*;
use garage_rpc::rpc_server::*;
use crate::data::*; use crate::data::*;
use crate::replication::*; use crate::replication::*;
@ -24,11 +24,11 @@ use crate::schema::*;
const TABLE_GC_BATCH_SIZE: usize = 1024; const TABLE_GC_BATCH_SIZE: usize = 1024;
const TABLE_GC_RPC_TIMEOUT: Duration = Duration::from_secs(30); const TABLE_GC_RPC_TIMEOUT: Duration = Duration::from_secs(30);
pub struct TableGc<F: TableSchema, R: TableReplication> { pub struct TableGc<F: TableSchema + 'static, R: TableReplication + 'static> {
system: Arc<System>, system: Arc<System>,
data: Arc<TableData<F, R>>, data: Arc<TableData<F, R>>,
rpc_client: Arc<RpcClient<GcRpc>>, endpoint: Arc<Endpoint<GcRpc, Self>>,
} }
#[derive(Serialize, Deserialize)] #[derive(Serialize, Deserialize)]
@ -36,30 +36,30 @@ enum GcRpc {
Update(Vec<ByteBuf>), Update(Vec<ByteBuf>),
DeleteIfEqualHash(Vec<(ByteBuf, Hash)>), DeleteIfEqualHash(Vec<(ByteBuf, Hash)>),
Ok, Ok,
Error(String),
} }
impl RpcMessage for GcRpc {} impl Message for GcRpc {
type Response = GcRpc;
}
impl<F, R> TableGc<F, R> impl<F, R> TableGc<F, R>
where where
F: TableSchema + 'static, F: TableSchema + 'static,
R: TableReplication + 'static, R: TableReplication + 'static,
{ {
pub(crate) fn launch( pub(crate) fn launch(system: Arc<System>, data: Arc<TableData<F, R>>) -> Arc<Self> {
system: Arc<System>, let endpoint = system
data: Arc<TableData<F, R>>, .netapp
rpc_server: &mut RpcServer, .endpoint(format!("garage_table/gc.rs/Rpc:{}", data.name));
) -> Arc<Self> {
let rpc_path = format!("table_{}/gc", data.name);
let rpc_client = system.rpc_client::<GcRpc>(&rpc_path);
let gc = Arc::new(Self { let gc = Arc::new(Self {
system: system.clone(), system: system.clone(),
data: data.clone(), data: data.clone(),
rpc_client, endpoint,
}); });
gc.register_handler(rpc_server, rpc_path); gc.endpoint.set_handler(gc.clone());
let gc1 = gc.clone(); let gc1 = gc.clone();
system.background.spawn_worker( system.background.spawn_worker(
@ -168,7 +168,7 @@ where
async fn try_send_and_delete( async fn try_send_and_delete(
&self, &self,
nodes: Vec<Uuid>, nodes: Vec<NodeID>,
items: Vec<(ByteBuf, Hash, ByteBuf)>, items: Vec<(ByteBuf, Hash, ByteBuf)>,
) -> Result<(), Error> { ) -> Result<(), Error> {
let n_items = items.len(); let n_items = items.len();
@ -180,11 +180,15 @@ where
deletes.push((k, vhash)); deletes.push((k, vhash));
} }
self.rpc_client self.system
.rpc
.try_call_many( .try_call_many(
&self.endpoint,
&nodes[..], &nodes[..],
GcRpc::Update(updates), GcRpc::Update(updates),
RequestStrategy::with_quorum(nodes.len()).with_timeout(TABLE_GC_RPC_TIMEOUT), RequestStrategy::with_priority(PRIO_BACKGROUND)
.with_quorum(nodes.len())
.with_timeout(TABLE_GC_RPC_TIMEOUT),
) )
.await?; .await?;
@ -193,11 +197,15 @@ where
self.data.name, n_items self.data.name, n_items
); );
self.rpc_client self.system
.rpc
.try_call_many( .try_call_many(
&self.endpoint,
&nodes[..], &nodes[..],
GcRpc::DeleteIfEqualHash(deletes.clone()), GcRpc::DeleteIfEqualHash(deletes.clone()),
RequestStrategy::with_quorum(nodes.len()).with_timeout(TABLE_GC_RPC_TIMEOUT), RequestStrategy::with_priority(PRIO_BACKGROUND)
.with_quorum(nodes.len())
.with_timeout(TABLE_GC_RPC_TIMEOUT),
) )
.await?; .await?;
@ -217,24 +225,7 @@ where
Ok(()) Ok(())
} }
// ---- RPC HANDLER ---- async fn handle_rpc(&self, message: &GcRpc) -> Result<GcRpc, Error> {
fn register_handler(self: &Arc<Self>, rpc_server: &mut RpcServer, path: String) {
let self2 = self.clone();
rpc_server.add_handler::<GcRpc, _, _>(path, move |msg, _addr| {
let self2 = self2.clone();
async move { self2.handle_rpc(&msg).await }
});
let self2 = self.clone();
self.rpc_client
.set_local_handler(self.system.id, move |msg| {
let self2 = self2.clone();
async move { self2.handle_rpc(&msg).await }
});
}
async fn handle_rpc(self: &Arc<Self>, message: &GcRpc) -> Result<GcRpc, Error> {
match message { match message {
GcRpc::Update(items) => { GcRpc::Update(items) => {
self.data.update_many(items)?; self.data.update_many(items)?;
@ -251,3 +242,16 @@ where
} }
} }
} }
#[async_trait]
impl<F, R> EndpointHandler<GcRpc> for TableGc<F, R>
where
F: TableSchema + 'static,
R: TableReplication + 'static,
{
async fn handle(self: &Arc<Self>, message: &GcRpc, _from: NodeID) -> GcRpc {
self.handle_rpc(message)
.await
.unwrap_or_else(|e| GcRpc::Error(format!("{}", e)))
}
}

View file

@ -1,7 +1,8 @@
use std::sync::Arc; use std::sync::Arc;
use garage_rpc::membership::System;
use garage_rpc::ring::*; use garage_rpc::ring::*;
use garage_rpc::system::System;
use garage_rpc::NodeID;
use garage_util::data::*; use garage_util::data::*;
use crate::replication::*; use crate::replication::*;
@ -19,16 +20,20 @@ pub struct TableFullReplication {
} }
impl TableReplication for TableFullReplication { impl TableReplication for TableFullReplication {
fn read_nodes(&self, _hash: &Hash) -> Vec<Uuid> { fn read_nodes(&self, _hash: &Hash) -> Vec<NodeID> {
vec![self.system.id] vec![self.system.id]
} }
fn read_quorum(&self) -> usize { fn read_quorum(&self) -> usize {
1 1
} }
fn write_nodes(&self, _hash: &Hash) -> Vec<Uuid> { fn write_nodes(&self, _hash: &Hash) -> Vec<NodeID> {
let ring = self.system.ring.borrow(); let ring = self.system.ring.borrow();
ring.config.members.keys().cloned().collect::<Vec<_>>() ring.config
.members
.keys()
.map(|id| NodeID::from_slice(id.as_slice()).unwrap())
.collect::<Vec<_>>()
} }
fn write_quorum(&self) -> usize { fn write_quorum(&self) -> usize {
let nmembers = self.system.ring.borrow().config.members.len(); let nmembers = self.system.ring.borrow().config.members.len();

View file

@ -1,5 +1,5 @@
use garage_rpc::ring::*; use garage_rpc::ring::*;
use garage_rpc::NodeID;
use garage_util::data::*; use garage_util::data::*;
/// Trait to describe how a table shall be replicated /// Trait to describe how a table shall be replicated
@ -8,12 +8,12 @@ pub trait TableReplication: Send + Sync {
// To understand various replication methods // To understand various replication methods
/// Which nodes to send read requests to /// Which nodes to send read requests to
fn read_nodes(&self, hash: &Hash) -> Vec<Uuid>; fn read_nodes(&self, hash: &Hash) -> Vec<NodeID>;
/// Responses needed to consider a read succesfull /// Responses needed to consider a read succesfull
fn read_quorum(&self) -> usize; fn read_quorum(&self) -> usize;
/// Which nodes to send writes to /// Which nodes to send writes to
fn write_nodes(&self, hash: &Hash) -> Vec<Uuid>; fn write_nodes(&self, hash: &Hash) -> Vec<NodeID>;
/// Responses needed to consider a write succesfull /// Responses needed to consider a write succesfull
fn write_quorum(&self) -> usize; fn write_quorum(&self) -> usize;
fn max_write_errors(&self) -> usize; fn max_write_errors(&self) -> usize;

View file

@ -1,7 +1,8 @@
use std::sync::Arc; use std::sync::Arc;
use garage_rpc::membership::System;
use garage_rpc::ring::*; use garage_rpc::ring::*;
use garage_rpc::system::System;
use garage_rpc::NodeID;
use garage_util::data::*; use garage_util::data::*;
use crate::replication::*; use crate::replication::*;
@ -25,7 +26,7 @@ pub struct TableShardedReplication {
} }
impl TableReplication for TableShardedReplication { impl TableReplication for TableShardedReplication {
fn read_nodes(&self, hash: &Hash) -> Vec<Uuid> { fn read_nodes(&self, hash: &Hash) -> Vec<NodeID> {
let ring = self.system.ring.borrow(); let ring = self.system.ring.borrow();
ring.get_nodes(&hash, self.replication_factor) ring.get_nodes(&hash, self.replication_factor)
} }
@ -33,7 +34,7 @@ impl TableReplication for TableShardedReplication {
self.read_quorum self.read_quorum
} }
fn write_nodes(&self, hash: &Hash) -> Vec<Uuid> { fn write_nodes(&self, hash: &Hash) -> Vec<NodeID> {
let ring = self.system.ring.borrow(); let ring = self.system.ring.borrow();
ring.get_nodes(&hash, self.replication_factor) ring.get_nodes(&hash, self.replication_factor)
} }

View file

@ -2,6 +2,7 @@ use std::collections::VecDeque;
use std::sync::{Arc, Mutex}; use std::sync::{Arc, Mutex};
use std::time::{Duration, Instant}; use std::time::{Duration, Instant};
use async_trait::async_trait;
use futures::select; use futures::select;
use futures_util::future::*; use futures_util::future::*;
use futures_util::stream::*; use futures_util::stream::*;
@ -13,10 +14,9 @@ use tokio::sync::{mpsc, watch};
use garage_util::data::*; use garage_util::data::*;
use garage_util::error::Error; use garage_util::error::Error;
use garage_rpc::membership::System;
use garage_rpc::ring::*; use garage_rpc::ring::*;
use garage_rpc::rpc_client::*; use garage_rpc::system::System;
use garage_rpc::rpc_server::*; use garage_rpc::*;
use crate::data::*; use crate::data::*;
use crate::merkle::*; use crate::merkle::*;
@ -28,13 +28,13 @@ const TABLE_SYNC_RPC_TIMEOUT: Duration = Duration::from_secs(30);
// Do anti-entropy every 10 minutes // Do anti-entropy every 10 minutes
const ANTI_ENTROPY_INTERVAL: Duration = Duration::from_secs(10 * 60); const ANTI_ENTROPY_INTERVAL: Duration = Duration::from_secs(10 * 60);
pub struct TableSyncer<F: TableSchema, R: TableReplication> { pub struct TableSyncer<F: TableSchema + 'static, R: TableReplication + 'static> {
system: Arc<System>, system: Arc<System>,
data: Arc<TableData<F, R>>, data: Arc<TableData<F, R>>,
merkle: Arc<MerkleUpdater<F, R>>, merkle: Arc<MerkleUpdater<F, R>>,
todo: Mutex<SyncTodo>, todo: Mutex<SyncTodo>,
rpc_client: Arc<RpcClient<SyncRpc>>, endpoint: Arc<Endpoint<SyncRpc, Self>>,
} }
#[derive(Serialize, Deserialize)] #[derive(Serialize, Deserialize)]
@ -45,9 +45,12 @@ pub(crate) enum SyncRpc {
Node(MerkleNodeKey, MerkleNode), Node(MerkleNodeKey, MerkleNode),
Items(Vec<Arc<ByteBuf>>), Items(Vec<Arc<ByteBuf>>),
Ok, Ok,
Error(String),
} }
impl RpcMessage for SyncRpc {} impl Message for SyncRpc {
type Response = SyncRpc;
}
struct SyncTodo { struct SyncTodo {
todo: Vec<TodoPartition>, todo: Vec<TodoPartition>,
@ -72,10 +75,10 @@ where
system: Arc<System>, system: Arc<System>,
data: Arc<TableData<F, R>>, data: Arc<TableData<F, R>>,
merkle: Arc<MerkleUpdater<F, R>>, merkle: Arc<MerkleUpdater<F, R>>,
rpc_server: &mut RpcServer,
) -> Arc<Self> { ) -> Arc<Self> {
let rpc_path = format!("table_{}/sync", data.name); let endpoint = system
let rpc_client = system.rpc_client::<SyncRpc>(&rpc_path); .netapp
.endpoint(format!("garage_table/sync.rs/Rpc:{}", data.name));
let todo = SyncTodo { todo: vec![] }; let todo = SyncTodo { todo: vec![] };
@ -84,10 +87,10 @@ where
data: data.clone(), data: data.clone(),
merkle, merkle,
todo: Mutex::new(todo), todo: Mutex::new(todo),
rpc_client, endpoint,
}); });
syncer.register_handler(rpc_server, rpc_path); syncer.endpoint.set_handler(syncer.clone());
let (busy_tx, busy_rx) = mpsc::unbounded_channel(); let (busy_tx, busy_rx) = mpsc::unbounded_channel();
@ -112,21 +115,6 @@ where
syncer syncer
} }
fn register_handler(self: &Arc<Self>, rpc_server: &mut RpcServer, path: String) {
let self2 = self.clone();
rpc_server.add_handler::<SyncRpc, _, _>(path, move |msg, _addr| {
let self2 = self2.clone();
async move { self2.handle_rpc(&msg).await }
});
let self2 = self.clone();
self.rpc_client
.set_local_handler(self.system.id, move |msg| {
let self2 = self2.clone();
async move { self2.handle_rpc(&msg).await }
});
}
async fn watcher_task( async fn watcher_task(
self: Arc<Self>, self: Arc<Self>,
mut must_exit: watch::Receiver<bool>, mut must_exit: watch::Receiver<bool>,
@ -317,15 +305,19 @@ where
async fn offload_items( async fn offload_items(
self: &Arc<Self>, self: &Arc<Self>,
items: &[(Vec<u8>, Arc<ByteBuf>)], items: &[(Vec<u8>, Arc<ByteBuf>)],
nodes: &[Uuid], nodes: &[NodeID],
) -> Result<(), Error> { ) -> Result<(), Error> {
let values = items.iter().map(|(_k, v)| v.clone()).collect::<Vec<_>>(); let values = items.iter().map(|(_k, v)| v.clone()).collect::<Vec<_>>();
self.rpc_client self.system
.rpc
.try_call_many( .try_call_many(
&self.endpoint,
nodes, nodes,
SyncRpc::Items(values), SyncRpc::Items(values),
RequestStrategy::with_quorum(nodes.len()).with_timeout(TABLE_SYNC_RPC_TIMEOUT), RequestStrategy::with_priority(PRIO_BACKGROUND)
.with_quorum(nodes.len())
.with_timeout(TABLE_SYNC_RPC_TIMEOUT),
) )
.await?; .await?;
@ -362,7 +354,7 @@ where
async fn do_sync_with( async fn do_sync_with(
self: Arc<Self>, self: Arc<Self>,
partition: TodoPartition, partition: TodoPartition,
who: Uuid, who: NodeID,
must_exit: watch::Receiver<bool>, must_exit: watch::Receiver<bool>,
) -> Result<(), Error> { ) -> Result<(), Error> {
let (root_ck_key, root_ck) = self.get_root_ck(partition.partition)?; let (root_ck_key, root_ck) = self.get_root_ck(partition.partition)?;
@ -378,11 +370,14 @@ where
// Check if they have the same root checksum // Check if they have the same root checksum
// If so, do nothing. // If so, do nothing.
let root_resp = self let root_resp = self
.rpc_client .system
.rpc
.call( .call(
&self.endpoint,
who, who,
SyncRpc::RootCkHash(partition.partition, root_ck_hash), SyncRpc::RootCkHash(partition.partition, root_ck_hash),
TABLE_SYNC_RPC_TIMEOUT, RequestStrategy::with_priority(PRIO_BACKGROUND)
.with_timeout(TABLE_SYNC_RPC_TIMEOUT),
) )
.await?; .await?;
@ -430,8 +425,15 @@ where
// Get Merkle node for this tree position at remote node // Get Merkle node for this tree position at remote node
// and compare it with local node // and compare it with local node
let remote_node = match self let remote_node = match self
.rpc_client .system
.call(who, SyncRpc::GetNode(key.clone()), TABLE_SYNC_RPC_TIMEOUT) .rpc
.call(
&self.endpoint,
who,
SyncRpc::GetNode(key.clone()),
RequestStrategy::with_priority(PRIO_BACKGROUND)
.with_timeout(TABLE_SYNC_RPC_TIMEOUT),
)
.await? .await?
{ {
SyncRpc::Node(_, node) => node, SyncRpc::Node(_, node) => node,
@ -478,7 +480,7 @@ where
Ok(()) Ok(())
} }
async fn send_items(&self, who: Uuid, item_value_list: Vec<Vec<u8>>) -> Result<(), Error> { async fn send_items(&self, who: NodeID, item_value_list: Vec<Vec<u8>>) -> Result<(), Error> {
info!( info!(
"({}) Sending {} items to {:?}", "({}) Sending {} items to {:?}",
self.data.name, self.data.name,
@ -492,8 +494,15 @@ where
.collect::<Vec<_>>(); .collect::<Vec<_>>();
let rpc_resp = self let rpc_resp = self
.rpc_client .system
.call(who, SyncRpc::Items(values), TABLE_SYNC_RPC_TIMEOUT) .rpc
.call(
&self.endpoint,
who,
SyncRpc::Items(values),
RequestStrategy::with_priority(PRIO_BACKGROUND)
.with_timeout(TABLE_SYNC_RPC_TIMEOUT),
)
.await?; .await?;
if let SyncRpc::Ok = rpc_resp { if let SyncRpc::Ok = rpc_resp {
Ok(()) Ok(())
@ -506,7 +515,6 @@ where
} }
// ======= SYNCHRONIZATION PROCEDURE -- RECEIVER SIDE ====== // ======= SYNCHRONIZATION PROCEDURE -- RECEIVER SIDE ======
async fn handle_rpc(self: &Arc<Self>, message: &SyncRpc) -> Result<SyncRpc, Error> { async fn handle_rpc(self: &Arc<Self>, message: &SyncRpc) -> Result<SyncRpc, Error> {
match message { match message {
SyncRpc::RootCkHash(range, h) => { SyncRpc::RootCkHash(range, h) => {
@ -527,6 +535,19 @@ where
} }
} }
#[async_trait]
impl<F, R> EndpointHandler<SyncRpc> for TableSyncer<F, R>
where
F: TableSchema + 'static,
R: TableReplication + 'static,
{
async fn handle(self: &Arc<Self>, message: &SyncRpc, _from: NodeID) -> SyncRpc {
self.handle_rpc(message)
.await
.unwrap_or_else(|e| SyncRpc::Error(format!("{}", e)))
}
}
impl SyncTodo { impl SyncTodo {
fn add_full_sync<F: TableSchema, R: TableReplication>( fn add_full_sync<F: TableSchema, R: TableReplication>(
&mut self, &mut self,

View file

@ -2,6 +2,7 @@ use std::collections::{BTreeMap, HashMap};
use std::sync::Arc; use std::sync::Arc;
use std::time::Duration; use std::time::Duration;
use async_trait::async_trait;
use futures::stream::*; use futures::stream::*;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use serde_bytes::ByteBuf; use serde_bytes::ByteBuf;
@ -9,9 +10,8 @@ use serde_bytes::ByteBuf;
use garage_util::data::*; use garage_util::data::*;
use garage_util::error::Error; use garage_util::error::Error;
use garage_rpc::membership::System; use garage_rpc::system::System;
use garage_rpc::rpc_client::*; use garage_rpc::*;
use garage_rpc::rpc_server::*;
use crate::crdt::Crdt; use crate::crdt::Crdt;
use crate::data::*; use crate::data::*;
@ -23,17 +23,18 @@ use crate::sync::*;
const TABLE_RPC_TIMEOUT: Duration = Duration::from_secs(10); const TABLE_RPC_TIMEOUT: Duration = Duration::from_secs(10);
pub struct Table<F: TableSchema, R: TableReplication> { pub struct Table<F: TableSchema + 'static, R: TableReplication + 'static> {
pub system: Arc<System>, pub system: Arc<System>,
pub data: Arc<TableData<F, R>>, pub data: Arc<TableData<F, R>>,
pub merkle_updater: Arc<MerkleUpdater<F, R>>, pub merkle_updater: Arc<MerkleUpdater<F, R>>,
pub syncer: Arc<TableSyncer<F, R>>, pub syncer: Arc<TableSyncer<F, R>>,
rpc_client: Arc<RpcClient<TableRpc<F>>>, endpoint: Arc<Endpoint<TableRpc<F>, Self>>,
} }
#[derive(Serialize, Deserialize)] #[derive(Serialize, Deserialize)]
pub(crate) enum TableRpc<F: TableSchema> { pub(crate) enum TableRpc<F: TableSchema> {
Ok, Ok,
Error(String),
ReadEntry(F::P, F::S), ReadEntry(F::P, F::S),
ReadEntryResponse(Option<ByteBuf>), ReadEntryResponse(Option<ByteBuf>),
@ -44,7 +45,9 @@ pub(crate) enum TableRpc<F: TableSchema> {
Update(Vec<Arc<ByteBuf>>), Update(Vec<Arc<ByteBuf>>),
} }
impl<F: TableSchema> RpcMessage for TableRpc<F> {} impl<F: TableSchema> Message for TableRpc<F> {
type Response = TableRpc<F>;
}
impl<F, R> Table<F, R> impl<F, R> Table<F, R>
where where
@ -59,32 +62,27 @@ where
system: Arc<System>, system: Arc<System>,
db: &sled::Db, db: &sled::Db,
name: String, name: String,
rpc_server: &mut RpcServer,
) -> Arc<Self> { ) -> Arc<Self> {
let rpc_path = format!("table_{}", name); let endpoint = system
let rpc_client = system.rpc_client::<TableRpc<F>>(&rpc_path); .netapp
.endpoint(format!("garage_table/table.rs/Rpc:{}", name));
let data = TableData::new(system.clone(), name, instance, replication, db); let data = TableData::new(system.clone(), name, instance, replication, db);
let merkle_updater = MerkleUpdater::launch(&system.background, data.clone()); let merkle_updater = MerkleUpdater::launch(&system.background, data.clone());
let syncer = TableSyncer::launch( let syncer = TableSyncer::launch(system.clone(), data.clone(), merkle_updater.clone());
system.clone(), TableGc::launch(system.clone(), data.clone());
data.clone(),
merkle_updater.clone(),
rpc_server,
);
TableGc::launch(system.clone(), data.clone(), rpc_server);
let table = Arc::new(Self { let table = Arc::new(Self {
system, system,
data, data,
merkle_updater, merkle_updater,
syncer, syncer,
rpc_client, endpoint,
}); });
table.clone().register_handler(rpc_server, rpc_path); table.endpoint.set_handler(table.clone());
table table
} }
@ -97,11 +95,14 @@ where
let e_enc = Arc::new(ByteBuf::from(rmp_to_vec_all_named(e)?)); let e_enc = Arc::new(ByteBuf::from(rmp_to_vec_all_named(e)?));
let rpc = TableRpc::<F>::Update(vec![e_enc]); let rpc = TableRpc::<F>::Update(vec![e_enc]);
self.rpc_client self.system
.rpc
.try_call_many( .try_call_many(
&self.endpoint,
&who[..], &who[..],
rpc, rpc,
RequestStrategy::with_quorum(self.data.replication.write_quorum()) RequestStrategy::with_priority(PRIO_NORMAL)
.with_quorum(self.data.replication.write_quorum())
.with_timeout(TABLE_RPC_TIMEOUT), .with_timeout(TABLE_RPC_TIMEOUT),
) )
.await?; .await?;
@ -123,7 +124,16 @@ where
let call_futures = call_list.drain().map(|(node, entries)| async move { let call_futures = call_list.drain().map(|(node, entries)| async move {
let rpc = TableRpc::<F>::Update(entries); let rpc = TableRpc::<F>::Update(entries);
let resp = self.rpc_client.call(node, rpc, TABLE_RPC_TIMEOUT).await?; let resp = self
.system
.rpc
.call(
&self.endpoint,
node,
rpc,
RequestStrategy::with_priority(PRIO_NORMAL).with_timeout(TABLE_RPC_TIMEOUT),
)
.await?;
Ok::<_, Error>((node, resp)) Ok::<_, Error>((node, resp))
}); });
let mut resps = call_futures.collect::<FuturesUnordered<_>>(); let mut resps = call_futures.collect::<FuturesUnordered<_>>();
@ -152,11 +162,14 @@ where
let rpc = TableRpc::<F>::ReadEntry(partition_key.clone(), sort_key.clone()); let rpc = TableRpc::<F>::ReadEntry(partition_key.clone(), sort_key.clone());
let resps = self let resps = self
.rpc_client .system
.rpc
.try_call_many( .try_call_many(
&self.endpoint,
&who[..], &who[..],
rpc, rpc,
RequestStrategy::with_quorum(self.data.replication.read_quorum()) RequestStrategy::with_priority(PRIO_NORMAL)
.with_quorum(self.data.replication.read_quorum())
.with_timeout(TABLE_RPC_TIMEOUT) .with_timeout(TABLE_RPC_TIMEOUT)
.interrupt_after_quorum(true), .interrupt_after_quorum(true),
) )
@ -208,11 +221,14 @@ where
let rpc = TableRpc::<F>::ReadRange(partition_key.clone(), begin_sort_key, filter, limit); let rpc = TableRpc::<F>::ReadRange(partition_key.clone(), begin_sort_key, filter, limit);
let resps = self let resps = self
.rpc_client .system
.rpc
.try_call_many( .try_call_many(
&self.endpoint,
&who[..], &who[..],
rpc, rpc,
RequestStrategy::with_quorum(self.data.replication.read_quorum()) RequestStrategy::with_priority(PRIO_NORMAL)
.with_quorum(self.data.replication.read_quorum())
.with_timeout(TABLE_RPC_TIMEOUT) .with_timeout(TABLE_RPC_TIMEOUT)
.interrupt_after_quorum(true), .interrupt_after_quorum(true),
) )
@ -261,36 +277,25 @@ where
// =============== UTILITY FUNCTION FOR CLIENT OPERATIONS =============== // =============== UTILITY FUNCTION FOR CLIENT OPERATIONS ===============
async fn repair_on_read(&self, who: &[Uuid], what: F::E) -> Result<(), Error> { async fn repair_on_read(&self, who: &[NodeID], what: F::E) -> Result<(), Error> {
let what_enc = Arc::new(ByteBuf::from(rmp_to_vec_all_named(&what)?)); let what_enc = Arc::new(ByteBuf::from(rmp_to_vec_all_named(&what)?));
self.rpc_client self.system
.rpc
.try_call_many( .try_call_many(
&self.endpoint,
who, who,
TableRpc::<F>::Update(vec![what_enc]), TableRpc::<F>::Update(vec![what_enc]),
RequestStrategy::with_quorum(who.len()).with_timeout(TABLE_RPC_TIMEOUT), RequestStrategy::with_priority(PRIO_NORMAL)
.with_quorum(who.len())
.with_timeout(TABLE_RPC_TIMEOUT),
) )
.await?; .await?;
Ok(()) Ok(())
} }
// =============== HANDLERS FOR RPC OPERATIONS (SERVER SIDE) ============== // ====== RPC HANDLER =====
//
fn register_handler(self: Arc<Self>, rpc_server: &mut RpcServer, path: String) { async fn handle_rpc(self: &Arc<Self>, msg: &TableRpc<F>) -> Result<TableRpc<F>, Error> {
let self2 = self.clone();
rpc_server.add_handler::<TableRpc<F>, _, _>(path, move |msg, _addr| {
let self2 = self2.clone();
async move { self2.handle(&msg).await }
});
let self2 = self.clone();
self.rpc_client
.set_local_handler(self.system.id, move |msg| {
let self2 = self2.clone();
async move { self2.handle(&msg).await }
});
}
async fn handle(self: &Arc<Self>, msg: &TableRpc<F>) -> Result<TableRpc<F>, Error> {
match msg { match msg {
TableRpc::ReadEntry(key, sort_key) => { TableRpc::ReadEntry(key, sort_key) => {
let value = self.data.read_entry(key, sort_key)?; let value = self.data.read_entry(key, sort_key)?;
@ -308,3 +313,16 @@ where
} }
} }
} }
#[async_trait]
impl<F, R> EndpointHandler<TableRpc<F>> for Table<F, R>
where
F: TableSchema + 'static,
R: TableReplication + 'static,
{
async fn handle(self: &Arc<Self>, msg: &TableRpc<F>, _from: NodeID) -> TableRpc<F> {
self.handle_rpc(msg)
.await
.unwrap_or_else(|e| TableRpc::<F>::Error(format!("{}", e)))
}
}

View file

@ -1,6 +1,6 @@
[package] [package]
name = "garage_util" name = "garage_util"
version = "0.3.0" version = "0.4.0"
authors = ["Alex Auvolat <alex@adnab.me>"] authors = ["Alex Auvolat <alex@adnab.me>"]
edition = "2018" edition = "2018"
license = "AGPL-3.0" license = "AGPL-3.0"
@ -32,7 +32,6 @@ toml = "0.5"
futures = "0.3" futures = "0.3"
tokio = { version = "1.0", default-features = false, features = ["rt", "rt-multi-thread", "io-util", "net", "time", "macros", "sync", "signal", "fs"] } tokio = { version = "1.0", default-features = false, features = ["rt", "rt-multi-thread", "io-util", "net", "time", "macros", "sync", "signal", "fs"] }
netapp = { version = "0.3.0", git = "https://git.deuxfleurs.fr/lx/netapp" }
http = "0.2" http = "0.2"
hyper = "0.14" hyper = "0.14"
rustls = "0.19"
webpki = "0.21"

View file

@ -3,8 +3,11 @@ use std::io::Read;
use std::net::SocketAddr; use std::net::SocketAddr;
use std::path::PathBuf; use std::path::PathBuf;
use serde::de::Error as SerdeError;
use serde::{de, Deserialize}; use serde::{de, Deserialize};
use netapp::NodeID;
use crate::error::Error; use crate::error::Error;
/// Represent the whole configuration /// Represent the whole configuration
@ -26,20 +29,20 @@ pub struct Config {
// (we can add more aliases for this later) // (we can add more aliases for this later)
pub replication_mode: String, pub replication_mode: String,
/// RPC secret key: 32 bytes hex encoded
pub rpc_secret: String,
/// Address to bind for RPC /// Address to bind for RPC
pub rpc_bind_addr: SocketAddr, pub rpc_bind_addr: SocketAddr,
/// Bootstrap peers RPC address /// Bootstrap peers RPC address
#[serde(deserialize_with = "deserialize_vec_addr")] #[serde(deserialize_with = "deserialize_vec_addr")]
pub bootstrap_peers: Vec<SocketAddr>, pub bootstrap_peers: Vec<(NodeID, SocketAddr)>,
/// Consule host to connect to to discover more peers /// Consule host to connect to to discover more peers
pub consul_host: Option<String>, pub consul_host: Option<String>,
/// Consul service name to use /// Consul service name to use
pub consul_service_name: Option<String>, pub consul_service_name: Option<String>,
/// Configuration for RPC TLS
pub rpc_tls: Option<TlsConfig>,
/// Max number of concurrent RPC request /// Max number of concurrent RPC request
#[serde(default = "default_max_concurrent_rpc_requests")] #[serde(default = "default_max_concurrent_rpc_requests")]
pub max_concurrent_rpc_requests: usize, pub max_concurrent_rpc_requests: usize,
@ -59,17 +62,6 @@ pub struct Config {
pub s3_web: WebConfig, pub s3_web: WebConfig,
} }
/// Configuration for RPC TLS
#[derive(Deserialize, Debug, Clone)]
pub struct TlsConfig {
/// Path to certificate autority used for all nodes
pub ca_cert: String,
/// Path to public certificate for this node
pub node_cert: String,
/// Path to private key for this node
pub node_key: String,
}
/// Configuration for S3 api /// Configuration for S3 api
#[derive(Deserialize, Debug, Clone)] #[derive(Deserialize, Debug, Clone)]
pub struct ApiConfig { pub struct ApiConfig {
@ -115,27 +107,32 @@ pub fn read_config(config_file: PathBuf) -> Result<Config, Error> {
Ok(toml::from_str(&config)?) Ok(toml::from_str(&config)?)
} }
fn deserialize_vec_addr<'de, D>(deserializer: D) -> Result<Vec<SocketAddr>, D::Error> fn deserialize_vec_addr<'de, D>(deserializer: D) -> Result<Vec<(NodeID, SocketAddr)>, D::Error>
where where
D: de::Deserializer<'de>, D: de::Deserializer<'de>,
{ {
use std::net::ToSocketAddrs; use std::net::ToSocketAddrs;
Ok(<Vec<&str>>::deserialize(deserializer)? let mut ret = vec![];
.iter()
.filter_map(|&name| { for peer in <Vec<&str>>::deserialize(deserializer)? {
name.to_socket_addrs() let delim = peer
.map(|iter| (name, iter)) .find('@')
.map_err(|_| warn!("Error resolving \"{}\"", name)) .ok_or_else(|| D::Error::custom("Invalid bootstrap peer: public key not specified"))?;
.ok() let (key, host) = peer.split_at(delim);
}) let pubkey = NodeID::from_slice(&hex::decode(&key).map_err(D::Error::custom)?)
.map(|(name, iter)| { .ok_or_else(|| D::Error::custom("Invalid bootstrap peer public key"))?;
let v = iter.collect::<Vec<_>>(); let hosts = host[1..]
if v.is_empty() { .to_socket_addrs()
warn!("Error resolving \"{}\"", name) .map_err(D::Error::custom)?
} .collect::<Vec<_>>();
v if hosts.is_empty() {
}) return Err(D::Error::custom(format!("Error resolving {}", &host[1..])));
.flatten() }
.collect()) for host in hosts {
ret.push((pubkey.clone(), host));
}
}
Ok(ret)
} }

View file

@ -11,8 +11,8 @@ pub enum RpcError {
#[error(display = "Node is down: {:?}.", _0)] #[error(display = "Node is down: {:?}.", _0)]
NodeDown(Uuid), NodeDown(Uuid),
#[error(display = "Timeout: {}", _0)] #[error(display = "Timeout")]
Timeout(#[error(source)] tokio::time::error::Elapsed), Timeout,
#[error(display = "HTTP error: {}", _0)] #[error(display = "HTTP error: {}", _0)]
Http(#[error(source)] http::Error), Http(#[error(source)] http::Error),
@ -45,11 +45,8 @@ pub enum Error {
#[error(display = "Invalid HTTP header value: {}", _0)] #[error(display = "Invalid HTTP header value: {}", _0)]
HttpHeader(#[error(source)] http::header::ToStrError), HttpHeader(#[error(source)] http::header::ToStrError),
#[error(display = "TLS error: {}", _0)] #[error(display = "Netapp error: {}", _0)]
Tls(#[error(source)] rustls::TLSError), Netapp(#[error(source)] netapp::error::Error),
#[error(display = "PKI error: {}", _0)]
Pki(#[error(source)] webpki::Error),

We discussed renaming this error "FailedQuorumError" or something similar as this is the only case it is fired.

We discussed renaming this error "FailedQuorumError" or something similar as this is the only case it is fired.
#[error(display = "Sled error: {}", _0)] #[error(display = "Sled error: {}", _0)]
Sled(#[error(source)] sled::Error), Sled(#[error(source)] sled::Error),

View file

@ -1,6 +1,6 @@
[package] [package]
name = "garage_web" name = "garage_web"
version = "0.3.0" version = "0.4.0"
authors = ["Alex Auvolat <alex@adnab.me>", "Quentin Dufour <quentin@dufour.io>"] authors = ["Alex Auvolat <alex@adnab.me>", "Quentin Dufour <quentin@dufour.io>"]
edition = "2018" edition = "2018"
license = "AGPL-3.0" license = "AGPL-3.0"
@ -13,10 +13,10 @@ path = "lib.rs"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies] [dependencies]
garage_api = { version = "0.3.0", path = "../api" } garage_api = { version = "0.4.0", path = "../api" }
garage_model = { version = "0.3.0", path = "../model" } garage_model = { version = "0.4.0", path = "../model" }
garage_util = { version = "0.3.0", path = "../util" } garage_util = { version = "0.4.0", path = "../util" }
garage_table = { version = "0.3.0", path = "../table" } garage_table = { version = "0.4.0", path = "../table" }
err-derive = "0.3" err-derive = "0.3"
idna = "0.2" idna = "0.2"