replace RPC stack with netapp #123

Merged
lx merged 5 commits from netapp into main 2021-10-25 13:49:35 +00:00
31 changed files with 1497 additions and 2289 deletions
Showing only changes of commit 4067797d01 - Show all commits

705
Cargo.lock generated

File diff suppressed because it is too large Load diff

View file

@ -1,6 +1,6 @@
[package]
name = "garage_api"
version = "0.3.0"
version = "0.4.0"
authors = ["Alex Auvolat <alex@adnab.me>"]
edition = "2018"
license = "AGPL-3.0"
@ -13,9 +13,9 @@ path = "lib.rs"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
garage_model = { version = "0.3.0", path = "../model" }
garage_table = { version = "0.3.0", path = "../table" }
garage_util = { version = "0.3.0", path = "../util" }
garage_model = { version = "0.4.0", path = "../model" }
garage_table = { version = "0.4.0", path = "../table" }
garage_util = { version = "0.4.0", path = "../util" }
base64 = "0.13"
bytes = "1.0"

View file

@ -1,6 +1,6 @@
[package]
name = "garage"
version = "0.3.0"
version = "0.4.0"
authors = ["Alex Auvolat <alex@adnab.me>"]
edition = "2018"
license = "AGPL-3.0"
@ -14,12 +14,12 @@ path = "main.rs"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
garage_api = { version = "0.3.0", path = "../api" }
garage_model = { version = "0.3.0", path = "../model" }
garage_rpc = { version = "0.3.0", path = "../rpc" }
garage_table = { version = "0.3.0", path = "../table" }
garage_util = { version = "0.3.0", path = "../util" }
garage_web = { version = "0.3.0", path = "../web" }
garage_api = { version = "0.4.0", path = "../api" }
garage_model = { version = "0.4.0", path = "../model" }
garage_rpc = { version = "0.4.0", path = "../rpc" }
garage_table = { version = "0.4.0", path = "../table" }
garage_util = { version = "0.4.0", path = "../util" }
garage_web = { version = "0.4.0", path = "../web" }
bytes = "1.0"
git-version = "0.3.4"
@ -27,6 +27,8 @@ hex = "0.4"
log = "0.4"
pretty_env_logger = "0.4"
rand = "0.8"
async-trait = "0.1.7"
sodiumoxide = { version = "0.2.5-0", package = "kuska-sodiumoxide" }
sled = "0.34"
@ -38,3 +40,5 @@ toml = "0.5"
futures = "0.3"
futures-util = "0.3"
tokio = { version = "1.0", default-features = false, features = ["rt", "rt-multi-thread", "io-util", "net", "time", "macros", "sync", "signal", "fs"] }
netapp = { version = "0.3.0", git = "https://git.deuxfleurs.fr/lx/netapp" }

View file

@ -2,6 +2,7 @@ use std::collections::HashMap;
use std::fmt::Write;
use std::sync::Arc;
use async_trait::async_trait;
use serde::{Deserialize, Serialize};
use garage_util::error::Error;
@ -10,8 +11,7 @@ use garage_table::crdt::Crdt;
use garage_table::replication::*;
use garage_table::*;
use garage_rpc::rpc_client::*;
use garage_rpc::rpc_server::*;
use garage_rpc::*;
use garage_model::bucket_table::*;
use garage_model::garage::Garage;
@ -19,10 +19,8 @@ use garage_model::key_table::*;
use crate::cli::*;
use crate::repair::Repair;
use crate::*;
pub const ADMIN_RPC_TIMEOUT: Duration = Duration::from_secs(30);
pub const ADMIN_RPC_PATH: &str = "_admin";
pub const ADMIN_RPC_PATH: &str = "garage/admin_rpc.rs/Rpc";
#[derive(Debug, Serialize, Deserialize)]
pub enum AdminRpc {
@ -33,41 +31,31 @@ pub enum AdminRpc {
// Replies
Ok(String),
Error(String),
BucketList(Vec<String>),
BucketInfo(Bucket),
KeyList(Vec<(String, String)>),
KeyInfo(Key),
}
impl RpcMessage for AdminRpc {}
impl Message for AdminRpc {
type Response = AdminRpc;
}
pub struct AdminRpcHandler {
garage: Arc<Garage>,
rpc_client: Arc<RpcClient<AdminRpc>>,
endpoint: Arc<Endpoint<AdminRpc, Self>>,
}
impl AdminRpcHandler {
pub fn new(garage: Arc<Garage>) -> Arc<Self> {
let rpc_client = garage.system.clone().rpc_client::<AdminRpc>(ADMIN_RPC_PATH);
Arc::new(Self { garage, rpc_client })
let endpoint = garage.system.netapp.endpoint(ADMIN_RPC_PATH.into());
let admin = Arc::new(Self { garage, endpoint });
admin.endpoint.set_handler(admin.clone());
admin
}
pub fn register_handler(self: Arc<Self>, rpc_server: &mut RpcServer) {
rpc_server.add_handler::<AdminRpc, _, _>(ADMIN_RPC_PATH.to_string(), move |msg, _addr| {
let self2 = self.clone();
async move {
match msg {
AdminRpc::BucketOperation(bo) => self2.handle_bucket_cmd(bo).await,
AdminRpc::KeyOperation(ko) => self2.handle_key_cmd(ko).await,
AdminRpc::LaunchRepair(opt) => self2.handle_launch_repair(opt).await,
AdminRpc::Stats(opt) => self2.handle_stats(opt).await,
_ => Err(Error::BadRpc("Invalid RPC".to_string())),
}
}
});
}
async fn handle_bucket_cmd(&self, cmd: BucketOperation) -> Result<AdminRpc, Error> {
async fn handle_bucket_cmd(&self, cmd: &BucketOperation) -> Result<AdminRpc, Error> {
match cmd {
BucketOperation::List => {
let bucket_names = self
@ -187,7 +175,7 @@ impl AdminRpcHandler {
}
}
async fn handle_key_cmd(&self, cmd: KeyOperation) -> Result<AdminRpc, Error> {
async fn handle_key_cmd(&self, cmd: &KeyOperation) -> Result<AdminRpc, Error> {
match cmd {
KeyOperation::List => {
let key_ids = self
@ -210,13 +198,13 @@ impl AdminRpcHandler {
Ok(AdminRpc::KeyInfo(key))
}
KeyOperation::New(query) => {
let key = Key::new(query.name);
let key = Key::new(query.name.clone());
self.garage.key_table.insert(&key).await?;
Ok(AdminRpc::KeyInfo(key))
}
KeyOperation::Rename(query) => {
let mut key = self.get_existing_key(&query.key_pattern).await?;
key.name.update(query.new_name);
key.name.update(query.new_name.clone());
self.garage.key_table.insert(&key).await?;
Ok(AdminRpc::KeyInfo(key))
}
@ -353,17 +341,18 @@ impl AdminRpcHandler {
let mut failures = vec![];
let ring = self.garage.system.ring.borrow().clone();
for node in ring.config.members.keys() {
let node = NodeID::from_slice(node.as_slice()).unwrap();
if self
.rpc_client
.endpoint
.call(
*node,
AdminRpc::LaunchRepair(opt_to_send.clone()),
ADMIN_RPC_TIMEOUT,
&node,
&AdminRpc::LaunchRepair(opt_to_send.clone()),
PRIO_NORMAL,
)
.await
.is_err()
{
failures.push(*node);
failures.push(node);
}
}
if failures.is_empty() {
@ -397,14 +386,16 @@ impl AdminRpcHandler {
let ring = self.garage.system.ring.borrow().clone();
for node in ring.config.members.keys() {
let node = NodeID::from_slice(node.as_slice()).unwrap();
let mut opt = opt.clone();
opt.all_nodes = false;
writeln!(&mut ret, "\n======================").unwrap();
writeln!(&mut ret, "Stats for node {:?}:", node).unwrap();
match self
.rpc_client
.call(*node, AdminRpc::Stats(opt), ADMIN_RPC_TIMEOUT)
.endpoint
.call(&node, &AdminRpc::Stats(opt), PRIO_NORMAL)
.await
{
Ok(AdminRpc::Ok(s)) => writeln!(&mut ret, "{}", s).unwrap(),
@ -495,4 +486,23 @@ impl AdminRpcHandler {
.unwrap();
writeln!(to, " GC todo queue length: {}", t.data.gc_todo_len()).unwrap();
}
async fn handle_rpc(self: &Arc<Self>, msg: &AdminRpc) -> Result<AdminRpc, Error> {
match msg {
AdminRpc::BucketOperation(bo) => self.handle_bucket_cmd(bo).await,
AdminRpc::KeyOperation(ko) => self.handle_key_cmd(ko).await,
AdminRpc::LaunchRepair(opt) => self.handle_launch_repair(opt.clone()).await,
AdminRpc::Stats(opt) => self.handle_stats(opt.clone()).await,
_ => Err(Error::BadRpc("Invalid RPC".to_string())),
}
}
}
#[async_trait]
impl EndpointHandler<AdminRpc> for AdminRpcHandler {
async fn handle(self: &Arc<Self>, message: &AdminRpc, _from: NodeID) -> AdminRpc {
self.handle_rpc(message)
.await
.unwrap_or_else(|e| AdminRpc::Error(format!("{}", e)))
}
}

View file

@ -1,6 +1,5 @@
use std::cmp::max;
use std::collections::HashSet;
use std::net::SocketAddr;
//use std::cmp::max;
//use std::collections::HashSet;
use std::path::PathBuf;
use serde::{Deserialize, Serialize};
@ -8,11 +7,11 @@ use structopt::StructOpt;
use garage_util::data::Uuid;
use garage_util::error::Error;
use garage_util::time::*;
//use garage_util::time::*;
use garage_rpc::membership::*;
use garage_rpc::ring::*;
use garage_rpc::rpc_client::*;
use garage_rpc::system::*;
use garage_rpc::*;
use garage_model::bucket_table::*;
use garage_model::key_table::*;
@ -298,54 +297,65 @@ pub struct StatsOpt {
pub async fn cli_cmd(
cmd: Command,
membership_rpc_cli: RpcAddrClient<Message>,
admin_rpc_cli: RpcAddrClient<AdminRpc>,
rpc_host: SocketAddr,
system_rpc_endpoint: &Endpoint<SystemRpc, ()>,
admin_rpc_endpoint: &Endpoint<AdminRpc, ()>,
rpc_host: NodeID,
) -> Result<(), Error> {
match cmd {
Command::Status => cmd_status(membership_rpc_cli, rpc_host).await,
Command::Status => cmd_status(system_rpc_endpoint, rpc_host).await,
Command::Node(NodeOperation::Configure(configure_opt)) => {
cmd_configure(membership_rpc_cli, rpc_host, configure_opt).await
cmd_configure(system_rpc_endpoint, rpc_host, configure_opt).await
}
Command::Node(NodeOperation::Remove(remove_opt)) => {
cmd_remove(membership_rpc_cli, rpc_host, remove_opt).await
cmd_remove(system_rpc_endpoint, rpc_host, remove_opt).await
}
Command::Bucket(bo) => {
cmd_admin(admin_rpc_cli, rpc_host, AdminRpc::BucketOperation(bo)).await
cmd_admin(admin_rpc_endpoint, rpc_host, AdminRpc::BucketOperation(bo)).await
}
Command::Key(ko) => cmd_admin(admin_rpc_cli, rpc_host, AdminRpc::KeyOperation(ko)).await,
Command::Repair(ro) => cmd_admin(admin_rpc_cli, rpc_host, AdminRpc::LaunchRepair(ro)).await,
Command::Stats(so) => cmd_admin(admin_rpc_cli, rpc_host, AdminRpc::Stats(so)).await,
Command::Key(ko) => {
cmd_admin(admin_rpc_endpoint, rpc_host, AdminRpc::KeyOperation(ko)).await
}
Command::Repair(ro) => {
cmd_admin(admin_rpc_endpoint, rpc_host, AdminRpc::LaunchRepair(ro)).await
}
Command::Stats(so) => cmd_admin(admin_rpc_endpoint, rpc_host, AdminRpc::Stats(so)).await,
_ => unreachable!(),
}
}
pub async fn cmd_status(
rpc_cli: RpcAddrClient<Message>,
rpc_host: SocketAddr,
) -> Result<(), Error> {
pub async fn cmd_status(rpc_cli: &Endpoint<SystemRpc, ()>, rpc_host: NodeID) -> Result<(), Error> {
let status = match rpc_cli
.call(&rpc_host, &Message::PullStatus, ADMIN_RPC_TIMEOUT)
.await??
.call(&rpc_host, &SystemRpc::GetKnownNodes, PRIO_NORMAL)
.await?
{
Message::AdvertiseNodesUp(nodes) => nodes,
SystemRpc::ReturnKnownNodes(nodes) => nodes,
resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
};
let config = match rpc_cli
.call(&rpc_host, &Message::PullConfig, ADMIN_RPC_TIMEOUT)
.await??
.call(&rpc_host, &SystemRpc::PullConfig, PRIO_NORMAL)
.await?
{
Message::AdvertiseConfig(cfg) => cfg,
SystemRpc::AdvertiseConfig(cfg) => cfg,
resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
};
println!("STATUS:");
for node in status {
println!("{:?}", node);
}
println!("CONFIG: (v{})", config.version);
for (id, node) in config.members {
println!("{} {:?}", hex::encode(id.as_slice()), node);
}
/* TODO
let (hostname_len, addr_len, tag_len, zone_len) = status
.iter()
.map(|adv| (adv, config.members.get(&adv.id)))
.map(|(adv, cfg)| {
.map(|(id, addr, _)| (addr, config.members.get(&adv.id)))
.map(|(addr, cfg)| {
(
adv.state_info.hostname.len(),
adv.addr.to_string().len(),
8,
addr.to_string().len(),
cfg.map(|c| c.tag.len()).unwrap_or(0),
cfg.map(|c| c.zone.len()).unwrap_or(0),
)
@ -355,13 +365,13 @@ pub async fn cmd_status(
});
println!("Healthy nodes:");
for adv in status.iter().filter(|x| x.is_up) {
for (id, addr, _) in status.iter().filter(|(id, addr, is_up)| is_up) {
if let Some(cfg) = config.members.get(&adv.id) {
println!(
"{id:?}\t{host}{h_pad}\t{addr}{a_pad}\t[{tag}]{t_pad}\t{zone}{z_pad}\t{capacity}",
id = adv.id,
host = adv.state_info.hostname,
addr = adv.addr,
id = id,
host = "",
addr = addr,
tag = cfg.tag,
zone = cfg.zone,
capacity = cfg.capacity_string(),
@ -373,36 +383,36 @@ pub async fn cmd_status(
} else {
println!(
"{id:?}\t{h}{h_pad}\t{addr}{a_pad}\tUNCONFIGURED/REMOVED",
id = adv.id,
h = adv.state_info.hostname,
addr = adv.addr,
h_pad = " ".repeat(hostname_len - adv.state_info.hostname.len()),
a_pad = " ".repeat(addr_len - adv.addr.to_string().len()),
id = id,
h = "",
addr = addr,
h_pad = " ".repeat(hostname_len - "".len()),
a_pad = " ".repeat(addr_len - addr.to_string().len()),
);
}
}
let status_keys = status.iter().map(|x| x.id).collect::<HashSet<_>>();
let failure_case_1 = status.iter().any(|x| !x.is_up);
let status_keys = status.iter().map(|(id, _, _)| id).collect::<HashSet<_>>();
let failure_case_1 = status.iter().any(|(_, _, is_up)| !is_up);
let failure_case_2 = config
.members
.iter()
.any(|(id, _)| !status_keys.contains(id));
if failure_case_1 || failure_case_2 {
println!("\nFailed nodes:");
for adv in status.iter().filter(|x| !x.is_up) {
if let Some(cfg) = config.members.get(&adv.id) {
for (id, addr) in status.iter().filter(|(_, _, is_up)| !is_up) {
if let Some(cfg) = config.members.get(&id) {
println!(
"{id:?}\t{host}{h_pad}\t{addr}{a_pad}\t[{tag}]{t_pad}\t{zone}{z_pad}\t{capacity}\tlast seen: {last_seen}s ago",
id=adv.id,
host=adv.state_info.hostname,
addr=adv.addr,
id=id,
host="",
addr=addr,
tag=cfg.tag,
zone=cfg.zone,
capacity=cfg.capacity_string(),
last_seen=(now_msec() - adv.last_seen) / 1000,
h_pad=" ".repeat(hostname_len - adv.state_info.hostname.len()),
a_pad=" ".repeat(addr_len - adv.addr.to_string().len()),
last_seen=(now_msec() - 0) / 1000,
h_pad=" ".repeat(hostname_len - "".len()),
a_pad=" ".repeat(addr_len - addr.to_string().len()),
t_pad=" ".repeat(tag_len - cfg.tag.len()),
z_pad=" ".repeat(zone_len - cfg.zone.len()),
);
@ -411,12 +421,12 @@ pub async fn cmd_status(
let (tag_len, zone_len) = config
.members
.iter()
.filter(|(&id, _)| !status.iter().any(|x| x.id == id))
.filter(|(&id, _)| !status.iter().any(|(xid, _, _)| xid == id))
.map(|(_, cfg)| (cfg.tag.len(), cfg.zone.len()))
.fold((0, 0), |(t, z), (mt, mz)| (max(t, mt), max(z, mz)));
for (id, cfg) in config.members.iter() {
if !status.iter().any(|x| x.id == *id) {
if !status.iter().any(|(xid, _, _)| xid == *id) {
println!(
"{id:?}\t{tag}{t_pad}\t{zone}{z_pad}\t{capacity}\tnever seen",
id = id,
@ -429,6 +439,7 @@ pub async fn cmd_status(
}
}
}
*/
Ok(())
}
@ -455,25 +466,30 @@ pub fn find_matching_node(
}
pub async fn cmd_configure(
rpc_cli: RpcAddrClient<Message>,
rpc_host: SocketAddr,
rpc_cli: &Endpoint<SystemRpc, ()>,
rpc_host: NodeID,
args: ConfigureNodeOpt,
) -> Result<(), Error> {
let status = match rpc_cli
.call(&rpc_host, &Message::PullStatus, ADMIN_RPC_TIMEOUT)
.await??
.call(&rpc_host, &SystemRpc::GetKnownNodes, PRIO_NORMAL)
.await?
{
Message::AdvertiseNodesUp(nodes) => nodes,
SystemRpc::ReturnKnownNodes(nodes) => nodes,
resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
};
let added_node = find_matching_node(status.iter().map(|x| x.id), &args.node_id)?;
let added_node = find_matching_node(
status
.iter()
.map(|(id, _, _)| Uuid::try_from(id.as_ref()).unwrap()),
&args.node_id,
)?;
let mut config = match rpc_cli
.call(&rpc_host, &Message::PullConfig, ADMIN_RPC_TIMEOUT)
.await??
.call(&rpc_host, &SystemRpc::PullConfig, PRIO_NORMAL)
.await?
{
Message::AdvertiseConfig(cfg) => cfg,
SystemRpc::AdvertiseConfig(cfg) => cfg,
resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
};
@ -527,25 +543,21 @@ pub async fn cmd_configure(
config.version += 1;
rpc_cli
.call(
&rpc_host,
&Message::AdvertiseConfig(config),
ADMIN_RPC_TIMEOUT,
)
.await??;
.call(&rpc_host, &SystemRpc::AdvertiseConfig(config), PRIO_NORMAL)
.await?;
Ok(())
}
pub async fn cmd_remove(
rpc_cli: RpcAddrClient<Message>,
rpc_host: SocketAddr,
rpc_cli: &Endpoint<SystemRpc, ()>,
rpc_host: NodeID,
args: RemoveNodeOpt,
) -> Result<(), Error> {
let mut config = match rpc_cli
.call(&rpc_host, &Message::PullConfig, ADMIN_RPC_TIMEOUT)
.await??
.call(&rpc_host, &SystemRpc::PullConfig, PRIO_NORMAL)
.await?
{
Message::AdvertiseConfig(cfg) => cfg,
SystemRpc::AdvertiseConfig(cfg) => cfg,
resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
};
@ -562,21 +574,17 @@ pub async fn cmd_remove(
config.version += 1;
rpc_cli
.call(
&rpc_host,
&Message::AdvertiseConfig(config),
ADMIN_RPC_TIMEOUT,
)
.await??;
.call(&rpc_host, &SystemRpc::AdvertiseConfig(config), PRIO_NORMAL)
.await?;
Ok(())
}
pub async fn cmd_admin(
rpc_cli: RpcAddrClient<AdminRpc>,
rpc_host: SocketAddr,
rpc_cli: &Endpoint<AdminRpc, ()>,
rpc_host: NodeID,
args: AdminRpc,
) -> Result<(), Error> {
match rpc_cli.call(&rpc_host, args, ADMIN_RPC_TIMEOUT).await?? {
match rpc_cli.call(&rpc_host, &args, PRIO_NORMAL).await? {
AdminRpc::Ok(msg) => {
println!("{}", msg);
}

View file

@ -10,16 +10,16 @@ mod repair;
mod server;
use std::net::SocketAddr;
use std::sync::Arc;
use std::time::Duration;
use structopt::StructOpt;
use garage_util::config::TlsConfig;
use netapp::util::parse_peer_addr;
use netapp::NetworkKey;
use garage_util::error::Error;
use garage_rpc::membership::*;
use garage_rpc::rpc_client::*;
use garage_rpc::system::*;
use garage_rpc::*;
use admin_rpc::*;
use cli::*;
@ -27,16 +27,14 @@ use cli::*;
#[derive(StructOpt, Debug)]
#[structopt(name = "garage")]
struct Opt {
/// RPC connect to this host to execute client operations
#[structopt(short = "h", long = "rpc-host", default_value = "127.0.0.1:3901", parse(try_from_str = parse_address))]
pub rpc_host: SocketAddr,
/// Host to connect to for admin operations, in the format:
/// <public-key>@<ip>:<port>
#[structopt(short = "h", long = "rpc-host")]
pub rpc_host: Option<String>,
#[structopt(long = "ca-cert")]
pub ca_cert: Option<String>,
#[structopt(long = "client-cert")]
pub client_cert: Option<String>,
#[structopt(long = "client-key")]
pub client_key: Option<String>,
/// RPC secret network key for admin operations
#[structopt(short = "s", long = "rpc-secret")]
pub rpc_secret: Option<String>,
#[structopt(subcommand)]
cmd: Command,
@ -66,33 +64,20 @@ async fn main() {
}
async fn cli_command(opt: Opt) -> Result<(), Error> {
let tls_config = match (opt.ca_cert, opt.client_cert, opt.client_key) {
(Some(ca_cert), Some(client_cert), Some(client_key)) => Some(TlsConfig {
ca_cert,
node_cert: client_cert,
node_key: client_key,
}),
(None, None, None) => None,
_ => {
warn!("Missing one of: --ca-cert, --node-cert, --node-key. Not using TLS.");
None
}
};
let net_key_hex_str = &opt.rpc_secret.expect("No RPC secret provided");
let network_key = NetworkKey::from_slice(
&hex::decode(net_key_hex_str).expect("Invalid RPC secret key (bad hex)")[..],
)
.expect("Invalid RPC secret provided (wrong length)");
let (_pk, sk) = sodiumoxide::crypto::sign::ed25519::gen_keypair();
let rpc_http_cli =
Arc::new(RpcHttpClient::new(8, &tls_config).expect("Could not create RPC client"));
let membership_rpc_cli =
RpcAddrClient::new(rpc_http_cli.clone(), MEMBERSHIP_RPC_PATH.to_string());
let admin_rpc_cli = RpcAddrClient::new(rpc_http_cli.clone(), ADMIN_RPC_PATH.to_string());
let netapp = NetApp::new(network_key, sk);
let (id, addr) =
parse_peer_addr(&opt.rpc_host.expect("No RPC host provided")).expect("Invalid RPC host");
netapp.clone().try_connect(addr, id).await?;
cli_cmd(opt.cmd, membership_rpc_cli, admin_rpc_cli, opt.rpc_host).await
}
let system_rpc_endpoint = netapp.endpoint::<SystemRpc, ()>(SYSTEM_RPC_PATH.into());
let admin_rpc_endpoint = netapp.endpoint::<AdminRpc, ()>(ADMIN_RPC_PATH.into());
fn parse_address(address: &str) -> Result<SocketAddr, String> {
use std::net::ToSocketAddrs;
address
.to_socket_addrs()
.map_err(|_| format!("Could not resolve {}", address))?
.next()
.ok_or_else(|| format!("Could not resolve {}", address))
cli_cmd(opt.cmd, &system_rpc_endpoint, &admin_rpc_endpoint, id).await
}

View file

@ -1,7 +1,5 @@
use std::path::PathBuf;
use std::sync::Arc;
use futures_util::future::*;
use tokio::sync::watch;
use garage_util::background::*;
@ -10,21 +8,10 @@ use garage_util::error::Error;
use garage_api::run_api_server;
use garage_model::garage::Garage;
use garage_rpc::rpc_server::RpcServer;
use garage_web::run_web_server;
use crate::admin_rpc::*;
async fn shutdown_signal(send_cancel: watch::Sender<bool>) -> Result<(), Error> {
// Wait for the CTRL+C signal
tokio::signal::ctrl_c()
.await
.expect("failed to install CTRL+C signal handler");
info!("Received CTRL+C, shutting down.");
send_cancel.send(true)?;
Ok(())
}
async fn wait_from(mut chan: watch::Receiver<bool>) {
while !*chan.borrow() {
if chan.changed().await.is_err() {
@ -47,52 +34,46 @@ pub async fn run_server(config_file: PathBuf) -> Result<(), Error> {
.open()
.expect("Unable to open sled DB");
info!("Initialize RPC server...");
let mut rpc_server = RpcServer::new(config.rpc_bind_addr, config.rpc_tls.clone());
info!("Initializing background runner...");
let (send_cancel, watch_cancel) = watch::channel(false);
let watch_cancel = netapp::util::watch_ctrl_c();
let (background, await_background_done) = BackgroundRunner::new(16, watch_cancel.clone());
info!("Initializing Garage main data store...");
let garage = Garage::new(config.clone(), db, background, &mut rpc_server);
let bootstrap = garage.system.clone().bootstrap(
config.bootstrap_peers,
config.consul_host,
config.consul_service_name,
);
let garage = Garage::new(config.clone(), db, background);
let run_system = tokio::spawn(garage.system.clone().run(watch_cancel.clone()));
info!("Crate admin RPC handler...");
AdminRpcHandler::new(garage.clone()).register_handler(&mut rpc_server);
AdminRpcHandler::new(garage.clone());
info!("Initializing RPC and API servers...");
let run_rpc_server = Arc::new(rpc_server).run(wait_from(watch_cancel.clone()));
let api_server = run_api_server(garage.clone(), wait_from(watch_cancel.clone()));
let web_server = run_web_server(garage, wait_from(watch_cancel.clone()));
info!("Initializing API server...");
let api_server = tokio::spawn(run_api_server(
garage.clone(),
wait_from(watch_cancel.clone()),
));
futures::try_join!(
bootstrap.map(|()| {
info!("Bootstrap done");
Ok(())
}),
run_rpc_server.map(|rv| {
info!("RPC server exited");
rv
}),
api_server.map(|rv| {
info!("API server exited");
rv
}),
web_server.map(|rv| {
info!("Web server exited");
rv
}),
await_background_done.map(|rv| {
info!("Background runner exited: {:?}", rv);
Ok(())
}),
shutdown_signal(send_cancel),
)?;
info!("Initializing web server...");
let web_server = tokio::spawn(run_web_server(
garage.clone(),
wait_from(watch_cancel.clone()),
));
// Stuff runs
// When a cancel signal is sent, stuff stops
if let Err(e) = api_server.await? {
warn!("API server exited with error: {}", e);
}
if let Err(e) = web_server.await? {
warn!("Web server exited with error: {}", e);
}
// Remove RPC handlers for system to break reference cycles
garage.system.netapp.drop_all_handlers();
// Await for last parts to end
run_system.await?;
await_background_done.await?;
info!("Cleaning up...");

View file

@ -1,6 +1,6 @@
[package]
name = "garage_model"
version = "0.3.0"
version = "0.4.0"
authors = ["Alex Auvolat <alex@adnab.me>"]
edition = "2018"
license = "AGPL-3.0"
@ -13,10 +13,11 @@ path = "lib.rs"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
garage_rpc = { version = "0.3.0", path = "../rpc" }
garage_table = { version = "0.3.0", path = "../table" }
garage_util = { version = "0.3.0", path = "../util" }
garage_rpc = { version = "0.4.0", path = "../rpc" }
garage_table = { version = "0.4.0", path = "../table" }
garage_util = { version = "0.4.0", path = "../util" }
async-trait = "0.1.7"
arc-swap = "1.0"
hex = "0.4"
log = "0.4"
@ -31,3 +32,5 @@ serde_bytes = "0.11"
futures = "0.3"
futures-util = "0.3"
tokio = { version = "1.0", default-features = false, features = ["rt", "rt-multi-thread", "io-util", "net", "time", "macros", "sync", "signal", "fs"] }
netapp = { version = "0.3.0", git = "https://git.deuxfleurs.fr/lx/netapp" }

View file

@ -3,6 +3,7 @@ use std::sync::Arc;
use std::time::Duration;
use arc_swap::ArcSwapOption;
use async_trait::async_trait;
use futures::future::*;
use futures::select;
use serde::{Deserialize, Serialize};
@ -14,9 +15,8 @@ use garage_util::data::*;
use garage_util::error::Error;
use garage_util::time::*;
use garage_rpc::membership::System;
use garage_rpc::rpc_client::*;
use garage_rpc::rpc_server::*;
use garage_rpc::system::System;
use garage_rpc::*;
use garage_table::replication::{TableReplication, TableShardedReplication};
@ -36,8 +36,9 @@ const RESYNC_RETRY_TIMEOUT: Duration = Duration::from_secs(10);
/// RPC messages used to share blocks of data between nodes
#[derive(Debug, Serialize, Deserialize)]
pub enum Message {
pub enum BlockRpc {
Ok,
Error(String),
/// Message to ask for a block of data, by hash
GetBlock(Hash),
/// Message to send a block of data, either because requested, of for first delivery of new
@ -60,7 +61,9 @@ pub struct PutBlockMessage {
pub data: Vec<u8>,
}
impl RpcMessage for Message {}
impl Message for BlockRpc {
type Response = BlockRpc;
}
/// The block manager, handling block exchange between nodes, and block storage on local node
pub struct BlockManager {
@ -77,7 +80,7 @@ pub struct BlockManager {
resync_notify: Notify,
system: Arc<System>,
rpc_client: Arc<RpcClient<Message>>,
endpoint: Arc<Endpoint<BlockRpc, Self>>,
pub(crate) garage: ArcSwapOption<Garage>,
}
@ -87,7 +90,6 @@ impl BlockManager {
data_dir: PathBuf,
replication: TableShardedReplication,
system: Arc<System>,
rpc_server: &mut RpcServer,
) -> Arc<Self> {
let rc = db
.open_tree("block_local_rc")
@ -97,8 +99,7 @@ impl BlockManager {
.open_tree("block_local_resync_queue")
.expect("Unable to open block_local_resync_queue tree");
let rpc_path = "block_manager";
let rpc_client = system.rpc_client::<Message>(rpc_path);
let endpoint = system.netapp.endpoint(format!("garage_model/block.rs/Rpc"));
let block_manager = Arc::new(Self {
replication,
@ -108,35 +109,19 @@ impl BlockManager {
resync_queue,
resync_notify: Notify::new(),
system,
rpc_client,
endpoint,
garage: ArcSwapOption::from(None),
});
block_manager
.clone()
.register_handler(rpc_server, rpc_path.into());
block_manager.endpoint.set_handler(block_manager.clone());
block_manager
}
fn register_handler(self: Arc<Self>, rpc_server: &mut RpcServer, path: String) {
let self2 = self.clone();
rpc_server.add_handler::<Message, _, _>(path, move |msg, _addr| {
let self2 = self2.clone();
async move { self2.handle(&msg).await }
});
let self2 = self.clone();
self.rpc_client
.set_local_handler(self.system.id, move |msg| {
let self2 = self2.clone();
async move { self2.handle(&msg).await }
});
}
async fn handle(self: Arc<Self>, msg: &Message) -> Result<Message, Error> {
async fn handle_rpc(self: Arc<Self>, msg: &BlockRpc) -> Result<BlockRpc, Error> {
match msg {
Message::PutBlock(m) => self.write_block(&m.hash, &m.data).await,
Message::GetBlock(h) => self.read_block(h).await,
Message::NeedBlockQuery(h) => self.need_block(h).await.map(Message::NeedBlockReply),
BlockRpc::PutBlock(m) => self.write_block(&m.hash, &m.data).await,
BlockRpc::GetBlock(h) => self.read_block(h).await,
BlockRpc::NeedBlockQuery(h) => self.need_block(h).await.map(BlockRpc::NeedBlockReply),
_ => Err(Error::BadRpc("Unexpected RPC message".to_string())),
}
}
@ -157,7 +142,7 @@ impl BlockManager {
}
/// Write a block to disk
async fn write_block(&self, hash: &Hash, data: &[u8]) -> Result<Message, Error> {
async fn write_block(&self, hash: &Hash, data: &[u8]) -> Result<BlockRpc, Error> {
let _lock = self.data_dir_lock.lock().await;
let mut path = self.block_dir(hash);
@ -165,18 +150,18 @@ impl BlockManager {
path.push(hex::encode(hash));
if fs::metadata(&path).await.is_ok() {
return Ok(Message::Ok);
return Ok(BlockRpc::Ok);
}
let mut f = fs::File::create(path).await?;
f.write_all(data).await?;
drop(f);
Ok(Message::Ok)
Ok(BlockRpc::Ok)
}
/// Read block from disk, verifying it's integrity
async fn read_block(&self, hash: &Hash) -> Result<Message, Error> {
async fn read_block(&self, hash: &Hash) -> Result<BlockRpc, Error> {
let path = self.block_path(hash);
let mut f = match fs::File::open(&path).await {
@ -204,7 +189,7 @@ impl BlockManager {
return Err(Error::CorruptData(*hash));
}
Ok(Message::PutBlock(PutBlockMessage { hash: *hash, data }))
Ok(BlockRpc::PutBlock(PutBlockMessage { hash: *hash, data }))
}
/// Check if this node should have a block, but don't actually have it
@ -346,17 +331,22 @@ impl BlockManager {
}
who.retain(|id| *id != self.system.id);
let msg = Arc::new(Message::NeedBlockQuery(*hash));
let msg = Arc::new(BlockRpc::NeedBlockQuery(*hash));
let who_needs_fut = who.iter().map(|to| {
self.rpc_client
.call_arc(*to, msg.clone(), NEED_BLOCK_QUERY_TIMEOUT)
self.system.rpc.call_arc(
&self.endpoint,
*to,
msg.clone(),
RequestStrategy::with_priority(PRIO_NORMAL)
.with_timeout(NEED_BLOCK_QUERY_TIMEOUT),
)
});
let who_needs_resps = join_all(who_needs_fut).await;
let mut need_nodes = vec![];
for (node, needed) in who.iter().zip(who_needs_resps.into_iter()) {
match needed? {
Message::NeedBlockReply(needed) => {
BlockRpc::NeedBlockReply(needed) => {
if needed {
need_nodes.push(*node);
}
@ -377,11 +367,14 @@ impl BlockManager {
);
let put_block_message = self.read_block(hash).await?;
self.rpc_client
self.system
.rpc
.try_call_many(
&self.endpoint,
&need_nodes[..],
put_block_message,
RequestStrategy::with_quorum(need_nodes.len())
RequestStrategy::with_priority(PRIO_NORMAL)
.with_quorum(need_nodes.len())
.with_timeout(BLOCK_RW_TIMEOUT),
)
.await?;
@ -413,18 +406,21 @@ impl BlockManager {
pub async fn rpc_get_block(&self, hash: &Hash) -> Result<Vec<u8>, Error> {
let who = self.replication.read_nodes(&hash);
let resps = self
.rpc_client
.system
.rpc
.try_call_many(
&self.endpoint,
&who[..],
Message::GetBlock(*hash),
RequestStrategy::with_quorum(1)
BlockRpc::GetBlock(*hash),
RequestStrategy::with_priority(PRIO_NORMAL)
.with_quorum(1)
.with_timeout(BLOCK_RW_TIMEOUT)
.interrupt_after_quorum(true),
)
.await?;
for resp in resps {
if let Message::PutBlock(msg) = resp {
if let BlockRpc::PutBlock(msg) = resp {
return Ok(msg.data);
}
}
@ -437,11 +433,14 @@ impl BlockManager {
/// Send block to nodes that should have it
pub async fn rpc_put_block(&self, hash: Hash, data: Vec<u8>) -> Result<(), Error> {
let who = self.replication.write_nodes(&hash);
self.rpc_client
self.system
.rpc
.try_call_many(
&self.endpoint,
&who[..],
Message::PutBlock(PutBlockMessage { hash, data }),
RequestStrategy::with_quorum(self.replication.write_quorum())
BlockRpc::PutBlock(PutBlockMessage { hash, data }),
RequestStrategy::with_priority(PRIO_NORMAL)
.with_quorum(self.replication.write_quorum())
.with_timeout(BLOCK_RW_TIMEOUT),
)
.await?;
@ -531,6 +530,16 @@ impl BlockManager {
}
}
#[async_trait]
impl EndpointHandler<BlockRpc> for BlockManager {
async fn handle(self: &Arc<Self>, message: &BlockRpc, _from: NodeID) -> BlockRpc {
self.clone()
.handle_rpc(message)
.await
.unwrap_or_else(|e| BlockRpc::Error(format!("{}", e)))
}
}
fn u64_from_be_bytes<T: AsRef<[u8]>>(bytes: T) -> u64 {
assert!(bytes.as_ref().len() == 8);
let mut x8 = [0u8; 8];

View file

@ -1,11 +1,11 @@
use std::sync::Arc;
use netapp::NetworkKey;
use garage_util::background::*;
use garage_util::config::*;
use garage_rpc::membership::System;
use garage_rpc::rpc_client::RpcHttpClient;
use garage_rpc::rpc_server::RpcServer;
use garage_rpc::system::System;
use garage_table::replication::ReplicationMode;
use garage_table::replication::TableFullReplication;
@ -45,26 +45,25 @@ pub struct Garage {
impl Garage {
/// Create and run garage
pub fn new(
config: Config,
db: sled::Db,
background: Arc<BackgroundRunner>,
rpc_server: &mut RpcServer,
) -> Arc<Self> {
pub fn new(config: Config, db: sled::Db, background: Arc<BackgroundRunner>) -> Arc<Self> {
let network_key = NetworkKey::from_slice(
&hex::decode(&config.rpc_secret).expect("Invalid RPC secret key")[..],
)
.expect("Invalid RPC secret key");
let replication_mode = ReplicationMode::parse(&config.replication_mode)
.expect("Invalid replication_mode in config file.");
info!("Initialize membership management system...");
let rpc_http_client = Arc::new(
RpcHttpClient::new(config.max_concurrent_rpc_requests, &config.rpc_tls)
.expect("Could not create RPC client"),
);
let system = System::new(
network_key,
config.metadata_dir.clone(),
rpc_http_client,
background.clone(),
rpc_server,
replication_mode.replication_factor(),
config.rpc_bind_addr,
config.bootstrap_peers.clone(),
config.consul_host.clone(),
config.consul_service_name.clone(),
);
let data_rep_param = TableShardedReplication {
@ -87,13 +86,8 @@ impl Garage {
};
info!("Initialize block manager...");
let block_manager = BlockManager::new(
&db,
config.data_dir.clone(),
data_rep_param,
system.clone(),
rpc_server,
);
let block_manager =
BlockManager::new(&db, config.data_dir.clone(), data_rep_param, system.clone());
info!("Initialize block_ref_table...");
let block_ref_table = Table::new(
@ -104,7 +98,6 @@ impl Garage {
system.clone(),
&db,
"block_ref".to_string(),
rpc_server,
);
info!("Initialize version_table...");
@ -117,7 +110,6 @@ impl Garage {
system.clone(),
&db,
"version".to_string(),
rpc_server,
);
info!("Initialize object_table...");
@ -130,7 +122,6 @@ impl Garage {
system.clone(),
&db,
"object".to_string(),
rpc_server,
);
info!("Initialize bucket_table...");
@ -140,7 +131,6 @@ impl Garage {
system.clone(),
&db,
"bucket".to_string(),
rpc_server,
);
info!("Initialize key_table_table...");
@ -150,7 +140,6 @@ impl Garage {
system.clone(),
&db,
"key".to_string(),
rpc_server,
);
info!("Initialize Garage...");

View file

@ -1,6 +1,6 @@
[package]
name = "garage_rpc"
version = "0.3.0"
version = "0.4.0"
authors = ["Alex Auvolat <alex@adnab.me>"]
edition = "2018"
license = "AGPL-3.0"
@ -13,7 +13,7 @@ path = "lib.rs"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
garage_util = { version = "0.3.0", path = "../util" }
garage_util = { version = "0.4.0", path = "../util" }
garage_rpc_021 = { package = "garage_rpc", version = "0.2.1" }
@ -22,7 +22,10 @@ bytes = "1.0"
gethostname = "0.2"
hex = "0.4"
log = "0.4"
rand = "0.8"
sodiumoxide = { version = "0.2.5-0", package = "kuska-sodiumoxide" }
async-trait = "0.1.7"
rmp-serde = "0.15"
serde = { version = "1.0", default-features = false, features = ["derive", "rc"] }
serde_json = "1.0"
@ -32,11 +35,6 @@ futures-util = "0.3"
tokio = { version = "1.0", default-features = false, features = ["rt", "rt-multi-thread", "io-util", "net", "time", "macros", "sync", "signal", "fs"] }
tokio-stream = { version = "0.1", features = ["net"] }
http = "0.2"
hyper = { version = "0.14", features = ["full"] }
hyper-rustls = { version = "0.22", default-features = false }
rustls = "0.19"
tokio-rustls = "0.22"
webpki = "0.21"
netapp = { version = "0.3.0", git = "https://git.deuxfleurs.fr/lx/netapp" }
hyper = "0.14"

View file

@ -4,10 +4,10 @@
extern crate log;
mod consul;
pub(crate) mod tls_util;
pub mod membership;
pub mod ring;
pub mod system;
pub mod rpc_client;
pub mod rpc_server;
pub mod rpc_helper;
pub use rpc_helper::*;

View file

@ -1,722 +0,0 @@
//! Module containing structs related to membership management
use std::collections::HashMap;
use std::fmt::Write as FmtWrite;
use std::io::{Read, Write};
use std::net::{IpAddr, SocketAddr};
use std::path::{Path, PathBuf};
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::Arc;
use std::time::Duration;
use futures::future::join_all;
use futures::select;
use futures_util::future::*;
use serde::{Deserialize, Serialize};
use tokio::sync::watch;
use tokio::sync::Mutex;
use garage_util::background::BackgroundRunner;
use garage_util::data::*;
use garage_util::error::Error;
use garage_util::persister::Persister;
use garage_util::time::*;
use crate::consul::get_consul_nodes;
use crate::ring::*;
use crate::rpc_client::*;
use crate::rpc_server::*;
const PING_INTERVAL: Duration = Duration::from_secs(10);
const DISCOVERY_INTERVAL: Duration = Duration::from_secs(60);
const PING_TIMEOUT: Duration = Duration::from_secs(2);
const MAX_FAILURES_BEFORE_CONSIDERED_DOWN: usize = 5;
/// RPC endpoint used for calls related to membership
pub const MEMBERSHIP_RPC_PATH: &str = "_membership";
/// RPC messages related to membership
#[derive(Debug, Serialize, Deserialize)]
pub enum Message {
/// Response to successfull advertisements
Ok,
/// Message sent to detect other nodes status
Ping(PingMessage),
/// Ask other node for the nodes it knows. Answered with AdvertiseNodesUp
PullStatus,
/// Ask other node its config. Answered with AdvertiseConfig
PullConfig,
/// Advertisement of nodes the host knows up. Sent spontanously or in response to PullStatus
AdvertiseNodesUp(Vec<AdvertisedNode>),
/// Advertisement of nodes config. Sent spontanously or in response to PullConfig
AdvertiseConfig(NetworkConfig),
}
impl RpcMessage for Message {}
/// A ping, containing informations about status and config
#[derive(Debug, Serialize, Deserialize)]
pub struct PingMessage {
id: Uuid,
rpc_port: u16,
status_hash: Hash,
config_version: u64,
state_info: StateInfo,
}
/// A node advertisement
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct AdvertisedNode {
/// Id of the node this advertisement relates to
pub id: Uuid,
/// IP and port of the node
pub addr: SocketAddr,
/// Is the node considered up
pub is_up: bool,
/// When was the node last seen up, in milliseconds since UNIX epoch
pub last_seen: u64,
pub state_info: StateInfo,
}
/// This node's membership manager
pub struct System {
/// The id of this node
pub id: Uuid,
persist_config: Persister<NetworkConfig>,
persist_status: Persister<Vec<AdvertisedNode>>,
rpc_local_port: u16,
state_info: StateInfo,
rpc_http_client: Arc<RpcHttpClient>,
rpc_client: Arc<RpcClient<Message>>,
replication_factor: usize,
pub(crate) status: watch::Receiver<Arc<Status>>,
/// The ring
pub ring: watch::Receiver<Arc<Ring>>,
update_lock: Mutex<Updaters>,
/// The job runner of this node
pub background: Arc<BackgroundRunner>,
}
struct Updaters {
update_status: watch::Sender<Arc<Status>>,
update_ring: watch::Sender<Arc<Ring>>,
}
/// The status of each nodes, viewed by this node
#[derive(Debug, Clone)]
pub struct Status {
/// Mapping of each node id to its known status
pub nodes: HashMap<Uuid, Arc<StatusEntry>>,
/// Hash of `nodes`, used to detect when nodes have different views of the cluster
pub hash: Hash,
}
/// The status of a single node
#[derive(Debug)]
pub struct StatusEntry {
/// The IP and port used to connect to this node
pub addr: SocketAddr,
/// Last time this node was seen
pub last_seen: u64,
/// Number of consecutive pings sent without reply to this node
pub num_failures: AtomicUsize,
pub state_info: StateInfo,
}
impl StatusEntry {
/// is the node associated to this entry considered up
pub fn is_up(&self) -> bool {
self.num_failures.load(Ordering::SeqCst) < MAX_FAILURES_BEFORE_CONSIDERED_DOWN
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct StateInfo {
/// Hostname of the node
pub hostname: String,
/// Replication factor configured on the node
pub replication_factor: Option<usize>, // TODO Option is just for retrocompatibility. It should become a simple usize at some point
}
impl Status {
fn handle_ping(&mut self, ip: IpAddr, info: &PingMessage) -> bool {
let addr = SocketAddr::new(ip, info.rpc_port);
let old_status = self.nodes.insert(
info.id,
Arc::new(StatusEntry {
addr,
last_seen: now_msec(),
num_failures: AtomicUsize::from(0),
state_info: info.state_info.clone(),
}),
);
match old_status {
None => {
info!("Newly pingable node: {}", hex::encode(&info.id));
true
}
Some(x) => x.addr != addr,
}
}
fn recalculate_hash(&mut self) {
let mut nodes = self.nodes.iter().collect::<Vec<_>>();
nodes.sort_unstable_by_key(|(id, _status)| *id);
let mut nodes_txt = String::new();
debug!("Current set of pingable nodes: --");
for (id, status) in nodes {
debug!("{} {}", hex::encode(&id), status.addr);
writeln!(&mut nodes_txt, "{} {}", hex::encode(&id), status.addr).unwrap();
}
debug!("END --");
self.hash = blake2sum(nodes_txt.as_bytes());
}
fn to_serializable_membership(&self, system: &System) -> Vec<AdvertisedNode> {
let mut mem = vec![];
for (node, status) in self.nodes.iter() {
let state_info = if *node == system.id {
system.state_info.clone()
} else {
status.state_info.clone()
};
mem.push(AdvertisedNode {
id: *node,
addr: status.addr,
is_up: status.is_up(),
last_seen: status.last_seen,
state_info,
});
}
mem
}
}
fn gen_node_id(metadata_dir: &Path) -> Result<Uuid, Error> {
let mut id_file = metadata_dir.to_path_buf();
id_file.push("node_id");
if id_file.as_path().exists() {
let mut f = std::fs::File::open(id_file.as_path())?;
let mut d = vec![];
f.read_to_end(&mut d)?;
if d.len() != 32 {
return Err(Error::Message("Corrupt node_id file".to_string()));
}
let mut id = [0u8; 32];
id.copy_from_slice(&d[..]);
Ok(id.into())
} else {
let id = gen_uuid();
let mut f = std::fs::File::create(id_file.as_path())?;
f.write_all(id.as_slice())?;
Ok(id)
}
}
impl System {
/// Create this node's membership manager
pub fn new(
metadata_dir: PathBuf,
rpc_http_client: Arc<RpcHttpClient>,
background: Arc<BackgroundRunner>,
rpc_server: &mut RpcServer,
replication_factor: usize,
) -> Arc<Self> {
let id = gen_node_id(&metadata_dir).expect("Unable to read or generate node ID");
info!("Node ID: {}", hex::encode(&id));
let persist_config = Persister::new(&metadata_dir, "network_config");
let persist_status = Persister::new(&metadata_dir, "peer_info");
let net_config = match persist_config.load() {
Ok(x) => x,
Err(e) => {
match Persister::<garage_rpc_021::ring::NetworkConfig>::new(
&metadata_dir,
"network_config",
)
.load()
{
Ok(old_config) => NetworkConfig::migrate_from_021(old_config),
Err(e2) => {
info!(
"No valid previous network configuration stored ({}, {}), starting fresh.",
e, e2
);
NetworkConfig::new()
}
}
}
};
let mut status = Status {
nodes: HashMap::new(),
hash: Hash::default(),
};
status.recalculate_hash();
let (update_status, status) = watch::channel(Arc::new(status));
let state_info = StateInfo {
hostname: gethostname::gethostname()
.into_string()
.unwrap_or_else(|_| "<invalid utf-8>".to_string()),
replication_factor: Some(replication_factor),
};
let ring = Ring::new(net_config, replication_factor);
let (update_ring, ring) = watch::channel(Arc::new(ring));
let rpc_path = MEMBERSHIP_RPC_PATH.to_string();
let rpc_client = RpcClient::new(
RpcAddrClient::<Message>::new(rpc_http_client.clone(), rpc_path.clone()),
background.clone(),
status.clone(),
);
let sys = Arc::new(System {
id,
persist_config,
persist_status,
rpc_local_port: rpc_server.bind_addr.port(),
state_info,
rpc_http_client,
rpc_client,
replication_factor,
status,
ring,
update_lock: Mutex::new(Updaters {
update_status,
update_ring,
}),
background,
});
sys.clone().register_handler(rpc_server, rpc_path);
sys
}
fn register_handler(self: Arc<Self>, rpc_server: &mut RpcServer, path: String) {
rpc_server.add_handler::<Message, _, _>(path, move |msg, addr| {
let self2 = self.clone();
async move {
match msg {
Message::Ping(ping) => self2.handle_ping(&addr, &ping).await,
Message::PullStatus => Ok(self2.handle_pull_status()),
Message::PullConfig => Ok(self2.handle_pull_config()),
Message::AdvertiseNodesUp(adv) => self2.handle_advertise_nodes_up(&adv).await,
Message::AdvertiseConfig(adv) => self2.handle_advertise_config(&adv).await,
_ => Err(Error::BadRpc("Unexpected RPC message".to_string())),
}
}
});
}
/// Get an RPC client
pub fn rpc_client<M: RpcMessage + 'static>(self: &Arc<Self>, path: &str) -> Arc<RpcClient<M>> {
RpcClient::new(
RpcAddrClient::new(self.rpc_http_client.clone(), path.to_string()),
self.background.clone(),
self.status.clone(),
)
}
/// Save network configuration to disc
async fn save_network_config(self: Arc<Self>) -> Result<(), Error> {
let ring = self.ring.borrow().clone();
self.persist_config
.save_async(&ring.config)
.await
.expect("Cannot save current cluster configuration");
Ok(())
}
fn make_ping(&self) -> Message {
let status = self.status.borrow().clone();
let ring = self.ring.borrow().clone();
Message::Ping(PingMessage {
id: self.id,
rpc_port: self.rpc_local_port,
status_hash: status.hash,
config_version: ring.config.version,
state_info: self.state_info.clone(),
})
}
async fn broadcast(self: Arc<Self>, msg: Message, timeout: Duration) {
let status = self.status.borrow().clone();
let to = status
.nodes
.keys()
.filter(|x| **x != self.id)
.cloned()
.collect::<Vec<_>>();
self.rpc_client.call_many(&to[..], msg, timeout).await;
}
/// Perform bootstraping, starting the ping loop
pub async fn bootstrap(
self: Arc<Self>,
peers: Vec<SocketAddr>,
consul_host: Option<String>,
consul_service_name: Option<String>,
) {
let self2 = self.clone();
self.background
.spawn_worker("discovery loop".to_string(), |stop_signal| {
self2.discovery_loop(peers, consul_host, consul_service_name, stop_signal)
});
let self2 = self.clone();
self.background
.spawn_worker("ping loop".to_string(), |stop_signal| {
self2.ping_loop(stop_signal)
});
}
async fn ping_nodes(self: Arc<Self>, peers: Vec<(SocketAddr, Option<Uuid>)>) {
let ping_msg = self.make_ping();
let ping_resps = join_all(peers.iter().map(|(addr, id_option)| {
let sys = self.clone();
let ping_msg_ref = &ping_msg;
async move {
(
id_option,
addr,
sys.rpc_client
.by_addr()
.call(&addr, ping_msg_ref, PING_TIMEOUT)
.await,
)
}
}))
.await;
let update_locked = self.update_lock.lock().await;
let mut status: Status = self.status.borrow().as_ref().clone();
let ring = self.ring.borrow().clone();
let mut has_changes = false;
let mut to_advertise = vec![];
for (id_option, addr, ping_resp) in ping_resps {
if let Ok(Ok(Message::Ping(info))) = ping_resp {
let is_new = status.handle_ping(addr.ip(), &info);
if is_new {
has_changes = true;
to_advertise.push(AdvertisedNode {
id: info.id,
addr: *addr,
is_up: true,
last_seen: now_msec(),
state_info: info.state_info.clone(),
});
}
if is_new || status.hash != info.status_hash {
self.background
.spawn_cancellable(self.clone().pull_status(info.id).map(Ok));
}
if is_new || ring.config.version < info.config_version {
self.background
.spawn_cancellable(self.clone().pull_config(info.id).map(Ok));
}
} else if let Some(id) = id_option {
if let Some(st) = status.nodes.get_mut(id) {
// we need to increment failure counter as call was done using by_addr so the
// counter was not auto-incremented
st.num_failures.fetch_add(1, Ordering::SeqCst);
if !st.is_up() {
warn!("Node {:?} seems to be down.", id);
if !ring.config.members.contains_key(id) {
info!("Removing node {:?} from status (not in config and not responding to pings anymore)", id);
status.nodes.remove(&id);
has_changes = true;
}
}
}
}
}
if has_changes {
status.recalculate_hash();
}
self.update_status(&update_locked, status).await;
drop(update_locked);
if !to_advertise.is_empty() {
self.broadcast(Message::AdvertiseNodesUp(to_advertise), PING_TIMEOUT)
.await;
}
}
async fn handle_ping(
self: Arc<Self>,
from: &SocketAddr,
ping: &PingMessage,
) -> Result<Message, Error> {
let update_locked = self.update_lock.lock().await;
let mut status: Status = self.status.borrow().as_ref().clone();
let is_new = status.handle_ping(from.ip(), ping);
if is_new {
status.recalculate_hash();
}
let status_hash = status.hash;
let config_version = self.ring.borrow().config.version;
self.update_status(&update_locked, status).await;
drop(update_locked);
if is_new || status_hash != ping.status_hash {
self.background
.spawn_cancellable(self.clone().pull_status(ping.id).map(Ok));
}
if is_new || config_version < ping.config_version {
self.background
.spawn_cancellable(self.clone().pull_config(ping.id).map(Ok));
}
Ok(self.make_ping())
}
fn handle_pull_status(&self) -> Message {
Message::AdvertiseNodesUp(self.status.borrow().to_serializable_membership(self))
}
fn handle_pull_config(&self) -> Message {
let ring = self.ring.borrow().clone();
Message::AdvertiseConfig(ring.config.clone())
}
async fn handle_advertise_nodes_up(
self: Arc<Self>,
adv: &[AdvertisedNode],
) -> Result<Message, Error> {
let mut to_ping = vec![];
let update_lock = self.update_lock.lock().await;
let mut status: Status = self.status.borrow().as_ref().clone();
let mut has_changed = false;
let mut max_replication_factor = 0;
for node in adv.iter() {
if node.id == self.id {
// learn our own ip address
let self_addr = SocketAddr::new(node.addr.ip(), self.rpc_local_port);
let old_self = status.nodes.insert(
node.id,
Arc::new(StatusEntry {
addr: self_addr,
last_seen: now_msec(),
num_failures: AtomicUsize::from(0),
state_info: self.state_info.clone(),
}),
);
has_changed = match old_self {
None => true,
Some(x) => x.addr != self_addr,
};
} else {
let ping_them = match status.nodes.get(&node.id) {
// Case 1: new node
None => true,
// Case 2: the node might have changed address
Some(our_node) => node.is_up && !our_node.is_up() && our_node.addr != node.addr,
};
max_replication_factor = std::cmp::max(
max_replication_factor,
node.state_info.replication_factor.unwrap_or_default(),
);
if ping_them {
to_ping.push((node.addr, Some(node.id)));
}
}
}
if self.replication_factor < max_replication_factor {
error!("Some node have a higher replication factor ({}) than this one ({}). This is not supported and might lead to bugs",
max_replication_factor,
self.replication_factor);
std::process::exit(1);
}
if has_changed {
status.recalculate_hash();
}
self.update_status(&update_lock, status).await;
drop(update_lock);
if !to_ping.is_empty() {
self.background
.spawn_cancellable(self.clone().ping_nodes(to_ping).map(Ok));
}
Ok(Message::Ok)
}
async fn handle_advertise_config(
self: Arc<Self>,
adv: &NetworkConfig,
) -> Result<Message, Error> {
let update_lock = self.update_lock.lock().await;
let ring: Arc<Ring> = self.ring.borrow().clone();
if adv.version > ring.config.version {
let ring = Ring::new(adv.clone(), self.replication_factor);
update_lock.update_ring.send(Arc::new(ring))?;
drop(update_lock);
self.background.spawn_cancellable(
self.clone()
.broadcast(Message::AdvertiseConfig(adv.clone()), PING_TIMEOUT)
.map(Ok),
);
self.background.spawn(self.clone().save_network_config());
}
Ok(Message::Ok)
}
async fn ping_loop(self: Arc<Self>, mut stop_signal: watch::Receiver<bool>) {
while !*stop_signal.borrow() {
let restart_at = tokio::time::sleep(PING_INTERVAL);
let status = self.status.borrow().clone();
let ping_addrs = status
.nodes
.iter()
.filter(|(id, _)| **id != self.id)
.map(|(id, status)| (status.addr, Some(*id)))
.collect::<Vec<_>>();
self.clone().ping_nodes(ping_addrs).await;
select! {
_ = restart_at.fuse() => {},
_ = stop_signal.changed().fuse() => {},
}
}
}
async fn discovery_loop(
self: Arc<Self>,
bootstrap_peers: Vec<SocketAddr>,
consul_host: Option<String>,
consul_service_name: Option<String>,
mut stop_signal: watch::Receiver<bool>,
) {
let consul_config = match (consul_host, consul_service_name) {
(Some(ch), Some(csn)) => Some((ch, csn)),
_ => None,
};
while !*stop_signal.borrow() {
let not_configured = self.ring.borrow().config.members.is_empty();
let no_peers = self.status.borrow().nodes.len() < 3;
let bad_peers = self
.status
.borrow()
.nodes
.iter()
.filter(|(_, v)| v.is_up())
.count() != self.ring.borrow().config.members.len();
if not_configured || no_peers || bad_peers {
info!("Doing a bootstrap/discovery step (not_configured: {}, no_peers: {}, bad_peers: {})", not_configured, no_peers, bad_peers);
let mut ping_list = bootstrap_peers
.iter()
.map(|ip| (*ip, None))
.collect::<Vec<_>>();
if let Ok(peers) = self.persist_status.load_async().await {
ping_list.extend(peers.iter().map(|x| (x.addr, Some(x.id))));
}
if let Some((consul_host, consul_service_name)) = &consul_config {
match get_consul_nodes(consul_host, consul_service_name).await {
Ok(node_list) => {
ping_list.extend(node_list.iter().map(|a| (*a, None)));
}
Err(e) => {
warn!("Could not retrieve node list from Consul: {}", e);
}
}
}
self.clone().ping_nodes(ping_list).await;
}
let restart_at = tokio::time::sleep(DISCOVERY_INTERVAL);
select! {
_ = restart_at.fuse() => {},
_ = stop_signal.changed().fuse() => {},
}
}
}
// for some reason fixing this is causing compilation error, see https://github.com/rust-lang/rust-clippy/issues/7052
#[allow(clippy::manual_async_fn)]
fn pull_status(
self: Arc<Self>,
peer: Uuid,
) -> impl futures::future::Future<Output = ()> + Send + 'static {
async move {
let resp = self
.rpc_client
.call(peer, Message::PullStatus, PING_TIMEOUT)
.await;
if let Ok(Message::AdvertiseNodesUp(nodes)) = resp {
let _: Result<_, _> = self.handle_advertise_nodes_up(&nodes).await;
}
}
}
async fn pull_config(self: Arc<Self>, peer: Uuid) {
let resp = self
.rpc_client
.call(peer, Message::PullConfig, PING_TIMEOUT)
.await;
if let Ok(Message::AdvertiseConfig(config)) = resp {
let _: Result<_, _> = self.handle_advertise_config(&config).await;
}
}
async fn update_status(self: &Arc<Self>, updaters: &Updaters, status: Status) {
if status.hash != self.status.borrow().hash {
let mut list = status.to_serializable_membership(&self);
// Combine with old peer list to make sure no peer is lost
if let Ok(old_list) = self.persist_status.load_async().await {
for pp in old_list {
if !list.iter().any(|np| pp.id == np.id) {
list.push(pp);
}
}
}
if !list.is_empty() {
info!("Persisting new peer list ({} peers)", list.len());
self.persist_status
.save_async(&list)
.await
.expect("Unable to persist peer list");
}
}
updaters
.update_status
.send(Arc::new(status))
.expect("Could not update internal membership status");
}
}

View file

@ -3,6 +3,8 @@
use std::collections::{HashMap, HashSet};
use std::convert::TryInto;
use netapp::NodeID;
use serde::{Deserialize, Serialize};
use garage_util::data::*;
@ -98,7 +100,7 @@ pub struct Ring {
pub config: NetworkConfig,
// Internal order of nodes used to make a more compact representation of the ring
nodes: Vec<Uuid>,
nodes: Vec<NodeID>,
// The list of entries in the ring
ring: Vec<RingEntry>,
@ -260,6 +262,11 @@ impl Ring {
})
.collect::<Vec<_>>();
let nodes = nodes
.iter()
.map(|id| NodeID::from_slice(id.as_slice()).unwrap())
.collect::<Vec<_>>();
Self {
replication_factor,
config,
@ -291,7 +298,7 @@ impl Ring {
}
/// Walk the ring to find the n servers in which data should be replicated
pub fn get_nodes(&self, position: &Hash, n: usize) -> Vec<Uuid> {
pub fn get_nodes(&self, position: &Hash, n: usize) -> Vec<NodeID> {
if self.ring.len() != 1 << PARTITION_BITS {
warn!("Ring not yet ready, read/writes will be lost!");
return vec![];

View file

@ -1,369 +0,0 @@
//! Contain structs related to making RPCs
use std::borrow::Borrow;
use std::marker::PhantomData;
use std::net::SocketAddr;
use std::pin::Pin;
use std::sync::atomic::Ordering;
use std::sync::Arc;
use std::time::Duration;
use arc_swap::ArcSwapOption;
use futures::future::Future;
use futures::stream::futures_unordered::FuturesUnordered;
use futures::stream::StreamExt;
use futures_util::future::FutureExt;
use hyper::client::{Client, HttpConnector};
use hyper::{Body, Method, Request};
use tokio::sync::{watch, Semaphore};
use garage_util::background::BackgroundRunner;
use garage_util::config::TlsConfig;
use garage_util::data::*;
use garage_util::error::{Error, RpcError};
use crate::membership::Status;
use crate::rpc_server::RpcMessage;
use crate::tls_util;
const DEFAULT_TIMEOUT: Duration = Duration::from_secs(10);
/// Strategy to apply when making RPC
#[derive(Copy, Clone)]
pub struct RequestStrategy {
/// Max time to wait for reponse
pub rs_timeout: Duration,
/// Min number of response to consider the request successful
pub rs_quorum: usize,
/// Should requests be dropped after enough response are received
pub rs_interrupt_after_quorum: bool,
}
impl RequestStrategy {
/// Create a RequestStrategy with default timeout and not interrupting when quorum reached
pub fn with_quorum(quorum: usize) -> Self {
RequestStrategy {
rs_timeout: DEFAULT_TIMEOUT,
rs_quorum: quorum,
rs_interrupt_after_quorum: false,
}
}
/// Set timeout of the strategy
pub fn with_timeout(mut self, timeout: Duration) -> Self {
self.rs_timeout = timeout;
self
}
/// Set if requests can be dropped after quorum has been reached
/// In general true for read requests, and false for write
pub fn interrupt_after_quorum(mut self, interrupt: bool) -> Self {
self.rs_interrupt_after_quorum = interrupt;
self
}
}
/// Shortcut for a boxed async function taking a message, and resolving to another message or an
/// error
pub type LocalHandlerFn<M> =
Box<dyn Fn(Arc<M>) -> Pin<Box<dyn Future<Output = Result<M, Error>> + Send>> + Send + Sync>;
/// Client used to send RPC
pub struct RpcClient<M: RpcMessage> {
status: watch::Receiver<Arc<Status>>,
background: Arc<BackgroundRunner>,
local_handler: ArcSwapOption<(Uuid, LocalHandlerFn<M>)>,
rpc_addr_client: RpcAddrClient<M>,
}
impl<M: RpcMessage + 'static> RpcClient<M> {
/// Create a new RpcClient from an address, a job runner, and the status of all RPC servers
pub fn new(
rac: RpcAddrClient<M>,
background: Arc<BackgroundRunner>,
status: watch::Receiver<Arc<Status>>,
) -> Arc<Self> {
Arc::new(Self {
rpc_addr_client: rac,
background,
status,
local_handler: ArcSwapOption::new(None),
})
}
/// Set the local handler, to process RPC to this node without network usage
pub fn set_local_handler<F, Fut>(&self, my_id: Uuid, handler: F)
where
F: Fn(Arc<M>) -> Fut + Send + Sync + 'static,
Fut: Future<Output = Result<M, Error>> + Send + 'static,
{
let handler_arc = Arc::new(handler);
let handler: LocalHandlerFn<M> = Box::new(move |msg| {
let handler_arc2 = handler_arc.clone();
Box::pin(async move { handler_arc2(msg).await })
});
self.local_handler.swap(Some(Arc::new((my_id, handler))));
}
/// Get a RPC client to make calls using node's SocketAddr instead of its ID
pub fn by_addr(&self) -> &RpcAddrClient<M> {
&self.rpc_addr_client
}
/// Make a RPC call
pub async fn call(&self, to: Uuid, msg: M, timeout: Duration) -> Result<M, Error> {
self.call_arc(to, Arc::new(msg), timeout).await
}
/// Make a RPC call from a message stored in an Arc
pub async fn call_arc(&self, to: Uuid, msg: Arc<M>, timeout: Duration) -> Result<M, Error> {
if let Some(lh) = self.local_handler.load_full() {
let (my_id, local_handler) = lh.as_ref();
if to.borrow() == my_id {
return local_handler(msg).await;
}
}
let status = self.status.borrow().clone();
let node_status = match status.nodes.get(&to) {
Some(node_status) => {
if node_status.is_up() {
node_status
} else {
return Err(Error::from(RpcError::NodeDown(to)));
}
}
None => {
return Err(Error::Message(format!(
"Peer ID not found: {:?}",
to.borrow()
)))
}
};
match self
.rpc_addr_client
.call(&node_status.addr, msg, timeout)
.await
{
Err(rpc_error) => {
node_status.num_failures.fetch_add(1, Ordering::SeqCst);
Err(Error::from(rpc_error))
}
Ok(x) => x,
}
}
/// Make a RPC call to multiple servers, returning a Vec containing each result
pub async fn call_many(&self, to: &[Uuid], msg: M, timeout: Duration) -> Vec<Result<M, Error>> {
let msg = Arc::new(msg);
let mut resp_stream = to
.iter()
.map(|to| self.call_arc(*to, msg.clone(), timeout))
.collect::<FuturesUnordered<_>>();
let mut results = vec![];
while let Some(resp) = resp_stream.next().await {
results.push(resp);
}
results
}
/// Make a RPC call to multiple servers, returning either a Vec of responses, or an error if
/// strategy could not be respected due to too many errors
pub async fn try_call_many(
self: &Arc<Self>,
to: &[Uuid],
msg: M,
strategy: RequestStrategy,
) -> Result<Vec<M>, Error> {
let timeout = strategy.rs_timeout;
let msg = Arc::new(msg);
let mut resp_stream = to
.to_vec()
.into_iter()
.map(|to| {
let self2 = self.clone();
let msg = msg.clone();
async move { self2.call_arc(to, msg, timeout).await }
})
.collect::<FuturesUnordered<_>>();
let mut results = vec![];
let mut errors = vec![];
while let Some(resp) = resp_stream.next().await {
match resp {
Ok(msg) => {
results.push(msg);
if results.len() >= strategy.rs_quorum {
break;
}
}
Err(e) => {
errors.push(e);
}
}
}
if results.len() >= strategy.rs_quorum {
// Continue requests in background.
// Continue the remaining requests immediately using tokio::spawn
// but enqueue a task in the background runner
// to ensure that the process won't exit until the requests are done
// (if we had just enqueued the resp_stream.collect directly in the background runner,
// the requests might have been put on hold in the background runner's queue,
// in which case they might timeout or otherwise fail)
if !strategy.rs_interrupt_after_quorum {
let wait_finished_fut = tokio::spawn(async move {
resp_stream.collect::<Vec<_>>().await;
});
self.background.spawn(wait_finished_fut.map(|_| Ok(())));
}
Ok(results)
} else {
let errors = errors.iter().map(|e| format!("{}", e)).collect::<Vec<_>>();
Err(Error::from(RpcError::TooManyErrors(errors)))
}
}
}
/// Thin wrapper arround an `RpcHttpClient` specifying the path of the request
pub struct RpcAddrClient<M: RpcMessage> {
phantom: PhantomData<M>,
http_client: Arc<RpcHttpClient>,
path: String,
}
impl<M: RpcMessage> RpcAddrClient<M> {
/// Create an RpcAddrClient from an HTTP client and the endpoint to reach for RPCs
pub fn new(http_client: Arc<RpcHttpClient>, path: String) -> Self {
Self {
phantom: PhantomData::default(),
http_client,
path,
}
}
/// Make a RPC
pub async fn call<MB>(
&self,
to_addr: &SocketAddr,
msg: MB,
timeout: Duration,
) -> Result<Result<M, Error>, RpcError>
where
MB: Borrow<M>,
{
self.http_client
.call(&self.path, to_addr, msg, timeout)
.await
}
}
/// HTTP client used to make RPCs
pub struct RpcHttpClient {
request_limiter: Semaphore,
method: ClientMethod,
}
enum ClientMethod {
Http(Client<HttpConnector, hyper::Body>),
Https(Client<tls_util::HttpsConnectorFixedDnsname<HttpConnector>, hyper::Body>),
}
impl RpcHttpClient {
/// Create a new RpcHttpClient
pub fn new(
max_concurrent_requests: usize,
tls_config: &Option<TlsConfig>,
) -> Result<Self, Error> {
let method = if let Some(cf) = tls_config {
let ca_certs = tls_util::load_certs(&cf.ca_cert).map_err(|e| {
Error::Message(format!("Failed to open CA certificate file: {:?}", e))
})?;
let node_certs = tls_util::load_certs(&cf.node_cert)
.map_err(|e| Error::Message(format!("Failed to open certificate file: {:?}", e)))?;
let node_key = tls_util::load_private_key(&cf.node_key)
.map_err(|e| Error::Message(format!("Failed to open private key file: {:?}", e)))?;
let mut config = rustls::ClientConfig::new();
for crt in ca_certs.iter() {
config.root_store.add(crt)?;
}
config.set_single_client_cert([&node_certs[..], &ca_certs[..]].concat(), node_key)?;
let connector =
tls_util::HttpsConnectorFixedDnsname::<HttpConnector>::new(config, "garage");
ClientMethod::Https(Client::builder().build(connector))
} else {
ClientMethod::Http(Client::new())
};
Ok(RpcHttpClient {
method,
request_limiter: Semaphore::new(max_concurrent_requests),
})
}
/// Make a RPC
async fn call<M, MB>(
&self,
path: &str,
to_addr: &SocketAddr,
msg: MB,
timeout: Duration,
) -> Result<Result<M, Error>, RpcError>
where
MB: Borrow<M>,
M: RpcMessage,
{
let uri = match self.method {
ClientMethod::Http(_) => format!("http://{}/{}", to_addr, path),
ClientMethod::Https(_) => format!("https://{}/{}", to_addr, path),
};
let req = Request::builder()
.method(Method::POST)
.uri(uri)
.body(Body::from(rmp_to_vec_all_named(msg.borrow())?))?;
let resp_fut = match &self.method {
ClientMethod::Http(client) => client.request(req).fuse(),
ClientMethod::Https(client) => client.request(req).fuse(),
};
trace!("({}) Acquiring request_limiter slot...", path);
let slot = self.request_limiter.acquire().await;
trace!("({}) Got slot, doing request to {}...", path, to_addr);
let resp = tokio::time::timeout(timeout, resp_fut)
.await
.map_err(|e| {
debug!(
"RPC timeout to {}: {}",
to_addr,
debug_serialize(msg.borrow())
);
e
})?
.map_err(|e| {
warn!(
"RPC HTTP client error when connecting to {}: {}",
to_addr, e
);
e
})?;
let status = resp.status();
trace!("({}) Request returned, got status {}", path, status);
let body = hyper::body::to_bytes(resp.into_body()).await?;
drop(slot);
match rmp_serde::decode::from_read::<_, Result<M, String>>(&body[..])? {
Err(e) => Ok(Err(Error::RemoteError(e, status))),
Ok(x) => Ok(Ok(x)),
}
}
}

206
src/rpc/rpc_helper.rs Normal file
View file

@ -0,0 +1,206 @@
//! Contain structs related to making RPCs
use std::sync::Arc;
use std::time::Duration;
use futures::future::join_all;
use futures::stream::futures_unordered::FuturesUnordered;
use futures::stream::StreamExt;
use futures_util::future::FutureExt;
use tokio::select;
pub use netapp::endpoint::{Endpoint, EndpointHandler, Message};
use netapp::peering::fullmesh::FullMeshPeeringStrategy;
pub use netapp::proto::*;
pub use netapp::{NetApp, NodeID};
use garage_util::background::BackgroundRunner;
use garage_util::error::{Error, RpcError};
const DEFAULT_TIMEOUT: Duration = Duration::from_secs(10);
/// Strategy to apply when making RPC
#[derive(Copy, Clone)]
pub struct RequestStrategy {
/// Max time to wait for reponse
pub rs_timeout: Duration,
/// Min number of response to consider the request successful
pub rs_quorum: Option<usize>,
/// Should requests be dropped after enough response are received
pub rs_interrupt_after_quorum: bool,
/// Request priority
pub rs_priority: RequestPriority,
}
impl RequestStrategy {
/// Create a RequestStrategy with default timeout and not interrupting when quorum reached
pub fn with_priority(prio: RequestPriority) -> Self {
RequestStrategy {
rs_timeout: DEFAULT_TIMEOUT,
rs_quorum: None,
rs_interrupt_after_quorum: false,
rs_priority: prio,
}
}
/// Set quorum to be reached for request
pub fn with_quorum(mut self, quorum: usize) -> Self {
self.rs_quorum = Some(quorum);
self
}
/// Set timeout of the strategy
pub fn with_timeout(mut self, timeout: Duration) -> Self {
self.rs_timeout = timeout;
self
}
/// Set if requests can be dropped after quorum has been reached
/// In general true for read requests, and false for write
pub fn interrupt_after_quorum(mut self, interrupt: bool) -> Self {
self.rs_interrupt_after_quorum = interrupt;
self
}
}
#[derive(Clone)]
pub struct RpcHelper {
pub(crate) fullmesh: Arc<FullMeshPeeringStrategy>,
pub(crate) background: Arc<BackgroundRunner>,
}
impl RpcHelper {
pub async fn call<M, H>(
&self,
endpoint: &Endpoint<M, H>,
to: NodeID,
msg: M,
strat: RequestStrategy,
) -> Result<M::Response, Error>
where
M: Message,
H: EndpointHandler<M>,
{
self.call_arc(endpoint, to, Arc::new(msg), strat).await
}
pub async fn call_arc<M, H>(
&self,
endpoint: &Endpoint<M, H>,
to: NodeID,
msg: Arc<M>,
strat: RequestStrategy,
) -> Result<M::Response, Error>
where
M: Message,
H: EndpointHandler<M>,
{
select! {
res = endpoint.call(&to, &msg, strat.rs_priority) => Ok(res?),
_ = tokio::time::sleep(strat.rs_timeout) => Err(Error::Rpc(RpcError::Timeout)),
}
}
pub async fn call_many<M, H>(
&self,
endpoint: &Endpoint<M, H>,
to: &[NodeID],
msg: M,
strat: RequestStrategy,
) -> Vec<(NodeID, Result<M::Response, Error>)>
where
M: Message,
H: EndpointHandler<M>,
{
let msg = Arc::new(msg);
let resps = join_all(
to.iter()
.map(|to| self.call_arc(endpoint, *to, msg.clone(), strat)),
)
.await;
to.iter()
.cloned()
.zip(resps.into_iter())
.collect::<Vec<_>>()
}
pub async fn broadcast<M, H>(
&self,
endpoint: &Endpoint<M, H>,
msg: M,
strat: RequestStrategy,
) -> Vec<(NodeID, Result<M::Response, Error>)>
where
M: Message,
H: EndpointHandler<M>,
{
let to = self
.fullmesh
.get_peer_list()
.iter()
.map(|p| p.id)
.collect::<Vec<_>>();
self.call_many(endpoint, &to[..], msg, strat).await
}
/// Make a RPC call to multiple servers, returning either a Vec of responses, or an error if
/// strategy could not be respected due to too many errors
pub async fn try_call_many<M, H>(
&self,
endpoint: &Arc<Endpoint<M, H>>,
to: &[NodeID],
msg: M,
strategy: RequestStrategy,
) -> Result<Vec<M::Response>, Error>
where
M: Message + 'static,
H: EndpointHandler<M> + 'static,
{
let msg = Arc::new(msg);
let mut resp_stream = to
.to_vec()
.into_iter()
.map(|to| {
let self2 = self.clone();
let msg = msg.clone();
let endpoint2 = endpoint.clone();
async move { self2.call_arc(&endpoint2, to, msg, strategy).await }
})
.collect::<FuturesUnordered<_>>();
let mut results = vec![];
let mut errors = vec![];
let quorum = strategy.rs_quorum.unwrap_or(to.len());
while let Some(resp) = resp_stream.next().await {
match resp {
Ok(msg) => {
results.push(msg);
if results.len() >= quorum {
break;
}
}
Err(e) => {
errors.push(e);
}
}
}
if results.len() >= quorum {
// Continue requests in background.
// Continue the remaining requests immediately using tokio::spawn
// but enqueue a task in the background runner
// to ensure that the process won't exit until the requests are done
// (if we had just enqueued the resp_stream.collect directly in the background runner,
// the requests might have been put on hold in the background runner's queue,
// in which case they might timeout or otherwise fail)
if !strategy.rs_interrupt_after_quorum {
let wait_finished_fut = tokio::spawn(async move {
resp_stream.collect::<Vec<_>>().await;
});
self.background.spawn(wait_finished_fut.map(|_| Ok(())));
}
Ok(results)
} else {
let errors = errors.iter().map(|e| format!("{}", e)).collect::<Vec<_>>();
Err(Error::from(RpcError::TooManyErrors(errors)))
}
}
}

View file

@ -1,247 +0,0 @@
//! Contains structs related to receiving RPCs
use std::collections::HashMap;
use std::net::SocketAddr;
use std::pin::Pin;
use std::sync::Arc;
use std::time::Instant;
use futures::future::Future;
use futures_util::future::*;
use futures_util::stream::*;
use hyper::server::conn::AddrStream;
use hyper::service::{make_service_fn, service_fn};
use hyper::{Body, Method, Request, Response, Server, StatusCode};
use serde::{Deserialize, Serialize};
use tokio::net::{TcpListener, TcpStream};
use tokio_rustls::server::TlsStream;
use tokio_rustls::TlsAcceptor;
use tokio_stream::wrappers::TcpListenerStream;
use garage_util::config::TlsConfig;
use garage_util::data::*;
use garage_util::error::Error;
use crate::tls_util;
/// Trait for messages that can be sent as RPC
pub trait RpcMessage: Serialize + for<'de> Deserialize<'de> + Send + Sync {}
type ResponseFuture = Pin<Box<dyn Future<Output = Result<Response<Body>, Error>> + Send>>;
type Handler = Box<dyn Fn(Request<Body>, SocketAddr) -> ResponseFuture + Send + Sync>;
/// Structure handling RPCs
pub struct RpcServer {
/// The address the RpcServer will bind
pub bind_addr: SocketAddr,
/// The tls configuration used for RPC
pub tls_config: Option<TlsConfig>,
handlers: HashMap<String, Handler>,
}
async fn handle_func<M, F, Fut>(
handler: Arc<F>,
req: Request<Body>,
sockaddr: SocketAddr,
name: Arc<String>,
) -> Result<Response<Body>, Error>
where
M: RpcMessage + 'static,
F: Fn(M, SocketAddr) -> Fut + Send + Sync + 'static,
Fut: Future<Output = Result<M, Error>> + Send + 'static,
{
let begin_time = Instant::now();
let whole_body = hyper::body::to_bytes(req.into_body()).await?;
let msg = rmp_serde::decode::from_read::<_, M>(&whole_body[..])?;
trace!(
"Request message: {}",
serde_json::to_string(&msg)
.unwrap_or_else(|_| "<json error>".into())
.chars()
.take(100)
.collect::<String>()
);
match handler(msg, sockaddr).await {
Ok(resp) => {
let resp_bytes = rmp_to_vec_all_named::<Result<M, String>>(&Ok(resp))?;
let rpc_duration = (Instant::now() - begin_time).as_millis();
if rpc_duration > 100 {
debug!("RPC {} ok, took long: {} ms", name, rpc_duration,);
}
Ok(Response::new(Body::from(resp_bytes)))
}
Err(e) => {
let err_str = format!("{}", e);
let rep_bytes = rmp_to_vec_all_named::<Result<M, String>>(&Err(err_str))?;
let mut err_response = Response::new(Body::from(rep_bytes));
*err_response.status_mut() = match e {
Error::BadRpc(_) => StatusCode::BAD_REQUEST,
_ => StatusCode::INTERNAL_SERVER_ERROR,
};
warn!(
"RPC error ({}): {} ({} ms)",
name,
e,
(Instant::now() - begin_time).as_millis(),
);
Ok(err_response)
}
}
}
impl RpcServer {
/// Create a new RpcServer
pub fn new(bind_addr: SocketAddr, tls_config: Option<TlsConfig>) -> Self {
Self {
bind_addr,
tls_config,
handlers: HashMap::new(),
}
}
/// Add handler handling request made to `name`
pub fn add_handler<M, F, Fut>(&mut self, name: String, handler: F)
where
M: RpcMessage + 'static,
F: Fn(M, SocketAddr) -> Fut + Send + Sync + 'static,
Fut: Future<Output = Result<M, Error>> + Send + 'static,
{
let name2 = Arc::new(name.clone());
let handler_arc = Arc::new(handler);
let handler = Box::new(move |req: Request<Body>, sockaddr: SocketAddr| {
let handler2 = handler_arc.clone();
let b: ResponseFuture = Box::pin(handle_func(handler2, req, sockaddr, name2.clone()));
b
});
self.handlers.insert(name, handler);
}
async fn handler(
self: Arc<Self>,
req: Request<Body>,
addr: SocketAddr,
) -> Result<Response<Body>, Error> {
if req.method() != Method::POST {
let mut bad_request = Response::default();
*bad_request.status_mut() = StatusCode::BAD_REQUEST;
return Ok(bad_request);
}
let path = &req.uri().path()[1..].to_string();
let handler = match self.handlers.get(path) {
Some(h) => h,
None => {
let mut not_found = Response::default();
*not_found.status_mut() = StatusCode::NOT_FOUND;
return Ok(not_found);
}
};
trace!("({}) Handling request", path);
let resp_waiter = tokio::spawn(handler(req, addr));
match resp_waiter.await {
Err(err) => {
warn!("Handler await error: {}", err);
let mut ise = Response::default();
*ise.status_mut() = StatusCode::INTERNAL_SERVER_ERROR;
Ok(ise)
}
Ok(Err(err)) => {
trace!("({}) Request handler failed: {}", path, err);
let mut bad_request = Response::new(Body::from(format!("{}", err)));
*bad_request.status_mut() = StatusCode::BAD_REQUEST;
Ok(bad_request)
}
Ok(Ok(resp)) => {
trace!("({}) Request handler succeeded", path);
Ok(resp)
}
}
}
/// Run the RpcServer
pub async fn run(
self: Arc<Self>,
shutdown_signal: impl Future<Output = ()>,
) -> Result<(), Error> {
if let Some(tls_config) = self.tls_config.as_ref() {
let ca_certs = tls_util::load_certs(&tls_config.ca_cert)?;
let node_certs = tls_util::load_certs(&tls_config.node_cert)?;
let node_key = tls_util::load_private_key(&tls_config.node_key)?;
let mut ca_store = rustls::RootCertStore::empty();
for crt in ca_certs.iter() {
ca_store.add(crt)?;
}
let mut config =
rustls::ServerConfig::new(rustls::AllowAnyAuthenticatedClient::new(ca_store));
config.set_single_cert([&node_certs[..], &ca_certs[..]].concat(), node_key)?;
let tls_acceptor = Arc::new(TlsAcceptor::from(Arc::new(config)));
let listener = TcpListener::bind(&self.bind_addr).await?;
let incoming = TcpListenerStream::new(listener).filter_map(|socket| async {
match socket {
Ok(stream) => match tls_acceptor.clone().accept(stream).await {
Ok(x) => Some(Ok::<_, hyper::Error>(x)),
Err(_e) => None,
},
Err(_) => None,
}
});
let incoming = hyper::server::accept::from_stream(incoming);
let self_arc = self.clone();
let service = make_service_fn(|conn: &TlsStream<TcpStream>| {
let client_addr = conn
.get_ref()
.0
.peer_addr()
.unwrap_or_else(|_| ([0, 0, 0, 0], 0).into());
let self_arc = self_arc.clone();
async move {
Ok::<_, Error>(service_fn(move |req: Request<Body>| {
self_arc.clone().handler(req, client_addr).map_err(|e| {
warn!("RPC handler error: {}", e);
e
})
}))
}
});
let server = Server::builder(incoming).serve(service);
let graceful = server.with_graceful_shutdown(shutdown_signal);
info!("RPC server listening on http://{}", self.bind_addr);
graceful.await?;
} else {
let self_arc = self.clone();
let service = make_service_fn(move |conn: &AddrStream| {
let client_addr = conn.remote_addr();
let self_arc = self_arc.clone();
async move {
Ok::<_, Error>(service_fn(move |req: Request<Body>| {
self_arc.clone().handler(req, client_addr).map_err(|e| {
warn!("RPC handler error: {}", e);
e
})
}))
}
});
let server = Server::bind(&self.bind_addr).serve(service);
let graceful = server.with_graceful_shutdown(shutdown_signal);
info!("RPC server listening on http://{}", self.bind_addr);
graceful.await?;
}
Ok(())
}
}

363
src/rpc/system.rs Normal file
View file

@ -0,0 +1,363 @@
//! Module containing structs related to membership management
use std::io::{Read, Write};
use std::net::SocketAddr;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::time::Duration;
use arc_swap::ArcSwap;
use async_trait::async_trait;
use futures::{join, select};
use futures_util::future::*;
use serde::{Deserialize, Serialize};
use sodiumoxide::crypto::sign::ed25519;
use tokio::sync::watch;
use tokio::sync::Mutex;
use netapp::endpoint::{Endpoint, EndpointHandler, Message};
use netapp::peering::fullmesh::FullMeshPeeringStrategy;
use netapp::proto::*;
use netapp::{NetApp, NetworkKey, NodeID, NodeKey};
use garage_util::background::BackgroundRunner;
use garage_util::error::Error;
use garage_util::persister::Persister;
//use garage_util::time::*;
//use crate::consul::get_consul_nodes;
use crate::ring::*;
use crate::rpc_helper::{RequestStrategy, RpcHelper};
const DISCOVERY_INTERVAL: Duration = Duration::from_secs(60);
const PING_TIMEOUT: Duration = Duration::from_secs(2);
/// RPC endpoint used for calls related to membership
pub const SYSTEM_RPC_PATH: &str = "garage_rpc/membership.rs/SystemRpc";
/// RPC messages related to membership
#[derive(Debug, Serialize, Deserialize, Clone)]
pub enum SystemRpc {
/// Response to successfull advertisements
Ok,
/// Error response
Error(String),
/// Ask other node its config. Answered with AdvertiseConfig
PullConfig,
/// Advertise Garage status. Answered with another AdvertiseStatus.
/// Exchanged with every node on a regular basis.
AdvertiseStatus(StateInfo),
/// Advertisement of nodes config. Sent spontanously or in response to PullConfig
AdvertiseConfig(NetworkConfig),
/// Get known nodes states
GetKnownNodes,
/// Return known nodes
ReturnKnownNodes(Vec<(NodeID, SocketAddr, bool)>),
}
impl Message for SystemRpc {
type Response = SystemRpc;
}
/// This node's membership manager
pub struct System {
/// The id of this node
pub id: NodeID,
persist_config: Persister<NetworkConfig>,
state_info: ArcSwap<StateInfo>,
pub netapp: Arc<NetApp>,
fullmesh: Arc<FullMeshPeeringStrategy>,
pub rpc: RpcHelper,
system_endpoint: Arc<Endpoint<SystemRpc, System>>,
rpc_listen_addr: SocketAddr,
bootstrap_peers: Vec<(NodeID, SocketAddr)>,
consul_host: Option<String>,
consul_service_name: Option<String>,
replication_factor: usize,
/// The ring
pub ring: watch::Receiver<Arc<Ring>>,
update_ring: Mutex<watch::Sender<Arc<Ring>>>,
/// The job runner of this node
pub background: Arc<BackgroundRunner>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct StateInfo {
/// Hostname of the node
pub hostname: String,
/// Replication factor configured on the node
pub replication_factor: usize,
/// Configuration version
pub config_version: u64,
}
fn gen_node_key(metadata_dir: &Path) -> Result<NodeKey, Error> {
let mut id_file = metadata_dir.to_path_buf();
id_file.push("node_id");
if id_file.as_path().exists() {
let mut f = std::fs::File::open(id_file.as_path())?;
let mut d = vec![];
f.read_to_end(&mut d)?;
if d.len() != 64 {
return Err(Error::Message("Corrupt node_id file".to_string()));
}
let mut key = [0u8; 64];
key.copy_from_slice(&d[..]);
Ok(NodeKey::from_slice(&key[..]).unwrap())
} else {
let (key, _) = ed25519::gen_keypair();
let mut f = std::fs::File::create(id_file.as_path())?;
f.write_all(&key[..])?;
Ok(NodeKey::from_slice(&key[..]).unwrap())
}
}
impl System {
/// Create this node's membership manager
pub fn new(
network_key: NetworkKey,
metadata_dir: PathBuf,
background: Arc<BackgroundRunner>,
replication_factor: usize,
rpc_listen_addr: SocketAddr,
bootstrap_peers: Vec<(NodeID, SocketAddr)>,
consul_host: Option<String>,
consul_service_name: Option<String>,
) -> Arc<Self> {
let node_key = gen_node_key(&metadata_dir).expect("Unable to read or generate node ID");
info!("Node public key: {}", hex::encode(&node_key.public_key()));
let persist_config = Persister::new(&metadata_dir, "network_config");
let net_config = match persist_config.load() {
Ok(x) => x,
Err(e) => {
match Persister::<garage_rpc_021::ring::NetworkConfig>::new(
&metadata_dir,
"network_config",
)
.load()
{
Ok(old_config) => NetworkConfig::migrate_from_021(old_config),
Err(e2) => {
info!(
"No valid previous network configuration stored ({}, {}), starting fresh.",
e, e2
);
NetworkConfig::new()
}
}
}
};
let state_info = StateInfo {
hostname: gethostname::gethostname()
.into_string()
.unwrap_or_else(|_| "<invalid utf-8>".to_string()),
replication_factor: replication_factor,
config_version: net_config.version,
};
let ring = Ring::new(net_config, replication_factor);
let (update_ring, ring) = watch::channel(Arc::new(ring));
let netapp = NetApp::new(network_key, node_key);
let fullmesh = FullMeshPeeringStrategy::new(netapp.clone(), bootstrap_peers.clone());
let system_endpoint = netapp.endpoint(SYSTEM_RPC_PATH.into());
let sys = Arc::new(System {
id: netapp.id.clone(),
persist_config,
state_info: ArcSwap::new(Arc::new(state_info)),
netapp: netapp.clone(),
fullmesh: fullmesh.clone(),
rpc: RpcHelper {
fullmesh: fullmesh.clone(),
background: background.clone(),
},
system_endpoint,
replication_factor,
rpc_listen_addr,
bootstrap_peers,
consul_host,
consul_service_name,
ring,
update_ring: Mutex::new(update_ring),
background: background.clone(),
});
sys.system_endpoint.set_handler(sys.clone());
sys
}
/// Perform bootstraping, starting the ping loop
pub async fn run(self: Arc<Self>, must_exit: watch::Receiver<bool>) {
join!(
self.netapp
.clone()
.listen(self.rpc_listen_addr, None, must_exit.clone()),
self.fullmesh.clone().run(must_exit.clone()),
self.discovery_loop(must_exit.clone()),
);
}
// ---- INTERNALS ----
/// Save network configuration to disc
async fn save_network_config(self: Arc<Self>) -> Result<(), Error> {
let ring: Arc<Ring> = self.ring.borrow().clone();
self.persist_config
.save_async(&ring.config)
.await
.expect("Cannot save current cluster configuration");
Ok(())
}
fn update_state_info(&self) {
let mut new_si: StateInfo = self.state_info.load().as_ref().clone();
let ring = self.ring.borrow();
new_si.config_version = ring.config.version;
self.state_info.swap(Arc::new(new_si));
}
fn handle_pull_config(&self) -> SystemRpc {
let ring = self.ring.borrow().clone();
SystemRpc::AdvertiseConfig(ring.config.clone())
}
async fn handle_advertise_config(
self: Arc<Self>,
adv: &NetworkConfig,
) -> Result<SystemRpc, Error> {
let update_ring = self.update_ring.lock().await;
let ring: Arc<Ring> = self.ring.borrow().clone();
if adv.version > ring.config.version {
let ring = Ring::new(adv.clone(), self.replication_factor);
update_ring.send(Arc::new(ring))?;
drop(update_ring);
let self2 = self.clone();
let adv2 = adv.clone();
self.background.spawn_cancellable(async move {
self2
.rpc
.broadcast(
&self2.system_endpoint,
SystemRpc::AdvertiseConfig(adv2),
RequestStrategy::with_priority(PRIO_NORMAL),
)
.await;
Ok(())
});
self.background.spawn(self.clone().save_network_config());
}
Ok(SystemRpc::Ok)
}
async fn discovery_loop(&self, mut stop_signal: watch::Receiver<bool>) {
/* TODO
let consul_config = match (&self.consul_host, &self.consul_service_name) {
(Some(ch), Some(csn)) => Some((ch.clone(), csn.clone())),
_ => None,
};
*/
while !*stop_signal.borrow() {
let not_configured = self.ring.borrow().config.members.is_empty();
let no_peers = self.fullmesh.get_peer_list().len() < self.replication_factor;
let bad_peers = self
.fullmesh
.get_peer_list()
.iter()
.filter(|p| p.is_up())
.count() != self.ring.borrow().config.members.len();
if not_configured || no_peers || bad_peers {
info!("Doing a bootstrap/discovery step (not_configured: {}, no_peers: {}, bad_peers: {})", not_configured, no_peers, bad_peers);
let ping_list = self.bootstrap_peers.clone();
/*
*TODO bring this back: persisted list of peers
if let Ok(peers) = self.persist_status.load_async().await {
ping_list.extend(peers.iter().map(|x| (x.addr, Some(x.id))));
}
*/
/*
* TODO bring this back: get peers from consul
if let Some((consul_host, consul_service_name)) = &consul_config {
match get_consul_nodes(consul_host, consul_service_name).await {
Ok(node_list) => {
ping_list.extend(node_list.iter().map(|a| (*a, None)));
}
Err(e) => {
warn!("Could not retrieve node list from Consul: {}", e);
}
}
}
*/
for (node_id, node_addr) in ping_list {
tokio::spawn(self.netapp.clone().try_connect(node_addr, node_id));
}
}
let restart_at = tokio::time::sleep(DISCOVERY_INTERVAL);
select! {
_ = restart_at.fuse() => {},
_ = stop_signal.changed().fuse() => {},
}
}
}
async fn pull_config(self: Arc<Self>, peer: NodeID) {
let resp = self
.rpc
.call(
&self.system_endpoint,
peer,
SystemRpc::PullConfig,
RequestStrategy::with_priority(PRIO_HIGH).with_timeout(PING_TIMEOUT),
)
.await;
if let Ok(SystemRpc::AdvertiseConfig(config)) = resp {
let _: Result<_, _> = self.handle_advertise_config(&config).await;
}
}
}
#[async_trait]
impl EndpointHandler<SystemRpc> for System {
async fn handle(self: &Arc<Self>, msg: &SystemRpc, _from: NodeID) -> SystemRpc {
let resp = match msg {
SystemRpc::PullConfig => Ok(self.handle_pull_config()),
SystemRpc::AdvertiseConfig(adv) => self.clone().handle_advertise_config(&adv).await,
SystemRpc::GetKnownNodes => {
let known_nodes = self
.fullmesh
.get_peer_list()
.iter()
.map(|n| (n.id, n.addr, n.is_up()))
.collect::<Vec<_>>();
Ok(SystemRpc::ReturnKnownNodes(known_nodes))
}
_ => Err(Error::BadRpc("Unexpected RPC message".to_string())),
};
match resp {
Ok(r) => r,
Err(e) => SystemRpc::Error(format!("{}", e)),
}
}
}

View file

@ -1,140 +0,0 @@
use core::future::Future;
use core::task::{Context, Poll};
use std::pin::Pin;
use std::sync::Arc;
use std::{fs, io};
use futures_util::future::*;
use hyper::client::connect::Connection;
use hyper::client::HttpConnector;
use hyper::service::Service;
use hyper::Uri;
use hyper_rustls::MaybeHttpsStream;
use rustls::internal::pemfile;
use tokio::io::{AsyncRead, AsyncWrite};
use tokio_rustls::TlsConnector;
use webpki::DNSNameRef;
use garage_util::error::Error;
pub fn load_certs(filename: &str) -> Result<Vec<rustls::Certificate>, Error> {
let certfile = fs::File::open(&filename)?;
let mut reader = io::BufReader::new(certfile);
let certs = pemfile::certs(&mut reader).map_err(|_| {
Error::Message(format!(
"Could not deecode certificates from file: {}",
filename
))
})?;
if certs.is_empty() {
return Err(Error::Message(format!(
"Invalid certificate file: {}",
filename
)));
}
Ok(certs)
}
pub fn load_private_key(filename: &str) -> Result<rustls::PrivateKey, Error> {
let keydata = fs::read_to_string(filename)?;
let mut buf1 = keydata.as_bytes();
let rsa_keys = pemfile::rsa_private_keys(&mut buf1).unwrap_or_default();
let mut buf2 = keydata.as_bytes();
let pkcs8_keys = pemfile::pkcs8_private_keys(&mut buf2).unwrap_or_default();
let mut keys = rsa_keys;
keys.extend(pkcs8_keys.into_iter());
if keys.len() != 1 {
return Err(Error::Message(format!(
"Invalid private key file: {} ({} private keys)",
filename,
keys.len()
)));
}
Ok(keys[0].clone())
}
// ---- AWFUL COPYPASTA FROM HYPER-RUSTLS connector.rs
// ---- ALWAYS USE `garage` AS HOSTNAME FOR TLS VERIFICATION
#[derive(Clone)]
pub struct HttpsConnectorFixedDnsname<T> {
http: T,
tls_config: Arc<rustls::ClientConfig>,
fixed_dnsname: &'static str,
}
type BoxError = Box<dyn std::error::Error + Send + Sync>;
impl HttpsConnectorFixedDnsname<HttpConnector> {
pub fn new(mut tls_config: rustls::ClientConfig, fixed_dnsname: &'static str) -> Self {
let mut http = HttpConnector::new();
http.enforce_http(false);
tls_config.alpn_protocols = vec![b"h2".to_vec(), b"http/1.1".to_vec()];
Self {
http,
tls_config: Arc::new(tls_config),
fixed_dnsname,
}
}
}
impl<T> Service<Uri> for HttpsConnectorFixedDnsname<T>
where
T: Service<Uri>,
T::Response: Connection + AsyncRead + AsyncWrite + Send + Unpin + 'static,
T::Future: Send + 'static,
T::Error: Into<BoxError>,
{
type Response = MaybeHttpsStream<T::Response>;
type Error = BoxError;
#[allow(clippy::type_complexity)]
type Future =
Pin<Box<dyn Future<Output = Result<MaybeHttpsStream<T::Response>, BoxError>> + Send>>;
fn poll_ready(&mut self, cx: &mut Context<'_>) -> Poll<Result<(), Self::Error>> {
match self.http.poll_ready(cx) {
Poll::Ready(Ok(())) => Poll::Ready(Ok(())),
Poll::Ready(Err(e)) => Poll::Ready(Err(e.into())),
Poll::Pending => Poll::Pending,
}
}
fn call(&mut self, dst: Uri) -> Self::Future {
let is_https = dst.scheme_str() == Some("https");
if !is_https {
let connecting_future = self.http.call(dst);
let f = async move {
let tcp = connecting_future.await.map_err(Into::into)?;
Ok(MaybeHttpsStream::Http(tcp))
};
f.boxed()
} else {
let cfg = self.tls_config.clone();
let connecting_future = self.http.call(dst);
let dnsname =
DNSNameRef::try_from_ascii_str(self.fixed_dnsname).expect("Invalid fixed dnsname");
let f = async move {
let tcp = connecting_future.await.map_err(Into::into)?;
let connector = TlsConnector::from(cfg);
let tls = connector
.connect(dnsname, tcp)
.await
.map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
Ok(MaybeHttpsStream::Https(tls))
};
f.boxed()
}
}
}

View file

@ -1,6 +1,6 @@
[package]
name = "garage_table"
version = "0.3.0"
version = "0.4.0"
authors = ["Alex Auvolat <alex@adnab.me>"]
edition = "2018"
license = "AGPL-3.0"
@ -13,9 +13,10 @@ path = "lib.rs"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
garage_rpc = { version = "0.3.0", path = "../rpc" }
garage_util = { version = "0.3.0", path = "../util" }
garage_rpc = { version = "0.4.0", path = "../rpc" }
garage_util = { version = "0.4.0", path = "../util" }
async-trait = "0.1.7"
bytes = "1.0"
hexdump = "0.1"
log = "0.4"
@ -30,4 +31,3 @@ serde_bytes = "0.11"
futures = "0.3"
futures-util = "0.3"
tokio = { version = "1.0", default-features = false, features = ["rt", "rt-multi-thread", "io-util", "net", "time", "macros", "sync", "signal", "fs"] }

View file

@ -9,7 +9,7 @@ use tokio::sync::Notify;
use garage_util::data::*;
use garage_util::error::*;
use garage_rpc::membership::System;
use garage_rpc::system::System;
use crate::crdt::Crdt;
use crate::replication::*;

View file

@ -2,6 +2,7 @@ use std::collections::HashMap;
use std::sync::Arc;
use std::time::Duration;
use async_trait::async_trait;
use serde::{Deserialize, Serialize};
use serde_bytes::ByteBuf;
@ -13,9 +14,8 @@ use tokio::sync::watch;
use garage_util::data::*;
use garage_util::error::Error;
use garage_rpc::membership::System;
use garage_rpc::rpc_client::*;
use garage_rpc::rpc_server::*;
use garage_rpc::system::System;
use garage_rpc::*;
use crate::data::*;
use crate::replication::*;
@ -24,11 +24,11 @@ use crate::schema::*;
const TABLE_GC_BATCH_SIZE: usize = 1024;
const TABLE_GC_RPC_TIMEOUT: Duration = Duration::from_secs(30);
pub struct TableGc<F: TableSchema, R: TableReplication> {
pub struct TableGc<F: TableSchema + 'static, R: TableReplication + 'static> {
system: Arc<System>,
data: Arc<TableData<F, R>>,
rpc_client: Arc<RpcClient<GcRpc>>,
endpoint: Arc<Endpoint<GcRpc, Self>>,
}
#[derive(Serialize, Deserialize)]
@ -36,30 +36,30 @@ enum GcRpc {
Update(Vec<ByteBuf>),
DeleteIfEqualHash(Vec<(ByteBuf, Hash)>),
Ok,
Error(String),
}
impl RpcMessage for GcRpc {}
impl Message for GcRpc {
type Response = GcRpc;
}
impl<F, R> TableGc<F, R>
where
F: TableSchema + 'static,
R: TableReplication + 'static,
{
pub(crate) fn launch(
system: Arc<System>,
data: Arc<TableData<F, R>>,
rpc_server: &mut RpcServer,
) -> Arc<Self> {
let rpc_path = format!("table_{}/gc", data.name);
let rpc_client = system.rpc_client::<GcRpc>(&rpc_path);
pub(crate) fn launch(system: Arc<System>, data: Arc<TableData<F, R>>) -> Arc<Self> {
let endpoint = system
.netapp
.endpoint(format!("garage_table/gc.rs/Rpc:{}", data.name));
let gc = Arc::new(Self {
system: system.clone(),
data: data.clone(),
rpc_client,
endpoint,
});
gc.register_handler(rpc_server, rpc_path);
gc.endpoint.set_handler(gc.clone());
let gc1 = gc.clone();
system.background.spawn_worker(
@ -168,7 +168,7 @@ where
async fn try_send_and_delete(
&self,
nodes: Vec<Uuid>,
nodes: Vec<NodeID>,
items: Vec<(ByteBuf, Hash, ByteBuf)>,
) -> Result<(), Error> {
let n_items = items.len();
@ -180,11 +180,15 @@ where
deletes.push((k, vhash));
}
self.rpc_client
self.system
.rpc
.try_call_many(
&self.endpoint,
&nodes[..],
GcRpc::Update(updates),
RequestStrategy::with_quorum(nodes.len()).with_timeout(TABLE_GC_RPC_TIMEOUT),
RequestStrategy::with_priority(PRIO_BACKGROUND)
.with_quorum(nodes.len())
.with_timeout(TABLE_GC_RPC_TIMEOUT),
)
.await?;
@ -193,11 +197,15 @@ where
self.data.name, n_items
);
self.rpc_client
self.system
.rpc
.try_call_many(
&self.endpoint,
&nodes[..],
GcRpc::DeleteIfEqualHash(deletes.clone()),
RequestStrategy::with_quorum(nodes.len()).with_timeout(TABLE_GC_RPC_TIMEOUT),
RequestStrategy::with_priority(PRIO_BACKGROUND)
.with_quorum(nodes.len())
.with_timeout(TABLE_GC_RPC_TIMEOUT),
)
.await?;
@ -217,24 +225,7 @@ where
Ok(())
}
// ---- RPC HANDLER ----
fn register_handler(self: &Arc<Self>, rpc_server: &mut RpcServer, path: String) {
let self2 = self.clone();
rpc_server.add_handler::<GcRpc, _, _>(path, move |msg, _addr| {
let self2 = self2.clone();
async move { self2.handle_rpc(&msg).await }
});
let self2 = self.clone();
self.rpc_client
.set_local_handler(self.system.id, move |msg| {
let self2 = self2.clone();
async move { self2.handle_rpc(&msg).await }
});
}
async fn handle_rpc(self: &Arc<Self>, message: &GcRpc) -> Result<GcRpc, Error> {
async fn handle_rpc(&self, message: &GcRpc) -> Result<GcRpc, Error> {
match message {
GcRpc::Update(items) => {
self.data.update_many(items)?;
@ -251,3 +242,16 @@ where
}
}
}
#[async_trait]
impl<F, R> EndpointHandler<GcRpc> for TableGc<F, R>
where
F: TableSchema + 'static,
R: TableReplication + 'static,
{
async fn handle(self: &Arc<Self>, message: &GcRpc, _from: NodeID) -> GcRpc {
self.handle_rpc(message)
.await
.unwrap_or_else(|e| GcRpc::Error(format!("{}", e)))
}
}

View file

@ -1,7 +1,8 @@
use std::sync::Arc;
use garage_rpc::membership::System;
use garage_rpc::ring::*;
use garage_rpc::system::System;
use garage_rpc::NodeID;
use garage_util::data::*;
use crate::replication::*;
@ -19,16 +20,20 @@ pub struct TableFullReplication {
}
impl TableReplication for TableFullReplication {
fn read_nodes(&self, _hash: &Hash) -> Vec<Uuid> {
fn read_nodes(&self, _hash: &Hash) -> Vec<NodeID> {
vec![self.system.id]
}
fn read_quorum(&self) -> usize {
1
}
fn write_nodes(&self, _hash: &Hash) -> Vec<Uuid> {
fn write_nodes(&self, _hash: &Hash) -> Vec<NodeID> {
let ring = self.system.ring.borrow();
ring.config.members.keys().cloned().collect::<Vec<_>>()
ring.config
.members
.keys()
.map(|id| NodeID::from_slice(id.as_slice()).unwrap())
.collect::<Vec<_>>()
}
fn write_quorum(&self) -> usize {
let nmembers = self.system.ring.borrow().config.members.len();

View file

@ -1,5 +1,5 @@
use garage_rpc::ring::*;
use garage_rpc::NodeID;
use garage_util::data::*;
/// Trait to describe how a table shall be replicated
@ -8,12 +8,12 @@ pub trait TableReplication: Send + Sync {
// To understand various replication methods
/// Which nodes to send read requests to
fn read_nodes(&self, hash: &Hash) -> Vec<Uuid>;
fn read_nodes(&self, hash: &Hash) -> Vec<NodeID>;
/// Responses needed to consider a read succesfull
fn read_quorum(&self) -> usize;
/// Which nodes to send writes to
fn write_nodes(&self, hash: &Hash) -> Vec<Uuid>;
fn write_nodes(&self, hash: &Hash) -> Vec<NodeID>;
/// Responses needed to consider a write succesfull
fn write_quorum(&self) -> usize;
fn max_write_errors(&self) -> usize;

View file

@ -1,7 +1,8 @@
use std::sync::Arc;
use garage_rpc::membership::System;
use garage_rpc::ring::*;
use garage_rpc::system::System;
use garage_rpc::NodeID;
use garage_util::data::*;
use crate::replication::*;
@ -25,7 +26,7 @@ pub struct TableShardedReplication {
}
impl TableReplication for TableShardedReplication {
fn read_nodes(&self, hash: &Hash) -> Vec<Uuid> {
fn read_nodes(&self, hash: &Hash) -> Vec<NodeID> {
let ring = self.system.ring.borrow();
ring.get_nodes(&hash, self.replication_factor)
}
@ -33,7 +34,7 @@ impl TableReplication for TableShardedReplication {
self.read_quorum
}
fn write_nodes(&self, hash: &Hash) -> Vec<Uuid> {
fn write_nodes(&self, hash: &Hash) -> Vec<NodeID> {
let ring = self.system.ring.borrow();
ring.get_nodes(&hash, self.replication_factor)
}

View file

@ -2,6 +2,7 @@ use std::collections::VecDeque;
use std::sync::{Arc, Mutex};
use std::time::{Duration, Instant};
use async_trait::async_trait;
use futures::select;
use futures_util::future::*;
use futures_util::stream::*;
@ -13,10 +14,9 @@ use tokio::sync::{mpsc, watch};
use garage_util::data::*;
use garage_util::error::Error;
use garage_rpc::membership::System;
use garage_rpc::ring::*;
use garage_rpc::rpc_client::*;
use garage_rpc::rpc_server::*;
use garage_rpc::system::System;
use garage_rpc::*;
use crate::data::*;
use crate::merkle::*;
@ -28,13 +28,13 @@ const TABLE_SYNC_RPC_TIMEOUT: Duration = Duration::from_secs(30);
// Do anti-entropy every 10 minutes
const ANTI_ENTROPY_INTERVAL: Duration = Duration::from_secs(10 * 60);
pub struct TableSyncer<F: TableSchema, R: TableReplication> {
pub struct TableSyncer<F: TableSchema + 'static, R: TableReplication + 'static> {
system: Arc<System>,
data: Arc<TableData<F, R>>,
merkle: Arc<MerkleUpdater<F, R>>,
todo: Mutex<SyncTodo>,
rpc_client: Arc<RpcClient<SyncRpc>>,
endpoint: Arc<Endpoint<SyncRpc, Self>>,
}
#[derive(Serialize, Deserialize)]
@ -45,9 +45,12 @@ pub(crate) enum SyncRpc {
Node(MerkleNodeKey, MerkleNode),
Items(Vec<Arc<ByteBuf>>),
Ok,
Error(String),
}
impl RpcMessage for SyncRpc {}
impl Message for SyncRpc {
type Response = SyncRpc;
}
struct SyncTodo {
todo: Vec<TodoPartition>,
@ -72,10 +75,10 @@ where
system: Arc<System>,
data: Arc<TableData<F, R>>,
merkle: Arc<MerkleUpdater<F, R>>,
rpc_server: &mut RpcServer,
) -> Arc<Self> {
let rpc_path = format!("table_{}/sync", data.name);
let rpc_client = system.rpc_client::<SyncRpc>(&rpc_path);
let endpoint = system
.netapp
.endpoint(format!("garage_table/sync.rs/Rpc:{}", data.name));
let todo = SyncTodo { todo: vec![] };
@ -84,10 +87,10 @@ where
data: data.clone(),
merkle,
todo: Mutex::new(todo),
rpc_client,
endpoint,
});
syncer.register_handler(rpc_server, rpc_path);
syncer.endpoint.set_handler(syncer.clone());
let (busy_tx, busy_rx) = mpsc::unbounded_channel();
@ -112,21 +115,6 @@ where
syncer
}
fn register_handler(self: &Arc<Self>, rpc_server: &mut RpcServer, path: String) {
let self2 = self.clone();
rpc_server.add_handler::<SyncRpc, _, _>(path, move |msg, _addr| {
let self2 = self2.clone();
async move { self2.handle_rpc(&msg).await }
});
let self2 = self.clone();
self.rpc_client
.set_local_handler(self.system.id, move |msg| {
let self2 = self2.clone();
async move { self2.handle_rpc(&msg).await }
});
}
async fn watcher_task(
self: Arc<Self>,
mut must_exit: watch::Receiver<bool>,
@ -317,15 +305,19 @@ where
async fn offload_items(
self: &Arc<Self>,
items: &[(Vec<u8>, Arc<ByteBuf>)],
nodes: &[Uuid],
nodes: &[NodeID],
) -> Result<(), Error> {
let values = items.iter().map(|(_k, v)| v.clone()).collect::<Vec<_>>();
self.rpc_client
self.system
.rpc
.try_call_many(
&self.endpoint,
nodes,
SyncRpc::Items(values),
RequestStrategy::with_quorum(nodes.len()).with_timeout(TABLE_SYNC_RPC_TIMEOUT),
RequestStrategy::with_priority(PRIO_BACKGROUND)
.with_quorum(nodes.len())
.with_timeout(TABLE_SYNC_RPC_TIMEOUT),
)
.await?;
@ -362,7 +354,7 @@ where
async fn do_sync_with(
self: Arc<Self>,
partition: TodoPartition,
who: Uuid,
who: NodeID,
must_exit: watch::Receiver<bool>,
) -> Result<(), Error> {
let (root_ck_key, root_ck) = self.get_root_ck(partition.partition)?;
@ -378,11 +370,14 @@ where
// Check if they have the same root checksum
// If so, do nothing.
let root_resp = self
.rpc_client
.system
.rpc
.call(
&self.endpoint,
who,
SyncRpc::RootCkHash(partition.partition, root_ck_hash),
TABLE_SYNC_RPC_TIMEOUT,
RequestStrategy::with_priority(PRIO_BACKGROUND)
.with_timeout(TABLE_SYNC_RPC_TIMEOUT),
)
.await?;
@ -430,8 +425,15 @@ where
// Get Merkle node for this tree position at remote node
// and compare it with local node
let remote_node = match self
.rpc_client
.call(who, SyncRpc::GetNode(key.clone()), TABLE_SYNC_RPC_TIMEOUT)
.system
.rpc
.call(
&self.endpoint,
who,
SyncRpc::GetNode(key.clone()),
RequestStrategy::with_priority(PRIO_BACKGROUND)
.with_timeout(TABLE_SYNC_RPC_TIMEOUT),
)
.await?
{
SyncRpc::Node(_, node) => node,
@ -478,7 +480,7 @@ where
Ok(())
}
async fn send_items(&self, who: Uuid, item_value_list: Vec<Vec<u8>>) -> Result<(), Error> {
async fn send_items(&self, who: NodeID, item_value_list: Vec<Vec<u8>>) -> Result<(), Error> {
info!(
"({}) Sending {} items to {:?}",
self.data.name,
@ -492,8 +494,15 @@ where
.collect::<Vec<_>>();
let rpc_resp = self
.rpc_client
.call(who, SyncRpc::Items(values), TABLE_SYNC_RPC_TIMEOUT)
.system
.rpc
.call(
&self.endpoint,
who,
SyncRpc::Items(values),
RequestStrategy::with_priority(PRIO_BACKGROUND)
.with_timeout(TABLE_SYNC_RPC_TIMEOUT),
)
.await?;
if let SyncRpc::Ok = rpc_resp {
Ok(())
@ -506,7 +515,6 @@ where
}
// ======= SYNCHRONIZATION PROCEDURE -- RECEIVER SIDE ======
async fn handle_rpc(self: &Arc<Self>, message: &SyncRpc) -> Result<SyncRpc, Error> {
match message {
SyncRpc::RootCkHash(range, h) => {
@ -527,6 +535,19 @@ where
}
}
#[async_trait]
impl<F, R> EndpointHandler<SyncRpc> for TableSyncer<F, R>
where
F: TableSchema + 'static,
R: TableReplication + 'static,
{
async fn handle(self: &Arc<Self>, message: &SyncRpc, _from: NodeID) -> SyncRpc {
self.handle_rpc(message)
.await
.unwrap_or_else(|e| SyncRpc::Error(format!("{}", e)))
}
}
impl SyncTodo {
fn add_full_sync<F: TableSchema, R: TableReplication>(
&mut self,

View file

@ -2,6 +2,7 @@ use std::collections::{BTreeMap, HashMap};
use std::sync::Arc;
use std::time::Duration;
use async_trait::async_trait;
use futures::stream::*;
use serde::{Deserialize, Serialize};
use serde_bytes::ByteBuf;
@ -9,9 +10,8 @@ use serde_bytes::ByteBuf;
use garage_util::data::*;
use garage_util::error::Error;
use garage_rpc::membership::System;
use garage_rpc::rpc_client::*;
use garage_rpc::rpc_server::*;
use garage_rpc::system::System;
use garage_rpc::*;
use crate::crdt::Crdt;
use crate::data::*;
@ -23,17 +23,18 @@ use crate::sync::*;
const TABLE_RPC_TIMEOUT: Duration = Duration::from_secs(10);
pub struct Table<F: TableSchema, R: TableReplication> {
pub struct Table<F: TableSchema + 'static, R: TableReplication + 'static> {
pub system: Arc<System>,
pub data: Arc<TableData<F, R>>,
pub merkle_updater: Arc<MerkleUpdater<F, R>>,
pub syncer: Arc<TableSyncer<F, R>>,
rpc_client: Arc<RpcClient<TableRpc<F>>>,
endpoint: Arc<Endpoint<TableRpc<F>, Self>>,
}
#[derive(Serialize, Deserialize)]
pub(crate) enum TableRpc<F: TableSchema> {
Ok,
Error(String),
ReadEntry(F::P, F::S),
ReadEntryResponse(Option<ByteBuf>),
@ -44,7 +45,9 @@ pub(crate) enum TableRpc<F: TableSchema> {
Update(Vec<Arc<ByteBuf>>),
}
impl<F: TableSchema> RpcMessage for TableRpc<F> {}
impl<F: TableSchema> Message for TableRpc<F> {
type Response = TableRpc<F>;
}
impl<F, R> Table<F, R>
where
@ -59,32 +62,27 @@ where
system: Arc<System>,
db: &sled::Db,
name: String,
rpc_server: &mut RpcServer,
) -> Arc<Self> {
let rpc_path = format!("table_{}", name);
let rpc_client = system.rpc_client::<TableRpc<F>>(&rpc_path);
let endpoint = system
.netapp
.endpoint(format!("garage_table/table.rs/Rpc:{}", name));
let data = TableData::new(system.clone(), name, instance, replication, db);
let merkle_updater = MerkleUpdater::launch(&system.background, data.clone());
let syncer = TableSyncer::launch(
system.clone(),
data.clone(),
merkle_updater.clone(),
rpc_server,
);
TableGc::launch(system.clone(), data.clone(), rpc_server);
let syncer = TableSyncer::launch(system.clone(), data.clone(), merkle_updater.clone());
TableGc::launch(system.clone(), data.clone());
let table = Arc::new(Self {
system,
data,
merkle_updater,
syncer,
rpc_client,
endpoint,
});
table.clone().register_handler(rpc_server, rpc_path);
table.endpoint.set_handler(table.clone());
table
}
@ -97,11 +95,14 @@ where
let e_enc = Arc::new(ByteBuf::from(rmp_to_vec_all_named(e)?));
let rpc = TableRpc::<F>::Update(vec![e_enc]);
self.rpc_client
self.system
.rpc
.try_call_many(
&self.endpoint,
&who[..],
rpc,
RequestStrategy::with_quorum(self.data.replication.write_quorum())
RequestStrategy::with_priority(PRIO_NORMAL)
.with_quorum(self.data.replication.write_quorum())
.with_timeout(TABLE_RPC_TIMEOUT),
)
.await?;
@ -123,7 +124,16 @@ where
let call_futures = call_list.drain().map(|(node, entries)| async move {
let rpc = TableRpc::<F>::Update(entries);
let resp = self.rpc_client.call(node, rpc, TABLE_RPC_TIMEOUT).await?;
let resp = self
.system
.rpc
.call(
&self.endpoint,
node,
rpc,
RequestStrategy::with_priority(PRIO_NORMAL).with_timeout(TABLE_RPC_TIMEOUT),
)
.await?;
Ok::<_, Error>((node, resp))
});
let mut resps = call_futures.collect::<FuturesUnordered<_>>();
@ -152,11 +162,14 @@ where
let rpc = TableRpc::<F>::ReadEntry(partition_key.clone(), sort_key.clone());
let resps = self
.rpc_client
.system
.rpc
.try_call_many(
&self.endpoint,
&who[..],
rpc,
RequestStrategy::with_quorum(self.data.replication.read_quorum())
RequestStrategy::with_priority(PRIO_NORMAL)
.with_quorum(self.data.replication.read_quorum())
.with_timeout(TABLE_RPC_TIMEOUT)
.interrupt_after_quorum(true),
)
@ -208,11 +221,14 @@ where
let rpc = TableRpc::<F>::ReadRange(partition_key.clone(), begin_sort_key, filter, limit);
let resps = self
.rpc_client
.system
.rpc
.try_call_many(
&self.endpoint,
&who[..],
rpc,
RequestStrategy::with_quorum(self.data.replication.read_quorum())
RequestStrategy::with_priority(PRIO_NORMAL)
.with_quorum(self.data.replication.read_quorum())
.with_timeout(TABLE_RPC_TIMEOUT)
.interrupt_after_quorum(true),
)
@ -261,36 +277,25 @@ where
// =============== UTILITY FUNCTION FOR CLIENT OPERATIONS ===============
async fn repair_on_read(&self, who: &[Uuid], what: F::E) -> Result<(), Error> {
async fn repair_on_read(&self, who: &[NodeID], what: F::E) -> Result<(), Error> {
let what_enc = Arc::new(ByteBuf::from(rmp_to_vec_all_named(&what)?));
self.rpc_client
self.system
.rpc
.try_call_many(
&self.endpoint,
who,
TableRpc::<F>::Update(vec![what_enc]),
RequestStrategy::with_quorum(who.len()).with_timeout(TABLE_RPC_TIMEOUT),
RequestStrategy::with_priority(PRIO_NORMAL)
.with_quorum(who.len())
.with_timeout(TABLE_RPC_TIMEOUT),
)
.await?;
Ok(())
}
// =============== HANDLERS FOR RPC OPERATIONS (SERVER SIDE) ==============
fn register_handler(self: Arc<Self>, rpc_server: &mut RpcServer, path: String) {
let self2 = self.clone();
rpc_server.add_handler::<TableRpc<F>, _, _>(path, move |msg, _addr| {
let self2 = self2.clone();
async move { self2.handle(&msg).await }
});
let self2 = self.clone();
self.rpc_client
.set_local_handler(self.system.id, move |msg| {
let self2 = self2.clone();
async move { self2.handle(&msg).await }
});
}
async fn handle(self: &Arc<Self>, msg: &TableRpc<F>) -> Result<TableRpc<F>, Error> {
// ====== RPC HANDLER =====
//
async fn handle_rpc(self: &Arc<Self>, msg: &TableRpc<F>) -> Result<TableRpc<F>, Error> {
match msg {
TableRpc::ReadEntry(key, sort_key) => {
let value = self.data.read_entry(key, sort_key)?;
@ -308,3 +313,16 @@ where
}
}
}
#[async_trait]
impl<F, R> EndpointHandler<TableRpc<F>> for Table<F, R>
where
F: TableSchema + 'static,
R: TableReplication + 'static,
{
async fn handle(self: &Arc<Self>, msg: &TableRpc<F>, _from: NodeID) -> TableRpc<F> {
self.handle_rpc(msg)
.await
.unwrap_or_else(|e| TableRpc::<F>::Error(format!("{}", e)))
}
}

View file

@ -1,6 +1,6 @@
[package]
name = "garage_util"
version = "0.3.0"
version = "0.4.0"
authors = ["Alex Auvolat <alex@adnab.me>"]
edition = "2018"
license = "AGPL-3.0"
@ -32,7 +32,6 @@ toml = "0.5"
futures = "0.3"
tokio = { version = "1.0", default-features = false, features = ["rt", "rt-multi-thread", "io-util", "net", "time", "macros", "sync", "signal", "fs"] }
netapp = { version = "0.3.0", git = "https://git.deuxfleurs.fr/lx/netapp" }
http = "0.2"
hyper = "0.14"
rustls = "0.19"
webpki = "0.21"

View file

@ -3,8 +3,11 @@ use std::io::Read;
use std::net::SocketAddr;
use std::path::PathBuf;
use serde::de::Error as SerdeError;
use serde::{de, Deserialize};
use netapp::NodeID;
use crate::error::Error;
/// Represent the whole configuration
@ -26,20 +29,20 @@ pub struct Config {
// (we can add more aliases for this later)
pub replication_mode: String,
/// RPC secret key: 32 bytes hex encoded
pub rpc_secret: String,
/// Address to bind for RPC
pub rpc_bind_addr: SocketAddr,
/// Bootstrap peers RPC address
#[serde(deserialize_with = "deserialize_vec_addr")]
pub bootstrap_peers: Vec<SocketAddr>,
pub bootstrap_peers: Vec<(NodeID, SocketAddr)>,
/// Consule host to connect to to discover more peers
pub consul_host: Option<String>,
/// Consul service name to use
pub consul_service_name: Option<String>,
/// Configuration for RPC TLS
pub rpc_tls: Option<TlsConfig>,
/// Max number of concurrent RPC request
#[serde(default = "default_max_concurrent_rpc_requests")]
pub max_concurrent_rpc_requests: usize,
@ -59,17 +62,6 @@ pub struct Config {
pub s3_web: WebConfig,
}
/// Configuration for RPC TLS
#[derive(Deserialize, Debug, Clone)]
pub struct TlsConfig {
/// Path to certificate autority used for all nodes
pub ca_cert: String,
/// Path to public certificate for this node
pub node_cert: String,
/// Path to private key for this node
pub node_key: String,
}
/// Configuration for S3 api
#[derive(Deserialize, Debug, Clone)]
pub struct ApiConfig {
@ -115,27 +107,32 @@ pub fn read_config(config_file: PathBuf) -> Result<Config, Error> {
Ok(toml::from_str(&config)?)
}
fn deserialize_vec_addr<'de, D>(deserializer: D) -> Result<Vec<SocketAddr>, D::Error>
fn deserialize_vec_addr<'de, D>(deserializer: D) -> Result<Vec<(NodeID, SocketAddr)>, D::Error>
where
D: de::Deserializer<'de>,
{
use std::net::ToSocketAddrs;
Ok(<Vec<&str>>::deserialize(deserializer)?
.iter()
.filter_map(|&name| {
name.to_socket_addrs()
.map(|iter| (name, iter))
.map_err(|_| warn!("Error resolving \"{}\"", name))
.ok()
})
.map(|(name, iter)| {
let v = iter.collect::<Vec<_>>();
if v.is_empty() {
warn!("Error resolving \"{}\"", name)
let mut ret = vec![];
for peer in <Vec<&str>>::deserialize(deserializer)? {
let delim = peer
.find('@')
.ok_or_else(|| D::Error::custom("Invalid bootstrap peer: public key not specified"))?;
let (key, host) = peer.split_at(delim);
let pubkey = NodeID::from_slice(&hex::decode(&key).map_err(D::Error::custom)?)
.ok_or_else(|| D::Error::custom("Invalid bootstrap peer public key"))?;
let hosts = host[1..]
.to_socket_addrs()
.map_err(D::Error::custom)?
.collect::<Vec<_>>();
if hosts.is_empty() {
return Err(D::Error::custom(format!("Error resolving {}", &host[1..])));
}
v
})
.flatten()
.collect())
for host in hosts {
ret.push((pubkey.clone(), host));
}
}
Ok(ret)
}

View file

@ -11,8 +11,8 @@ pub enum RpcError {
#[error(display = "Node is down: {:?}.", _0)]
NodeDown(Uuid),
#[error(display = "Timeout: {}", _0)]
Timeout(#[error(source)] tokio::time::error::Elapsed),
#[error(display = "Timeout")]
Timeout,
#[error(display = "HTTP error: {}", _0)]
Http(#[error(source)] http::Error),
@ -45,11 +45,8 @@ pub enum Error {
#[error(display = "Invalid HTTP header value: {}", _0)]
HttpHeader(#[error(source)] http::header::ToStrError),
#[error(display = "TLS error: {}", _0)]
Tls(#[error(source)] rustls::TLSError),
#[error(display = "PKI error: {}", _0)]
Pki(#[error(source)] webpki::Error),
#[error(display = "Netapp error: {}", _0)]
Netapp(#[error(source)] netapp::error::Error),
#[error(display = "Sled error: {}", _0)]
Sled(#[error(source)] sled::Error),

View file

@ -1,6 +1,6 @@
[package]
name = "garage_web"
version = "0.3.0"
version = "0.4.0"
authors = ["Alex Auvolat <alex@adnab.me>", "Quentin Dufour <quentin@dufour.io>"]
edition = "2018"
license = "AGPL-3.0"
@ -13,10 +13,10 @@ path = "lib.rs"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
garage_api = { version = "0.3.0", path = "../api" }
garage_model = { version = "0.3.0", path = "../model" }
garage_util = { version = "0.3.0", path = "../util" }
garage_table = { version = "0.3.0", path = "../table" }
garage_api = { version = "0.4.0", path = "../api" }
garage_model = { version = "0.4.0", path = "../model" }
garage_util = { version = "0.4.0", path = "../util" }
garage_table = { version = "0.4.0", path = "../table" }
err-derive = "0.3"
idna = "0.2"