replace RPC stack with netapp #123
31 changed files with 1497 additions and 2289 deletions
705
Cargo.lock
generated
705
Cargo.lock
generated
File diff suppressed because it is too large
Load diff
|
@ -1,6 +1,6 @@
|
|||
[package]
|
||||
name = "garage_api"
|
||||
version = "0.3.0"
|
||||
version = "0.4.0"
|
||||
authors = ["Alex Auvolat <alex@adnab.me>"]
|
||||
edition = "2018"
|
||||
license = "AGPL-3.0"
|
||||
|
@ -13,9 +13,9 @@ path = "lib.rs"
|
|||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
garage_model = { version = "0.3.0", path = "../model" }
|
||||
garage_table = { version = "0.3.0", path = "../table" }
|
||||
garage_util = { version = "0.3.0", path = "../util" }
|
||||
garage_model = { version = "0.4.0", path = "../model" }
|
||||
garage_table = { version = "0.4.0", path = "../table" }
|
||||
garage_util = { version = "0.4.0", path = "../util" }
|
||||
|
||||
base64 = "0.13"
|
||||
bytes = "1.0"
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
[package]
|
||||
name = "garage"
|
||||
version = "0.3.0"
|
||||
version = "0.4.0"
|
||||
authors = ["Alex Auvolat <alex@adnab.me>"]
|
||||
edition = "2018"
|
||||
license = "AGPL-3.0"
|
||||
|
@ -14,12 +14,12 @@ path = "main.rs"
|
|||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
garage_api = { version = "0.3.0", path = "../api" }
|
||||
garage_model = { version = "0.3.0", path = "../model" }
|
||||
garage_rpc = { version = "0.3.0", path = "../rpc" }
|
||||
garage_table = { version = "0.3.0", path = "../table" }
|
||||
garage_util = { version = "0.3.0", path = "../util" }
|
||||
garage_web = { version = "0.3.0", path = "../web" }
|
||||
garage_api = { version = "0.4.0", path = "../api" }
|
||||
garage_model = { version = "0.4.0", path = "../model" }
|
||||
garage_rpc = { version = "0.4.0", path = "../rpc" }
|
||||
garage_table = { version = "0.4.0", path = "../table" }
|
||||
garage_util = { version = "0.4.0", path = "../util" }
|
||||
garage_web = { version = "0.4.0", path = "../web" }
|
||||
|
||||
bytes = "1.0"
|
||||
git-version = "0.3.4"
|
||||
|
@ -27,6 +27,8 @@ hex = "0.4"
|
|||
log = "0.4"
|
||||
pretty_env_logger = "0.4"
|
||||
rand = "0.8"
|
||||
async-trait = "0.1.7"
|
||||
sodiumoxide = { version = "0.2.5-0", package = "kuska-sodiumoxide" }
|
||||
|
||||
sled = "0.34"
|
||||
|
||||
|
@ -38,3 +40,5 @@ toml = "0.5"
|
|||
futures = "0.3"
|
||||
futures-util = "0.3"
|
||||
tokio = { version = "1.0", default-features = false, features = ["rt", "rt-multi-thread", "io-util", "net", "time", "macros", "sync", "signal", "fs"] }
|
||||
|
||||
netapp = { version = "0.3.0", git = "https://git.deuxfleurs.fr/lx/netapp" }
|
||||
|
|
|
@ -2,6 +2,7 @@ use std::collections::HashMap;
|
|||
use std::fmt::Write;
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use garage_util::error::Error;
|
||||
|
@ -10,8 +11,7 @@ use garage_table::crdt::Crdt;
|
|||
use garage_table::replication::*;
|
||||
use garage_table::*;
|
||||
|
||||
use garage_rpc::rpc_client::*;
|
||||
use garage_rpc::rpc_server::*;
|
||||
use garage_rpc::*;
|
||||
|
||||
use garage_model::bucket_table::*;
|
||||
use garage_model::garage::Garage;
|
||||
|
@ -19,10 +19,8 @@ use garage_model::key_table::*;
|
|||
|
||||
use crate::cli::*;
|
||||
use crate::repair::Repair;
|
||||
use crate::*;
|
||||
|
||||
pub const ADMIN_RPC_TIMEOUT: Duration = Duration::from_secs(30);
|
||||
pub const ADMIN_RPC_PATH: &str = "_admin";
|
||||
pub const ADMIN_RPC_PATH: &str = "garage/admin_rpc.rs/Rpc";
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub enum AdminRpc {
|
||||
|
@ -33,41 +31,31 @@ pub enum AdminRpc {
|
|||
|
||||
// Replies
|
||||
Ok(String),
|
||||
Error(String),
|
||||
BucketList(Vec<String>),
|
||||
BucketInfo(Bucket),
|
||||
KeyList(Vec<(String, String)>),
|
||||
KeyInfo(Key),
|
||||
}
|
||||
|
||||
impl RpcMessage for AdminRpc {}
|
||||
impl Message for AdminRpc {
|
||||
type Response = AdminRpc;
|
||||
}
|
||||
|
||||
pub struct AdminRpcHandler {
|
||||
garage: Arc<Garage>,
|
||||
rpc_client: Arc<RpcClient<AdminRpc>>,
|
||||
endpoint: Arc<Endpoint<AdminRpc, Self>>,
|
||||
}
|
||||
|
||||
impl AdminRpcHandler {
|
||||
pub fn new(garage: Arc<Garage>) -> Arc<Self> {
|
||||
let rpc_client = garage.system.clone().rpc_client::<AdminRpc>(ADMIN_RPC_PATH);
|
||||
Arc::new(Self { garage, rpc_client })
|
||||
let endpoint = garage.system.netapp.endpoint(ADMIN_RPC_PATH.into());
|
||||
let admin = Arc::new(Self { garage, endpoint });
|
||||
admin.endpoint.set_handler(admin.clone());
|
||||
admin
|
||||
}
|
||||
|
||||
pub fn register_handler(self: Arc<Self>, rpc_server: &mut RpcServer) {
|
||||
rpc_server.add_handler::<AdminRpc, _, _>(ADMIN_RPC_PATH.to_string(), move |msg, _addr| {
|
||||
let self2 = self.clone();
|
||||
async move {
|
||||
match msg {
|
||||
AdminRpc::BucketOperation(bo) => self2.handle_bucket_cmd(bo).await,
|
||||
AdminRpc::KeyOperation(ko) => self2.handle_key_cmd(ko).await,
|
||||
AdminRpc::LaunchRepair(opt) => self2.handle_launch_repair(opt).await,
|
||||
AdminRpc::Stats(opt) => self2.handle_stats(opt).await,
|
||||
_ => Err(Error::BadRpc("Invalid RPC".to_string())),
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
async fn handle_bucket_cmd(&self, cmd: BucketOperation) -> Result<AdminRpc, Error> {
|
||||
async fn handle_bucket_cmd(&self, cmd: &BucketOperation) -> Result<AdminRpc, Error> {
|
||||
match cmd {
|
||||
BucketOperation::List => {
|
||||
let bucket_names = self
|
||||
|
@ -187,7 +175,7 @@ impl AdminRpcHandler {
|
|||
}
|
||||
}
|
||||
|
||||
async fn handle_key_cmd(&self, cmd: KeyOperation) -> Result<AdminRpc, Error> {
|
||||
async fn handle_key_cmd(&self, cmd: &KeyOperation) -> Result<AdminRpc, Error> {
|
||||
match cmd {
|
||||
KeyOperation::List => {
|
||||
let key_ids = self
|
||||
|
@ -210,13 +198,13 @@ impl AdminRpcHandler {
|
|||
Ok(AdminRpc::KeyInfo(key))
|
||||
}
|
||||
KeyOperation::New(query) => {
|
||||
let key = Key::new(query.name);
|
||||
let key = Key::new(query.name.clone());
|
||||
self.garage.key_table.insert(&key).await?;
|
||||
Ok(AdminRpc::KeyInfo(key))
|
||||
}
|
||||
KeyOperation::Rename(query) => {
|
||||
let mut key = self.get_existing_key(&query.key_pattern).await?;
|
||||
key.name.update(query.new_name);
|
||||
key.name.update(query.new_name.clone());
|
||||
self.garage.key_table.insert(&key).await?;
|
||||
Ok(AdminRpc::KeyInfo(key))
|
||||
}
|
||||
|
@ -353,17 +341,18 @@ impl AdminRpcHandler {
|
|||
let mut failures = vec![];
|
||||
let ring = self.garage.system.ring.borrow().clone();
|
||||
for node in ring.config.members.keys() {
|
||||
let node = NodeID::from_slice(node.as_slice()).unwrap();
|
||||
if self
|
||||
.rpc_client
|
||||
.endpoint
|
||||
.call(
|
||||
*node,
|
||||
AdminRpc::LaunchRepair(opt_to_send.clone()),
|
||||
ADMIN_RPC_TIMEOUT,
|
||||
&node,
|
||||
&AdminRpc::LaunchRepair(opt_to_send.clone()),
|
||||
PRIO_NORMAL,
|
||||
)
|
||||
.await
|
||||
.is_err()
|
||||
{
|
||||
failures.push(*node);
|
||||
failures.push(node);
|
||||
}
|
||||
}
|
||||
if failures.is_empty() {
|
||||
|
@ -397,14 +386,16 @@ impl AdminRpcHandler {
|
|||
let ring = self.garage.system.ring.borrow().clone();
|
||||
|
||||
for node in ring.config.members.keys() {
|
||||
let node = NodeID::from_slice(node.as_slice()).unwrap();
|
||||
|
||||
let mut opt = opt.clone();
|
||||
opt.all_nodes = false;
|
||||
|
||||
writeln!(&mut ret, "\n======================").unwrap();
|
||||
writeln!(&mut ret, "Stats for node {:?}:", node).unwrap();
|
||||
match self
|
||||
.rpc_client
|
||||
.call(*node, AdminRpc::Stats(opt), ADMIN_RPC_TIMEOUT)
|
||||
.endpoint
|
||||
.call(&node, &AdminRpc::Stats(opt), PRIO_NORMAL)
|
||||
.await
|
||||
{
|
||||
Ok(AdminRpc::Ok(s)) => writeln!(&mut ret, "{}", s).unwrap(),
|
||||
|
@ -495,4 +486,23 @@ impl AdminRpcHandler {
|
|||
.unwrap();
|
||||
writeln!(to, " GC todo queue length: {}", t.data.gc_todo_len()).unwrap();
|
||||
}
|
||||
|
||||
async fn handle_rpc(self: &Arc<Self>, msg: &AdminRpc) -> Result<AdminRpc, Error> {
|
||||
match msg {
|
||||
AdminRpc::BucketOperation(bo) => self.handle_bucket_cmd(bo).await,
|
||||
AdminRpc::KeyOperation(ko) => self.handle_key_cmd(ko).await,
|
||||
AdminRpc::LaunchRepair(opt) => self.handle_launch_repair(opt.clone()).await,
|
||||
AdminRpc::Stats(opt) => self.handle_stats(opt.clone()).await,
|
||||
_ => Err(Error::BadRpc("Invalid RPC".to_string())),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl EndpointHandler<AdminRpc> for AdminRpcHandler {
|
||||
async fn handle(self: &Arc<Self>, message: &AdminRpc, _from: NodeID) -> AdminRpc {
|
||||
self.handle_rpc(message)
|
||||
.await
|
||||
.unwrap_or_else(|e| AdminRpc::Error(format!("{}", e)))
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
use std::cmp::max;
|
||||
use std::collections::HashSet;
|
||||
use std::net::SocketAddr;
|
||||
//use std::cmp::max;
|
||||
//use std::collections::HashSet;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
@ -8,11 +7,11 @@ use structopt::StructOpt;
|
|||
|
||||
use garage_util::data::Uuid;
|
||||
use garage_util::error::Error;
|
||||
use garage_util::time::*;
|
||||
//use garage_util::time::*;
|
||||
|
||||
use garage_rpc::membership::*;
|
||||
use garage_rpc::ring::*;
|
||||
use garage_rpc::rpc_client::*;
|
||||
use garage_rpc::system::*;
|
||||
use garage_rpc::*;
|
||||
|
||||
use garage_model::bucket_table::*;
|
||||
use garage_model::key_table::*;
|
||||
|
@ -298,54 +297,65 @@ pub struct StatsOpt {
|
|||
|
||||
pub async fn cli_cmd(
|
||||
cmd: Command,
|
||||
membership_rpc_cli: RpcAddrClient<Message>,
|
||||
admin_rpc_cli: RpcAddrClient<AdminRpc>,
|
||||
rpc_host: SocketAddr,
|
||||
system_rpc_endpoint: &Endpoint<SystemRpc, ()>,
|
||||
admin_rpc_endpoint: &Endpoint<AdminRpc, ()>,
|
||||
rpc_host: NodeID,
|
||||
) -> Result<(), Error> {
|
||||
match cmd {
|
||||
Command::Status => cmd_status(membership_rpc_cli, rpc_host).await,
|
||||
Command::Status => cmd_status(system_rpc_endpoint, rpc_host).await,
|
||||
Command::Node(NodeOperation::Configure(configure_opt)) => {
|
||||
cmd_configure(membership_rpc_cli, rpc_host, configure_opt).await
|
||||
cmd_configure(system_rpc_endpoint, rpc_host, configure_opt).await
|
||||
}
|
||||
Command::Node(NodeOperation::Remove(remove_opt)) => {
|
||||
cmd_remove(membership_rpc_cli, rpc_host, remove_opt).await
|
||||
cmd_remove(system_rpc_endpoint, rpc_host, remove_opt).await
|
||||
}
|
||||
Command::Bucket(bo) => {
|
||||
cmd_admin(admin_rpc_cli, rpc_host, AdminRpc::BucketOperation(bo)).await
|
||||
cmd_admin(admin_rpc_endpoint, rpc_host, AdminRpc::BucketOperation(bo)).await
|
||||
}
|
||||
Command::Key(ko) => cmd_admin(admin_rpc_cli, rpc_host, AdminRpc::KeyOperation(ko)).await,
|
||||
Command::Repair(ro) => cmd_admin(admin_rpc_cli, rpc_host, AdminRpc::LaunchRepair(ro)).await,
|
||||
Command::Stats(so) => cmd_admin(admin_rpc_cli, rpc_host, AdminRpc::Stats(so)).await,
|
||||
Command::Key(ko) => {
|
||||
cmd_admin(admin_rpc_endpoint, rpc_host, AdminRpc::KeyOperation(ko)).await
|
||||
}
|
||||
Command::Repair(ro) => {
|
||||
cmd_admin(admin_rpc_endpoint, rpc_host, AdminRpc::LaunchRepair(ro)).await
|
||||
}
|
||||
Command::Stats(so) => cmd_admin(admin_rpc_endpoint, rpc_host, AdminRpc::Stats(so)).await,
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn cmd_status(
|
||||
rpc_cli: RpcAddrClient<Message>,
|
||||
rpc_host: SocketAddr,
|
||||
) -> Result<(), Error> {
|
||||
pub async fn cmd_status(rpc_cli: &Endpoint<SystemRpc, ()>, rpc_host: NodeID) -> Result<(), Error> {
|
||||
let status = match rpc_cli
|
||||
.call(&rpc_host, &Message::PullStatus, ADMIN_RPC_TIMEOUT)
|
||||
.await??
|
||||
.call(&rpc_host, &SystemRpc::GetKnownNodes, PRIO_NORMAL)
|
||||
.await?
|
||||
{
|
||||
Message::AdvertiseNodesUp(nodes) => nodes,
|
||||
SystemRpc::ReturnKnownNodes(nodes) => nodes,
|
||||
resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
|
||||
};
|
||||
let config = match rpc_cli
|
||||
.call(&rpc_host, &Message::PullConfig, ADMIN_RPC_TIMEOUT)
|
||||
.await??
|
||||
.call(&rpc_host, &SystemRpc::PullConfig, PRIO_NORMAL)
|
||||
.await?
|
||||
{
|
||||
Message::AdvertiseConfig(cfg) => cfg,
|
||||
SystemRpc::AdvertiseConfig(cfg) => cfg,
|
||||
resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
|
||||
};
|
||||
|
||||
println!("STATUS:");
|
||||
for node in status {
|
||||
println!("{:?}", node);
|
||||
}
|
||||
println!("CONFIG: (v{})", config.version);
|
||||
for (id, node) in config.members {
|
||||
println!("{} {:?}", hex::encode(id.as_slice()), node);
|
||||
}
|
||||
|
||||
/* TODO
|
||||
let (hostname_len, addr_len, tag_len, zone_len) = status
|
||||
.iter()
|
||||
.map(|adv| (adv, config.members.get(&adv.id)))
|
||||
.map(|(adv, cfg)| {
|
||||
.map(|(id, addr, _)| (addr, config.members.get(&adv.id)))
|
||||
.map(|(addr, cfg)| {
|
||||
(
|
||||
adv.state_info.hostname.len(),
|
||||
adv.addr.to_string().len(),
|
||||
8,
|
||||
addr.to_string().len(),
|
||||
cfg.map(|c| c.tag.len()).unwrap_or(0),
|
||||
cfg.map(|c| c.zone.len()).unwrap_or(0),
|
||||
)
|
||||
|
@ -355,13 +365,13 @@ pub async fn cmd_status(
|
|||
});
|
||||
|
||||
println!("Healthy nodes:");
|
||||
for adv in status.iter().filter(|x| x.is_up) {
|
||||
for (id, addr, _) in status.iter().filter(|(id, addr, is_up)| is_up) {
|
||||
if let Some(cfg) = config.members.get(&adv.id) {
|
||||
println!(
|
||||
"{id:?}\t{host}{h_pad}\t{addr}{a_pad}\t[{tag}]{t_pad}\t{zone}{z_pad}\t{capacity}",
|
||||
id = adv.id,
|
||||
host = adv.state_info.hostname,
|
||||
addr = adv.addr,
|
||||
id = id,
|
||||
host = "",
|
||||
addr = addr,
|
||||
tag = cfg.tag,
|
||||
zone = cfg.zone,
|
||||
capacity = cfg.capacity_string(),
|
||||
|
@ -373,36 +383,36 @@ pub async fn cmd_status(
|
|||
} else {
|
||||
println!(
|
||||
"{id:?}\t{h}{h_pad}\t{addr}{a_pad}\tUNCONFIGURED/REMOVED",
|
||||
id = adv.id,
|
||||
h = adv.state_info.hostname,
|
||||
addr = adv.addr,
|
||||
h_pad = " ".repeat(hostname_len - adv.state_info.hostname.len()),
|
||||
a_pad = " ".repeat(addr_len - adv.addr.to_string().len()),
|
||||
id = id,
|
||||
h = "",
|
||||
addr = addr,
|
||||
h_pad = " ".repeat(hostname_len - "".len()),
|
||||
|
||||
a_pad = " ".repeat(addr_len - addr.to_string().len()),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
let status_keys = status.iter().map(|x| x.id).collect::<HashSet<_>>();
|
||||
let failure_case_1 = status.iter().any(|x| !x.is_up);
|
||||
let status_keys = status.iter().map(|(id, _, _)| id).collect::<HashSet<_>>();
|
||||
let failure_case_1 = status.iter().any(|(_, _, is_up)| !is_up);
|
||||
let failure_case_2 = config
|
||||
.members
|
||||
.iter()
|
||||
.any(|(id, _)| !status_keys.contains(id));
|
||||
if failure_case_1 || failure_case_2 {
|
||||
println!("\nFailed nodes:");
|
||||
for adv in status.iter().filter(|x| !x.is_up) {
|
||||
if let Some(cfg) = config.members.get(&adv.id) {
|
||||
for (id, addr) in status.iter().filter(|(_, _, is_up)| !is_up) {
|
||||
if let Some(cfg) = config.members.get(&id) {
|
||||
println!(
|
||||
"{id:?}\t{host}{h_pad}\t{addr}{a_pad}\t[{tag}]{t_pad}\t{zone}{z_pad}\t{capacity}\tlast seen: {last_seen}s ago",
|
||||
id=adv.id,
|
||||
host=adv.state_info.hostname,
|
||||
addr=adv.addr,
|
||||
id=id,
|
||||
host="",
|
||||
addr=addr,
|
||||
tag=cfg.tag,
|
||||
zone=cfg.zone,
|
||||
capacity=cfg.capacity_string(),
|
||||
last_seen=(now_msec() - adv.last_seen) / 1000,
|
||||
h_pad=" ".repeat(hostname_len - adv.state_info.hostname.len()),
|
||||
a_pad=" ".repeat(addr_len - adv.addr.to_string().len()),
|
||||
last_seen=(now_msec() - 0) / 1000,
|
||||
h_pad=" ".repeat(hostname_len - "".len()),
|
||||
a_pad=" ".repeat(addr_len - addr.to_string().len()),
|
||||
t_pad=" ".repeat(tag_len - cfg.tag.len()),
|
||||
z_pad=" ".repeat(zone_len - cfg.zone.len()),
|
||||
);
|
||||
|
@ -411,12 +421,12 @@ pub async fn cmd_status(
|
|||
let (tag_len, zone_len) = config
|
||||
.members
|
||||
.iter()
|
||||
.filter(|(&id, _)| !status.iter().any(|x| x.id == id))
|
||||
.filter(|(&id, _)| !status.iter().any(|(xid, _, _)| xid == id))
|
||||
.map(|(_, cfg)| (cfg.tag.len(), cfg.zone.len()))
|
||||
.fold((0, 0), |(t, z), (mt, mz)| (max(t, mt), max(z, mz)));
|
||||
|
||||
for (id, cfg) in config.members.iter() {
|
||||
if !status.iter().any(|x| x.id == *id) {
|
||||
if !status.iter().any(|(xid, _, _)| xid == *id) {
|
||||
println!(
|
||||
"{id:?}\t{tag}{t_pad}\t{zone}{z_pad}\t{capacity}\tnever seen",
|
||||
id = id,
|
||||
|
@ -429,6 +439,7 @@ pub async fn cmd_status(
|
|||
}
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
@ -455,25 +466,30 @@ pub fn find_matching_node(
|
|||
}
|
||||
|
||||
pub async fn cmd_configure(
|
||||
rpc_cli: RpcAddrClient<Message>,
|
||||
rpc_host: SocketAddr,
|
||||
rpc_cli: &Endpoint<SystemRpc, ()>,
|
||||
rpc_host: NodeID,
|
||||
args: ConfigureNodeOpt,
|
||||
) -> Result<(), Error> {
|
||||
let status = match rpc_cli
|
||||
.call(&rpc_host, &Message::PullStatus, ADMIN_RPC_TIMEOUT)
|
||||
.await??
|
||||
.call(&rpc_host, &SystemRpc::GetKnownNodes, PRIO_NORMAL)
|
||||
.await?
|
||||
{
|
||||
Message::AdvertiseNodesUp(nodes) => nodes,
|
||||
SystemRpc::ReturnKnownNodes(nodes) => nodes,
|
||||
resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
|
||||
};
|
||||
|
||||
let added_node = find_matching_node(status.iter().map(|x| x.id), &args.node_id)?;
|
||||
let added_node = find_matching_node(
|
||||
status
|
||||
.iter()
|
||||
.map(|(id, _, _)| Uuid::try_from(id.as_ref()).unwrap()),
|
||||
&args.node_id,
|
||||
)?;
|
||||
|
||||
let mut config = match rpc_cli
|
||||
.call(&rpc_host, &Message::PullConfig, ADMIN_RPC_TIMEOUT)
|
||||
.await??
|
||||
.call(&rpc_host, &SystemRpc::PullConfig, PRIO_NORMAL)
|
||||
.await?
|
||||
{
|
||||
Message::AdvertiseConfig(cfg) => cfg,
|
||||
SystemRpc::AdvertiseConfig(cfg) => cfg,
|
||||
resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
|
||||
};
|
||||
|
||||
|
@ -527,25 +543,21 @@ pub async fn cmd_configure(
|
|||
config.version += 1;
|
||||
|
||||
rpc_cli
|
||||
.call(
|
||||
&rpc_host,
|
||||
&Message::AdvertiseConfig(config),
|
||||
ADMIN_RPC_TIMEOUT,
|
||||
)
|
||||
.await??;
|
||||
.call(&rpc_host, &SystemRpc::AdvertiseConfig(config), PRIO_NORMAL)
|
||||
.await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn cmd_remove(
|
||||
rpc_cli: RpcAddrClient<Message>,
|
||||
rpc_host: SocketAddr,
|
||||
rpc_cli: &Endpoint<SystemRpc, ()>,
|
||||
rpc_host: NodeID,
|
||||
args: RemoveNodeOpt,
|
||||
) -> Result<(), Error> {
|
||||
let mut config = match rpc_cli
|
||||
.call(&rpc_host, &Message::PullConfig, ADMIN_RPC_TIMEOUT)
|
||||
.await??
|
||||
.call(&rpc_host, &SystemRpc::PullConfig, PRIO_NORMAL)
|
||||
.await?
|
||||
{
|
||||
Message::AdvertiseConfig(cfg) => cfg,
|
||||
SystemRpc::AdvertiseConfig(cfg) => cfg,
|
||||
resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
|
||||
};
|
||||
|
||||
|
@ -562,21 +574,17 @@ pub async fn cmd_remove(
|
|||
config.version += 1;
|
||||
|
||||
rpc_cli
|
||||
.call(
|
||||
&rpc_host,
|
||||
&Message::AdvertiseConfig(config),
|
||||
ADMIN_RPC_TIMEOUT,
|
||||
)
|
||||
.await??;
|
||||
.call(&rpc_host, &SystemRpc::AdvertiseConfig(config), PRIO_NORMAL)
|
||||
.await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn cmd_admin(
|
||||
rpc_cli: RpcAddrClient<AdminRpc>,
|
||||
rpc_host: SocketAddr,
|
||||
rpc_cli: &Endpoint<AdminRpc, ()>,
|
||||
rpc_host: NodeID,
|
||||
args: AdminRpc,
|
||||
) -> Result<(), Error> {
|
||||
match rpc_cli.call(&rpc_host, args, ADMIN_RPC_TIMEOUT).await?? {
|
||||
match rpc_cli.call(&rpc_host, &args, PRIO_NORMAL).await? {
|
||||
AdminRpc::Ok(msg) => {
|
||||
println!("{}", msg);
|
||||
}
|
||||
|
|
|
@ -10,16 +10,16 @@ mod repair;
|
|||
mod server;
|
||||
|
||||
use std::net::SocketAddr;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use structopt::StructOpt;
|
||||
|
||||
use garage_util::config::TlsConfig;
|
||||
use netapp::util::parse_peer_addr;
|
||||
use netapp::NetworkKey;
|
||||
|
||||
use garage_util::error::Error;
|
||||
|
||||
use garage_rpc::membership::*;
|
||||
use garage_rpc::rpc_client::*;
|
||||
use garage_rpc::system::*;
|
||||
use garage_rpc::*;
|
||||
|
||||
use admin_rpc::*;
|
||||
use cli::*;
|
||||
|
@ -27,16 +27,14 @@ use cli::*;
|
|||
#[derive(StructOpt, Debug)]
|
||||
#[structopt(name = "garage")]
|
||||
struct Opt {
|
||||
/// RPC connect to this host to execute client operations
|
||||
#[structopt(short = "h", long = "rpc-host", default_value = "127.0.0.1:3901", parse(try_from_str = parse_address))]
|
||||
pub rpc_host: SocketAddr,
|
||||
/// Host to connect to for admin operations, in the format:
|
||||
/// <public-key>@<ip>:<port>
|
||||
#[structopt(short = "h", long = "rpc-host")]
|
||||
pub rpc_host: Option<String>,
|
||||
|
||||
#[structopt(long = "ca-cert")]
|
||||
pub ca_cert: Option<String>,
|
||||
#[structopt(long = "client-cert")]
|
||||
pub client_cert: Option<String>,
|
||||
#[structopt(long = "client-key")]
|
||||
pub client_key: Option<String>,
|
||||
/// RPC secret network key for admin operations
|
||||
#[structopt(short = "s", long = "rpc-secret")]
|
||||
pub rpc_secret: Option<String>,
|
||||
|
||||
#[structopt(subcommand)]
|
||||
cmd: Command,
|
||||
|
@ -66,33 +64,20 @@ async fn main() {
|
|||
}
|
||||
|
||||
async fn cli_command(opt: Opt) -> Result<(), Error> {
|
||||
let tls_config = match (opt.ca_cert, opt.client_cert, opt.client_key) {
|
||||
(Some(ca_cert), Some(client_cert), Some(client_key)) => Some(TlsConfig {
|
||||
ca_cert,
|
||||
node_cert: client_cert,
|
||||
node_key: client_key,
|
||||
}),
|
||||
(None, None, None) => None,
|
||||
_ => {
|
||||
warn!("Missing one of: --ca-cert, --node-cert, --node-key. Not using TLS.");
|
||||
None
|
||||
}
|
||||
};
|
||||
let net_key_hex_str = &opt.rpc_secret.expect("No RPC secret provided");
|
||||
let network_key = NetworkKey::from_slice(
|
||||
&hex::decode(net_key_hex_str).expect("Invalid RPC secret key (bad hex)")[..],
|
||||
)
|
||||
.expect("Invalid RPC secret provided (wrong length)");
|
||||
let (_pk, sk) = sodiumoxide::crypto::sign::ed25519::gen_keypair();
|
||||
|
||||
let rpc_http_cli =
|
||||
Arc::new(RpcHttpClient::new(8, &tls_config).expect("Could not create RPC client"));
|
||||
let membership_rpc_cli =
|
||||
RpcAddrClient::new(rpc_http_cli.clone(), MEMBERSHIP_RPC_PATH.to_string());
|
||||
let admin_rpc_cli = RpcAddrClient::new(rpc_http_cli.clone(), ADMIN_RPC_PATH.to_string());
|
||||
let netapp = NetApp::new(network_key, sk);
|
||||
let (id, addr) =
|
||||
parse_peer_addr(&opt.rpc_host.expect("No RPC host provided")).expect("Invalid RPC host");
|
||||
netapp.clone().try_connect(addr, id).await?;
|
||||
|
||||
cli_cmd(opt.cmd, membership_rpc_cli, admin_rpc_cli, opt.rpc_host).await
|
||||
}
|
||||
|
||||
fn parse_address(address: &str) -> Result<SocketAddr, String> {
|
||||
use std::net::ToSocketAddrs;
|
||||
address
|
||||
.to_socket_addrs()
|
||||
.map_err(|_| format!("Could not resolve {}", address))?
|
||||
.next()
|
||||
.ok_or_else(|| format!("Could not resolve {}", address))
|
||||
let system_rpc_endpoint = netapp.endpoint::<SystemRpc, ()>(SYSTEM_RPC_PATH.into());
|
||||
let admin_rpc_endpoint = netapp.endpoint::<AdminRpc, ()>(ADMIN_RPC_PATH.into());
|
||||
|
||||
cli_cmd(opt.cmd, &system_rpc_endpoint, &admin_rpc_endpoint, id).await
|
||||
}
|
||||
|
|
|
@ -1,7 +1,5 @@
|
|||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
|
||||
use futures_util::future::*;
|
||||
use tokio::sync::watch;
|
||||
|
||||
use garage_util::background::*;
|
||||
|
@ -10,21 +8,10 @@ use garage_util::error::Error;
|
|||
|
||||
use garage_api::run_api_server;
|
||||
use garage_model::garage::Garage;
|
||||
use garage_rpc::rpc_server::RpcServer;
|
||||
use garage_web::run_web_server;
|
||||
|
||||
use crate::admin_rpc::*;
|
||||
|
||||
async fn shutdown_signal(send_cancel: watch::Sender<bool>) -> Result<(), Error> {
|
||||
// Wait for the CTRL+C signal
|
||||
tokio::signal::ctrl_c()
|
||||
.await
|
||||
.expect("failed to install CTRL+C signal handler");
|
||||
info!("Received CTRL+C, shutting down.");
|
||||
send_cancel.send(true)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn wait_from(mut chan: watch::Receiver<bool>) {
|
||||
while !*chan.borrow() {
|
||||
if chan.changed().await.is_err() {
|
||||
|
@ -47,52 +34,46 @@ pub async fn run_server(config_file: PathBuf) -> Result<(), Error> {
|
|||
.open()
|
||||
.expect("Unable to open sled DB");
|
||||
|
||||
info!("Initialize RPC server...");
|
||||
let mut rpc_server = RpcServer::new(config.rpc_bind_addr, config.rpc_tls.clone());
|
||||
|
||||
info!("Initializing background runner...");
|
||||
let (send_cancel, watch_cancel) = watch::channel(false);
|
||||
let watch_cancel = netapp::util::watch_ctrl_c();
|
||||
let (background, await_background_done) = BackgroundRunner::new(16, watch_cancel.clone());
|
||||
|
||||
info!("Initializing Garage main data store...");
|
||||
let garage = Garage::new(config.clone(), db, background, &mut rpc_server);
|
||||
let bootstrap = garage.system.clone().bootstrap(
|
||||
config.bootstrap_peers,
|
||||
config.consul_host,
|
||||
config.consul_service_name,
|
||||
);
|
||||
let garage = Garage::new(config.clone(), db, background);
|
||||
|
||||
let run_system = tokio::spawn(garage.system.clone().run(watch_cancel.clone()));
|
||||
|
||||
info!("Crate admin RPC handler...");
|
||||
AdminRpcHandler::new(garage.clone()).register_handler(&mut rpc_server);
|
||||
AdminRpcHandler::new(garage.clone());
|
||||
|
||||
info!("Initializing RPC and API servers...");
|
||||
let run_rpc_server = Arc::new(rpc_server).run(wait_from(watch_cancel.clone()));
|
||||
let api_server = run_api_server(garage.clone(), wait_from(watch_cancel.clone()));
|
||||
let web_server = run_web_server(garage, wait_from(watch_cancel.clone()));
|
||||
info!("Initializing API server...");
|
||||
let api_server = tokio::spawn(run_api_server(
|
||||
garage.clone(),
|
||||
wait_from(watch_cancel.clone()),
|
||||
));
|
||||
|
||||
futures::try_join!(
|
||||
bootstrap.map(|()| {
|
||||
info!("Bootstrap done");
|
||||
Ok(())
|
||||
}),
|
||||
run_rpc_server.map(|rv| {
|
||||
info!("RPC server exited");
|
||||
rv
|
||||
}),
|
||||
api_server.map(|rv| {
|
||||
info!("API server exited");
|
||||
rv
|
||||
}),
|
||||
web_server.map(|rv| {
|
||||
info!("Web server exited");
|
||||
rv
|
||||
}),
|
||||
await_background_done.map(|rv| {
|
||||
info!("Background runner exited: {:?}", rv);
|
||||
Ok(())
|
||||
}),
|
||||
shutdown_signal(send_cancel),
|
||||
)?;
|
||||
info!("Initializing web server...");
|
||||
let web_server = tokio::spawn(run_web_server(
|
||||
garage.clone(),
|
||||
wait_from(watch_cancel.clone()),
|
||||
));
|
||||
|
||||
// Stuff runs
|
||||
|
||||
// When a cancel signal is sent, stuff stops
|
||||
if let Err(e) = api_server.await? {
|
||||
warn!("API server exited with error: {}", e);
|
||||
}
|
||||
if let Err(e) = web_server.await? {
|
||||
warn!("Web server exited with error: {}", e);
|
||||
}
|
||||
|
||||
// Remove RPC handlers for system to break reference cycles
|
||||
garage.system.netapp.drop_all_handlers();
|
||||
|
||||
// Await for last parts to end
|
||||
run_system.await?;
|
||||
await_background_done.await?;
|
||||
|
||||
info!("Cleaning up...");
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
[package]
|
||||
name = "garage_model"
|
||||
version = "0.3.0"
|
||||
version = "0.4.0"
|
||||
authors = ["Alex Auvolat <alex@adnab.me>"]
|
||||
edition = "2018"
|
||||
license = "AGPL-3.0"
|
||||
|
@ -13,10 +13,11 @@ path = "lib.rs"
|
|||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
garage_rpc = { version = "0.3.0", path = "../rpc" }
|
||||
garage_table = { version = "0.3.0", path = "../table" }
|
||||
garage_util = { version = "0.3.0", path = "../util" }
|
||||
garage_rpc = { version = "0.4.0", path = "../rpc" }
|
||||
garage_table = { version = "0.4.0", path = "../table" }
|
||||
garage_util = { version = "0.4.0", path = "../util" }
|
||||
|
||||
async-trait = "0.1.7"
|
||||
arc-swap = "1.0"
|
||||
hex = "0.4"
|
||||
log = "0.4"
|
||||
|
@ -31,3 +32,5 @@ serde_bytes = "0.11"
|
|||
futures = "0.3"
|
||||
futures-util = "0.3"
|
||||
tokio = { version = "1.0", default-features = false, features = ["rt", "rt-multi-thread", "io-util", "net", "time", "macros", "sync", "signal", "fs"] }
|
||||
|
||||
netapp = { version = "0.3.0", git = "https://git.deuxfleurs.fr/lx/netapp" }
|
||||
|
|
|
@ -3,6 +3,7 @@ use std::sync::Arc;
|
|||
use std::time::Duration;
|
||||
|
||||
use arc_swap::ArcSwapOption;
|
||||
use async_trait::async_trait;
|
||||
use futures::future::*;
|
||||
use futures::select;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
@ -14,9 +15,8 @@ use garage_util::data::*;
|
|||
use garage_util::error::Error;
|
||||
use garage_util::time::*;
|
||||
|
||||
use garage_rpc::membership::System;
|
||||
use garage_rpc::rpc_client::*;
|
||||
use garage_rpc::rpc_server::*;
|
||||
use garage_rpc::system::System;
|
||||
use garage_rpc::*;
|
||||
|
||||
use garage_table::replication::{TableReplication, TableShardedReplication};
|
||||
|
||||
|
@ -36,8 +36,9 @@ const RESYNC_RETRY_TIMEOUT: Duration = Duration::from_secs(10);
|
|||
|
||||
/// RPC messages used to share blocks of data between nodes
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub enum Message {
|
||||
pub enum BlockRpc {
|
||||
Ok,
|
||||
Error(String),
|
||||
/// Message to ask for a block of data, by hash
|
||||
GetBlock(Hash),
|
||||
/// Message to send a block of data, either because requested, of for first delivery of new
|
||||
|
@ -60,7 +61,9 @@ pub struct PutBlockMessage {
|
|||
pub data: Vec<u8>,
|
||||
}
|
||||
|
||||
impl RpcMessage for Message {}
|
||||
impl Message for BlockRpc {
|
||||
type Response = BlockRpc;
|
||||
}
|
||||
|
||||
/// The block manager, handling block exchange between nodes, and block storage on local node
|
||||
pub struct BlockManager {
|
||||
|
@ -77,7 +80,7 @@ pub struct BlockManager {
|
|||
resync_notify: Notify,
|
||||
|
||||
system: Arc<System>,
|
||||
rpc_client: Arc<RpcClient<Message>>,
|
||||
endpoint: Arc<Endpoint<BlockRpc, Self>>,
|
||||
pub(crate) garage: ArcSwapOption<Garage>,
|
||||
}
|
||||
|
||||
|
@ -87,7 +90,6 @@ impl BlockManager {
|
|||
data_dir: PathBuf,
|
||||
replication: TableShardedReplication,
|
||||
system: Arc<System>,
|
||||
rpc_server: &mut RpcServer,
|
||||
) -> Arc<Self> {
|
||||
let rc = db
|
||||
.open_tree("block_local_rc")
|
||||
|
@ -97,8 +99,7 @@ impl BlockManager {
|
|||
.open_tree("block_local_resync_queue")
|
||||
.expect("Unable to open block_local_resync_queue tree");
|
||||
|
||||
let rpc_path = "block_manager";
|
||||
let rpc_client = system.rpc_client::<Message>(rpc_path);
|
||||
let endpoint = system.netapp.endpoint(format!("garage_model/block.rs/Rpc"));
|
||||
|
||||
let block_manager = Arc::new(Self {
|
||||
replication,
|
||||
|
@ -108,35 +109,19 @@ impl BlockManager {
|
|||
resync_queue,
|
||||
resync_notify: Notify::new(),
|
||||
system,
|
||||
rpc_client,
|
||||
endpoint,
|
||||
garage: ArcSwapOption::from(None),
|
||||
});
|
||||
block_manager
|
||||
.clone()
|
||||
.register_handler(rpc_server, rpc_path.into());
|
||||
block_manager.endpoint.set_handler(block_manager.clone());
|
||||
|
||||
block_manager
|
||||
}
|
||||
|
||||
fn register_handler(self: Arc<Self>, rpc_server: &mut RpcServer, path: String) {
|
||||
let self2 = self.clone();
|
||||
rpc_server.add_handler::<Message, _, _>(path, move |msg, _addr| {
|
||||
let self2 = self2.clone();
|
||||
async move { self2.handle(&msg).await }
|
||||
});
|
||||
|
||||
let self2 = self.clone();
|
||||
self.rpc_client
|
||||
.set_local_handler(self.system.id, move |msg| {
|
||||
let self2 = self2.clone();
|
||||
async move { self2.handle(&msg).await }
|
||||
});
|
||||
}
|
||||
|
||||
async fn handle(self: Arc<Self>, msg: &Message) -> Result<Message, Error> {
|
||||
async fn handle_rpc(self: Arc<Self>, msg: &BlockRpc) -> Result<BlockRpc, Error> {
|
||||
match msg {
|
||||
Message::PutBlock(m) => self.write_block(&m.hash, &m.data).await,
|
||||
Message::GetBlock(h) => self.read_block(h).await,
|
||||
Message::NeedBlockQuery(h) => self.need_block(h).await.map(Message::NeedBlockReply),
|
||||
BlockRpc::PutBlock(m) => self.write_block(&m.hash, &m.data).await,
|
||||
BlockRpc::GetBlock(h) => self.read_block(h).await,
|
||||
BlockRpc::NeedBlockQuery(h) => self.need_block(h).await.map(BlockRpc::NeedBlockReply),
|
||||
_ => Err(Error::BadRpc("Unexpected RPC message".to_string())),
|
||||
}
|
||||
}
|
||||
|
@ -157,7 +142,7 @@ impl BlockManager {
|
|||
}
|
||||
|
||||
/// Write a block to disk
|
||||
async fn write_block(&self, hash: &Hash, data: &[u8]) -> Result<Message, Error> {
|
||||
async fn write_block(&self, hash: &Hash, data: &[u8]) -> Result<BlockRpc, Error> {
|
||||
let _lock = self.data_dir_lock.lock().await;
|
||||
|
||||
let mut path = self.block_dir(hash);
|
||||
|
@ -165,18 +150,18 @@ impl BlockManager {
|
|||
|
||||
path.push(hex::encode(hash));
|
||||
if fs::metadata(&path).await.is_ok() {
|
||||
return Ok(Message::Ok);
|
||||
return Ok(BlockRpc::Ok);
|
||||
}
|
||||
|
||||
let mut f = fs::File::create(path).await?;
|
||||
f.write_all(data).await?;
|
||||
drop(f);
|
||||
|
||||
Ok(Message::Ok)
|
||||
Ok(BlockRpc::Ok)
|
||||
}
|
||||
|
||||
/// Read block from disk, verifying it's integrity
|
||||
async fn read_block(&self, hash: &Hash) -> Result<Message, Error> {
|
||||
async fn read_block(&self, hash: &Hash) -> Result<BlockRpc, Error> {
|
||||
let path = self.block_path(hash);
|
||||
|
||||
let mut f = match fs::File::open(&path).await {
|
||||
|
@ -204,7 +189,7 @@ impl BlockManager {
|
|||
return Err(Error::CorruptData(*hash));
|
||||
}
|
||||
|
||||
Ok(Message::PutBlock(PutBlockMessage { hash: *hash, data }))
|
||||
Ok(BlockRpc::PutBlock(PutBlockMessage { hash: *hash, data }))
|
||||
}
|
||||
|
||||
/// Check if this node should have a block, but don't actually have it
|
||||
|
@ -346,17 +331,22 @@ impl BlockManager {
|
|||
}
|
||||
who.retain(|id| *id != self.system.id);
|
||||
|
||||
let msg = Arc::new(Message::NeedBlockQuery(*hash));
|
||||
let msg = Arc::new(BlockRpc::NeedBlockQuery(*hash));
|
||||
let who_needs_fut = who.iter().map(|to| {
|
||||
self.rpc_client
|
||||
.call_arc(*to, msg.clone(), NEED_BLOCK_QUERY_TIMEOUT)
|
||||
self.system.rpc.call_arc(
|
||||
&self.endpoint,
|
||||
*to,
|
||||
msg.clone(),
|
||||
RequestStrategy::with_priority(PRIO_NORMAL)
|
||||
.with_timeout(NEED_BLOCK_QUERY_TIMEOUT),
|
||||
)
|
||||
});
|
||||
let who_needs_resps = join_all(who_needs_fut).await;
|
||||
|
||||
let mut need_nodes = vec![];
|
||||
for (node, needed) in who.iter().zip(who_needs_resps.into_iter()) {
|
||||
match needed? {
|
||||
Message::NeedBlockReply(needed) => {
|
||||
BlockRpc::NeedBlockReply(needed) => {
|
||||
if needed {
|
||||
need_nodes.push(*node);
|
||||
}
|
||||
|
@ -377,11 +367,14 @@ impl BlockManager {
|
|||
);
|
||||
|
||||
let put_block_message = self.read_block(hash).await?;
|
||||
self.rpc_client
|
||||
self.system
|
||||
.rpc
|
||||
.try_call_many(
|
||||
&self.endpoint,
|
||||
&need_nodes[..],
|
||||
put_block_message,
|
||||
RequestStrategy::with_quorum(need_nodes.len())
|
||||
RequestStrategy::with_priority(PRIO_NORMAL)
|
||||
.with_quorum(need_nodes.len())
|
||||
.with_timeout(BLOCK_RW_TIMEOUT),
|
||||
)
|
||||
.await?;
|
||||
|
@ -413,18 +406,21 @@ impl BlockManager {
|
|||
pub async fn rpc_get_block(&self, hash: &Hash) -> Result<Vec<u8>, Error> {
|
||||
let who = self.replication.read_nodes(&hash);
|
||||
let resps = self
|
||||
.rpc_client
|
||||
.system
|
||||
.rpc
|
||||
.try_call_many(
|
||||
&self.endpoint,
|
||||
&who[..],
|
||||
Message::GetBlock(*hash),
|
||||
RequestStrategy::with_quorum(1)
|
||||
BlockRpc::GetBlock(*hash),
|
||||
RequestStrategy::with_priority(PRIO_NORMAL)
|
||||
.with_quorum(1)
|
||||
.with_timeout(BLOCK_RW_TIMEOUT)
|
||||
.interrupt_after_quorum(true),
|
||||
)
|
||||
.await?;
|
||||
|
||||
for resp in resps {
|
||||
if let Message::PutBlock(msg) = resp {
|
||||
if let BlockRpc::PutBlock(msg) = resp {
|
||||
return Ok(msg.data);
|
||||
}
|
||||
}
|
||||
|
@ -437,11 +433,14 @@ impl BlockManager {
|
|||
/// Send block to nodes that should have it
|
||||
pub async fn rpc_put_block(&self, hash: Hash, data: Vec<u8>) -> Result<(), Error> {
|
||||
let who = self.replication.write_nodes(&hash);
|
||||
self.rpc_client
|
||||
self.system
|
||||
.rpc
|
||||
.try_call_many(
|
||||
&self.endpoint,
|
||||
&who[..],
|
||||
Message::PutBlock(PutBlockMessage { hash, data }),
|
||||
RequestStrategy::with_quorum(self.replication.write_quorum())
|
||||
BlockRpc::PutBlock(PutBlockMessage { hash, data }),
|
||||
RequestStrategy::with_priority(PRIO_NORMAL)
|
||||
.with_quorum(self.replication.write_quorum())
|
||||
.with_timeout(BLOCK_RW_TIMEOUT),
|
||||
)
|
||||
.await?;
|
||||
|
@ -531,6 +530,16 @@ impl BlockManager {
|
|||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl EndpointHandler<BlockRpc> for BlockManager {
|
||||
async fn handle(self: &Arc<Self>, message: &BlockRpc, _from: NodeID) -> BlockRpc {
|
||||
self.clone()
|
||||
.handle_rpc(message)
|
||||
.await
|
||||
.unwrap_or_else(|e| BlockRpc::Error(format!("{}", e)))
|
||||
}
|
||||
}
|
||||
|
||||
fn u64_from_be_bytes<T: AsRef<[u8]>>(bytes: T) -> u64 {
|
||||
assert!(bytes.as_ref().len() == 8);
|
||||
let mut x8 = [0u8; 8];
|
||||
|
|
|
@ -1,11 +1,11 @@
|
|||
use std::sync::Arc;
|
||||
|
||||
use netapp::NetworkKey;
|
||||
|
||||
use garage_util::background::*;
|
||||
use garage_util::config::*;
|
||||
|
||||
use garage_rpc::membership::System;
|
||||
use garage_rpc::rpc_client::RpcHttpClient;
|
||||
use garage_rpc::rpc_server::RpcServer;
|
||||
use garage_rpc::system::System;
|
||||
|
||||
use garage_table::replication::ReplicationMode;
|
||||
use garage_table::replication::TableFullReplication;
|
||||
|
@ -45,26 +45,25 @@ pub struct Garage {
|
|||
|
||||
impl Garage {
|
||||
/// Create and run garage
|
||||
pub fn new(
|
||||
config: Config,
|
||||
db: sled::Db,
|
||||
background: Arc<BackgroundRunner>,
|
||||
rpc_server: &mut RpcServer,
|
||||
) -> Arc<Self> {
|
||||
pub fn new(config: Config, db: sled::Db, background: Arc<BackgroundRunner>) -> Arc<Self> {
|
||||
let network_key = NetworkKey::from_slice(
|
||||
&hex::decode(&config.rpc_secret).expect("Invalid RPC secret key")[..],
|
||||
)
|
||||
.expect("Invalid RPC secret key");
|
||||
|
||||
let replication_mode = ReplicationMode::parse(&config.replication_mode)
|
||||
.expect("Invalid replication_mode in config file.");
|
||||
|
||||
info!("Initialize membership management system...");
|
||||
let rpc_http_client = Arc::new(
|
||||
RpcHttpClient::new(config.max_concurrent_rpc_requests, &config.rpc_tls)
|
||||
.expect("Could not create RPC client"),
|
||||
);
|
||||
let system = System::new(
|
||||
network_key,
|
||||
config.metadata_dir.clone(),
|
||||
rpc_http_client,
|
||||
background.clone(),
|
||||
rpc_server,
|
||||
replication_mode.replication_factor(),
|
||||
config.rpc_bind_addr,
|
||||
config.bootstrap_peers.clone(),
|
||||
config.consul_host.clone(),
|
||||
config.consul_service_name.clone(),
|
||||
);
|
||||
|
||||
let data_rep_param = TableShardedReplication {
|
||||
|
@ -87,13 +86,8 @@ impl Garage {
|
|||
};
|
||||
|
||||
info!("Initialize block manager...");
|
||||
let block_manager = BlockManager::new(
|
||||
&db,
|
||||
config.data_dir.clone(),
|
||||
data_rep_param,
|
||||
system.clone(),
|
||||
rpc_server,
|
||||
);
|
||||
let block_manager =
|
||||
BlockManager::new(&db, config.data_dir.clone(), data_rep_param, system.clone());
|
||||
|
||||
info!("Initialize block_ref_table...");
|
||||
let block_ref_table = Table::new(
|
||||
|
@ -104,7 +98,6 @@ impl Garage {
|
|||
system.clone(),
|
||||
&db,
|
||||
"block_ref".to_string(),
|
||||
rpc_server,
|
||||
);
|
||||
|
||||
info!("Initialize version_table...");
|
||||
|
@ -117,7 +110,6 @@ impl Garage {
|
|||
system.clone(),
|
||||
&db,
|
||||
"version".to_string(),
|
||||
rpc_server,
|
||||
);
|
||||
|
||||
info!("Initialize object_table...");
|
||||
|
@ -130,7 +122,6 @@ impl Garage {
|
|||
system.clone(),
|
||||
&db,
|
||||
"object".to_string(),
|
||||
rpc_server,
|
||||
);
|
||||
|
||||
info!("Initialize bucket_table...");
|
||||
|
@ -140,7 +131,6 @@ impl Garage {
|
|||
system.clone(),
|
||||
&db,
|
||||
"bucket".to_string(),
|
||||
rpc_server,
|
||||
);
|
||||
|
||||
info!("Initialize key_table_table...");
|
||||
|
@ -150,7 +140,6 @@ impl Garage {
|
|||
system.clone(),
|
||||
&db,
|
||||
"key".to_string(),
|
||||
rpc_server,
|
||||
);
|
||||
|
||||
info!("Initialize Garage...");
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
[package]
|
||||
name = "garage_rpc"
|
||||
version = "0.3.0"
|
||||
version = "0.4.0"
|
||||
authors = ["Alex Auvolat <alex@adnab.me>"]
|
||||
edition = "2018"
|
||||
license = "AGPL-3.0"
|
||||
|
@ -13,7 +13,7 @@ path = "lib.rs"
|
|||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
garage_util = { version = "0.3.0", path = "../util" }
|
||||
garage_util = { version = "0.4.0", path = "../util" }
|
||||
|
||||
garage_rpc_021 = { package = "garage_rpc", version = "0.2.1" }
|
||||
|
||||
|
@ -22,7 +22,10 @@ bytes = "1.0"
|
|||
gethostname = "0.2"
|
||||
hex = "0.4"
|
||||
log = "0.4"
|
||||
rand = "0.8"
|
||||
sodiumoxide = { version = "0.2.5-0", package = "kuska-sodiumoxide" }
|
||||
|
||||
async-trait = "0.1.7"
|
||||
rmp-serde = "0.15"
|
||||
serde = { version = "1.0", default-features = false, features = ["derive", "rc"] }
|
||||
serde_json = "1.0"
|
||||
|
@ -32,11 +35,6 @@ futures-util = "0.3"
|
|||
tokio = { version = "1.0", default-features = false, features = ["rt", "rt-multi-thread", "io-util", "net", "time", "macros", "sync", "signal", "fs"] }
|
||||
tokio-stream = { version = "0.1", features = ["net"] }
|
||||
|
||||
http = "0.2"
|
||||
hyper = { version = "0.14", features = ["full"] }
|
||||
hyper-rustls = { version = "0.22", default-features = false }
|
||||
rustls = "0.19"
|
||||
tokio-rustls = "0.22"
|
||||
webpki = "0.21"
|
||||
|
||||
netapp = { version = "0.3.0", git = "https://git.deuxfleurs.fr/lx/netapp" }
|
||||
hyper = "0.14"
|
||||
|
||||
|
|
|
@ -4,10 +4,10 @@
|
|||
extern crate log;
|
||||
|
||||
mod consul;
|
||||
pub(crate) mod tls_util;
|
||||
|
||||
pub mod membership;
|
||||
pub mod ring;
|
||||
pub mod system;
|
||||
|
||||
pub mod rpc_client;
|
||||
pub mod rpc_server;
|
||||
pub mod rpc_helper;
|
||||
|
||||
pub use rpc_helper::*;
|
||||
|
|
|
@ -1,722 +0,0 @@
|
|||
//! Module containing structs related to membership management
|
||||
use std::collections::HashMap;
|
||||
use std::fmt::Write as FmtWrite;
|
||||
use std::io::{Read, Write};
|
||||
use std::net::{IpAddr, SocketAddr};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use futures::future::join_all;
|
||||
use futures::select;
|
||||
use futures_util::future::*;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tokio::sync::watch;
|
||||
use tokio::sync::Mutex;
|
||||
|
||||
use garage_util::background::BackgroundRunner;
|
||||
use garage_util::data::*;
|
||||
use garage_util::error::Error;
|
||||
use garage_util::persister::Persister;
|
||||
use garage_util::time::*;
|
||||
|
||||
use crate::consul::get_consul_nodes;
|
||||
use crate::ring::*;
|
||||
use crate::rpc_client::*;
|
||||
use crate::rpc_server::*;
|
||||
|
||||
const PING_INTERVAL: Duration = Duration::from_secs(10);
|
||||
const DISCOVERY_INTERVAL: Duration = Duration::from_secs(60);
|
||||
const PING_TIMEOUT: Duration = Duration::from_secs(2);
|
||||
const MAX_FAILURES_BEFORE_CONSIDERED_DOWN: usize = 5;
|
||||
|
||||
/// RPC endpoint used for calls related to membership
|
||||
pub const MEMBERSHIP_RPC_PATH: &str = "_membership";
|
||||
|
||||
/// RPC messages related to membership
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub enum Message {
|
||||
/// Response to successfull advertisements
|
||||
Ok,
|
||||
/// Message sent to detect other nodes status
|
||||
Ping(PingMessage),
|
||||
/// Ask other node for the nodes it knows. Answered with AdvertiseNodesUp
|
||||
PullStatus,
|
||||
/// Ask other node its config. Answered with AdvertiseConfig
|
||||
PullConfig,
|
||||
/// Advertisement of nodes the host knows up. Sent spontanously or in response to PullStatus
|
||||
AdvertiseNodesUp(Vec<AdvertisedNode>),
|
||||
/// Advertisement of nodes config. Sent spontanously or in response to PullConfig
|
||||
AdvertiseConfig(NetworkConfig),
|
||||
}
|
||||
|
||||
impl RpcMessage for Message {}
|
||||
|
||||
/// A ping, containing informations about status and config
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct PingMessage {
|
||||
id: Uuid,
|
||||
rpc_port: u16,
|
||||
|
||||
status_hash: Hash,
|
||||
config_version: u64,
|
||||
|
||||
state_info: StateInfo,
|
||||
}
|
||||
|
||||
/// A node advertisement
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct AdvertisedNode {
|
||||
/// Id of the node this advertisement relates to
|
||||
pub id: Uuid,
|
||||
/// IP and port of the node
|
||||
pub addr: SocketAddr,
|
||||
|
||||
/// Is the node considered up
|
||||
pub is_up: bool,
|
||||
/// When was the node last seen up, in milliseconds since UNIX epoch
|
||||
pub last_seen: u64,
|
||||
|
||||
pub state_info: StateInfo,
|
||||
}
|
||||
|
||||
/// This node's membership manager
|
||||
pub struct System {
|
||||
/// The id of this node
|
||||
pub id: Uuid,
|
||||
|
||||
persist_config: Persister<NetworkConfig>,
|
||||
persist_status: Persister<Vec<AdvertisedNode>>,
|
||||
rpc_local_port: u16,
|
||||
|
||||
state_info: StateInfo,
|
||||
|
||||
rpc_http_client: Arc<RpcHttpClient>,
|
||||
rpc_client: Arc<RpcClient<Message>>,
|
||||
|
||||
replication_factor: usize,
|
||||
pub(crate) status: watch::Receiver<Arc<Status>>,
|
||||
/// The ring
|
||||
pub ring: watch::Receiver<Arc<Ring>>,
|
||||
|
||||
update_lock: Mutex<Updaters>,
|
||||
|
||||
/// The job runner of this node
|
||||
pub background: Arc<BackgroundRunner>,
|
||||
}
|
||||
|
||||
struct Updaters {
|
||||
update_status: watch::Sender<Arc<Status>>,
|
||||
update_ring: watch::Sender<Arc<Ring>>,
|
||||
}
|
||||
|
||||
/// The status of each nodes, viewed by this node
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Status {
|
||||
/// Mapping of each node id to its known status
|
||||
pub nodes: HashMap<Uuid, Arc<StatusEntry>>,
|
||||
/// Hash of `nodes`, used to detect when nodes have different views of the cluster
|
||||
pub hash: Hash,
|
||||
}
|
||||
|
||||
/// The status of a single node
|
||||
#[derive(Debug)]
|
||||
pub struct StatusEntry {
|
||||
/// The IP and port used to connect to this node
|
||||
pub addr: SocketAddr,
|
||||
/// Last time this node was seen
|
||||
pub last_seen: u64,
|
||||
/// Number of consecutive pings sent without reply to this node
|
||||
pub num_failures: AtomicUsize,
|
||||
pub state_info: StateInfo,
|
||||
}
|
||||
|
||||
impl StatusEntry {
|
||||
/// is the node associated to this entry considered up
|
||||
pub fn is_up(&self) -> bool {
|
||||
self.num_failures.load(Ordering::SeqCst) < MAX_FAILURES_BEFORE_CONSIDERED_DOWN
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct StateInfo {
|
||||
/// Hostname of the node
|
||||
pub hostname: String,
|
||||
/// Replication factor configured on the node
|
||||
pub replication_factor: Option<usize>, // TODO Option is just for retrocompatibility. It should become a simple usize at some point
|
||||
}
|
||||
|
||||
impl Status {
|
||||
fn handle_ping(&mut self, ip: IpAddr, info: &PingMessage) -> bool {
|
||||
let addr = SocketAddr::new(ip, info.rpc_port);
|
||||
let old_status = self.nodes.insert(
|
||||
info.id,
|
||||
Arc::new(StatusEntry {
|
||||
addr,
|
||||
last_seen: now_msec(),
|
||||
num_failures: AtomicUsize::from(0),
|
||||
state_info: info.state_info.clone(),
|
||||
}),
|
||||
);
|
||||
match old_status {
|
||||
None => {
|
||||
info!("Newly pingable node: {}", hex::encode(&info.id));
|
||||
true
|
||||
}
|
||||
Some(x) => x.addr != addr,
|
||||
}
|
||||
}
|
||||
|
||||
fn recalculate_hash(&mut self) {
|
||||
let mut nodes = self.nodes.iter().collect::<Vec<_>>();
|
||||
nodes.sort_unstable_by_key(|(id, _status)| *id);
|
||||
|
||||
let mut nodes_txt = String::new();
|
||||
debug!("Current set of pingable nodes: --");
|
||||
for (id, status) in nodes {
|
||||
debug!("{} {}", hex::encode(&id), status.addr);
|
||||
writeln!(&mut nodes_txt, "{} {}", hex::encode(&id), status.addr).unwrap();
|
||||
}
|
||||
debug!("END --");
|
||||
self.hash = blake2sum(nodes_txt.as_bytes());
|
||||
}
|
||||
|
||||
fn to_serializable_membership(&self, system: &System) -> Vec<AdvertisedNode> {
|
||||
let mut mem = vec![];
|
||||
for (node, status) in self.nodes.iter() {
|
||||
let state_info = if *node == system.id {
|
||||
system.state_info.clone()
|
||||
} else {
|
||||
status.state_info.clone()
|
||||
};
|
||||
mem.push(AdvertisedNode {
|
||||
id: *node,
|
||||
addr: status.addr,
|
||||
is_up: status.is_up(),
|
||||
last_seen: status.last_seen,
|
||||
state_info,
|
||||
});
|
||||
}
|
||||
mem
|
||||
}
|
||||
}
|
||||
|
||||
fn gen_node_id(metadata_dir: &Path) -> Result<Uuid, Error> {
|
||||
let mut id_file = metadata_dir.to_path_buf();
|
||||
id_file.push("node_id");
|
||||
if id_file.as_path().exists() {
|
||||
let mut f = std::fs::File::open(id_file.as_path())?;
|
||||
let mut d = vec![];
|
||||
f.read_to_end(&mut d)?;
|
||||
if d.len() != 32 {
|
||||
return Err(Error::Message("Corrupt node_id file".to_string()));
|
||||
}
|
||||
|
||||
let mut id = [0u8; 32];
|
||||
id.copy_from_slice(&d[..]);
|
||||
Ok(id.into())
|
||||
} else {
|
||||
let id = gen_uuid();
|
||||
|
||||
let mut f = std::fs::File::create(id_file.as_path())?;
|
||||
f.write_all(id.as_slice())?;
|
||||
Ok(id)
|
||||
}
|
||||
}
|
||||
|
||||
impl System {
|
||||
/// Create this node's membership manager
|
||||
pub fn new(
|
||||
metadata_dir: PathBuf,
|
||||
rpc_http_client: Arc<RpcHttpClient>,
|
||||
background: Arc<BackgroundRunner>,
|
||||
rpc_server: &mut RpcServer,
|
||||
replication_factor: usize,
|
||||
) -> Arc<Self> {
|
||||
let id = gen_node_id(&metadata_dir).expect("Unable to read or generate node ID");
|
||||
info!("Node ID: {}", hex::encode(&id));
|
||||
|
||||
let persist_config = Persister::new(&metadata_dir, "network_config");
|
||||
let persist_status = Persister::new(&metadata_dir, "peer_info");
|
||||
|
||||
let net_config = match persist_config.load() {
|
||||
Ok(x) => x,
|
||||
Err(e) => {
|
||||
match Persister::<garage_rpc_021::ring::NetworkConfig>::new(
|
||||
&metadata_dir,
|
||||
"network_config",
|
||||
)
|
||||
.load()
|
||||
{
|
||||
Ok(old_config) => NetworkConfig::migrate_from_021(old_config),
|
||||
Err(e2) => {
|
||||
info!(
|
||||
"No valid previous network configuration stored ({}, {}), starting fresh.",
|
||||
e, e2
|
||||
);
|
||||
NetworkConfig::new()
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let mut status = Status {
|
||||
nodes: HashMap::new(),
|
||||
hash: Hash::default(),
|
||||
};
|
||||
status.recalculate_hash();
|
||||
let (update_status, status) = watch::channel(Arc::new(status));
|
||||
|
||||
let state_info = StateInfo {
|
||||
hostname: gethostname::gethostname()
|
||||
.into_string()
|
||||
.unwrap_or_else(|_| "<invalid utf-8>".to_string()),
|
||||
replication_factor: Some(replication_factor),
|
||||
};
|
||||
|
||||
let ring = Ring::new(net_config, replication_factor);
|
||||
let (update_ring, ring) = watch::channel(Arc::new(ring));
|
||||
|
||||
let rpc_path = MEMBERSHIP_RPC_PATH.to_string();
|
||||
let rpc_client = RpcClient::new(
|
||||
RpcAddrClient::<Message>::new(rpc_http_client.clone(), rpc_path.clone()),
|
||||
background.clone(),
|
||||
status.clone(),
|
||||
);
|
||||
|
||||
let sys = Arc::new(System {
|
||||
id,
|
||||
persist_config,
|
||||
persist_status,
|
||||
rpc_local_port: rpc_server.bind_addr.port(),
|
||||
state_info,
|
||||
rpc_http_client,
|
||||
rpc_client,
|
||||
replication_factor,
|
||||
status,
|
||||
ring,
|
||||
update_lock: Mutex::new(Updaters {
|
||||
update_status,
|
||||
update_ring,
|
||||
}),
|
||||
background,
|
||||
});
|
||||
sys.clone().register_handler(rpc_server, rpc_path);
|
||||
sys
|
||||
}
|
||||
|
||||
fn register_handler(self: Arc<Self>, rpc_server: &mut RpcServer, path: String) {
|
||||
rpc_server.add_handler::<Message, _, _>(path, move |msg, addr| {
|
||||
let self2 = self.clone();
|
||||
async move {
|
||||
match msg {
|
||||
Message::Ping(ping) => self2.handle_ping(&addr, &ping).await,
|
||||
|
||||
Message::PullStatus => Ok(self2.handle_pull_status()),
|
||||
Message::PullConfig => Ok(self2.handle_pull_config()),
|
||||
Message::AdvertiseNodesUp(adv) => self2.handle_advertise_nodes_up(&adv).await,
|
||||
Message::AdvertiseConfig(adv) => self2.handle_advertise_config(&adv).await,
|
||||
|
||||
_ => Err(Error::BadRpc("Unexpected RPC message".to_string())),
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/// Get an RPC client
|
||||
pub fn rpc_client<M: RpcMessage + 'static>(self: &Arc<Self>, path: &str) -> Arc<RpcClient<M>> {
|
||||
RpcClient::new(
|
||||
RpcAddrClient::new(self.rpc_http_client.clone(), path.to_string()),
|
||||
self.background.clone(),
|
||||
self.status.clone(),
|
||||
)
|
||||
}
|
||||
|
||||
/// Save network configuration to disc
|
||||
async fn save_network_config(self: Arc<Self>) -> Result<(), Error> {
|
||||
let ring = self.ring.borrow().clone();
|
||||
self.persist_config
|
||||
.save_async(&ring.config)
|
||||
.await
|
||||
.expect("Cannot save current cluster configuration");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn make_ping(&self) -> Message {
|
||||
let status = self.status.borrow().clone();
|
||||
let ring = self.ring.borrow().clone();
|
||||
Message::Ping(PingMessage {
|
||||
id: self.id,
|
||||
rpc_port: self.rpc_local_port,
|
||||
status_hash: status.hash,
|
||||
config_version: ring.config.version,
|
||||
state_info: self.state_info.clone(),
|
||||
})
|
||||
}
|
||||
|
||||
async fn broadcast(self: Arc<Self>, msg: Message, timeout: Duration) {
|
||||
let status = self.status.borrow().clone();
|
||||
let to = status
|
||||
.nodes
|
||||
.keys()
|
||||
.filter(|x| **x != self.id)
|
||||
.cloned()
|
||||
.collect::<Vec<_>>();
|
||||
self.rpc_client.call_many(&to[..], msg, timeout).await;
|
||||
}
|
||||
|
||||
/// Perform bootstraping, starting the ping loop
|
||||
pub async fn bootstrap(
|
||||
self: Arc<Self>,
|
||||
peers: Vec<SocketAddr>,
|
||||
consul_host: Option<String>,
|
||||
consul_service_name: Option<String>,
|
||||
) {
|
||||
let self2 = self.clone();
|
||||
self.background
|
||||
.spawn_worker("discovery loop".to_string(), |stop_signal| {
|
||||
self2.discovery_loop(peers, consul_host, consul_service_name, stop_signal)
|
||||
});
|
||||
|
||||
let self2 = self.clone();
|
||||
self.background
|
||||
.spawn_worker("ping loop".to_string(), |stop_signal| {
|
||||
self2.ping_loop(stop_signal)
|
||||
});
|
||||
}
|
||||
|
||||
async fn ping_nodes(self: Arc<Self>, peers: Vec<(SocketAddr, Option<Uuid>)>) {
|
||||
let ping_msg = self.make_ping();
|
||||
let ping_resps = join_all(peers.iter().map(|(addr, id_option)| {
|
||||
let sys = self.clone();
|
||||
let ping_msg_ref = &ping_msg;
|
||||
async move {
|
||||
(
|
||||
id_option,
|
||||
addr,
|
||||
sys.rpc_client
|
||||
.by_addr()
|
||||
.call(&addr, ping_msg_ref, PING_TIMEOUT)
|
||||
.await,
|
||||
)
|
||||
}
|
||||
}))
|
||||
.await;
|
||||
|
||||
let update_locked = self.update_lock.lock().await;
|
||||
let mut status: Status = self.status.borrow().as_ref().clone();
|
||||
let ring = self.ring.borrow().clone();
|
||||
|
||||
let mut has_changes = false;
|
||||
let mut to_advertise = vec![];
|
||||
|
||||
for (id_option, addr, ping_resp) in ping_resps {
|
||||
if let Ok(Ok(Message::Ping(info))) = ping_resp {
|
||||
let is_new = status.handle_ping(addr.ip(), &info);
|
||||
if is_new {
|
||||
has_changes = true;
|
||||
to_advertise.push(AdvertisedNode {
|
||||
id: info.id,
|
||||
addr: *addr,
|
||||
is_up: true,
|
||||
last_seen: now_msec(),
|
||||
state_info: info.state_info.clone(),
|
||||
});
|
||||
}
|
||||
if is_new || status.hash != info.status_hash {
|
||||
self.background
|
||||
.spawn_cancellable(self.clone().pull_status(info.id).map(Ok));
|
||||
}
|
||||
if is_new || ring.config.version < info.config_version {
|
||||
self.background
|
||||
.spawn_cancellable(self.clone().pull_config(info.id).map(Ok));
|
||||
}
|
||||
} else if let Some(id) = id_option {
|
||||
if let Some(st) = status.nodes.get_mut(id) {
|
||||
// we need to increment failure counter as call was done using by_addr so the
|
||||
// counter was not auto-incremented
|
||||
st.num_failures.fetch_add(1, Ordering::SeqCst);
|
||||
if !st.is_up() {
|
||||
warn!("Node {:?} seems to be down.", id);
|
||||
if !ring.config.members.contains_key(id) {
|
||||
info!("Removing node {:?} from status (not in config and not responding to pings anymore)", id);
|
||||
status.nodes.remove(&id);
|
||||
has_changes = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if has_changes {
|
||||
status.recalculate_hash();
|
||||
}
|
||||
self.update_status(&update_locked, status).await;
|
||||
drop(update_locked);
|
||||
|
||||
if !to_advertise.is_empty() {
|
||||
self.broadcast(Message::AdvertiseNodesUp(to_advertise), PING_TIMEOUT)
|
||||
.await;
|
||||
}
|
||||
}
|
||||
|
||||
async fn handle_ping(
|
||||
self: Arc<Self>,
|
||||
from: &SocketAddr,
|
||||
ping: &PingMessage,
|
||||
) -> Result<Message, Error> {
|
||||
let update_locked = self.update_lock.lock().await;
|
||||
let mut status: Status = self.status.borrow().as_ref().clone();
|
||||
|
||||
let is_new = status.handle_ping(from.ip(), ping);
|
||||
if is_new {
|
||||
status.recalculate_hash();
|
||||
}
|
||||
let status_hash = status.hash;
|
||||
let config_version = self.ring.borrow().config.version;
|
||||
|
||||
self.update_status(&update_locked, status).await;
|
||||
drop(update_locked);
|
||||
|
||||
if is_new || status_hash != ping.status_hash {
|
||||
self.background
|
||||
.spawn_cancellable(self.clone().pull_status(ping.id).map(Ok));
|
||||
}
|
||||
if is_new || config_version < ping.config_version {
|
||||
self.background
|
||||
.spawn_cancellable(self.clone().pull_config(ping.id).map(Ok));
|
||||
}
|
||||
|
||||
Ok(self.make_ping())
|
||||
}
|
||||
|
||||
fn handle_pull_status(&self) -> Message {
|
||||
Message::AdvertiseNodesUp(self.status.borrow().to_serializable_membership(self))
|
||||
}
|
||||
|
||||
fn handle_pull_config(&self) -> Message {
|
||||
let ring = self.ring.borrow().clone();
|
||||
Message::AdvertiseConfig(ring.config.clone())
|
||||
}
|
||||
|
||||
async fn handle_advertise_nodes_up(
|
||||
self: Arc<Self>,
|
||||
adv: &[AdvertisedNode],
|
||||
) -> Result<Message, Error> {
|
||||
let mut to_ping = vec![];
|
||||
|
||||
let update_lock = self.update_lock.lock().await;
|
||||
let mut status: Status = self.status.borrow().as_ref().clone();
|
||||
let mut has_changed = false;
|
||||
let mut max_replication_factor = 0;
|
||||
|
||||
for node in adv.iter() {
|
||||
if node.id == self.id {
|
||||
// learn our own ip address
|
||||
let self_addr = SocketAddr::new(node.addr.ip(), self.rpc_local_port);
|
||||
let old_self = status.nodes.insert(
|
||||
node.id,
|
||||
Arc::new(StatusEntry {
|
||||
addr: self_addr,
|
||||
last_seen: now_msec(),
|
||||
num_failures: AtomicUsize::from(0),
|
||||
state_info: self.state_info.clone(),
|
||||
}),
|
||||
);
|
||||
has_changed = match old_self {
|
||||
None => true,
|
||||
Some(x) => x.addr != self_addr,
|
||||
};
|
||||
} else {
|
||||
let ping_them = match status.nodes.get(&node.id) {
|
||||
// Case 1: new node
|
||||
None => true,
|
||||
// Case 2: the node might have changed address
|
||||
Some(our_node) => node.is_up && !our_node.is_up() && our_node.addr != node.addr,
|
||||
};
|
||||
max_replication_factor = std::cmp::max(
|
||||
max_replication_factor,
|
||||
node.state_info.replication_factor.unwrap_or_default(),
|
||||
);
|
||||
if ping_them {
|
||||
to_ping.push((node.addr, Some(node.id)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if self.replication_factor < max_replication_factor {
|
||||
error!("Some node have a higher replication factor ({}) than this one ({}). This is not supported and might lead to bugs",
|
||||
max_replication_factor,
|
||||
self.replication_factor);
|
||||
std::process::exit(1);
|
||||
}
|
||||
if has_changed {
|
||||
status.recalculate_hash();
|
||||
}
|
||||
self.update_status(&update_lock, status).await;
|
||||
drop(update_lock);
|
||||
|
||||
if !to_ping.is_empty() {
|
||||
self.background
|
||||
.spawn_cancellable(self.clone().ping_nodes(to_ping).map(Ok));
|
||||
}
|
||||
|
||||
Ok(Message::Ok)
|
||||
}
|
||||
|
||||
async fn handle_advertise_config(
|
||||
self: Arc<Self>,
|
||||
adv: &NetworkConfig,
|
||||
) -> Result<Message, Error> {
|
||||
let update_lock = self.update_lock.lock().await;
|
||||
let ring: Arc<Ring> = self.ring.borrow().clone();
|
||||
|
||||
if adv.version > ring.config.version {
|
||||
let ring = Ring::new(adv.clone(), self.replication_factor);
|
||||
update_lock.update_ring.send(Arc::new(ring))?;
|
||||
drop(update_lock);
|
||||
|
||||
self.background.spawn_cancellable(
|
||||
self.clone()
|
||||
.broadcast(Message::AdvertiseConfig(adv.clone()), PING_TIMEOUT)
|
||||
.map(Ok),
|
||||
);
|
||||
self.background.spawn(self.clone().save_network_config());
|
||||
}
|
||||
|
||||
Ok(Message::Ok)
|
||||
}
|
||||
|
||||
async fn ping_loop(self: Arc<Self>, mut stop_signal: watch::Receiver<bool>) {
|
||||
while !*stop_signal.borrow() {
|
||||
let restart_at = tokio::time::sleep(PING_INTERVAL);
|
||||
|
||||
let status = self.status.borrow().clone();
|
||||
let ping_addrs = status
|
||||
.nodes
|
||||
.iter()
|
||||
.filter(|(id, _)| **id != self.id)
|
||||
.map(|(id, status)| (status.addr, Some(*id)))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
self.clone().ping_nodes(ping_addrs).await;
|
||||
|
||||
select! {
|
||||
_ = restart_at.fuse() => {},
|
||||
_ = stop_signal.changed().fuse() => {},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn discovery_loop(
|
||||
self: Arc<Self>,
|
||||
bootstrap_peers: Vec<SocketAddr>,
|
||||
consul_host: Option<String>,
|
||||
consul_service_name: Option<String>,
|
||||
mut stop_signal: watch::Receiver<bool>,
|
||||
) {
|
||||
let consul_config = match (consul_host, consul_service_name) {
|
||||
(Some(ch), Some(csn)) => Some((ch, csn)),
|
||||
_ => None,
|
||||
};
|
||||
|
||||
while !*stop_signal.borrow() {
|
||||
let not_configured = self.ring.borrow().config.members.is_empty();
|
||||
let no_peers = self.status.borrow().nodes.len() < 3;
|
||||
let bad_peers = self
|
||||
.status
|
||||
.borrow()
|
||||
.nodes
|
||||
.iter()
|
||||
.filter(|(_, v)| v.is_up())
|
||||
.count() != self.ring.borrow().config.members.len();
|
||||
|
||||
if not_configured || no_peers || bad_peers {
|
||||
info!("Doing a bootstrap/discovery step (not_configured: {}, no_peers: {}, bad_peers: {})", not_configured, no_peers, bad_peers);
|
||||
|
||||
let mut ping_list = bootstrap_peers
|
||||
.iter()
|
||||
.map(|ip| (*ip, None))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
if let Ok(peers) = self.persist_status.load_async().await {
|
||||
ping_list.extend(peers.iter().map(|x| (x.addr, Some(x.id))));
|
||||
}
|
||||
|
||||
if let Some((consul_host, consul_service_name)) = &consul_config {
|
||||
match get_consul_nodes(consul_host, consul_service_name).await {
|
||||
Ok(node_list) => {
|
||||
ping_list.extend(node_list.iter().map(|a| (*a, None)));
|
||||
}
|
||||
Err(e) => {
|
||||
warn!("Could not retrieve node list from Consul: {}", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
self.clone().ping_nodes(ping_list).await;
|
||||
}
|
||||
|
||||
let restart_at = tokio::time::sleep(DISCOVERY_INTERVAL);
|
||||
select! {
|
||||
_ = restart_at.fuse() => {},
|
||||
_ = stop_signal.changed().fuse() => {},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// for some reason fixing this is causing compilation error, see https://github.com/rust-lang/rust-clippy/issues/7052
|
||||
#[allow(clippy::manual_async_fn)]
|
||||
fn pull_status(
|
||||
self: Arc<Self>,
|
||||
peer: Uuid,
|
||||
) -> impl futures::future::Future<Output = ()> + Send + 'static {
|
||||
async move {
|
||||
let resp = self
|
||||
.rpc_client
|
||||
.call(peer, Message::PullStatus, PING_TIMEOUT)
|
||||
.await;
|
||||
if let Ok(Message::AdvertiseNodesUp(nodes)) = resp {
|
||||
let _: Result<_, _> = self.handle_advertise_nodes_up(&nodes).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn pull_config(self: Arc<Self>, peer: Uuid) {
|
||||
let resp = self
|
||||
.rpc_client
|
||||
.call(peer, Message::PullConfig, PING_TIMEOUT)
|
||||
.await;
|
||||
if let Ok(Message::AdvertiseConfig(config)) = resp {
|
||||
let _: Result<_, _> = self.handle_advertise_config(&config).await;
|
||||
}
|
||||
}
|
||||
|
||||
async fn update_status(self: &Arc<Self>, updaters: &Updaters, status: Status) {
|
||||
if status.hash != self.status.borrow().hash {
|
||||
let mut list = status.to_serializable_membership(&self);
|
||||
|
||||
// Combine with old peer list to make sure no peer is lost
|
||||
if let Ok(old_list) = self.persist_status.load_async().await {
|
||||
for pp in old_list {
|
||||
if !list.iter().any(|np| pp.id == np.id) {
|
||||
list.push(pp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !list.is_empty() {
|
||||
info!("Persisting new peer list ({} peers)", list.len());
|
||||
self.persist_status
|
||||
.save_async(&list)
|
||||
.await
|
||||
.expect("Unable to persist peer list");
|
||||
}
|
||||
}
|
||||
|
||||
updaters
|
||||
.update_status
|
||||
.send(Arc::new(status))
|
||||
.expect("Could not update internal membership status");
|
||||
}
|
||||
}
|
|
@ -3,6 +3,8 @@
|
|||
use std::collections::{HashMap, HashSet};
|
||||
use std::convert::TryInto;
|
||||
|
||||
use netapp::NodeID;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use garage_util::data::*;
|
||||
|
@ -98,7 +100,7 @@ pub struct Ring {
|
|||
pub config: NetworkConfig,
|
||||
|
||||
// Internal order of nodes used to make a more compact representation of the ring
|
||||
nodes: Vec<Uuid>,
|
||||
nodes: Vec<NodeID>,
|
||||
|
||||
// The list of entries in the ring
|
||||
ring: Vec<RingEntry>,
|
||||
|
@ -260,6 +262,11 @@ impl Ring {
|
|||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let nodes = nodes
|
||||
.iter()
|
||||
.map(|id| NodeID::from_slice(id.as_slice()).unwrap())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
Self {
|
||||
replication_factor,
|
||||
config,
|
||||
|
@ -291,7 +298,7 @@ impl Ring {
|
|||
}
|
||||
|
||||
/// Walk the ring to find the n servers in which data should be replicated
|
||||
pub fn get_nodes(&self, position: &Hash, n: usize) -> Vec<Uuid> {
|
||||
pub fn get_nodes(&self, position: &Hash, n: usize) -> Vec<NodeID> {
|
||||
if self.ring.len() != 1 << PARTITION_BITS {
|
||||
warn!("Ring not yet ready, read/writes will be lost!");
|
||||
return vec![];
|
||||
|
|
|
@ -1,369 +0,0 @@
|
|||
//! Contain structs related to making RPCs
|
||||
use std::borrow::Borrow;
|
||||
use std::marker::PhantomData;
|
||||
use std::net::SocketAddr;
|
||||
use std::pin::Pin;
|
||||
use std::sync::atomic::Ordering;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use arc_swap::ArcSwapOption;
|
||||
use futures::future::Future;
|
||||
use futures::stream::futures_unordered::FuturesUnordered;
|
||||
use futures::stream::StreamExt;
|
||||
use futures_util::future::FutureExt;
|
||||
use hyper::client::{Client, HttpConnector};
|
||||
use hyper::{Body, Method, Request};
|
||||
use tokio::sync::{watch, Semaphore};
|
||||
|
||||
use garage_util::background::BackgroundRunner;
|
||||
use garage_util::config::TlsConfig;
|
||||
use garage_util::data::*;
|
||||
use garage_util::error::{Error, RpcError};
|
||||
|
||||
use crate::membership::Status;
|
||||
use crate::rpc_server::RpcMessage;
|
||||
use crate::tls_util;
|
||||
|
||||
const DEFAULT_TIMEOUT: Duration = Duration::from_secs(10);
|
||||
|
||||
/// Strategy to apply when making RPC
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct RequestStrategy {
|
||||
/// Max time to wait for reponse
|
||||
pub rs_timeout: Duration,
|
||||
/// Min number of response to consider the request successful
|
||||
pub rs_quorum: usize,
|
||||
/// Should requests be dropped after enough response are received
|
||||
pub rs_interrupt_after_quorum: bool,
|
||||
}
|
||||
|
||||
impl RequestStrategy {
|
||||
/// Create a RequestStrategy with default timeout and not interrupting when quorum reached
|
||||
pub fn with_quorum(quorum: usize) -> Self {
|
||||
RequestStrategy {
|
||||
rs_timeout: DEFAULT_TIMEOUT,
|
||||
rs_quorum: quorum,
|
||||
rs_interrupt_after_quorum: false,
|
||||
}
|
||||
}
|
||||
/// Set timeout of the strategy
|
||||
pub fn with_timeout(mut self, timeout: Duration) -> Self {
|
||||
self.rs_timeout = timeout;
|
||||
self
|
||||
}
|
||||
/// Set if requests can be dropped after quorum has been reached
|
||||
/// In general true for read requests, and false for write
|
||||
pub fn interrupt_after_quorum(mut self, interrupt: bool) -> Self {
|
||||
self.rs_interrupt_after_quorum = interrupt;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
/// Shortcut for a boxed async function taking a message, and resolving to another message or an
|
||||
/// error
|
||||
pub type LocalHandlerFn<M> =
|
||||
Box<dyn Fn(Arc<M>) -> Pin<Box<dyn Future<Output = Result<M, Error>> + Send>> + Send + Sync>;
|
||||
|
||||
/// Client used to send RPC
|
||||
pub struct RpcClient<M: RpcMessage> {
|
||||
status: watch::Receiver<Arc<Status>>,
|
||||
background: Arc<BackgroundRunner>,
|
||||
|
||||
local_handler: ArcSwapOption<(Uuid, LocalHandlerFn<M>)>,
|
||||
|
||||
rpc_addr_client: RpcAddrClient<M>,
|
||||
}
|
||||
|
||||
impl<M: RpcMessage + 'static> RpcClient<M> {
|
||||
/// Create a new RpcClient from an address, a job runner, and the status of all RPC servers
|
||||
pub fn new(
|
||||
rac: RpcAddrClient<M>,
|
||||
background: Arc<BackgroundRunner>,
|
||||
status: watch::Receiver<Arc<Status>>,
|
||||
) -> Arc<Self> {
|
||||
Arc::new(Self {
|
||||
rpc_addr_client: rac,
|
||||
background,
|
||||
status,
|
||||
local_handler: ArcSwapOption::new(None),
|
||||
})
|
||||
}
|
||||
|
||||
/// Set the local handler, to process RPC to this node without network usage
|
||||
pub fn set_local_handler<F, Fut>(&self, my_id: Uuid, handler: F)
|
||||
where
|
||||
F: Fn(Arc<M>) -> Fut + Send + Sync + 'static,
|
||||
Fut: Future<Output = Result<M, Error>> + Send + 'static,
|
||||
{
|
||||
let handler_arc = Arc::new(handler);
|
||||
let handler: LocalHandlerFn<M> = Box::new(move |msg| {
|
||||
let handler_arc2 = handler_arc.clone();
|
||||
Box::pin(async move { handler_arc2(msg).await })
|
||||
});
|
||||
self.local_handler.swap(Some(Arc::new((my_id, handler))));
|
||||
}
|
||||
|
||||
/// Get a RPC client to make calls using node's SocketAddr instead of its ID
|
||||
pub fn by_addr(&self) -> &RpcAddrClient<M> {
|
||||
&self.rpc_addr_client
|
||||
}
|
||||
|
||||
/// Make a RPC call
|
||||
pub async fn call(&self, to: Uuid, msg: M, timeout: Duration) -> Result<M, Error> {
|
||||
self.call_arc(to, Arc::new(msg), timeout).await
|
||||
}
|
||||
|
||||
/// Make a RPC call from a message stored in an Arc
|
||||
pub async fn call_arc(&self, to: Uuid, msg: Arc<M>, timeout: Duration) -> Result<M, Error> {
|
||||
if let Some(lh) = self.local_handler.load_full() {
|
||||
let (my_id, local_handler) = lh.as_ref();
|
||||
if to.borrow() == my_id {
|
||||
return local_handler(msg).await;
|
||||
}
|
||||
}
|
||||
let status = self.status.borrow().clone();
|
||||
let node_status = match status.nodes.get(&to) {
|
||||
Some(node_status) => {
|
||||
if node_status.is_up() {
|
||||
node_status
|
||||
} else {
|
||||
return Err(Error::from(RpcError::NodeDown(to)));
|
||||
}
|
||||
}
|
||||
None => {
|
||||
return Err(Error::Message(format!(
|
||||
"Peer ID not found: {:?}",
|
||||
to.borrow()
|
||||
)))
|
||||
}
|
||||
};
|
||||
match self
|
||||
.rpc_addr_client
|
||||
.call(&node_status.addr, msg, timeout)
|
||||
.await
|
||||
{
|
||||
Err(rpc_error) => {
|
||||
node_status.num_failures.fetch_add(1, Ordering::SeqCst);
|
||||
Err(Error::from(rpc_error))
|
||||
}
|
||||
Ok(x) => x,
|
||||
}
|
||||
}
|
||||
|
||||
/// Make a RPC call to multiple servers, returning a Vec containing each result
|
||||
pub async fn call_many(&self, to: &[Uuid], msg: M, timeout: Duration) -> Vec<Result<M, Error>> {
|
||||
let msg = Arc::new(msg);
|
||||
let mut resp_stream = to
|
||||
.iter()
|
||||
.map(|to| self.call_arc(*to, msg.clone(), timeout))
|
||||
.collect::<FuturesUnordered<_>>();
|
||||
|
||||
let mut results = vec![];
|
||||
while let Some(resp) = resp_stream.next().await {
|
||||
results.push(resp);
|
||||
}
|
||||
results
|
||||
}
|
||||
|
||||
/// Make a RPC call to multiple servers, returning either a Vec of responses, or an error if
|
||||
/// strategy could not be respected due to too many errors
|
||||
pub async fn try_call_many(
|
||||
self: &Arc<Self>,
|
||||
to: &[Uuid],
|
||||
msg: M,
|
||||
strategy: RequestStrategy,
|
||||
) -> Result<Vec<M>, Error> {
|
||||
let timeout = strategy.rs_timeout;
|
||||
|
||||
let msg = Arc::new(msg);
|
||||
let mut resp_stream = to
|
||||
.to_vec()
|
||||
.into_iter()
|
||||
.map(|to| {
|
||||
let self2 = self.clone();
|
||||
let msg = msg.clone();
|
||||
async move { self2.call_arc(to, msg, timeout).await }
|
||||
})
|
||||
.collect::<FuturesUnordered<_>>();
|
||||
|
||||
let mut results = vec![];
|
||||
let mut errors = vec![];
|
||||
|
||||
while let Some(resp) = resp_stream.next().await {
|
||||
match resp {
|
||||
Ok(msg) => {
|
||||
results.push(msg);
|
||||
if results.len() >= strategy.rs_quorum {
|
||||
break;
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
errors.push(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if results.len() >= strategy.rs_quorum {
|
||||
// Continue requests in background.
|
||||
// Continue the remaining requests immediately using tokio::spawn
|
||||
// but enqueue a task in the background runner
|
||||
// to ensure that the process won't exit until the requests are done
|
||||
// (if we had just enqueued the resp_stream.collect directly in the background runner,
|
||||
// the requests might have been put on hold in the background runner's queue,
|
||||
// in which case they might timeout or otherwise fail)
|
||||
if !strategy.rs_interrupt_after_quorum {
|
||||
let wait_finished_fut = tokio::spawn(async move {
|
||||
resp_stream.collect::<Vec<_>>().await;
|
||||
});
|
||||
self.background.spawn(wait_finished_fut.map(|_| Ok(())));
|
||||
}
|
||||
|
||||
Ok(results)
|
||||
} else {
|
||||
let errors = errors.iter().map(|e| format!("{}", e)).collect::<Vec<_>>();
|
||||
Err(Error::from(RpcError::TooManyErrors(errors)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Thin wrapper arround an `RpcHttpClient` specifying the path of the request
|
||||
pub struct RpcAddrClient<M: RpcMessage> {
|
||||
phantom: PhantomData<M>,
|
||||
|
||||
http_client: Arc<RpcHttpClient>,
|
||||
path: String,
|
||||
}
|
||||
|
||||
impl<M: RpcMessage> RpcAddrClient<M> {
|
||||
/// Create an RpcAddrClient from an HTTP client and the endpoint to reach for RPCs
|
||||
pub fn new(http_client: Arc<RpcHttpClient>, path: String) -> Self {
|
||||
Self {
|
||||
phantom: PhantomData::default(),
|
||||
http_client,
|
||||
path,
|
||||
}
|
||||
}
|
||||
|
||||
/// Make a RPC
|
||||
pub async fn call<MB>(
|
||||
&self,
|
||||
to_addr: &SocketAddr,
|
||||
msg: MB,
|
||||
timeout: Duration,
|
||||
) -> Result<Result<M, Error>, RpcError>
|
||||
where
|
||||
MB: Borrow<M>,
|
||||
{
|
||||
self.http_client
|
||||
.call(&self.path, to_addr, msg, timeout)
|
||||
.await
|
||||
}
|
||||
}
|
||||
|
||||
/// HTTP client used to make RPCs
|
||||
pub struct RpcHttpClient {
|
||||
request_limiter: Semaphore,
|
||||
method: ClientMethod,
|
||||
}
|
||||
|
||||
enum ClientMethod {
|
||||
Http(Client<HttpConnector, hyper::Body>),
|
||||
Https(Client<tls_util::HttpsConnectorFixedDnsname<HttpConnector>, hyper::Body>),
|
||||
}
|
||||
|
||||
impl RpcHttpClient {
|
||||
/// Create a new RpcHttpClient
|
||||
pub fn new(
|
||||
max_concurrent_requests: usize,
|
||||
tls_config: &Option<TlsConfig>,
|
||||
) -> Result<Self, Error> {
|
||||
let method = if let Some(cf) = tls_config {
|
||||
let ca_certs = tls_util::load_certs(&cf.ca_cert).map_err(|e| {
|
||||
Error::Message(format!("Failed to open CA certificate file: {:?}", e))
|
||||
})?;
|
||||
let node_certs = tls_util::load_certs(&cf.node_cert)
|
||||
.map_err(|e| Error::Message(format!("Failed to open certificate file: {:?}", e)))?;
|
||||
let node_key = tls_util::load_private_key(&cf.node_key)
|
||||
.map_err(|e| Error::Message(format!("Failed to open private key file: {:?}", e)))?;
|
||||
|
||||
let mut config = rustls::ClientConfig::new();
|
||||
|
||||
for crt in ca_certs.iter() {
|
||||
config.root_store.add(crt)?;
|
||||
}
|
||||
|
||||
config.set_single_client_cert([&node_certs[..], &ca_certs[..]].concat(), node_key)?;
|
||||
|
||||
let connector =
|
||||
tls_util::HttpsConnectorFixedDnsname::<HttpConnector>::new(config, "garage");
|
||||
|
||||
ClientMethod::Https(Client::builder().build(connector))
|
||||
} else {
|
||||
ClientMethod::Http(Client::new())
|
||||
};
|
||||
Ok(RpcHttpClient {
|
||||
method,
|
||||
request_limiter: Semaphore::new(max_concurrent_requests),
|
||||
})
|
||||
}
|
||||
|
||||
/// Make a RPC
|
||||
async fn call<M, MB>(
|
||||
&self,
|
||||
path: &str,
|
||||
to_addr: &SocketAddr,
|
||||
msg: MB,
|
||||
timeout: Duration,
|
||||
) -> Result<Result<M, Error>, RpcError>
|
||||
where
|
||||
MB: Borrow<M>,
|
||||
M: RpcMessage,
|
||||
{
|
||||
let uri = match self.method {
|
||||
ClientMethod::Http(_) => format!("http://{}/{}", to_addr, path),
|
||||
ClientMethod::Https(_) => format!("https://{}/{}", to_addr, path),
|
||||
};
|
||||
|
||||
let req = Request::builder()
|
||||
.method(Method::POST)
|
||||
.uri(uri)
|
||||
.body(Body::from(rmp_to_vec_all_named(msg.borrow())?))?;
|
||||
|
||||
let resp_fut = match &self.method {
|
||||
ClientMethod::Http(client) => client.request(req).fuse(),
|
||||
ClientMethod::Https(client) => client.request(req).fuse(),
|
||||
};
|
||||
|
||||
trace!("({}) Acquiring request_limiter slot...", path);
|
||||
let slot = self.request_limiter.acquire().await;
|
||||
trace!("({}) Got slot, doing request to {}...", path, to_addr);
|
||||
let resp = tokio::time::timeout(timeout, resp_fut)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
debug!(
|
||||
"RPC timeout to {}: {}",
|
||||
to_addr,
|
||||
debug_serialize(msg.borrow())
|
||||
);
|
||||
e
|
||||
})?
|
||||
.map_err(|e| {
|
||||
warn!(
|
||||
"RPC HTTP client error when connecting to {}: {}",
|
||||
to_addr, e
|
||||
);
|
||||
e
|
||||
})?;
|
||||
|
||||
let status = resp.status();
|
||||
trace!("({}) Request returned, got status {}", path, status);
|
||||
let body = hyper::body::to_bytes(resp.into_body()).await?;
|
||||
drop(slot);
|
||||
|
||||
match rmp_serde::decode::from_read::<_, Result<M, String>>(&body[..])? {
|
||||
Err(e) => Ok(Err(Error::RemoteError(e, status))),
|
||||
Ok(x) => Ok(Ok(x)),
|
||||
}
|
||||
}
|
||||
}
|
206
src/rpc/rpc_helper.rs
Normal file
206
src/rpc/rpc_helper.rs
Normal file
|
@ -0,0 +1,206 @@
|
|||
//! Contain structs related to making RPCs
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use futures::future::join_all;
|
||||
use futures::stream::futures_unordered::FuturesUnordered;
|
||||
use futures::stream::StreamExt;
|
||||
use futures_util::future::FutureExt;
|
||||
use tokio::select;
|
||||
|
||||
pub use netapp::endpoint::{Endpoint, EndpointHandler, Message};
|
||||
use netapp::peering::fullmesh::FullMeshPeeringStrategy;
|
||||
pub use netapp::proto::*;
|
||||
pub use netapp::{NetApp, NodeID};
|
||||
|
||||
use garage_util::background::BackgroundRunner;
|
||||
use garage_util::error::{Error, RpcError};
|
||||
|
||||
const DEFAULT_TIMEOUT: Duration = Duration::from_secs(10);
|
||||
|
||||
/// Strategy to apply when making RPC
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct RequestStrategy {
|
||||
/// Max time to wait for reponse
|
||||
pub rs_timeout: Duration,
|
||||
/// Min number of response to consider the request successful
|
||||
pub rs_quorum: Option<usize>,
|
||||
/// Should requests be dropped after enough response are received
|
||||
pub rs_interrupt_after_quorum: bool,
|
||||
/// Request priority
|
||||
pub rs_priority: RequestPriority,
|
||||
}
|
||||
|
||||
impl RequestStrategy {
|
||||
/// Create a RequestStrategy with default timeout and not interrupting when quorum reached
|
||||
pub fn with_priority(prio: RequestPriority) -> Self {
|
||||
RequestStrategy {
|
||||
rs_timeout: DEFAULT_TIMEOUT,
|
||||
rs_quorum: None,
|
||||
rs_interrupt_after_quorum: false,
|
||||
rs_priority: prio,
|
||||
}
|
||||
}
|
||||
/// Set quorum to be reached for request
|
||||
pub fn with_quorum(mut self, quorum: usize) -> Self {
|
||||
self.rs_quorum = Some(quorum);
|
||||
self
|
||||
}
|
||||
/// Set timeout of the strategy
|
||||
pub fn with_timeout(mut self, timeout: Duration) -> Self {
|
||||
self.rs_timeout = timeout;
|
||||
self
|
||||
}
|
||||
/// Set if requests can be dropped after quorum has been reached
|
||||
/// In general true for read requests, and false for write
|
||||
pub fn interrupt_after_quorum(mut self, interrupt: bool) -> Self {
|
||||
self.rs_interrupt_after_quorum = interrupt;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct RpcHelper {
|
||||
pub(crate) fullmesh: Arc<FullMeshPeeringStrategy>,
|
||||
pub(crate) background: Arc<BackgroundRunner>,
|
||||
}
|
||||
|
||||
impl RpcHelper {
|
||||
pub async fn call<M, H>(
|
||||
&self,
|
||||
endpoint: &Endpoint<M, H>,
|
||||
to: NodeID,
|
||||
msg: M,
|
||||
strat: RequestStrategy,
|
||||
) -> Result<M::Response, Error>
|
||||
where
|
||||
M: Message,
|
||||
H: EndpointHandler<M>,
|
||||
{
|
||||
self.call_arc(endpoint, to, Arc::new(msg), strat).await
|
||||
}
|
||||
|
||||
pub async fn call_arc<M, H>(
|
||||
&self,
|
||||
endpoint: &Endpoint<M, H>,
|
||||
to: NodeID,
|
||||
msg: Arc<M>,
|
||||
strat: RequestStrategy,
|
||||
) -> Result<M::Response, Error>
|
||||
where
|
||||
M: Message,
|
||||
H: EndpointHandler<M>,
|
||||
{
|
||||
select! {
|
||||
res = endpoint.call(&to, &msg, strat.rs_priority) => Ok(res?),
|
||||
_ = tokio::time::sleep(strat.rs_timeout) => Err(Error::Rpc(RpcError::Timeout)),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn call_many<M, H>(
|
||||
&self,
|
||||
endpoint: &Endpoint<M, H>,
|
||||
to: &[NodeID],
|
||||
msg: M,
|
||||
strat: RequestStrategy,
|
||||
) -> Vec<(NodeID, Result<M::Response, Error>)>
|
||||
where
|
||||
M: Message,
|
||||
H: EndpointHandler<M>,
|
||||
{
|
||||
let msg = Arc::new(msg);
|
||||
let resps = join_all(
|
||||
to.iter()
|
||||
.map(|to| self.call_arc(endpoint, *to, msg.clone(), strat)),
|
||||
)
|
||||
.await;
|
||||
to.iter()
|
||||
.cloned()
|
||||
.zip(resps.into_iter())
|
||||
.collect::<Vec<_>>()
|
||||
}
|
||||
|
||||
pub async fn broadcast<M, H>(
|
||||
&self,
|
||||
endpoint: &Endpoint<M, H>,
|
||||
msg: M,
|
||||
strat: RequestStrategy,
|
||||
) -> Vec<(NodeID, Result<M::Response, Error>)>
|
||||
where
|
||||
M: Message,
|
||||
H: EndpointHandler<M>,
|
||||
{
|
||||
let to = self
|
||||
.fullmesh
|
||||
.get_peer_list()
|
||||
.iter()
|
||||
.map(|p| p.id)
|
||||
.collect::<Vec<_>>();
|
||||
self.call_many(endpoint, &to[..], msg, strat).await
|
||||
}
|
||||
|
||||
/// Make a RPC call to multiple servers, returning either a Vec of responses, or an error if
|
||||
/// strategy could not be respected due to too many errors
|
||||
pub async fn try_call_many<M, H>(
|
||||
&self,
|
||||
endpoint: &Arc<Endpoint<M, H>>,
|
||||
to: &[NodeID],
|
||||
msg: M,
|
||||
strategy: RequestStrategy,
|
||||
) -> Result<Vec<M::Response>, Error>
|
||||
where
|
||||
M: Message + 'static,
|
||||
H: EndpointHandler<M> + 'static,
|
||||
{
|
||||
let msg = Arc::new(msg);
|
||||
let mut resp_stream = to
|
||||
.to_vec()
|
||||
.into_iter()
|
||||
.map(|to| {
|
||||
let self2 = self.clone();
|
||||
let msg = msg.clone();
|
||||
let endpoint2 = endpoint.clone();
|
||||
async move { self2.call_arc(&endpoint2, to, msg, strategy).await }
|
||||
})
|
||||
.collect::<FuturesUnordered<_>>();
|
||||
|
||||
let mut results = vec![];
|
||||
let mut errors = vec![];
|
||||
let quorum = strategy.rs_quorum.unwrap_or(to.len());
|
||||
|
||||
while let Some(resp) = resp_stream.next().await {
|
||||
match resp {
|
||||
Ok(msg) => {
|
||||
results.push(msg);
|
||||
if results.len() >= quorum {
|
||||
break;
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
errors.push(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if results.len() >= quorum {
|
||||
// Continue requests in background.
|
||||
// Continue the remaining requests immediately using tokio::spawn
|
||||
// but enqueue a task in the background runner
|
||||
// to ensure that the process won't exit until the requests are done
|
||||
// (if we had just enqueued the resp_stream.collect directly in the background runner,
|
||||
// the requests might have been put on hold in the background runner's queue,
|
||||
// in which case they might timeout or otherwise fail)
|
||||
if !strategy.rs_interrupt_after_quorum {
|
||||
let wait_finished_fut = tokio::spawn(async move {
|
||||
resp_stream.collect::<Vec<_>>().await;
|
||||
});
|
||||
self.background.spawn(wait_finished_fut.map(|_| Ok(())));
|
||||
}
|
||||
|
||||
Ok(results)
|
||||
} else {
|
||||
let errors = errors.iter().map(|e| format!("{}", e)).collect::<Vec<_>>();
|
||||
Err(Error::from(RpcError::TooManyErrors(errors)))
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,247 +0,0 @@
|
|||
//! Contains structs related to receiving RPCs
|
||||
use std::collections::HashMap;
|
||||
use std::net::SocketAddr;
|
||||
use std::pin::Pin;
|
||||
use std::sync::Arc;
|
||||
use std::time::Instant;
|
||||
|
||||
use futures::future::Future;
|
||||
use futures_util::future::*;
|
||||
use futures_util::stream::*;
|
||||
use hyper::server::conn::AddrStream;
|
||||
use hyper::service::{make_service_fn, service_fn};
|
||||
use hyper::{Body, Method, Request, Response, Server, StatusCode};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tokio::net::{TcpListener, TcpStream};
|
||||
use tokio_rustls::server::TlsStream;
|
||||
use tokio_rustls::TlsAcceptor;
|
||||
use tokio_stream::wrappers::TcpListenerStream;
|
||||
|
||||
use garage_util::config::TlsConfig;
|
||||
use garage_util::data::*;
|
||||
use garage_util::error::Error;
|
||||
|
||||
use crate::tls_util;
|
||||
|
||||
/// Trait for messages that can be sent as RPC
|
||||
pub trait RpcMessage: Serialize + for<'de> Deserialize<'de> + Send + Sync {}
|
||||
|
||||
type ResponseFuture = Pin<Box<dyn Future<Output = Result<Response<Body>, Error>> + Send>>;
|
||||
type Handler = Box<dyn Fn(Request<Body>, SocketAddr) -> ResponseFuture + Send + Sync>;
|
||||
|
||||
/// Structure handling RPCs
|
||||
pub struct RpcServer {
|
||||
/// The address the RpcServer will bind
|
||||
pub bind_addr: SocketAddr,
|
||||
/// The tls configuration used for RPC
|
||||
pub tls_config: Option<TlsConfig>,
|
||||
|
||||
handlers: HashMap<String, Handler>,
|
||||
}
|
||||
|
||||
async fn handle_func<M, F, Fut>(
|
||||
handler: Arc<F>,
|
||||
req: Request<Body>,
|
||||
sockaddr: SocketAddr,
|
||||
name: Arc<String>,
|
||||
) -> Result<Response<Body>, Error>
|
||||
where
|
||||
M: RpcMessage + 'static,
|
||||
F: Fn(M, SocketAddr) -> Fut + Send + Sync + 'static,
|
||||
Fut: Future<Output = Result<M, Error>> + Send + 'static,
|
||||
{
|
||||
let begin_time = Instant::now();
|
||||
let whole_body = hyper::body::to_bytes(req.into_body()).await?;
|
||||
let msg = rmp_serde::decode::from_read::<_, M>(&whole_body[..])?;
|
||||
|
||||
trace!(
|
||||
"Request message: {}",
|
||||
serde_json::to_string(&msg)
|
||||
.unwrap_or_else(|_| "<json error>".into())
|
||||
.chars()
|
||||
.take(100)
|
||||
.collect::<String>()
|
||||
);
|
||||
|
||||
match handler(msg, sockaddr).await {
|
||||
Ok(resp) => {
|
||||
let resp_bytes = rmp_to_vec_all_named::<Result<M, String>>(&Ok(resp))?;
|
||||
let rpc_duration = (Instant::now() - begin_time).as_millis();
|
||||
if rpc_duration > 100 {
|
||||
debug!("RPC {} ok, took long: {} ms", name, rpc_duration,);
|
||||
}
|
||||
Ok(Response::new(Body::from(resp_bytes)))
|
||||
}
|
||||
Err(e) => {
|
||||
let err_str = format!("{}", e);
|
||||
let rep_bytes = rmp_to_vec_all_named::<Result<M, String>>(&Err(err_str))?;
|
||||
let mut err_response = Response::new(Body::from(rep_bytes));
|
||||
*err_response.status_mut() = match e {
|
||||
Error::BadRpc(_) => StatusCode::BAD_REQUEST,
|
||||
_ => StatusCode::INTERNAL_SERVER_ERROR,
|
||||
};
|
||||
warn!(
|
||||
"RPC error ({}): {} ({} ms)",
|
||||
name,
|
||||
e,
|
||||
(Instant::now() - begin_time).as_millis(),
|
||||
);
|
||||
Ok(err_response)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl RpcServer {
|
||||
/// Create a new RpcServer
|
||||
pub fn new(bind_addr: SocketAddr, tls_config: Option<TlsConfig>) -> Self {
|
||||
Self {
|
||||
bind_addr,
|
||||
tls_config,
|
||||
handlers: HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Add handler handling request made to `name`
|
||||
pub fn add_handler<M, F, Fut>(&mut self, name: String, handler: F)
|
||||
where
|
||||
M: RpcMessage + 'static,
|
||||
F: Fn(M, SocketAddr) -> Fut + Send + Sync + 'static,
|
||||
Fut: Future<Output = Result<M, Error>> + Send + 'static,
|
||||
{
|
||||
let name2 = Arc::new(name.clone());
|
||||
let handler_arc = Arc::new(handler);
|
||||
let handler = Box::new(move |req: Request<Body>, sockaddr: SocketAddr| {
|
||||
let handler2 = handler_arc.clone();
|
||||
let b: ResponseFuture = Box::pin(handle_func(handler2, req, sockaddr, name2.clone()));
|
||||
b
|
||||
});
|
||||
self.handlers.insert(name, handler);
|
||||
}
|
||||
|
||||
async fn handler(
|
||||
self: Arc<Self>,
|
||||
req: Request<Body>,
|
||||
addr: SocketAddr,
|
||||
) -> Result<Response<Body>, Error> {
|
||||
if req.method() != Method::POST {
|
||||
let mut bad_request = Response::default();
|
||||
*bad_request.status_mut() = StatusCode::BAD_REQUEST;
|
||||
return Ok(bad_request);
|
||||
}
|
||||
|
||||
let path = &req.uri().path()[1..].to_string();
|
||||
|
||||
let handler = match self.handlers.get(path) {
|
||||
Some(h) => h,
|
||||
None => {
|
||||
let mut not_found = Response::default();
|
||||
*not_found.status_mut() = StatusCode::NOT_FOUND;
|
||||
return Ok(not_found);
|
||||
}
|
||||
};
|
||||
|
||||
trace!("({}) Handling request", path);
|
||||
|
||||
let resp_waiter = tokio::spawn(handler(req, addr));
|
||||
match resp_waiter.await {
|
||||
Err(err) => {
|
||||
warn!("Handler await error: {}", err);
|
||||
let mut ise = Response::default();
|
||||
*ise.status_mut() = StatusCode::INTERNAL_SERVER_ERROR;
|
||||
Ok(ise)
|
||||
}
|
||||
Ok(Err(err)) => {
|
||||
trace!("({}) Request handler failed: {}", path, err);
|
||||
let mut bad_request = Response::new(Body::from(format!("{}", err)));
|
||||
*bad_request.status_mut() = StatusCode::BAD_REQUEST;
|
||||
Ok(bad_request)
|
||||
}
|
||||
Ok(Ok(resp)) => {
|
||||
trace!("({}) Request handler succeeded", path);
|
||||
Ok(resp)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Run the RpcServer
|
||||
pub async fn run(
|
||||
self: Arc<Self>,
|
||||
shutdown_signal: impl Future<Output = ()>,
|
||||
) -> Result<(), Error> {
|
||||
if let Some(tls_config) = self.tls_config.as_ref() {
|
||||
let ca_certs = tls_util::load_certs(&tls_config.ca_cert)?;
|
||||
let node_certs = tls_util::load_certs(&tls_config.node_cert)?;
|
||||
let node_key = tls_util::load_private_key(&tls_config.node_key)?;
|
||||
|
||||
let mut ca_store = rustls::RootCertStore::empty();
|
||||
for crt in ca_certs.iter() {
|
||||
ca_store.add(crt)?;
|
||||
}
|
||||
|
||||
let mut config =
|
||||
rustls::ServerConfig::new(rustls::AllowAnyAuthenticatedClient::new(ca_store));
|
||||
config.set_single_cert([&node_certs[..], &ca_certs[..]].concat(), node_key)?;
|
||||
let tls_acceptor = Arc::new(TlsAcceptor::from(Arc::new(config)));
|
||||
|
||||
let listener = TcpListener::bind(&self.bind_addr).await?;
|
||||
let incoming = TcpListenerStream::new(listener).filter_map(|socket| async {
|
||||
match socket {
|
||||
Ok(stream) => match tls_acceptor.clone().accept(stream).await {
|
||||
Ok(x) => Some(Ok::<_, hyper::Error>(x)),
|
||||
Err(_e) => None,
|
||||
},
|
||||
Err(_) => None,
|
||||
}
|
||||
});
|
||||
let incoming = hyper::server::accept::from_stream(incoming);
|
||||
|
||||
let self_arc = self.clone();
|
||||
let service = make_service_fn(|conn: &TlsStream<TcpStream>| {
|
||||
let client_addr = conn
|
||||
.get_ref()
|
||||
.0
|
||||
.peer_addr()
|
||||
.unwrap_or_else(|_| ([0, 0, 0, 0], 0).into());
|
||||
let self_arc = self_arc.clone();
|
||||
async move {
|
||||
Ok::<_, Error>(service_fn(move |req: Request<Body>| {
|
||||
self_arc.clone().handler(req, client_addr).map_err(|e| {
|
||||
warn!("RPC handler error: {}", e);
|
||||
e
|
||||
})
|
||||
}))
|
||||
}
|
||||
});
|
||||
|
||||
let server = Server::builder(incoming).serve(service);
|
||||
|
||||
let graceful = server.with_graceful_shutdown(shutdown_signal);
|
||||
info!("RPC server listening on http://{}", self.bind_addr);
|
||||
|
||||
graceful.await?;
|
||||
} else {
|
||||
let self_arc = self.clone();
|
||||
let service = make_service_fn(move |conn: &AddrStream| {
|
||||
let client_addr = conn.remote_addr();
|
||||
let self_arc = self_arc.clone();
|
||||
async move {
|
||||
Ok::<_, Error>(service_fn(move |req: Request<Body>| {
|
||||
self_arc.clone().handler(req, client_addr).map_err(|e| {
|
||||
warn!("RPC handler error: {}", e);
|
||||
e
|
||||
})
|
||||
}))
|
||||
}
|
||||
});
|
||||
|
||||
let server = Server::bind(&self.bind_addr).serve(service);
|
||||
|
||||
let graceful = server.with_graceful_shutdown(shutdown_signal);
|
||||
info!("RPC server listening on http://{}", self.bind_addr);
|
||||
|
||||
graceful.await?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
363
src/rpc/system.rs
Normal file
363
src/rpc/system.rs
Normal file
|
@ -0,0 +1,363 @@
|
|||
//! Module containing structs related to membership management
|
||||
use std::io::{Read, Write};
|
||||
use std::net::SocketAddr;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use arc_swap::ArcSwap;
|
||||
use async_trait::async_trait;
|
||||
use futures::{join, select};
|
||||
use futures_util::future::*;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use sodiumoxide::crypto::sign::ed25519;
|
||||
use tokio::sync::watch;
|
||||
use tokio::sync::Mutex;
|
||||
|
||||
use netapp::endpoint::{Endpoint, EndpointHandler, Message};
|
||||
use netapp::peering::fullmesh::FullMeshPeeringStrategy;
|
||||
use netapp::proto::*;
|
||||
use netapp::{NetApp, NetworkKey, NodeID, NodeKey};
|
||||
|
||||
use garage_util::background::BackgroundRunner;
|
||||
use garage_util::error::Error;
|
||||
use garage_util::persister::Persister;
|
||||
//use garage_util::time::*;
|
||||
|
||||
//use crate::consul::get_consul_nodes;
|
||||
use crate::ring::*;
|
||||
use crate::rpc_helper::{RequestStrategy, RpcHelper};
|
||||
|
||||
const DISCOVERY_INTERVAL: Duration = Duration::from_secs(60);
|
||||
const PING_TIMEOUT: Duration = Duration::from_secs(2);
|
||||
|
||||
/// RPC endpoint used for calls related to membership
|
||||
pub const SYSTEM_RPC_PATH: &str = "garage_rpc/membership.rs/SystemRpc";
|
||||
|
||||
/// RPC messages related to membership
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub enum SystemRpc {
|
||||
/// Response to successfull advertisements
|
||||
Ok,
|
||||
/// Error response
|
||||
Error(String),
|
||||
/// Ask other node its config. Answered with AdvertiseConfig
|
||||
PullConfig,
|
||||
/// Advertise Garage status. Answered with another AdvertiseStatus.
|
||||
/// Exchanged with every node on a regular basis.
|
||||
AdvertiseStatus(StateInfo),
|
||||
/// Advertisement of nodes config. Sent spontanously or in response to PullConfig
|
||||
AdvertiseConfig(NetworkConfig),
|
||||
/// Get known nodes states
|
||||
GetKnownNodes,
|
||||
/// Return known nodes
|
||||
ReturnKnownNodes(Vec<(NodeID, SocketAddr, bool)>),
|
||||
}
|
||||
|
||||
impl Message for SystemRpc {
|
||||
type Response = SystemRpc;
|
||||
}
|
||||
|
||||
/// This node's membership manager
|
||||
pub struct System {
|
||||
/// The id of this node
|
||||
pub id: NodeID,
|
||||
|
||||
persist_config: Persister<NetworkConfig>,
|
||||
|
||||
state_info: ArcSwap<StateInfo>,
|
||||
|
||||
pub netapp: Arc<NetApp>,
|
||||
fullmesh: Arc<FullMeshPeeringStrategy>,
|
||||
pub rpc: RpcHelper,
|
||||
|
||||
system_endpoint: Arc<Endpoint<SystemRpc, System>>,
|
||||
|
||||
rpc_listen_addr: SocketAddr,
|
||||
bootstrap_peers: Vec<(NodeID, SocketAddr)>,
|
||||
consul_host: Option<String>,
|
||||
consul_service_name: Option<String>,
|
||||
replication_factor: usize,
|
||||
|
||||
/// The ring
|
||||
pub ring: watch::Receiver<Arc<Ring>>,
|
||||
update_ring: Mutex<watch::Sender<Arc<Ring>>>,
|
||||
|
||||
/// The job runner of this node
|
||||
pub background: Arc<BackgroundRunner>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct StateInfo {
|
||||
/// Hostname of the node
|
||||
pub hostname: String,
|
||||
/// Replication factor configured on the node
|
||||
pub replication_factor: usize,
|
||||
/// Configuration version
|
||||
pub config_version: u64,
|
||||
}
|
||||
|
||||
fn gen_node_key(metadata_dir: &Path) -> Result<NodeKey, Error> {
|
||||
let mut id_file = metadata_dir.to_path_buf();
|
||||
id_file.push("node_id");
|
||||
if id_file.as_path().exists() {
|
||||
let mut f = std::fs::File::open(id_file.as_path())?;
|
||||
let mut d = vec![];
|
||||
f.read_to_end(&mut d)?;
|
||||
if d.len() != 64 {
|
||||
return Err(Error::Message("Corrupt node_id file".to_string()));
|
||||
}
|
||||
|
||||
let mut key = [0u8; 64];
|
||||
key.copy_from_slice(&d[..]);
|
||||
Ok(NodeKey::from_slice(&key[..]).unwrap())
|
||||
} else {
|
||||
let (key, _) = ed25519::gen_keypair();
|
||||
|
||||
let mut f = std::fs::File::create(id_file.as_path())?;
|
||||
f.write_all(&key[..])?;
|
||||
Ok(NodeKey::from_slice(&key[..]).unwrap())
|
||||
}
|
||||
}
|
||||
|
||||
impl System {
|
||||
/// Create this node's membership manager
|
||||
pub fn new(
|
||||
network_key: NetworkKey,
|
||||
metadata_dir: PathBuf,
|
||||
background: Arc<BackgroundRunner>,
|
||||
replication_factor: usize,
|
||||
rpc_listen_addr: SocketAddr,
|
||||
bootstrap_peers: Vec<(NodeID, SocketAddr)>,
|
||||
quentin
commented
2 points:
1. Create a file `/etc/garage.toml` with the content given in the Quickstart
2. Never start the daemon (check that no meta or data folder have been created)
3. Run `garage node-id` (this is similar to the steps advertised in "Cookbook > Deploying Garage"
4. Get an error:
```
strace -e open ./result/bin/garage node-id
open("/proc/self/cgroup", O_RDONLY|O_CLOEXEC) = 3
open("/proc/self/mountinfo", O_RDONLY|O_CLOEXEC) = 3
open("/sys/fs/cgroup/cpu,cpuacct/user.slice/user-1000.slice/user@1000.service/cpu.cfs_quota_us", O_RDONLY|O_CLOEXEC) = 3
open("/etc/garage.toml", O_RDONLY|O_CLOEXEC) = 9
open("/tmp/meta/node_key", O_WRONLY|O_CREAT|O_TRUNC|O_CLOEXEC, 0666) = -1 ENOENT (Aucun fichier ou dossier de ce type)
ERROR garage > Unable to read or generate node key: IO error: No such file or directory (os error 2)
+++ exited with 1 +++
```
2 points:
1. We need to recursively create folders before trying to write the key
2. We might want to improve our error reporting by specifying the failed path.
|
||||
consul_host: Option<String>,
|
||||
consul_service_name: Option<String>,
|
||||
quentin
commented
There are many chance that the ```
$ RUST_LOG=garage=debug,netapp=debug strace -e open ./result/bin/garage node-id
open("/proc/self/cgroup", O_RDONLY|O_CLOEXEC) = 3
open("/proc/self/mountinfo", O_RDONLY|O_CLOEXEC) = 3
open("/sys/fs/cgroup/cpu,cpuacct/user.slice/user-1000.slice/user@1000.service/cpu.cfs_quota_us", O_RDONLY|O_CLOEXEC) = 3
open("/etc/garage.toml", O_RDONLY|O_CLOEXEC) = 9
open("/tmp/meta/node_key", O_WRONLY|O_CREAT|O_TRUNC|O_CLOEXEC, 0666) = -1 ENOENT (Aucun fichier ou dossier de ce type)
ERROR garage > Unable to read or generate node key: IO error: No such file or directory (os error 2)
+++ exited with 1 +++
```
There are many chance that the `garage node-id` command will fail as the key will be stored in the `meta` folder that will very likely not be created yet and it will throw the previous cryptic error I a diagnosed through strace.
|
||||
) -> Arc<Self> {
|
||||
let node_key = gen_node_key(&metadata_dir).expect("Unable to read or generate node ID");
|
||||
info!("Node public key: {}", hex::encode(&node_key.public_key()));
|
||||
|
||||
let persist_config = Persister::new(&metadata_dir, "network_config");
|
||||
|
||||
let net_config = match persist_config.load() {
|
||||
Ok(x) => x,
|
||||
Err(e) => {
|
||||
match Persister::<garage_rpc_021::ring::NetworkConfig>::new(
|
||||
&metadata_dir,
|
||||
"network_config",
|
||||
)
|
||||
.load()
|
||||
{
|
||||
Ok(old_config) => NetworkConfig::migrate_from_021(old_config),
|
||||
Err(e2) => {
|
||||
info!(
|
||||
"No valid previous network configuration stored ({}, {}), starting fresh.",
|
||||
e, e2
|
||||
);
|
||||
NetworkConfig::new()
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let state_info = StateInfo {
|
||||
hostname: gethostname::gethostname()
|
||||
.into_string()
|
||||
.unwrap_or_else(|_| "<invalid utf-8>".to_string()),
|
||||
replication_factor: replication_factor,
|
||||
config_version: net_config.version,
|
||||
};
|
||||
|
||||
let ring = Ring::new(net_config, replication_factor);
|
||||
let (update_ring, ring) = watch::channel(Arc::new(ring));
|
||||
|
||||
let netapp = NetApp::new(network_key, node_key);
|
||||
let fullmesh = FullMeshPeeringStrategy::new(netapp.clone(), bootstrap_peers.clone());
|
||||
|
||||
let system_endpoint = netapp.endpoint(SYSTEM_RPC_PATH.into());
|
||||
|
||||
let sys = Arc::new(System {
|
||||
id: netapp.id.clone(),
|
||||
persist_config,
|
||||
state_info: ArcSwap::new(Arc::new(state_info)),
|
||||
netapp: netapp.clone(),
|
||||
fullmesh: fullmesh.clone(),
|
||||
rpc: RpcHelper {
|
||||
fullmesh: fullmesh.clone(),
|
||||
background: background.clone(),
|
||||
},
|
||||
system_endpoint,
|
||||
replication_factor,
|
||||
rpc_listen_addr,
|
||||
bootstrap_peers,
|
||||
consul_host,
|
||||
consul_service_name,
|
||||
ring,
|
||||
update_ring: Mutex::new(update_ring),
|
||||
background: background.clone(),
|
||||
});
|
||||
sys.system_endpoint.set_handler(sys.clone());
|
||||
sys
|
||||
}
|
||||
|
||||
/// Perform bootstraping, starting the ping loop
|
||||
pub async fn run(self: Arc<Self>, must_exit: watch::Receiver<bool>) {
|
||||
join!(
|
||||
self.netapp
|
||||
.clone()
|
||||
.listen(self.rpc_listen_addr, None, must_exit.clone()),
|
||||
self.fullmesh.clone().run(must_exit.clone()),
|
||||
self.discovery_loop(must_exit.clone()),
|
||||
);
|
||||
}
|
||||
|
||||
// ---- INTERNALS ----
|
||||
|
||||
/// Save network configuration to disc
|
||||
async fn save_network_config(self: Arc<Self>) -> Result<(), Error> {
|
||||
let ring: Arc<Ring> = self.ring.borrow().clone();
|
||||
self.persist_config
|
||||
.save_async(&ring.config)
|
||||
.await
|
||||
.expect("Cannot save current cluster configuration");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn update_state_info(&self) {
|
||||
let mut new_si: StateInfo = self.state_info.load().as_ref().clone();
|
||||
|
||||
let ring = self.ring.borrow();
|
||||
new_si.config_version = ring.config.version;
|
||||
self.state_info.swap(Arc::new(new_si));
|
||||
}
|
||||
|
||||
fn handle_pull_config(&self) -> SystemRpc {
|
||||
let ring = self.ring.borrow().clone();
|
||||
SystemRpc::AdvertiseConfig(ring.config.clone())
|
||||
}
|
||||
|
||||
async fn handle_advertise_config(
|
||||
self: Arc<Self>,
|
||||
adv: &NetworkConfig,
|
||||
) -> Result<SystemRpc, Error> {
|
||||
let update_ring = self.update_ring.lock().await;
|
||||
let ring: Arc<Ring> = self.ring.borrow().clone();
|
||||
|
||||
if adv.version > ring.config.version {
|
||||
let ring = Ring::new(adv.clone(), self.replication_factor);
|
||||
update_ring.send(Arc::new(ring))?;
|
||||
drop(update_ring);
|
||||
|
||||
let self2 = self.clone();
|
||||
let adv2 = adv.clone();
|
||||
self.background.spawn_cancellable(async move {
|
||||
self2
|
||||
.rpc
|
||||
.broadcast(
|
||||
&self2.system_endpoint,
|
||||
SystemRpc::AdvertiseConfig(adv2),
|
||||
RequestStrategy::with_priority(PRIO_NORMAL),
|
||||
)
|
||||
.await;
|
||||
Ok(())
|
||||
});
|
||||
self.background.spawn(self.clone().save_network_config());
|
||||
}
|
||||
|
||||
Ok(SystemRpc::Ok)
|
||||
}
|
||||
|
||||
async fn discovery_loop(&self, mut stop_signal: watch::Receiver<bool>) {
|
||||
/* TODO
|
||||
let consul_config = match (&self.consul_host, &self.consul_service_name) {
|
||||
(Some(ch), Some(csn)) => Some((ch.clone(), csn.clone())),
|
||||
_ => None,
|
||||
};
|
||||
*/
|
||||
|
||||
while !*stop_signal.borrow() {
|
||||
let not_configured = self.ring.borrow().config.members.is_empty();
|
||||
let no_peers = self.fullmesh.get_peer_list().len() < self.replication_factor;
|
||||
let bad_peers = self
|
||||
.fullmesh
|
||||
.get_peer_list()
|
||||
.iter()
|
||||
.filter(|p| p.is_up())
|
||||
.count() != self.ring.borrow().config.members.len();
|
||||
|
||||
if not_configured || no_peers || bad_peers {
|
||||
info!("Doing a bootstrap/discovery step (not_configured: {}, no_peers: {}, bad_peers: {})", not_configured, no_peers, bad_peers);
|
||||
|
||||
let ping_list = self.bootstrap_peers.clone();
|
||||
|
||||
/*
|
||||
*TODO bring this back: persisted list of peers
|
||||
if let Ok(peers) = self.persist_status.load_async().await {
|
||||
ping_list.extend(peers.iter().map(|x| (x.addr, Some(x.id))));
|
||||
}
|
||||
*/
|
||||
|
||||
/*
|
||||
* TODO bring this back: get peers from consul
|
||||
if let Some((consul_host, consul_service_name)) = &consul_config {
|
||||
match get_consul_nodes(consul_host, consul_service_name).await {
|
||||
Ok(node_list) => {
|
||||
ping_list.extend(node_list.iter().map(|a| (*a, None)));
|
||||
}
|
||||
Err(e) => {
|
||||
warn!("Could not retrieve node list from Consul: {}", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
for (node_id, node_addr) in ping_list {
|
||||
tokio::spawn(self.netapp.clone().try_connect(node_addr, node_id));
|
||||
}
|
||||
}
|
||||
|
||||
let restart_at = tokio::time::sleep(DISCOVERY_INTERVAL);
|
||||
select! {
|
||||
_ = restart_at.fuse() => {},
|
||||
_ = stop_signal.changed().fuse() => {},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn pull_config(self: Arc<Self>, peer: NodeID) {
|
||||
let resp = self
|
||||
.rpc
|
||||
.call(
|
||||
&self.system_endpoint,
|
||||
peer,
|
||||
SystemRpc::PullConfig,
|
||||
RequestStrategy::with_priority(PRIO_HIGH).with_timeout(PING_TIMEOUT),
|
||||
)
|
||||
.await;
|
||||
if let Ok(SystemRpc::AdvertiseConfig(config)) = resp {
|
||||
let _: Result<_, _> = self.handle_advertise_config(&config).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl EndpointHandler<SystemRpc> for System {
|
||||
async fn handle(self: &Arc<Self>, msg: &SystemRpc, _from: NodeID) -> SystemRpc {
|
||||
let resp = match msg {
|
||||
SystemRpc::PullConfig => Ok(self.handle_pull_config()),
|
||||
SystemRpc::AdvertiseConfig(adv) => self.clone().handle_advertise_config(&adv).await,
|
||||
SystemRpc::GetKnownNodes => {
|
||||
let known_nodes = self
|
||||
.fullmesh
|
||||
.get_peer_list()
|
||||
.iter()
|
||||
.map(|n| (n.id, n.addr, n.is_up()))
|
||||
.collect::<Vec<_>>();
|
||||
Ok(SystemRpc::ReturnKnownNodes(known_nodes))
|
||||
}
|
||||
_ => Err(Error::BadRpc("Unexpected RPC message".to_string())),
|
||||
};
|
||||
match resp {
|
||||
Ok(r) => r,
|
||||
Err(e) => SystemRpc::Error(format!("{}", e)),
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,140 +0,0 @@
|
|||
use core::future::Future;
|
||||
use core::task::{Context, Poll};
|
||||
use std::pin::Pin;
|
||||
use std::sync::Arc;
|
||||
use std::{fs, io};
|
||||
|
||||
use futures_util::future::*;
|
||||
use hyper::client::connect::Connection;
|
||||
use hyper::client::HttpConnector;
|
||||
use hyper::service::Service;
|
||||
use hyper::Uri;
|
||||
use hyper_rustls::MaybeHttpsStream;
|
||||
use rustls::internal::pemfile;
|
||||
use tokio::io::{AsyncRead, AsyncWrite};
|
||||
use tokio_rustls::TlsConnector;
|
||||
use webpki::DNSNameRef;
|
||||
|
||||
use garage_util::error::Error;
|
||||
|
||||
pub fn load_certs(filename: &str) -> Result<Vec<rustls::Certificate>, Error> {
|
||||
let certfile = fs::File::open(&filename)?;
|
||||
let mut reader = io::BufReader::new(certfile);
|
||||
|
||||
let certs = pemfile::certs(&mut reader).map_err(|_| {
|
||||
Error::Message(format!(
|
||||
"Could not deecode certificates from file: {}",
|
||||
filename
|
||||
))
|
||||
})?;
|
||||
|
||||
if certs.is_empty() {
|
||||
return Err(Error::Message(format!(
|
||||
"Invalid certificate file: {}",
|
||||
filename
|
||||
)));
|
||||
}
|
||||
Ok(certs)
|
||||
}
|
||||
|
||||
pub fn load_private_key(filename: &str) -> Result<rustls::PrivateKey, Error> {
|
||||
let keydata = fs::read_to_string(filename)?;
|
||||
|
||||
let mut buf1 = keydata.as_bytes();
|
||||
let rsa_keys = pemfile::rsa_private_keys(&mut buf1).unwrap_or_default();
|
||||
|
||||
let mut buf2 = keydata.as_bytes();
|
||||
let pkcs8_keys = pemfile::pkcs8_private_keys(&mut buf2).unwrap_or_default();
|
||||
|
||||
let mut keys = rsa_keys;
|
||||
keys.extend(pkcs8_keys.into_iter());
|
||||
|
||||
if keys.len() != 1 {
|
||||
return Err(Error::Message(format!(
|
||||
"Invalid private key file: {} ({} private keys)",
|
||||
filename,
|
||||
keys.len()
|
||||
)));
|
||||
}
|
||||
Ok(keys[0].clone())
|
||||
}
|
||||
|
||||
// ---- AWFUL COPYPASTA FROM HYPER-RUSTLS connector.rs
|
||||
// ---- ALWAYS USE `garage` AS HOSTNAME FOR TLS VERIFICATION
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct HttpsConnectorFixedDnsname<T> {
|
||||
http: T,
|
||||
tls_config: Arc<rustls::ClientConfig>,
|
||||
fixed_dnsname: &'static str,
|
||||
}
|
||||
|
||||
type BoxError = Box<dyn std::error::Error + Send + Sync>;
|
||||
|
||||
impl HttpsConnectorFixedDnsname<HttpConnector> {
|
||||
pub fn new(mut tls_config: rustls::ClientConfig, fixed_dnsname: &'static str) -> Self {
|
||||
let mut http = HttpConnector::new();
|
||||
http.enforce_http(false);
|
||||
tls_config.alpn_protocols = vec![b"h2".to_vec(), b"http/1.1".to_vec()];
|
||||
Self {
|
||||
http,
|
||||
tls_config: Arc::new(tls_config),
|
||||
fixed_dnsname,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Service<Uri> for HttpsConnectorFixedDnsname<T>
|
||||
where
|
||||
T: Service<Uri>,
|
||||
T::Response: Connection + AsyncRead + AsyncWrite + Send + Unpin + 'static,
|
||||
T::Future: Send + 'static,
|
||||
T::Error: Into<BoxError>,
|
||||
{
|
||||
type Response = MaybeHttpsStream<T::Response>;
|
||||
type Error = BoxError;
|
||||
|
||||
#[allow(clippy::type_complexity)]
|
||||
type Future =
|
||||
Pin<Box<dyn Future<Output = Result<MaybeHttpsStream<T::Response>, BoxError>> + Send>>;
|
||||
|
||||
fn poll_ready(&mut self, cx: &mut Context<'_>) -> Poll<Result<(), Self::Error>> {
|
||||
match self.http.poll_ready(cx) {
|
||||
Poll::Ready(Ok(())) => Poll::Ready(Ok(())),
|
||||
Poll::Ready(Err(e)) => Poll::Ready(Err(e.into())),
|
||||
Poll::Pending => Poll::Pending,
|
||||
}
|
||||
}
|
||||
|
||||
fn call(&mut self, dst: Uri) -> Self::Future {
|
||||
let is_https = dst.scheme_str() == Some("https");
|
||||
|
||||
if !is_https {
|
||||
let connecting_future = self.http.call(dst);
|
||||
|
||||
let f = async move {
|
||||
let tcp = connecting_future.await.map_err(Into::into)?;
|
||||
|
||||
Ok(MaybeHttpsStream::Http(tcp))
|
||||
};
|
||||
f.boxed()
|
||||
} else {
|
||||
let cfg = self.tls_config.clone();
|
||||
let connecting_future = self.http.call(dst);
|
||||
|
||||
let dnsname =
|
||||
DNSNameRef::try_from_ascii_str(self.fixed_dnsname).expect("Invalid fixed dnsname");
|
||||
|
||||
let f = async move {
|
||||
let tcp = connecting_future.await.map_err(Into::into)?;
|
||||
let connector = TlsConnector::from(cfg);
|
||||
let tls = connector
|
||||
.connect(dnsname, tcp)
|
||||
.await
|
||||
.map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
|
||||
Ok(MaybeHttpsStream::Https(tls))
|
||||
};
|
||||
f.boxed()
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,6 +1,6 @@
|
|||
[package]
|
||||
name = "garage_table"
|
||||
version = "0.3.0"
|
||||
version = "0.4.0"
|
||||
authors = ["Alex Auvolat <alex@adnab.me>"]
|
||||
edition = "2018"
|
||||
license = "AGPL-3.0"
|
||||
|
@ -13,9 +13,10 @@ path = "lib.rs"
|
|||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
garage_rpc = { version = "0.3.0", path = "../rpc" }
|
||||
garage_util = { version = "0.3.0", path = "../util" }
|
||||
garage_rpc = { version = "0.4.0", path = "../rpc" }
|
||||
garage_util = { version = "0.4.0", path = "../util" }
|
||||
|
||||
async-trait = "0.1.7"
|
||||
bytes = "1.0"
|
||||
hexdump = "0.1"
|
||||
log = "0.4"
|
||||
|
@ -30,4 +31,3 @@ serde_bytes = "0.11"
|
|||
futures = "0.3"
|
||||
futures-util = "0.3"
|
||||
tokio = { version = "1.0", default-features = false, features = ["rt", "rt-multi-thread", "io-util", "net", "time", "macros", "sync", "signal", "fs"] }
|
||||
|
||||
|
|
|
@ -9,7 +9,7 @@ use tokio::sync::Notify;
|
|||
use garage_util::data::*;
|
||||
use garage_util::error::*;
|
||||
|
||||
use garage_rpc::membership::System;
|
||||
use garage_rpc::system::System;
|
||||
|
||||
use crate::crdt::Crdt;
|
||||
use crate::replication::*;
|
||||
|
|
|
@ -2,6 +2,7 @@ use std::collections::HashMap;
|
|||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_bytes::ByteBuf;
|
||||
|
||||
|
@ -13,9 +14,8 @@ use tokio::sync::watch;
|
|||
use garage_util::data::*;
|
||||
use garage_util::error::Error;
|
||||
|
||||
use garage_rpc::membership::System;
|
||||
use garage_rpc::rpc_client::*;
|
||||
use garage_rpc::rpc_server::*;
|
||||
use garage_rpc::system::System;
|
||||
use garage_rpc::*;
|
||||
|
||||
use crate::data::*;
|
||||
use crate::replication::*;
|
||||
|
@ -24,11 +24,11 @@ use crate::schema::*;
|
|||
const TABLE_GC_BATCH_SIZE: usize = 1024;
|
||||
const TABLE_GC_RPC_TIMEOUT: Duration = Duration::from_secs(30);
|
||||
|
||||
pub struct TableGc<F: TableSchema, R: TableReplication> {
|
||||
pub struct TableGc<F: TableSchema + 'static, R: TableReplication + 'static> {
|
||||
system: Arc<System>,
|
||||
data: Arc<TableData<F, R>>,
|
||||
|
||||
rpc_client: Arc<RpcClient<GcRpc>>,
|
||||
endpoint: Arc<Endpoint<GcRpc, Self>>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
|
@ -36,30 +36,30 @@ enum GcRpc {
|
|||
Update(Vec<ByteBuf>),
|
||||
DeleteIfEqualHash(Vec<(ByteBuf, Hash)>),
|
||||
Ok,
|
||||
Error(String),
|
||||
}
|
||||
|
||||
impl RpcMessage for GcRpc {}
|
||||
impl Message for GcRpc {
|
||||
type Response = GcRpc;
|
||||
}
|
||||
|
||||
impl<F, R> TableGc<F, R>
|
||||
where
|
||||
F: TableSchema + 'static,
|
||||
R: TableReplication + 'static,
|
||||
{
|
||||
pub(crate) fn launch(
|
||||
system: Arc<System>,
|
||||
data: Arc<TableData<F, R>>,
|
||||
rpc_server: &mut RpcServer,
|
||||
) -> Arc<Self> {
|
||||
let rpc_path = format!("table_{}/gc", data.name);
|
||||
let rpc_client = system.rpc_client::<GcRpc>(&rpc_path);
|
||||
pub(crate) fn launch(system: Arc<System>, data: Arc<TableData<F, R>>) -> Arc<Self> {
|
||||
let endpoint = system
|
||||
.netapp
|
||||
.endpoint(format!("garage_table/gc.rs/Rpc:{}", data.name));
|
||||
|
||||
let gc = Arc::new(Self {
|
||||
system: system.clone(),
|
||||
data: data.clone(),
|
||||
rpc_client,
|
||||
endpoint,
|
||||
});
|
||||
|
||||
gc.register_handler(rpc_server, rpc_path);
|
||||
gc.endpoint.set_handler(gc.clone());
|
||||
|
||||
let gc1 = gc.clone();
|
||||
system.background.spawn_worker(
|
||||
|
@ -168,7 +168,7 @@ where
|
|||
|
||||
async fn try_send_and_delete(
|
||||
&self,
|
||||
nodes: Vec<Uuid>,
|
||||
nodes: Vec<NodeID>,
|
||||
items: Vec<(ByteBuf, Hash, ByteBuf)>,
|
||||
) -> Result<(), Error> {
|
||||
let n_items = items.len();
|
||||
|
@ -180,11 +180,15 @@ where
|
|||
deletes.push((k, vhash));
|
||||
}
|
||||
|
||||
self.rpc_client
|
||||
self.system
|
||||
.rpc
|
||||
.try_call_many(
|
||||
&self.endpoint,
|
||||
&nodes[..],
|
||||
GcRpc::Update(updates),
|
||||
RequestStrategy::with_quorum(nodes.len()).with_timeout(TABLE_GC_RPC_TIMEOUT),
|
||||
RequestStrategy::with_priority(PRIO_BACKGROUND)
|
||||
.with_quorum(nodes.len())
|
||||
.with_timeout(TABLE_GC_RPC_TIMEOUT),
|
||||
)
|
||||
.await?;
|
||||
|
||||
|
@ -193,11 +197,15 @@ where
|
|||
self.data.name, n_items
|
||||
);
|
||||
|
||||
self.rpc_client
|
||||
self.system
|
||||
.rpc
|
||||
.try_call_many(
|
||||
&self.endpoint,
|
||||
&nodes[..],
|
||||
GcRpc::DeleteIfEqualHash(deletes.clone()),
|
||||
RequestStrategy::with_quorum(nodes.len()).with_timeout(TABLE_GC_RPC_TIMEOUT),
|
||||
RequestStrategy::with_priority(PRIO_BACKGROUND)
|
||||
.with_quorum(nodes.len())
|
||||
.with_timeout(TABLE_GC_RPC_TIMEOUT),
|
||||
)
|
||||
.await?;
|
||||
|
||||
|
@ -217,24 +225,7 @@ where
|
|||
Ok(())
|
||||
}
|
||||
|
||||
// ---- RPC HANDLER ----
|
||||
|
||||
fn register_handler(self: &Arc<Self>, rpc_server: &mut RpcServer, path: String) {
|
||||
let self2 = self.clone();
|
||||
rpc_server.add_handler::<GcRpc, _, _>(path, move |msg, _addr| {
|
||||
let self2 = self2.clone();
|
||||
async move { self2.handle_rpc(&msg).await }
|
||||
});
|
||||
|
||||
let self2 = self.clone();
|
||||
self.rpc_client
|
||||
.set_local_handler(self.system.id, move |msg| {
|
||||
let self2 = self2.clone();
|
||||
async move { self2.handle_rpc(&msg).await }
|
||||
});
|
||||
}
|
||||
|
||||
async fn handle_rpc(self: &Arc<Self>, message: &GcRpc) -> Result<GcRpc, Error> {
|
||||
async fn handle_rpc(&self, message: &GcRpc) -> Result<GcRpc, Error> {
|
||||
match message {
|
||||
GcRpc::Update(items) => {
|
||||
self.data.update_many(items)?;
|
||||
|
@ -251,3 +242,16 @@ where
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<F, R> EndpointHandler<GcRpc> for TableGc<F, R>
|
||||
where
|
||||
F: TableSchema + 'static,
|
||||
R: TableReplication + 'static,
|
||||
{
|
||||
async fn handle(self: &Arc<Self>, message: &GcRpc, _from: NodeID) -> GcRpc {
|
||||
self.handle_rpc(message)
|
||||
.await
|
||||
.unwrap_or_else(|e| GcRpc::Error(format!("{}", e)))
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
use std::sync::Arc;
|
||||
|
||||
use garage_rpc::membership::System;
|
||||
use garage_rpc::ring::*;
|
||||
use garage_rpc::system::System;
|
||||
use garage_rpc::NodeID;
|
||||
use garage_util::data::*;
|
||||
|
||||
use crate::replication::*;
|
||||
|
@ -19,16 +20,20 @@ pub struct TableFullReplication {
|
|||
}
|
||||
|
||||
impl TableReplication for TableFullReplication {
|
||||
fn read_nodes(&self, _hash: &Hash) -> Vec<Uuid> {
|
||||
fn read_nodes(&self, _hash: &Hash) -> Vec<NodeID> {
|
||||
vec![self.system.id]
|
||||
}
|
||||
fn read_quorum(&self) -> usize {
|
||||
1
|
||||
}
|
||||
|
||||
fn write_nodes(&self, _hash: &Hash) -> Vec<Uuid> {
|
||||
fn write_nodes(&self, _hash: &Hash) -> Vec<NodeID> {
|
||||
let ring = self.system.ring.borrow();
|
||||
ring.config.members.keys().cloned().collect::<Vec<_>>()
|
||||
ring.config
|
||||
.members
|
||||
.keys()
|
||||
.map(|id| NodeID::from_slice(id.as_slice()).unwrap())
|
||||
.collect::<Vec<_>>()
|
||||
}
|
||||
fn write_quorum(&self) -> usize {
|
||||
let nmembers = self.system.ring.borrow().config.members.len();
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
use garage_rpc::ring::*;
|
||||
|
||||
use garage_rpc::NodeID;
|
||||
use garage_util::data::*;
|
||||
|
||||
/// Trait to describe how a table shall be replicated
|
||||
|
@ -8,12 +8,12 @@ pub trait TableReplication: Send + Sync {
|
|||
// To understand various replication methods
|
||||
|
||||
/// Which nodes to send read requests to
|
||||
fn read_nodes(&self, hash: &Hash) -> Vec<Uuid>;
|
||||
fn read_nodes(&self, hash: &Hash) -> Vec<NodeID>;
|
||||
/// Responses needed to consider a read succesfull
|
||||
fn read_quorum(&self) -> usize;
|
||||
|
||||
/// Which nodes to send writes to
|
||||
fn write_nodes(&self, hash: &Hash) -> Vec<Uuid>;
|
||||
fn write_nodes(&self, hash: &Hash) -> Vec<NodeID>;
|
||||
/// Responses needed to consider a write succesfull
|
||||
fn write_quorum(&self) -> usize;
|
||||
fn max_write_errors(&self) -> usize;
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
use std::sync::Arc;
|
||||
|
||||
use garage_rpc::membership::System;
|
||||
use garage_rpc::ring::*;
|
||||
use garage_rpc::system::System;
|
||||
use garage_rpc::NodeID;
|
||||
use garage_util::data::*;
|
||||
|
||||
use crate::replication::*;
|
||||
|
@ -25,7 +26,7 @@ pub struct TableShardedReplication {
|
|||
}
|
||||
|
||||
impl TableReplication for TableShardedReplication {
|
||||
fn read_nodes(&self, hash: &Hash) -> Vec<Uuid> {
|
||||
fn read_nodes(&self, hash: &Hash) -> Vec<NodeID> {
|
||||
let ring = self.system.ring.borrow();
|
||||
ring.get_nodes(&hash, self.replication_factor)
|
||||
}
|
||||
|
@ -33,7 +34,7 @@ impl TableReplication for TableShardedReplication {
|
|||
self.read_quorum
|
||||
}
|
||||
|
||||
fn write_nodes(&self, hash: &Hash) -> Vec<Uuid> {
|
||||
fn write_nodes(&self, hash: &Hash) -> Vec<NodeID> {
|
||||
let ring = self.system.ring.borrow();
|
||||
ring.get_nodes(&hash, self.replication_factor)
|
||||
}
|
||||
|
|
|
@ -2,6 +2,7 @@ use std::collections::VecDeque;
|
|||
use std::sync::{Arc, Mutex};
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use futures::select;
|
||||
use futures_util::future::*;
|
||||
use futures_util::stream::*;
|
||||
|
@ -13,10 +14,9 @@ use tokio::sync::{mpsc, watch};
|
|||
use garage_util::data::*;
|
||||
use garage_util::error::Error;
|
||||
|
||||
use garage_rpc::membership::System;
|
||||
use garage_rpc::ring::*;
|
||||
use garage_rpc::rpc_client::*;
|
||||
use garage_rpc::rpc_server::*;
|
||||
use garage_rpc::system::System;
|
||||
use garage_rpc::*;
|
||||
|
||||
use crate::data::*;
|
||||
use crate::merkle::*;
|
||||
|
@ -28,13 +28,13 @@ const TABLE_SYNC_RPC_TIMEOUT: Duration = Duration::from_secs(30);
|
|||
// Do anti-entropy every 10 minutes
|
||||
const ANTI_ENTROPY_INTERVAL: Duration = Duration::from_secs(10 * 60);
|
||||
|
||||
pub struct TableSyncer<F: TableSchema, R: TableReplication> {
|
||||
pub struct TableSyncer<F: TableSchema + 'static, R: TableReplication + 'static> {
|
||||
system: Arc<System>,
|
||||
data: Arc<TableData<F, R>>,
|
||||
merkle: Arc<MerkleUpdater<F, R>>,
|
||||
|
||||
todo: Mutex<SyncTodo>,
|
||||
rpc_client: Arc<RpcClient<SyncRpc>>,
|
||||
endpoint: Arc<Endpoint<SyncRpc, Self>>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
|
@ -45,9 +45,12 @@ pub(crate) enum SyncRpc {
|
|||
Node(MerkleNodeKey, MerkleNode),
|
||||
Items(Vec<Arc<ByteBuf>>),
|
||||
Ok,
|
||||
Error(String),
|
||||
}
|
||||
|
||||
impl RpcMessage for SyncRpc {}
|
||||
impl Message for SyncRpc {
|
||||
type Response = SyncRpc;
|
||||
}
|
||||
|
||||
struct SyncTodo {
|
||||
todo: Vec<TodoPartition>,
|
||||
|
@ -72,10 +75,10 @@ where
|
|||
system: Arc<System>,
|
||||
data: Arc<TableData<F, R>>,
|
||||
merkle: Arc<MerkleUpdater<F, R>>,
|
||||
rpc_server: &mut RpcServer,
|
||||
) -> Arc<Self> {
|
||||
let rpc_path = format!("table_{}/sync", data.name);
|
||||
let rpc_client = system.rpc_client::<SyncRpc>(&rpc_path);
|
||||
let endpoint = system
|
||||
.netapp
|
||||
.endpoint(format!("garage_table/sync.rs/Rpc:{}", data.name));
|
||||
|
||||
let todo = SyncTodo { todo: vec![] };
|
||||
|
||||
|
@ -84,10 +87,10 @@ where
|
|||
data: data.clone(),
|
||||
merkle,
|
||||
todo: Mutex::new(todo),
|
||||
rpc_client,
|
||||
endpoint,
|
||||
});
|
||||
|
||||
syncer.register_handler(rpc_server, rpc_path);
|
||||
syncer.endpoint.set_handler(syncer.clone());
|
||||
|
||||
let (busy_tx, busy_rx) = mpsc::unbounded_channel();
|
||||
|
||||
|
@ -112,21 +115,6 @@ where
|
|||
syncer
|
||||
}
|
||||
|
||||
fn register_handler(self: &Arc<Self>, rpc_server: &mut RpcServer, path: String) {
|
||||
let self2 = self.clone();
|
||||
rpc_server.add_handler::<SyncRpc, _, _>(path, move |msg, _addr| {
|
||||
let self2 = self2.clone();
|
||||
async move { self2.handle_rpc(&msg).await }
|
||||
});
|
||||
|
||||
let self2 = self.clone();
|
||||
self.rpc_client
|
||||
.set_local_handler(self.system.id, move |msg| {
|
||||
let self2 = self2.clone();
|
||||
async move { self2.handle_rpc(&msg).await }
|
||||
});
|
||||
}
|
||||
|
||||
async fn watcher_task(
|
||||
self: Arc<Self>,
|
||||
mut must_exit: watch::Receiver<bool>,
|
||||
|
@ -317,15 +305,19 @@ where
|
|||
async fn offload_items(
|
||||
self: &Arc<Self>,
|
||||
items: &[(Vec<u8>, Arc<ByteBuf>)],
|
||||
nodes: &[Uuid],
|
||||
nodes: &[NodeID],
|
||||
) -> Result<(), Error> {
|
||||
let values = items.iter().map(|(_k, v)| v.clone()).collect::<Vec<_>>();
|
||||
|
||||
self.rpc_client
|
||||
self.system
|
||||
.rpc
|
||||
.try_call_many(
|
||||
&self.endpoint,
|
||||
nodes,
|
||||
SyncRpc::Items(values),
|
||||
RequestStrategy::with_quorum(nodes.len()).with_timeout(TABLE_SYNC_RPC_TIMEOUT),
|
||||
RequestStrategy::with_priority(PRIO_BACKGROUND)
|
||||
.with_quorum(nodes.len())
|
||||
.with_timeout(TABLE_SYNC_RPC_TIMEOUT),
|
||||
)
|
||||
.await?;
|
||||
|
||||
|
@ -362,7 +354,7 @@ where
|
|||
async fn do_sync_with(
|
||||
self: Arc<Self>,
|
||||
partition: TodoPartition,
|
||||
who: Uuid,
|
||||
who: NodeID,
|
||||
must_exit: watch::Receiver<bool>,
|
||||
) -> Result<(), Error> {
|
||||
let (root_ck_key, root_ck) = self.get_root_ck(partition.partition)?;
|
||||
|
@ -378,11 +370,14 @@ where
|
|||
// Check if they have the same root checksum
|
||||
// If so, do nothing.
|
||||
let root_resp = self
|
||||
.rpc_client
|
||||
.system
|
||||
.rpc
|
||||
.call(
|
||||
&self.endpoint,
|
||||
who,
|
||||
SyncRpc::RootCkHash(partition.partition, root_ck_hash),
|
||||
TABLE_SYNC_RPC_TIMEOUT,
|
||||
RequestStrategy::with_priority(PRIO_BACKGROUND)
|
||||
.with_timeout(TABLE_SYNC_RPC_TIMEOUT),
|
||||
)
|
||||
.await?;
|
||||
|
||||
|
@ -430,8 +425,15 @@ where
|
|||
// Get Merkle node for this tree position at remote node
|
||||
// and compare it with local node
|
||||
let remote_node = match self
|
||||
.rpc_client
|
||||
.call(who, SyncRpc::GetNode(key.clone()), TABLE_SYNC_RPC_TIMEOUT)
|
||||
.system
|
||||
.rpc
|
||||
.call(
|
||||
&self.endpoint,
|
||||
who,
|
||||
SyncRpc::GetNode(key.clone()),
|
||||
RequestStrategy::with_priority(PRIO_BACKGROUND)
|
||||
.with_timeout(TABLE_SYNC_RPC_TIMEOUT),
|
||||
)
|
||||
.await?
|
||||
{
|
||||
SyncRpc::Node(_, node) => node,
|
||||
|
@ -478,7 +480,7 @@ where
|
|||
Ok(())
|
||||
}
|
||||
|
||||
async fn send_items(&self, who: Uuid, item_value_list: Vec<Vec<u8>>) -> Result<(), Error> {
|
||||
async fn send_items(&self, who: NodeID, item_value_list: Vec<Vec<u8>>) -> Result<(), Error> {
|
||||
info!(
|
||||
"({}) Sending {} items to {:?}",
|
||||
self.data.name,
|
||||
|
@ -492,8 +494,15 @@ where
|
|||
.collect::<Vec<_>>();
|
||||
|
||||
let rpc_resp = self
|
||||
.rpc_client
|
||||
.call(who, SyncRpc::Items(values), TABLE_SYNC_RPC_TIMEOUT)
|
||||
.system
|
||||
.rpc
|
||||
.call(
|
||||
&self.endpoint,
|
||||
who,
|
||||
SyncRpc::Items(values),
|
||||
RequestStrategy::with_priority(PRIO_BACKGROUND)
|
||||
.with_timeout(TABLE_SYNC_RPC_TIMEOUT),
|
||||
)
|
||||
.await?;
|
||||
if let SyncRpc::Ok = rpc_resp {
|
||||
Ok(())
|
||||
|
@ -506,7 +515,6 @@ where
|
|||
}
|
||||
|
||||
// ======= SYNCHRONIZATION PROCEDURE -- RECEIVER SIDE ======
|
||||
|
||||
async fn handle_rpc(self: &Arc<Self>, message: &SyncRpc) -> Result<SyncRpc, Error> {
|
||||
match message {
|
||||
SyncRpc::RootCkHash(range, h) => {
|
||||
|
@ -527,6 +535,19 @@ where
|
|||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<F, R> EndpointHandler<SyncRpc> for TableSyncer<F, R>
|
||||
where
|
||||
F: TableSchema + 'static,
|
||||
R: TableReplication + 'static,
|
||||
{
|
||||
async fn handle(self: &Arc<Self>, message: &SyncRpc, _from: NodeID) -> SyncRpc {
|
||||
self.handle_rpc(message)
|
||||
.await
|
||||
.unwrap_or_else(|e| SyncRpc::Error(format!("{}", e)))
|
||||
}
|
||||
}
|
||||
|
||||
impl SyncTodo {
|
||||
fn add_full_sync<F: TableSchema, R: TableReplication>(
|
||||
&mut self,
|
||||
|
|
|
@ -2,6 +2,7 @@ use std::collections::{BTreeMap, HashMap};
|
|||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use futures::stream::*;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_bytes::ByteBuf;
|
||||
|
@ -9,9 +10,8 @@ use serde_bytes::ByteBuf;
|
|||
use garage_util::data::*;
|
||||
use garage_util::error::Error;
|
||||
|
||||
use garage_rpc::membership::System;
|
||||
use garage_rpc::rpc_client::*;
|
||||
use garage_rpc::rpc_server::*;
|
||||
use garage_rpc::system::System;
|
||||
use garage_rpc::*;
|
||||
|
||||
use crate::crdt::Crdt;
|
||||
use crate::data::*;
|
||||
|
@ -23,17 +23,18 @@ use crate::sync::*;
|
|||
|
||||
const TABLE_RPC_TIMEOUT: Duration = Duration::from_secs(10);
|
||||
|
||||
pub struct Table<F: TableSchema, R: TableReplication> {
|
||||
pub struct Table<F: TableSchema + 'static, R: TableReplication + 'static> {
|
||||
pub system: Arc<System>,
|
||||
pub data: Arc<TableData<F, R>>,
|
||||
pub merkle_updater: Arc<MerkleUpdater<F, R>>,
|
||||
pub syncer: Arc<TableSyncer<F, R>>,
|
||||
rpc_client: Arc<RpcClient<TableRpc<F>>>,
|
||||
endpoint: Arc<Endpoint<TableRpc<F>, Self>>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub(crate) enum TableRpc<F: TableSchema> {
|
||||
Ok,
|
||||
Error(String),
|
||||
|
||||
ReadEntry(F::P, F::S),
|
||||
ReadEntryResponse(Option<ByteBuf>),
|
||||
|
@ -44,7 +45,9 @@ pub(crate) enum TableRpc<F: TableSchema> {
|
|||
Update(Vec<Arc<ByteBuf>>),
|
||||
}
|
||||
|
||||
impl<F: TableSchema> RpcMessage for TableRpc<F> {}
|
||||
impl<F: TableSchema> Message for TableRpc<F> {
|
||||
type Response = TableRpc<F>;
|
||||
}
|
||||
|
||||
impl<F, R> Table<F, R>
|
||||
where
|
||||
|
@ -59,32 +62,27 @@ where
|
|||
system: Arc<System>,
|
||||
db: &sled::Db,
|
||||
name: String,
|
||||
rpc_server: &mut RpcServer,
|
||||
) -> Arc<Self> {
|
||||
let rpc_path = format!("table_{}", name);
|
||||
let rpc_client = system.rpc_client::<TableRpc<F>>(&rpc_path);
|
||||
let endpoint = system
|
||||
.netapp
|
||||
.endpoint(format!("garage_table/table.rs/Rpc:{}", name));
|
||||
|
||||
let data = TableData::new(system.clone(), name, instance, replication, db);
|
||||
|
||||
let merkle_updater = MerkleUpdater::launch(&system.background, data.clone());
|
||||
|
||||
let syncer = TableSyncer::launch(
|
||||
system.clone(),
|
||||
data.clone(),
|
||||
merkle_updater.clone(),
|
||||
rpc_server,
|
||||
);
|
||||
TableGc::launch(system.clone(), data.clone(), rpc_server);
|
||||
let syncer = TableSyncer::launch(system.clone(), data.clone(), merkle_updater.clone());
|
||||
TableGc::launch(system.clone(), data.clone());
|
||||
|
||||
let table = Arc::new(Self {
|
||||
system,
|
||||
data,
|
||||
merkle_updater,
|
||||
syncer,
|
||||
rpc_client,
|
||||
endpoint,
|
||||
});
|
||||
|
||||
table.clone().register_handler(rpc_server, rpc_path);
|
||||
table.endpoint.set_handler(table.clone());
|
||||
|
||||
table
|
||||
}
|
||||
|
@ -97,11 +95,14 @@ where
|
|||
let e_enc = Arc::new(ByteBuf::from(rmp_to_vec_all_named(e)?));
|
||||
let rpc = TableRpc::<F>::Update(vec![e_enc]);
|
||||
|
||||
self.rpc_client
|
||||
self.system
|
||||
.rpc
|
||||
.try_call_many(
|
||||
&self.endpoint,
|
||||
&who[..],
|
||||
rpc,
|
||||
RequestStrategy::with_quorum(self.data.replication.write_quorum())
|
||||
RequestStrategy::with_priority(PRIO_NORMAL)
|
||||
.with_quorum(self.data.replication.write_quorum())
|
||||
.with_timeout(TABLE_RPC_TIMEOUT),
|
||||
)
|
||||
.await?;
|
||||
|
@ -123,7 +124,16 @@ where
|
|||
let call_futures = call_list.drain().map(|(node, entries)| async move {
|
||||
let rpc = TableRpc::<F>::Update(entries);
|
||||
|
||||
let resp = self.rpc_client.call(node, rpc, TABLE_RPC_TIMEOUT).await?;
|
||||
let resp = self
|
||||
.system
|
||||
.rpc
|
||||
.call(
|
||||
&self.endpoint,
|
||||
node,
|
||||
rpc,
|
||||
RequestStrategy::with_priority(PRIO_NORMAL).with_timeout(TABLE_RPC_TIMEOUT),
|
||||
)
|
||||
.await?;
|
||||
Ok::<_, Error>((node, resp))
|
||||
});
|
||||
let mut resps = call_futures.collect::<FuturesUnordered<_>>();
|
||||
|
@ -152,11 +162,14 @@ where
|
|||
|
||||
let rpc = TableRpc::<F>::ReadEntry(partition_key.clone(), sort_key.clone());
|
||||
let resps = self
|
||||
.rpc_client
|
||||
.system
|
||||
.rpc
|
||||
.try_call_many(
|
||||
&self.endpoint,
|
||||
&who[..],
|
||||
rpc,
|
||||
RequestStrategy::with_quorum(self.data.replication.read_quorum())
|
||||
RequestStrategy::with_priority(PRIO_NORMAL)
|
||||
.with_quorum(self.data.replication.read_quorum())
|
||||
.with_timeout(TABLE_RPC_TIMEOUT)
|
||||
.interrupt_after_quorum(true),
|
||||
)
|
||||
|
@ -208,11 +221,14 @@ where
|
|||
let rpc = TableRpc::<F>::ReadRange(partition_key.clone(), begin_sort_key, filter, limit);
|
||||
|
||||
let resps = self
|
||||
.rpc_client
|
||||
.system
|
||||
.rpc
|
||||
.try_call_many(
|
||||
&self.endpoint,
|
||||
&who[..],
|
||||
rpc,
|
||||
RequestStrategy::with_quorum(self.data.replication.read_quorum())
|
||||
RequestStrategy::with_priority(PRIO_NORMAL)
|
||||
.with_quorum(self.data.replication.read_quorum())
|
||||
.with_timeout(TABLE_RPC_TIMEOUT)
|
||||
.interrupt_after_quorum(true),
|
||||
)
|
||||
|
@ -261,36 +277,25 @@ where
|
|||
|
||||
// =============== UTILITY FUNCTION FOR CLIENT OPERATIONS ===============
|
||||
|
||||
async fn repair_on_read(&self, who: &[Uuid], what: F::E) -> Result<(), Error> {
|
||||
async fn repair_on_read(&self, who: &[NodeID], what: F::E) -> Result<(), Error> {
|
||||
let what_enc = Arc::new(ByteBuf::from(rmp_to_vec_all_named(&what)?));
|
||||
self.rpc_client
|
||||
self.system
|
||||
.rpc
|
||||
.try_call_many(
|
||||
&self.endpoint,
|
||||
who,
|
||||
TableRpc::<F>::Update(vec![what_enc]),
|
||||
RequestStrategy::with_quorum(who.len()).with_timeout(TABLE_RPC_TIMEOUT),
|
||||
RequestStrategy::with_priority(PRIO_NORMAL)
|
||||
.with_quorum(who.len())
|
||||
.with_timeout(TABLE_RPC_TIMEOUT),
|
||||
)
|
||||
.await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// =============== HANDLERS FOR RPC OPERATIONS (SERVER SIDE) ==============
|
||||
|
||||
fn register_handler(self: Arc<Self>, rpc_server: &mut RpcServer, path: String) {
|
||||
let self2 = self.clone();
|
||||
rpc_server.add_handler::<TableRpc<F>, _, _>(path, move |msg, _addr| {
|
||||
let self2 = self2.clone();
|
||||
async move { self2.handle(&msg).await }
|
||||
});
|
||||
|
||||
let self2 = self.clone();
|
||||
self.rpc_client
|
||||
.set_local_handler(self.system.id, move |msg| {
|
||||
let self2 = self2.clone();
|
||||
async move { self2.handle(&msg).await }
|
||||
});
|
||||
}
|
||||
|
||||
async fn handle(self: &Arc<Self>, msg: &TableRpc<F>) -> Result<TableRpc<F>, Error> {
|
||||
// ====== RPC HANDLER =====
|
||||
//
|
||||
async fn handle_rpc(self: &Arc<Self>, msg: &TableRpc<F>) -> Result<TableRpc<F>, Error> {
|
||||
match msg {
|
||||
TableRpc::ReadEntry(key, sort_key) => {
|
||||
let value = self.data.read_entry(key, sort_key)?;
|
||||
|
@ -308,3 +313,16 @@ where
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<F, R> EndpointHandler<TableRpc<F>> for Table<F, R>
|
||||
where
|
||||
F: TableSchema + 'static,
|
||||
R: TableReplication + 'static,
|
||||
{
|
||||
async fn handle(self: &Arc<Self>, msg: &TableRpc<F>, _from: NodeID) -> TableRpc<F> {
|
||||
self.handle_rpc(msg)
|
||||
.await
|
||||
.unwrap_or_else(|e| TableRpc::<F>::Error(format!("{}", e)))
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
[package]
|
||||
name = "garage_util"
|
||||
version = "0.3.0"
|
||||
version = "0.4.0"
|
||||
authors = ["Alex Auvolat <alex@adnab.me>"]
|
||||
edition = "2018"
|
||||
license = "AGPL-3.0"
|
||||
|
@ -32,7 +32,6 @@ toml = "0.5"
|
|||
futures = "0.3"
|
||||
tokio = { version = "1.0", default-features = false, features = ["rt", "rt-multi-thread", "io-util", "net", "time", "macros", "sync", "signal", "fs"] }
|
||||
|
||||
netapp = { version = "0.3.0", git = "https://git.deuxfleurs.fr/lx/netapp" }
|
||||
http = "0.2"
|
||||
hyper = "0.14"
|
||||
rustls = "0.19"
|
||||
webpki = "0.21"
|
||||
|
|
|
@ -3,8 +3,11 @@ use std::io::Read;
|
|||
use std::net::SocketAddr;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use serde::de::Error as SerdeError;
|
||||
use serde::{de, Deserialize};
|
||||
|
||||
use netapp::NodeID;
|
||||
|
||||
use crate::error::Error;
|
||||
|
||||
/// Represent the whole configuration
|
||||
|
@ -26,20 +29,20 @@ pub struct Config {
|
|||
// (we can add more aliases for this later)
|
||||
pub replication_mode: String,
|
||||
|
||||
/// RPC secret key: 32 bytes hex encoded
|
||||
pub rpc_secret: String,
|
||||
|
||||
/// Address to bind for RPC
|
||||
pub rpc_bind_addr: SocketAddr,
|
||||
|
||||
/// Bootstrap peers RPC address
|
||||
#[serde(deserialize_with = "deserialize_vec_addr")]
|
||||
pub bootstrap_peers: Vec<SocketAddr>,
|
||||
pub bootstrap_peers: Vec<(NodeID, SocketAddr)>,
|
||||
/// Consule host to connect to to discover more peers
|
||||
pub consul_host: Option<String>,
|
||||
/// Consul service name to use
|
||||
pub consul_service_name: Option<String>,
|
||||
|
||||
/// Configuration for RPC TLS
|
||||
pub rpc_tls: Option<TlsConfig>,
|
||||
|
||||
/// Max number of concurrent RPC request
|
||||
#[serde(default = "default_max_concurrent_rpc_requests")]
|
||||
pub max_concurrent_rpc_requests: usize,
|
||||
|
@ -59,17 +62,6 @@ pub struct Config {
|
|||
pub s3_web: WebConfig,
|
||||
}
|
||||
|
||||
/// Configuration for RPC TLS
|
||||
#[derive(Deserialize, Debug, Clone)]
|
||||
pub struct TlsConfig {
|
||||
/// Path to certificate autority used for all nodes
|
||||
pub ca_cert: String,
|
||||
/// Path to public certificate for this node
|
||||
pub node_cert: String,
|
||||
/// Path to private key for this node
|
||||
pub node_key: String,
|
||||
}
|
||||
|
||||
/// Configuration for S3 api
|
||||
#[derive(Deserialize, Debug, Clone)]
|
||||
pub struct ApiConfig {
|
||||
|
@ -115,27 +107,32 @@ pub fn read_config(config_file: PathBuf) -> Result<Config, Error> {
|
|||
Ok(toml::from_str(&config)?)
|
||||
}
|
||||
|
||||
fn deserialize_vec_addr<'de, D>(deserializer: D) -> Result<Vec<SocketAddr>, D::Error>
|
||||
fn deserialize_vec_addr<'de, D>(deserializer: D) -> Result<Vec<(NodeID, SocketAddr)>, D::Error>
|
||||
where
|
||||
D: de::Deserializer<'de>,
|
||||
{
|
||||
use std::net::ToSocketAddrs;
|
||||
|
||||
Ok(<Vec<&str>>::deserialize(deserializer)?
|
||||
.iter()
|
||||
.filter_map(|&name| {
|
||||
name.to_socket_addrs()
|
||||
.map(|iter| (name, iter))
|
||||
.map_err(|_| warn!("Error resolving \"{}\"", name))
|
||||
.ok()
|
||||
})
|
||||
.map(|(name, iter)| {
|
||||
let v = iter.collect::<Vec<_>>();
|
||||
if v.is_empty() {
|
||||
warn!("Error resolving \"{}\"", name)
|
||||
let mut ret = vec![];
|
||||
|
||||
for peer in <Vec<&str>>::deserialize(deserializer)? {
|
||||
let delim = peer
|
||||
.find('@')
|
||||
.ok_or_else(|| D::Error::custom("Invalid bootstrap peer: public key not specified"))?;
|
||||
let (key, host) = peer.split_at(delim);
|
||||
let pubkey = NodeID::from_slice(&hex::decode(&key).map_err(D::Error::custom)?)
|
||||
.ok_or_else(|| D::Error::custom("Invalid bootstrap peer public key"))?;
|
||||
let hosts = host[1..]
|
||||
.to_socket_addrs()
|
||||
.map_err(D::Error::custom)?
|
||||
.collect::<Vec<_>>();
|
||||
if hosts.is_empty() {
|
||||
return Err(D::Error::custom(format!("Error resolving {}", &host[1..])));
|
||||
}
|
||||
v
|
||||
})
|
||||
.flatten()
|
||||
.collect())
|
||||
for host in hosts {
|
||||
ret.push((pubkey.clone(), host));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(ret)
|
||||
}
|
||||
|
|
|
@ -11,8 +11,8 @@ pub enum RpcError {
|
|||
#[error(display = "Node is down: {:?}.", _0)]
|
||||
NodeDown(Uuid),
|
||||
|
||||
#[error(display = "Timeout: {}", _0)]
|
||||
Timeout(#[error(source)] tokio::time::error::Elapsed),
|
||||
#[error(display = "Timeout")]
|
||||
Timeout,
|
||||
|
||||
#[error(display = "HTTP error: {}", _0)]
|
||||
Http(#[error(source)] http::Error),
|
||||
|
@ -45,11 +45,8 @@ pub enum Error {
|
|||
#[error(display = "Invalid HTTP header value: {}", _0)]
|
||||
HttpHeader(#[error(source)] http::header::ToStrError),
|
||||
|
||||
#[error(display = "TLS error: {}", _0)]
|
||||
Tls(#[error(source)] rustls::TLSError),
|
||||
|
||||
#[error(display = "PKI error: {}", _0)]
|
||||
Pki(#[error(source)] webpki::Error),
|
||||
#[error(display = "Netapp error: {}", _0)]
|
||||
Netapp(#[error(source)] netapp::error::Error),
|
||||
|
||||
quentin
commented
We discussed renaming this error "FailedQuorumError" or something similar as this is the only case it is fired. We discussed renaming this error "FailedQuorumError" or something similar as this is the only case it is fired.
|
||||
#[error(display = "Sled error: {}", _0)]
|
||||
Sled(#[error(source)] sled::Error),
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
[package]
|
||||
name = "garage_web"
|
||||
version = "0.3.0"
|
||||
version = "0.4.0"
|
||||
authors = ["Alex Auvolat <alex@adnab.me>", "Quentin Dufour <quentin@dufour.io>"]
|
||||
edition = "2018"
|
||||
license = "AGPL-3.0"
|
||||
|
@ -13,10 +13,10 @@ path = "lib.rs"
|
|||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
garage_api = { version = "0.3.0", path = "../api" }
|
||||
garage_model = { version = "0.3.0", path = "../model" }
|
||||
garage_util = { version = "0.3.0", path = "../util" }
|
||||
garage_table = { version = "0.3.0", path = "../table" }
|
||||
garage_api = { version = "0.4.0", path = "../api" }
|
||||
garage_model = { version = "0.4.0", path = "../model" }
|
||||
garage_util = { version = "0.4.0", path = "../util" }
|
||||
garage_table = { version = "0.4.0", path = "../table" }
|
||||
|
||||
err-derive = "0.3"
|
||||
idna = "0.2"
|
||||
|
|
Loading…
Reference in a new issue
If we are on
failure_case_2
we display the failed nodes section but it is empty.It can be reproduced by spanning 2 garage instances then connecting the first instance to the second one.
eg: