garage/src/rpc/rpc_client.rs

366 lines
9.8 KiB
Rust
Raw Normal View History

2021-03-21 23:00:09 +00:00
//! Contain structs related to making RPCs
use std::borrow::Borrow;
2020-04-18 17:21:34 +00:00
use std::marker::PhantomData;
use std::net::SocketAddr;
use std::pin::Pin;
use std::sync::atomic::Ordering;
use std::sync::Arc;
use std::time::Duration;
use arc_swap::ArcSwapOption;
use futures::future::Future;
use futures::stream::futures_unordered::FuturesUnordered;
use futures::stream::StreamExt;
use futures_util::future::FutureExt;
2020-04-12 13:51:19 +00:00
use hyper::client::{Client, HttpConnector};
use hyper::{Body, Method, Request};
use tokio::sync::{watch, Semaphore};
2020-04-24 10:10:01 +00:00
use garage_util::background::BackgroundRunner;
use garage_util::config::TlsConfig;
use garage_util::data::*;
2021-05-02 21:13:08 +00:00
use garage_util::error::{Error, RpcError};
2020-04-23 17:05:46 +00:00
2020-04-24 10:10:01 +00:00
use crate::membership::Status;
use crate::rpc_server::RpcMessage;
use crate::tls_util;
const DEFAULT_TIMEOUT: Duration = Duration::from_secs(10);
2021-03-21 23:00:09 +00:00
/// Strategy to apply when making RPC
#[derive(Copy, Clone)]
pub struct RequestStrategy {
2021-03-21 23:00:09 +00:00
/// Max time to wait for reponse
pub rs_timeout: Duration,
2021-03-21 23:00:09 +00:00
/// Min number of response to consider the request successful
pub rs_quorum: usize,
2021-03-21 23:00:09 +00:00
/// Should requests be dropped after enough response are received
pub rs_interrupt_after_quorum: bool,
}
impl RequestStrategy {
2021-03-21 23:00:09 +00:00
/// Create a RequestStrategy with default timeout and not interrupting when quorum reached
pub fn with_quorum(quorum: usize) -> Self {
RequestStrategy {
rs_timeout: DEFAULT_TIMEOUT,
rs_quorum: quorum,
rs_interrupt_after_quorum: false,
}
}
2021-03-21 23:00:09 +00:00
/// Set timeout of the strategy
pub fn with_timeout(mut self, timeout: Duration) -> Self {
self.rs_timeout = timeout;
self
}
2021-03-21 23:00:09 +00:00
/// Set if requests can be dropped after quorum has been reached
2021-04-06 03:25:28 +00:00
/// In general true for read requests, and false for write
pub fn interrupt_after_quorum(mut self, interrupt: bool) -> Self {
self.rs_interrupt_after_quorum = interrupt;
self
}
}
2021-03-21 23:00:09 +00:00
/// Shortcut for a boxed async function taking a message, and resolving to another message or an
/// error
pub type LocalHandlerFn<M> =
Box<dyn Fn(Arc<M>) -> Pin<Box<dyn Future<Output = Result<M, Error>> + Send>> + Send + Sync>;
2021-03-21 23:00:09 +00:00
/// Client used to send RPC
2020-04-18 17:21:34 +00:00
pub struct RpcClient<M: RpcMessage> {
status: watch::Receiver<Arc<Status>>,
background: Arc<BackgroundRunner>,
2021-05-02 21:13:08 +00:00
local_handler: ArcSwapOption<(Uuid, LocalHandlerFn<M>)>,
2020-12-12 16:58:19 +00:00
rpc_addr_client: RpcAddrClient<M>,
}
2020-04-18 17:21:34 +00:00
impl<M: RpcMessage + 'static> RpcClient<M> {
2021-03-21 23:00:09 +00:00
/// Create a new RpcClient from an address, a job runner, and the status of all RPC servers
2020-04-18 17:21:34 +00:00
pub fn new(
rac: RpcAddrClient<M>,
background: Arc<BackgroundRunner>,
status: watch::Receiver<Arc<Status>>,
) -> Arc<Self> {
Arc::new(Self {
rpc_addr_client: rac,
background,
status,
local_handler: ArcSwapOption::new(None),
2020-04-18 17:21:34 +00:00
})
}
2021-03-21 23:00:09 +00:00
/// Set the local handler, to process RPC to this node without network usage
2021-05-02 21:13:08 +00:00
pub fn set_local_handler<F, Fut>(&self, my_id: Uuid, handler: F)
where
F: Fn(Arc<M>) -> Fut + Send + Sync + 'static,
Fut: Future<Output = Result<M, Error>> + Send + 'static,
{
let handler_arc = Arc::new(handler);
let handler: LocalHandlerFn<M> = Box::new(move |msg| {
let handler_arc2 = handler_arc.clone();
Box::pin(async move { handler_arc2(msg).await })
});
self.local_handler.swap(Some(Arc::new((my_id, handler))));
}
2021-04-06 03:25:28 +00:00
/// Get a RPC client to make calls using node's SocketAddr instead of its ID
pub fn by_addr(&self) -> &RpcAddrClient<M> {
2020-04-18 17:21:34 +00:00
&self.rpc_addr_client
}
2021-03-21 23:00:09 +00:00
/// Make a RPC call
2021-05-02 21:13:08 +00:00
pub async fn call(&self, to: Uuid, msg: M, timeout: Duration) -> Result<M, Error> {
self.call_arc(to, Arc::new(msg), timeout).await
}
2021-03-21 23:00:09 +00:00
/// Make a RPC call from a message stored in an Arc
2021-05-02 21:13:08 +00:00
pub async fn call_arc(&self, to: Uuid, msg: Arc<M>, timeout: Duration) -> Result<M, Error> {
if let Some(lh) = self.local_handler.load_full() {
let (my_id, local_handler) = lh.as_ref();
if to.borrow() == my_id {
return local_handler(msg).await;
}
}
let status = self.status.borrow().clone();
let node_status = match status.nodes.get(&to) {
Some(node_status) => {
if node_status.is_up() {
node_status
} else {
2021-05-02 21:13:08 +00:00
return Err(Error::from(RpcError::NodeDown(to)));
}
}
None => {
return Err(Error::Message(format!(
"Peer ID not found: {:?}",
to.borrow()
)))
}
2020-04-18 17:21:34 +00:00
};
match self
.rpc_addr_client
.call(&node_status.addr, msg, timeout)
.await
{
Err(rpc_error) => {
node_status.num_failures.fetch_add(1, Ordering::SeqCst);
Err(Error::from(rpc_error))
}
Ok(x) => x,
}
2020-04-18 17:21:34 +00:00
}
2021-03-21 23:00:09 +00:00
/// Make a RPC call to multiple servers, returning a Vec containing each result
2021-05-02 21:13:08 +00:00
pub async fn call_many(&self, to: &[Uuid], msg: M, timeout: Duration) -> Vec<Result<M, Error>> {
2020-04-18 17:21:34 +00:00
let msg = Arc::new(msg);
let mut resp_stream = to
.iter()
.map(|to| self.call_arc(*to, msg.clone(), timeout))
2020-04-18 17:21:34 +00:00
.collect::<FuturesUnordered<_>>();
let mut results = vec![];
while let Some(resp) = resp_stream.next().await {
results.push(resp);
}
2020-04-18 17:21:34 +00:00
results
}
2021-03-21 23:00:09 +00:00
/// Make a RPC call to multiple servers, returning either a Vec of responses, or an error if
2021-04-06 03:25:28 +00:00
/// strategy could not be respected due to too many errors
2020-04-18 17:21:34 +00:00
pub async fn try_call_many(
self: &Arc<Self>,
2021-05-02 21:13:08 +00:00
to: &[Uuid],
2020-04-18 17:21:34 +00:00
msg: M,
strategy: RequestStrategy,
2020-04-18 17:21:34 +00:00
) -> Result<Vec<M>, Error> {
let timeout = strategy.rs_timeout;
2020-04-18 17:21:34 +00:00
let msg = Arc::new(msg);
let mut resp_stream = to
.to_vec()
.into_iter()
.map(|to| {
let self2 = self.clone();
let msg = msg.clone();
async move { self2.call_arc(to, msg, timeout).await }
2020-04-18 17:21:34 +00:00
})
.collect::<FuturesUnordered<_>>();
let mut results = vec![];
let mut errors = vec![];
while let Some(resp) = resp_stream.next().await {
match resp {
Ok(msg) => {
results.push(msg);
if results.len() >= strategy.rs_quorum {
2020-04-18 17:21:34 +00:00
break;
}
}
Err(e) => {
errors.push(e);
}
}
}
if results.len() >= strategy.rs_quorum {
// Continue requests in background.
// Continue the remaining requests immediately using tokio::spawn
// but enqueue a task in the background runner
// to ensure that the process won't exit until the requests are done
// (if we had just enqueued the resp_stream.collect directly in the background runner,
// the requests might have been put on hold in the background runner's queue,
// in which case they might timeout or otherwise fail)
if !strategy.rs_interrupt_after_quorum {
let wait_finished_fut = tokio::spawn(async move {
resp_stream.collect::<Vec<_>>().await;
});
self.background.spawn(wait_finished_fut.map(|_| Ok(())));
}
2020-04-18 17:21:34 +00:00
Ok(results)
} else {
2020-04-23 16:23:06 +00:00
let errors = errors.iter().map(|e| format!("{}", e)).collect::<Vec<_>>();
2021-05-02 21:13:08 +00:00
Err(Error::from(RpcError::TooManyErrors(errors)))
}
}
}
2021-04-06 03:25:28 +00:00
/// Thin wrapper arround an `RpcHttpClient` specifying the path of the request
2020-04-18 17:21:34 +00:00
pub struct RpcAddrClient<M: RpcMessage> {
phantom: PhantomData<M>,
2020-12-12 16:58:19 +00:00
http_client: Arc<RpcHttpClient>,
path: String,
2020-04-18 17:21:34 +00:00
}
impl<M: RpcMessage> RpcAddrClient<M> {
2021-03-21 23:00:09 +00:00
/// Create an RpcAddrClient from an HTTP client and the endpoint to reach for RPCs
2020-04-18 17:21:34 +00:00
pub fn new(http_client: Arc<RpcHttpClient>, path: String) -> Self {
Self {
phantom: PhantomData::default(),
2021-04-09 00:32:42 +00:00
http_client,
2020-04-18 17:21:34 +00:00
path,
}
2020-04-18 17:21:34 +00:00
}
2021-03-21 23:00:09 +00:00
/// Make a RPC
2020-04-18 17:21:34 +00:00
pub async fn call<MB>(
&self,
to_addr: &SocketAddr,
msg: MB,
timeout: Duration,
2021-05-02 21:13:08 +00:00
) -> Result<Result<M, Error>, RpcError>
2020-04-18 17:21:34 +00:00
where
MB: Borrow<M>,
{
self.http_client
.call(&self.path, to_addr, msg, timeout)
.await
}
}
2021-03-21 23:00:09 +00:00
/// HTTP client used to make RPCs
pub struct RpcHttpClient {
request_limiter: Semaphore,
method: ClientMethod,
}
enum ClientMethod {
2021-05-02 21:13:08 +00:00
Http(Client<HttpConnector, hyper::Body>),
Https(Client<tls_util::HttpsConnectorFixedDnsname<HttpConnector>, hyper::Body>),
}
2020-04-18 17:21:34 +00:00
impl RpcHttpClient {
2021-03-21 23:00:09 +00:00
/// Create a new RpcHttpClient
pub fn new(
max_concurrent_requests: usize,
tls_config: &Option<TlsConfig>,
) -> Result<Self, Error> {
let method = if let Some(cf) = tls_config {
2020-04-12 13:51:19 +00:00
let ca_certs = tls_util::load_certs(&cf.ca_cert)?;
let node_certs = tls_util::load_certs(&cf.node_cert)?;
let node_key = tls_util::load_private_key(&cf.node_key)?;
let mut config = rustls::ClientConfig::new();
for crt in ca_certs.iter() {
config.root_store.add(crt)?;
}
config.set_single_client_cert([&node_certs[..], &ca_certs[..]].concat(), node_key)?;
2020-04-12 13:51:19 +00:00
let connector =
tls_util::HttpsConnectorFixedDnsname::<HttpConnector>::new(config, "garage");
2020-04-12 13:51:19 +00:00
2021-05-02 21:13:08 +00:00
ClientMethod::Https(Client::builder().build(connector))
2020-04-12 13:51:19 +00:00
} else {
2021-05-02 21:13:08 +00:00
ClientMethod::Http(Client::new())
};
Ok(RpcHttpClient {
method,
request_limiter: Semaphore::new(max_concurrent_requests),
})
}
2021-03-21 23:00:09 +00:00
/// Make a RPC
2020-04-18 17:21:34 +00:00
async fn call<M, MB>(
&self,
2020-04-18 17:21:34 +00:00
path: &str,
to_addr: &SocketAddr,
2020-04-18 17:21:34 +00:00
msg: MB,
timeout: Duration,
2021-05-02 21:13:08 +00:00
) -> Result<Result<M, Error>, RpcError>
2020-04-18 17:21:34 +00:00
where
MB: Borrow<M>,
M: RpcMessage,
{
let uri = match self.method {
2021-05-02 21:13:08 +00:00
ClientMethod::Http(_) => format!("http://{}/{}", to_addr, path),
ClientMethod::Https(_) => format!("https://{}/{}", to_addr, path),
2020-04-12 13:51:19 +00:00
};
let req = Request::builder()
.method(Method::POST)
.uri(uri)
.body(Body::from(rmp_to_vec_all_named(msg.borrow())?))?;
let resp_fut = match &self.method {
2021-05-02 21:13:08 +00:00
ClientMethod::Http(client) => client.request(req).fuse(),
ClientMethod::Https(client) => client.request(req).fuse(),
2020-04-12 13:51:19 +00:00
};
trace!("({}) Acquiring request_limiter slot...", path);
let slot = self.request_limiter.acquire().await;
trace!("({}) Got slot, doing request to {}...", path, to_addr);
2020-04-12 13:51:19 +00:00
let resp = tokio::time::timeout(timeout, resp_fut)
.await
.map_err(|e| {
debug!(
"RPC timeout to {}: {}",
to_addr,
debug_serialize(msg.borrow())
);
e
})?
2020-04-12 13:51:19 +00:00
.map_err(|e| {
2020-04-21 12:54:55 +00:00
warn!(
2020-04-16 12:50:49 +00:00
"RPC HTTP client error when connecting to {}: {}",
to_addr, e
);
2020-04-12 13:51:19 +00:00
e
})?;
let status = resp.status();
trace!("({}) Request returned, got status {}", path, status);
let body = hyper::body::to_bytes(resp.into_body()).await?;
drop(slot);
2021-03-16 14:58:40 +00:00
match rmp_serde::decode::from_read::<_, Result<M, String>>(&body[..])? {
Err(e) => Ok(Err(Error::RemoteError(e, status))),
Ok(x) => Ok(Ok(x)),
}
}
}