forked from Deuxfleurs/garage
RequestStrategy with possible interruption or not
This commit is contained in:
parent
73574ab43e
commit
37f880bc09
4 changed files with 75 additions and 46 deletions
4
TODO
4
TODO
|
@ -12,10 +12,6 @@ Membership: keep IP addresses of failed nodes and try to reping them regularly
|
||||||
|
|
||||||
RPC client/server: do not go through the serialization+HTTP+TLS+deserialization when doing a request to ourself.
|
RPC client/server: do not go through the serialization+HTTP+TLS+deserialization when doing a request to ourself.
|
||||||
|
|
||||||
RPC requests: unify quorum + timeout in a "RequestStrategy" class,
|
|
||||||
and add to the request strategy whether or not the request should continue in the background
|
|
||||||
once `quorum` valid responses have been received
|
|
||||||
|
|
||||||
|
|
||||||
Attaining S3 compatibility
|
Attaining S3 compatibility
|
||||||
--------------------------
|
--------------------------
|
||||||
|
|
42
src/block.rs
42
src/block.rs
|
@ -317,7 +317,7 @@ impl BlockManager {
|
||||||
e
|
e
|
||||||
)));
|
)));
|
||||||
}
|
}
|
||||||
_ => {
|
Ok(_) => {
|
||||||
return Err(Error::Message(format!(
|
return Err(Error::Message(format!(
|
||||||
"Unexpected response to NeedBlockQuery RPC"
|
"Unexpected response to NeedBlockQuery RPC"
|
||||||
)));
|
)));
|
||||||
|
@ -327,13 +327,14 @@ impl BlockManager {
|
||||||
|
|
||||||
if need_nodes.len() > 0 {
|
if need_nodes.len() > 0 {
|
||||||
let put_block_message = self.read_block(hash).await?;
|
let put_block_message = self.read_block(hash).await?;
|
||||||
let put_responses = self
|
self.rpc_client
|
||||||
.rpc_client
|
.try_call_many(
|
||||||
.call_many(&need_nodes[..], put_block_message, BLOCK_RW_TIMEOUT)
|
&need_nodes[..],
|
||||||
.await;
|
put_block_message,
|
||||||
for resp in put_responses {
|
RequestStrategy::with_quorum(need_nodes.len())
|
||||||
resp?;
|
.with_timeout(BLOCK_RW_TIMEOUT),
|
||||||
}
|
)
|
||||||
|
.await?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
fs::remove_file(path).await?;
|
fs::remove_file(path).await?;
|
||||||
|
@ -354,19 +355,22 @@ impl BlockManager {
|
||||||
pub async fn rpc_get_block(&self, hash: &Hash) -> Result<Vec<u8>, Error> {
|
pub async fn rpc_get_block(&self, hash: &Hash) -> Result<Vec<u8>, Error> {
|
||||||
let ring = self.system.ring.borrow().clone();
|
let ring = self.system.ring.borrow().clone();
|
||||||
let who = ring.walk_ring(&hash, self.system.config.data_replication_factor);
|
let who = ring.walk_ring(&hash, self.system.config.data_replication_factor);
|
||||||
let msg = Arc::new(Message::GetBlock(*hash));
|
let resps = self
|
||||||
let mut resp_stream = who
|
.rpc_client
|
||||||
.iter()
|
.try_call_many(
|
||||||
.map(|to| self.rpc_client.call(to, msg.clone(), BLOCK_RW_TIMEOUT))
|
&who[..],
|
||||||
.collect::<FuturesUnordered<_>>();
|
Message::GetBlock(*hash),
|
||||||
|
RequestStrategy::with_quorum(1)
|
||||||
|
.with_timeout(BLOCK_RW_TIMEOUT)
|
||||||
|
.interrupt_after_quorum(true),
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
|
||||||
while let Some(resp) = resp_stream.next().await {
|
for resp in resps {
|
||||||
if let Ok(Message::PutBlock(msg)) = resp {
|
if let Message::PutBlock(msg) = resp {
|
||||||
if data::hash(&msg.data[..]) == *hash {
|
|
||||||
return Ok(msg.data);
|
return Ok(msg.data);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
Err(Error::Message(format!(
|
Err(Error::Message(format!(
|
||||||
"Unable to read block {:?}: no valid blocks returned",
|
"Unable to read block {:?}: no valid blocks returned",
|
||||||
hash
|
hash
|
||||||
|
@ -380,8 +384,8 @@ impl BlockManager {
|
||||||
.try_call_many(
|
.try_call_many(
|
||||||
&who[..],
|
&who[..],
|
||||||
Message::PutBlock(PutBlockMessage { hash, data }),
|
Message::PutBlock(PutBlockMessage { hash, data }),
|
||||||
(self.system.config.data_replication_factor + 1) / 2,
|
RequestStrategy::with_quorum((self.system.config.data_replication_factor + 1) / 2)
|
||||||
BLOCK_RW_TIMEOUT,
|
.with_timeout(BLOCK_RW_TIMEOUT),
|
||||||
)
|
)
|
||||||
.await?;
|
.await?;
|
||||||
Ok(())
|
Ok(())
|
||||||
|
|
|
@ -20,6 +20,33 @@ use crate::rpc_server::RpcMessage;
|
||||||
use crate::server::TlsConfig;
|
use crate::server::TlsConfig;
|
||||||
use crate::tls_util;
|
use crate::tls_util;
|
||||||
|
|
||||||
|
const DEFAULT_TIMEOUT: Duration = Duration::from_secs(10);
|
||||||
|
|
||||||
|
#[derive(Copy, Clone)]
|
||||||
|
pub struct RequestStrategy {
|
||||||
|
pub rs_timeout: Duration,
|
||||||
|
pub rs_quorum: usize,
|
||||||
|
pub rs_interrupt_after_quorum: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl RequestStrategy {
|
||||||
|
pub fn with_quorum(quorum: usize) -> Self {
|
||||||
|
RequestStrategy {
|
||||||
|
rs_timeout: DEFAULT_TIMEOUT,
|
||||||
|
rs_quorum: quorum,
|
||||||
|
rs_interrupt_after_quorum: false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pub fn with_timeout(mut self, timeout: Duration) -> Self {
|
||||||
|
self.rs_timeout = timeout;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
pub fn interrupt_after_quorum(mut self, interrupt: bool) -> Self {
|
||||||
|
self.rs_interrupt_after_quorum = interrupt;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub struct RpcClient<M: RpcMessage> {
|
pub struct RpcClient<M: RpcMessage> {
|
||||||
status: watch::Receiver<Arc<Status>>,
|
status: watch::Receiver<Arc<Status>>,
|
||||||
background: Arc<BackgroundRunner>,
|
background: Arc<BackgroundRunner>,
|
||||||
|
@ -83,9 +110,10 @@ impl<M: RpcMessage + 'static> RpcClient<M> {
|
||||||
self: &Arc<Self>,
|
self: &Arc<Self>,
|
||||||
to: &[UUID],
|
to: &[UUID],
|
||||||
msg: M,
|
msg: M,
|
||||||
stop_after: usize,
|
strategy: RequestStrategy,
|
||||||
timeout: Duration,
|
|
||||||
) -> Result<Vec<M>, Error> {
|
) -> Result<Vec<M>, Error> {
|
||||||
|
let timeout = strategy.rs_timeout;
|
||||||
|
|
||||||
let msg = Arc::new(msg);
|
let msg = Arc::new(msg);
|
||||||
let mut resp_stream = to
|
let mut resp_stream = to
|
||||||
.to_vec()
|
.to_vec()
|
||||||
|
@ -104,7 +132,7 @@ impl<M: RpcMessage + 'static> RpcClient<M> {
|
||||||
match resp {
|
match resp {
|
||||||
Ok(msg) => {
|
Ok(msg) => {
|
||||||
results.push(msg);
|
results.push(msg);
|
||||||
if results.len() >= stop_after {
|
if results.len() >= strategy.rs_quorum {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -114,16 +142,15 @@ impl<M: RpcMessage + 'static> RpcClient<M> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if results.len() >= stop_after {
|
if results.len() >= strategy.rs_quorum {
|
||||||
// Continue requests in background
|
// Continue requests in background.
|
||||||
// TODO: make this optionnal (only usefull for write requests)
|
|
||||||
|
|
||||||
// Continue the remaining requests immediately using tokio::spawn
|
// Continue the remaining requests immediately using tokio::spawn
|
||||||
// but enqueue a task in the background runner
|
// but enqueue a task in the background runner
|
||||||
// to ensure that the process won't exit until the requests are done
|
// to ensure that the process won't exit until the requests are done
|
||||||
// (if we had just enqueued the resp_stream.collect directly in the background runner,
|
// (if we had just enqueued the resp_stream.collect directly in the background runner,
|
||||||
// the requests might have been put on hold in the background runner's queue,
|
// the requests might have been put on hold in the background runner's queue,
|
||||||
// in which case they might timeout or otherwise fail)
|
// in which case they might timeout or otherwise fail)
|
||||||
|
if !strategy.rs_interrupt_after_quorum {
|
||||||
let wait_finished_fut = tokio::spawn(async move {
|
let wait_finished_fut = tokio::spawn(async move {
|
||||||
resp_stream.collect::<Vec<_>>().await;
|
resp_stream.collect::<Vec<_>>().await;
|
||||||
Ok(())
|
Ok(())
|
||||||
|
@ -131,6 +158,7 @@ impl<M: RpcMessage + 'static> RpcClient<M> {
|
||||||
self.clone().background.spawn(wait_finished_fut.map(|x| {
|
self.clone().background.spawn(wait_finished_fut.map(|x| {
|
||||||
x.unwrap_or_else(|e| Err(Error::Message(format!("Await failed: {}", e))))
|
x.unwrap_or_else(|e| Err(Error::Message(format!("Await failed: {}", e))))
|
||||||
}));
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
Ok(results)
|
Ok(results)
|
||||||
} else {
|
} else {
|
||||||
|
|
17
src/table.rs
17
src/table.rs
|
@ -179,8 +179,8 @@ where
|
||||||
.try_call_many(
|
.try_call_many(
|
||||||
&who[..],
|
&who[..],
|
||||||
rpc,
|
rpc,
|
||||||
self.replication.write_quorum(),
|
RequestStrategy::with_quorum(self.replication.write_quorum())
|
||||||
TABLE_RPC_TIMEOUT,
|
.with_timeout(TABLE_RPC_TIMEOUT),
|
||||||
)
|
)
|
||||||
.await?;
|
.await?;
|
||||||
Ok(())
|
Ok(())
|
||||||
|
@ -237,8 +237,9 @@ where
|
||||||
.try_call_many(
|
.try_call_many(
|
||||||
&who[..],
|
&who[..],
|
||||||
rpc,
|
rpc,
|
||||||
self.replication.read_quorum(),
|
RequestStrategy::with_quorum(self.replication.read_quorum())
|
||||||
TABLE_RPC_TIMEOUT,
|
.with_timeout(TABLE_RPC_TIMEOUT)
|
||||||
|
.interrupt_after_quorum(true),
|
||||||
)
|
)
|
||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
|
@ -292,8 +293,9 @@ where
|
||||||
.try_call_many(
|
.try_call_many(
|
||||||
&who[..],
|
&who[..],
|
||||||
rpc,
|
rpc,
|
||||||
self.replication.read_quorum(),
|
RequestStrategy::with_quorum(self.replication.read_quorum())
|
||||||
TABLE_RPC_TIMEOUT,
|
.with_timeout(TABLE_RPC_TIMEOUT)
|
||||||
|
.interrupt_after_quorum(true),
|
||||||
)
|
)
|
||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
|
@ -347,8 +349,7 @@ where
|
||||||
.try_call_many(
|
.try_call_many(
|
||||||
&who[..],
|
&who[..],
|
||||||
TableRPC::<F>::Update(vec![what_enc]),
|
TableRPC::<F>::Update(vec![what_enc]),
|
||||||
who.len(),
|
RequestStrategy::with_quorum(who.len()).with_timeout(TABLE_RPC_TIMEOUT),
|
||||||
TABLE_RPC_TIMEOUT,
|
|
||||||
)
|
)
|
||||||
.await?;
|
.await?;
|
||||||
Ok(())
|
Ok(())
|
||||||
|
|
Loading…
Reference in a new issue