From bc977f9a7a7a5bd87ccf5fe96d64b397591f8ba0 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 1 Sep 2022 12:58:20 +0200 Subject: [PATCH] Update to Netapp with OrderTag support and exploit OrderTags --- Cargo.lock | 2 +- src/api/s3/copy.rs | 10 ++++++-- src/api/s3/get.rs | 21 ++++++++++++----- src/block/manager.rs | 55 ++++++++++++++++++++++++++++++------------- src/rpc/rpc_helper.rs | 2 +- 5 files changed, 64 insertions(+), 26 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7b5f6984..bcb8f215 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2176,7 +2176,7 @@ dependencies = [ [[package]] name = "netapp" version = "0.5.0" -source = "git+https://git.deuxfleurs.fr/lx/netapp?branch=stream-body#3fd30c6e280fba41377c8b563352d756e8bc1caf" +source = "git+https://git.deuxfleurs.fr/lx/netapp?branch=stream-body#4a59b73d7bfd0f136f654e874afb5d2a9bf4df2e" dependencies = [ "arc-swap", "async-trait", diff --git a/src/api/s3/copy.rs b/src/api/s3/copy.rs index b54cbd23..10cf5935 100644 --- a/src/api/s3/copy.rs +++ b/src/api/s3/copy.rs @@ -9,6 +9,7 @@ use bytes::Bytes; use hyper::{Body, Request, Response}; use serde::Serialize; +use garage_rpc::rpc_helper::OrderTag; use garage_table::*; use garage_util::data::*; use garage_util::time::*; @@ -306,11 +307,16 @@ pub async fn handle_upload_part_copy( // if and only if the block returned is a block that already existed // in the Garage data store (thus we don't need to save it again). let garage2 = garage.clone(); + let order_stream = OrderTag::stream(); let source_blocks = stream::iter(blocks_to_copy) - .flat_map(|(block_hash, range_to_copy)| { + .enumerate() + .flat_map(|(i, (block_hash, range_to_copy))| { let garage3 = garage2.clone(); stream::once(async move { - let data = garage3.block_manager.rpc_get_block(&block_hash).await?; + let data = garage3 + .block_manager + .rpc_get_block(&block_hash, Some(order_stream.order(i as u64))) + .await?; match range_to_copy { Some(r) => Ok((data.slice(r), None)), None => Ok((data, Some(block_hash))), diff --git a/src/api/s3/get.rs b/src/api/s3/get.rs index c7621ade..dfc284fe 100644 --- a/src/api/s3/get.rs +++ b/src/api/s3/get.rs @@ -10,6 +10,7 @@ use http::header::{ use hyper::body::Bytes; use hyper::{Body, Request, Response, StatusCode}; +use garage_rpc::rpc_helper::OrderTag; use garage_table::EmptyKey; use garage_util::data::*; @@ -242,9 +243,11 @@ pub async fn handle_get( Ok(resp_builder.body(body)?) } ObjectVersionData::FirstBlock(_, first_block_hash) => { + let order_stream = OrderTag::stream(); + let read_first_block = garage .block_manager - .rpc_get_block_streaming(first_block_hash); + .rpc_get_block_streaming(first_block_hash, Some(order_stream.order(0))); let get_next_blocks = garage.version_table.get(&last_v.uuid, &EmptyKey); let (first_block_stream, version) = @@ -260,7 +263,8 @@ pub async fn handle_get( blocks[0].1 = Some(first_block_stream); let body_stream = futures::stream::iter(blocks) - .map(move |(hash, stream_opt)| { + .enumerate() + .map(move |(i, (hash, stream_opt))| { let garage = garage.clone(); async move { if let Some(stream) = stream_opt { @@ -268,7 +272,7 @@ pub async fn handle_get( } else { garage .block_manager - .rpc_get_block_streaming(&hash) + .rpc_get_block_streaming(&hash, Some(order_stream.order(i as u64))) .await .unwrap_or_else(|_| { Box::pin(futures::stream::once(async move { @@ -281,7 +285,7 @@ pub async fn handle_get( } } }) - .buffered(3) + .buffered(2) .flatten(); let body = hyper::body::Body::wrap_stream(body_stream); @@ -445,11 +449,16 @@ fn body_from_blocks_range( true_offset += b.size; } + let order_stream = OrderTag::stream(); let body_stream = futures::stream::iter(blocks) - .map(move |(block, true_offset)| { + .enumerate() + .map(move |(i, (block, true_offset))| { let garage = garage.clone(); async move { - let data = garage.block_manager.rpc_get_block(&block.hash).await?; + let data = garage + .block_manager + .rpc_get_block(&block.hash, Some(order_stream.order(i as u64))) + .await?; let start_in_block = if true_offset > begin { 0 } else { diff --git a/src/block/manager.rs b/src/block/manager.rs index b8fe4c74..b9f6fc0f 100644 --- a/src/block/manager.rs +++ b/src/block/manager.rs @@ -33,6 +33,7 @@ use garage_util::metrics::RecordDuration; use garage_util::time::*; use garage_util::tranquilizer::Tranquilizer; +use garage_rpc::rpc_helper::OrderTag; use garage_rpc::system::System; use garage_rpc::*; @@ -70,7 +71,7 @@ pub(crate) const BLOCK_GC_DELAY: Duration = Duration::from_secs(600); pub enum BlockRpc { Ok, /// Message to ask for a block of data, by hash - GetBlock(Hash), + GetBlock(Hash, Option), /// Message to send a block of data, either because requested, of for first delivery of new /// block PutBlock { @@ -183,15 +184,18 @@ impl BlockManager { async fn rpc_get_raw_block_streaming( &self, hash: &Hash, + order_tag: Option, ) -> Result<(DataBlockHeader, ByteStream), Error> { let who = self.replication.read_nodes(hash); //let who = self.system.rpc.request_order(&who); for node in who.iter() { let node_id = NodeID::from(*node); - let rpc = - self.endpoint - .call_streaming(&node_id, BlockRpc::GetBlock(*hash), PRIO_NORMAL); + let rpc = self.endpoint.call_streaming( + &node_id, + BlockRpc::GetBlock(*hash, order_tag), + PRIO_NORMAL, + ); tokio::select! { res = rpc => { let res = match res { @@ -224,15 +228,21 @@ impl BlockManager { /// Ask nodes that might have a (possibly compressed) block for it /// Return its entire body - async fn rpc_get_raw_block(&self, hash: &Hash) -> Result { + async fn rpc_get_raw_block( + &self, + hash: &Hash, + order_tag: Option, + ) -> Result { let who = self.replication.read_nodes(hash); //let who = self.system.rpc.request_order(&who); for node in who.iter() { let node_id = NodeID::from(*node); - let rpc = - self.endpoint - .call_streaming(&node_id, BlockRpc::GetBlock(*hash), PRIO_NORMAL); + let rpc = self.endpoint.call_streaming( + &node_id, + BlockRpc::GetBlock(*hash, order_tag), + PRIO_NORMAL, + ); tokio::select! { res = rpc => { let res = match res { @@ -275,11 +285,12 @@ impl BlockManager { pub async fn rpc_get_block_streaming( &self, hash: &Hash, + order_tag: Option, ) -> Result< Pin> + Send + Sync + 'static>>, Error, > { - let (header, stream) = self.rpc_get_raw_block_streaming(hash).await?; + let (header, stream) = self.rpc_get_raw_block_streaming(hash, order_tag).await?; match header { DataBlockHeader::Plain => Ok(Box::pin(stream.map_err(|_| { std::io::Error::new(std::io::ErrorKind::Other, "netapp stream error") @@ -295,8 +306,14 @@ impl BlockManager { } /// Ask nodes that might have a block for it - pub async fn rpc_get_block(&self, hash: &Hash) -> Result { - self.rpc_get_raw_block(hash).await?.verify_get(*hash) + pub async fn rpc_get_block( + &self, + hash: &Hash, + order_tag: Option, + ) -> Result { + self.rpc_get_raw_block(hash, order_tag) + .await? + .verify_get(*hash) } /// Send block to nodes that should have it @@ -441,7 +458,7 @@ impl BlockManager { Ok(()) } - async fn handle_get_block(&self, hash: &Hash) -> Resp { + async fn handle_get_block(&self, hash: &Hash, order_tag: Option) -> Resp { let block = match self.read_block(hash).await { Ok(data) => data, Err(e) => return Resp::new(Err(e)), @@ -449,11 +466,17 @@ impl BlockManager { let (header, data) = block.into_parts(); - Resp::new(Ok(BlockRpc::PutBlock { + let resp = Resp::new(Ok(BlockRpc::PutBlock { hash: *hash, header, })) - .with_stream_from_buffer(data) + .with_stream_from_buffer(data); + + if let Some(order_tag) = order_tag { + resp.with_order_tag(order_tag) + } else { + resp + } } /// Read block from disk, verifying it's integrity @@ -841,7 +864,7 @@ impl BlockManager { hash ); - let block_data = self.rpc_get_raw_block(hash).await?; + let block_data = self.rpc_get_raw_block(hash, None).await?; self.metrics.resync_recv_counter.add(1); @@ -861,7 +884,7 @@ impl StreamingEndpointHandler for BlockManager { .await .map(|_| BlockRpc::Ok), ), - BlockRpc::GetBlock(h) => self.handle_get_block(h).await, + BlockRpc::GetBlock(h, order_tag) => self.handle_get_block(h, *order_tag).await, BlockRpc::NeedBlockQuery(h) => { Resp::new(self.need_block(h).await.map(BlockRpc::NeedBlockReply)) } diff --git a/src/rpc/rpc_helper.rs b/src/rpc/rpc_helper.rs index 216fffd4..6c79c502 100644 --- a/src/rpc/rpc_helper.rs +++ b/src/rpc/rpc_helper.rs @@ -18,7 +18,7 @@ use opentelemetry::{ pub use netapp::endpoint::{Endpoint, EndpointHandler, StreamingEndpointHandler}; use netapp::message::IntoReq; pub use netapp::message::{ - Message as Rpc, Req, RequestPriority, Resp, PRIO_BACKGROUND, PRIO_HIGH, PRIO_NORMAL, + Message as Rpc, OrderTag, Req, RequestPriority, Resp, PRIO_BACKGROUND, PRIO_HIGH, PRIO_NORMAL, }; use netapp::peering::fullmesh::FullMeshPeeringStrategy; pub use netapp::{self, NetApp, NodeID};