Fix sync: use max root checksum level

This commit is contained in:
Alex 2020-04-17 21:59:07 +02:00
parent f62b54f1df
commit 3f40ef149f
4 changed files with 114 additions and 81 deletions

View file

@ -86,16 +86,14 @@ async fn handler_inner(
.to_string(); .to_string();
let version_uuid = let version_uuid =
handle_put(garage, &mime_type, &bucket, &key, req.into_body()).await?; handle_put(garage, &mime_type, &bucket, &key, req.into_body()).await?;
Ok(Response::new(Box::new(BytesBody::from(format!( Ok(Response::new(Box::new(BytesBody::from(hex::encode(
"{:?}\n", version_uuid,
version_uuid
))))) )))))
} }
&Method::DELETE => { &Method::DELETE => {
let version_uuid = handle_delete(garage, &bucket, &key).await?; let version_uuid = handle_delete(garage, &bucket, &key).await?;
Ok(Response::new(Box::new(BytesBody::from(format!( Ok(Response::new(Box::new(BytesBody::from(hex::encode(
"{:?}\n", version_uuid,
version_uuid
))))) )))))
} }
_ => Err(Error::BadRequest(format!("Invalid method"))), _ => Err(Error::BadRequest(format!("Invalid method"))),

View file

@ -242,14 +242,14 @@ impl BlockManager {
if need_nodes.len() > 0 { if need_nodes.len() > 0 {
let put_block_message = self.read_block(hash).await?; let put_block_message = self.read_block(hash).await?;
for resp in rpc_call_many( let put_responses = rpc_call_many(
garage.system.clone(), garage.system.clone(),
&need_nodes[..], &need_nodes[..],
put_block_message, put_block_message,
BLOCK_RW_TIMEOUT, BLOCK_RW_TIMEOUT,
) )
.await .await;
{ for resp in put_responses {
resp?; resp?;
} }
} }

View file

@ -155,7 +155,7 @@ impl<F: TableSchema + 'static> Table<F> {
let hash = e.partition_key().hash(); let hash = e.partition_key().hash();
let ring = self.system.ring.borrow().clone(); let ring = self.system.ring.borrow().clone();
let who = ring.walk_ring(&hash, self.param.replication_factor); let who = ring.walk_ring(&hash, self.param.replication_factor);
eprintln!("insert who: {:?}", who); //eprintln!("insert who: {:?}", who);
let e_enc = Arc::new(ByteBuf::from(rmp_to_vec_all_named(e)?)); let e_enc = Arc::new(ByteBuf::from(rmp_to_vec_all_named(e)?));
let rpc = &TableRPC::<F>::Update(vec![e_enc]); let rpc = &TableRPC::<F>::Update(vec![e_enc]);
@ -212,7 +212,7 @@ impl<F: TableSchema + 'static> Table<F> {
let hash = partition_key.hash(); let hash = partition_key.hash();
let ring = self.system.ring.borrow().clone(); let ring = self.system.ring.borrow().clone();
let who = ring.walk_ring(&hash, self.param.replication_factor); let who = ring.walk_ring(&hash, self.param.replication_factor);
eprintln!("get who: {:?}", who); //eprintln!("get who: {:?}", who);
let rpc = &TableRPC::<F>::ReadEntry(partition_key.clone(), sort_key.clone()); let rpc = &TableRPC::<F>::ReadEntry(partition_key.clone(), sort_key.clone());
let resps = self let resps = self

View file

@ -29,6 +29,8 @@ pub struct TableSyncer<F: TableSchema> {
#[derive(Serialize, Deserialize)] #[derive(Serialize, Deserialize)]
pub enum SyncRPC { pub enum SyncRPC {
GetRootChecksumRange(Hash, Hash),
RootChecksumRange(SyncRange),
Checksums(Vec<RangeChecksum>, bool), Checksums(Vec<RangeChecksum>, bool),
Difference(Vec<SyncRange>, Vec<Arc<ByteBuf>>), Difference(Vec<SyncRange>, Vec<Arc<ByteBuf>>),
} }
@ -180,6 +182,7 @@ impl<F: TableSchema + 'static> TableSyncer<F> {
.filter(|node| **node != my_id) .filter(|node| **node != my_id)
.map(|node| { .map(|node| {
self.clone().do_sync_with( self.clone().do_sync_with(
partition.clone(),
root_cks.clone(), root_cks.clone(),
node.clone(), node.clone(),
partition.retain, partition.retain,
@ -346,13 +349,33 @@ impl<F: TableSchema + 'static> TableSyncer<F> {
async fn do_sync_with( async fn do_sync_with(
self: Arc<Self>, self: Arc<Self>,
partition: Partition,
root_ck: RangeChecksum, root_ck: RangeChecksum,
who: UUID, who: UUID,
retain: bool, retain: bool,
mut must_exit: watch::Receiver<bool>, mut must_exit: watch::Receiver<bool>,
) -> Result<(), Error> { ) -> Result<(), Error> {
let mut todo = VecDeque::new(); let mut todo = VecDeque::new();
todo.push_back(root_ck);
// If their root checksum has level > than us, use that as a reference
let root_cks_resp = self
.table
.rpc_call(
&who,
&TableRPC::<F>::SyncRPC(SyncRPC::GetRootChecksumRange(
partition.begin.clone(),
partition.end.clone(),
)),
)
.await?;
if let TableRPC::<F>::SyncRPC(SyncRPC::RootChecksumRange(range)) = root_cks_resp {
if range.level > root_ck.bounds.level {
let their_root_range_ck = self.range_checksum(&range, &mut must_exit).await?;
todo.push_back(their_root_range_ck);
} else {
todo.push_back(root_ck);
}
}
while !todo.is_empty() && !*must_exit.borrow() { while !todo.is_empty() && !*must_exit.borrow() {
let total_children = todo.iter().map(|x| x.children.len()).fold(0, |x, y| x + y); let total_children = todo.iter().map(|x| x.children.len()).fold(0, |x, y| x + y);
@ -445,83 +468,95 @@ impl<F: TableSchema + 'static> TableSyncer<F> {
message: &SyncRPC, message: &SyncRPC,
mut must_exit: watch::Receiver<bool>, mut must_exit: watch::Receiver<bool>,
) -> Result<SyncRPC, Error> { ) -> Result<SyncRPC, Error> {
if let SyncRPC::Checksums(checksums, retain) = message { match message {
let mut ret_ranges = vec![]; SyncRPC::GetRootChecksumRange(begin, end) => {
let mut ret_items = vec![]; let root_cks = self.root_checksum(&begin, &end, &mut must_exit).await?;
for ckr in checksums.iter() { Ok(SyncRPC::RootChecksumRange(root_cks.bounds))
let our_ckr = self.range_checksum(&ckr.bounds, &mut must_exit).await?; }
for (range, hash) in ckr.children.iter() { SyncRPC::Checksums(checksums, retain) => {
// Only consider items that are in the intersection of the two ranges self.handle_checksums_rpc(&checksums[..], *retain, &mut must_exit)
// (other ranges will be exchanged at some point) .await
if our_ckr }
.found_limit _ => Err(Error::Message(format!("Unexpected sync RPC"))),
.as_ref() }
.map(|x| range.begin.as_slice() >= x.as_slice()) }
.unwrap_or(false)
{
break;
}
let differs = match our_ckr pub async fn handle_checksums_rpc(
.children self: &Arc<Self>,
.binary_search_by(|(our_range, _)| our_range.begin.cmp(&range.begin)) checksums: &[RangeChecksum],
{ retain: bool,
Err(_) => true, must_exit: &mut watch::Receiver<bool>,
Ok(i) => our_ckr.children[i].1 != *hash, ) -> Result<SyncRPC, Error> {
}; let mut ret_ranges = vec![];
if differs { let mut ret_items = vec![];
ret_ranges.push(range.clone()); for ckr in checksums.iter() {
if *retain && range.level == 0 { let our_ckr = self.range_checksum(&ckr.bounds, must_exit).await?;
if let Some(item_bytes) = for (range, hash) in ckr.children.iter() {
self.table.store.get(range.begin.as_slice())? // Only consider items that are in the intersection of the two ranges
{ // (other ranges will be exchanged at some point)
ret_items.push(Arc::new(ByteBuf::from(item_bytes.to_vec()))); if our_ckr
} .found_limit
} .as_ref()
} .map(|x| range.begin.as_slice() >= x.as_slice())
.unwrap_or(false)
{
break;
} }
for (range, _hash) in our_ckr.children.iter() {
if ckr
.found_limit
.as_ref()
.map(|x| range.begin.as_slice() >= x.as_slice())
.unwrap_or(false)
{
break;
}
let not_present = ckr let differs = match our_ckr
.children .children
.binary_search_by(|(their_range, _)| their_range.begin.cmp(&range.begin)) .binary_search_by(|(our_range, _)| our_range.begin.cmp(&range.begin))
.is_err(); {
if not_present { Err(_) => true,
if range.level > 0 { Ok(i) => our_ckr.children[i].1 != *hash,
ret_ranges.push(range.clone()); };
} if differs {
if *retain && range.level == 0 { ret_ranges.push(range.clone());
if let Some(item_bytes) = if retain && range.level == 0 {
self.table.store.get(range.begin.as_slice())? if let Some(item_bytes) = self.table.store.get(range.begin.as_slice())? {
{ ret_items.push(Arc::new(ByteBuf::from(item_bytes.to_vec())));
ret_items.push(Arc::new(ByteBuf::from(item_bytes.to_vec()))); }
} }
}
}
for (range, _hash) in our_ckr.children.iter() {
if ckr
.found_limit
.as_ref()
.map(|x| range.begin.as_slice() >= x.as_slice())
.unwrap_or(false)
{
break;
}
let not_present = ckr
.children
.binary_search_by(|(their_range, _)| their_range.begin.cmp(&range.begin))
.is_err();
if not_present {
if range.level > 0 {
ret_ranges.push(range.clone());
}
if retain && range.level == 0 {
if let Some(item_bytes) = self.table.store.get(range.begin.as_slice())? {
ret_items.push(Arc::new(ByteBuf::from(item_bytes.to_vec())));
} }
} }
} }
} }
let n_checksums = checksums
.iter()
.map(|x| x.children.len())
.fold(0, |x, y| x + y);
eprintln!(
"({}) Checksum comparison RPC: {} different + {} items for {} received",
self.table.name,
ret_ranges.len(),
ret_items.len(),
n_checksums
);
return Ok(SyncRPC::Difference(ret_ranges, ret_items));
} }
Err(Error::Message(format!("Unexpected sync RPC"))) let n_checksums = checksums
.iter()
.map(|x| x.children.len())
.fold(0, |x, y| x + y);
eprintln!(
"({}) Checksum comparison RPC: {} different + {} items for {} received",
self.table.name,
ret_ranges.len(),
ret_items.len(),
n_checksums
);
Ok(SyncRPC::Difference(ret_ranges, ret_items))
} }
pub async fn invalidate(self: Arc<Self>, item_key: Vec<u8>) -> Result<(), Error> { pub async fn invalidate(self: Arc<Self>, item_key: Vec<u8>) -> Result<(), Error> {