forked from Deuxfleurs/garage
Fix sync: use max root checksum level
This commit is contained in:
parent
f62b54f1df
commit
3f40ef149f
4 changed files with 114 additions and 81 deletions
|
@ -86,16 +86,14 @@ async fn handler_inner(
|
||||||
.to_string();
|
.to_string();
|
||||||
let version_uuid =
|
let version_uuid =
|
||||||
handle_put(garage, &mime_type, &bucket, &key, req.into_body()).await?;
|
handle_put(garage, &mime_type, &bucket, &key, req.into_body()).await?;
|
||||||
Ok(Response::new(Box::new(BytesBody::from(format!(
|
Ok(Response::new(Box::new(BytesBody::from(hex::encode(
|
||||||
"{:?}\n",
|
version_uuid,
|
||||||
version_uuid
|
|
||||||
)))))
|
)))))
|
||||||
}
|
}
|
||||||
&Method::DELETE => {
|
&Method::DELETE => {
|
||||||
let version_uuid = handle_delete(garage, &bucket, &key).await?;
|
let version_uuid = handle_delete(garage, &bucket, &key).await?;
|
||||||
Ok(Response::new(Box::new(BytesBody::from(format!(
|
Ok(Response::new(Box::new(BytesBody::from(hex::encode(
|
||||||
"{:?}\n",
|
version_uuid,
|
||||||
version_uuid
|
|
||||||
)))))
|
)))))
|
||||||
}
|
}
|
||||||
_ => Err(Error::BadRequest(format!("Invalid method"))),
|
_ => Err(Error::BadRequest(format!("Invalid method"))),
|
||||||
|
|
|
@ -242,14 +242,14 @@ impl BlockManager {
|
||||||
|
|
||||||
if need_nodes.len() > 0 {
|
if need_nodes.len() > 0 {
|
||||||
let put_block_message = self.read_block(hash).await?;
|
let put_block_message = self.read_block(hash).await?;
|
||||||
for resp in rpc_call_many(
|
let put_responses = rpc_call_many(
|
||||||
garage.system.clone(),
|
garage.system.clone(),
|
||||||
&need_nodes[..],
|
&need_nodes[..],
|
||||||
put_block_message,
|
put_block_message,
|
||||||
BLOCK_RW_TIMEOUT,
|
BLOCK_RW_TIMEOUT,
|
||||||
)
|
)
|
||||||
.await
|
.await;
|
||||||
{
|
for resp in put_responses {
|
||||||
resp?;
|
resp?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -155,7 +155,7 @@ impl<F: TableSchema + 'static> Table<F> {
|
||||||
let hash = e.partition_key().hash();
|
let hash = e.partition_key().hash();
|
||||||
let ring = self.system.ring.borrow().clone();
|
let ring = self.system.ring.borrow().clone();
|
||||||
let who = ring.walk_ring(&hash, self.param.replication_factor);
|
let who = ring.walk_ring(&hash, self.param.replication_factor);
|
||||||
eprintln!("insert who: {:?}", who);
|
//eprintln!("insert who: {:?}", who);
|
||||||
|
|
||||||
let e_enc = Arc::new(ByteBuf::from(rmp_to_vec_all_named(e)?));
|
let e_enc = Arc::new(ByteBuf::from(rmp_to_vec_all_named(e)?));
|
||||||
let rpc = &TableRPC::<F>::Update(vec![e_enc]);
|
let rpc = &TableRPC::<F>::Update(vec![e_enc]);
|
||||||
|
@ -212,7 +212,7 @@ impl<F: TableSchema + 'static> Table<F> {
|
||||||
let hash = partition_key.hash();
|
let hash = partition_key.hash();
|
||||||
let ring = self.system.ring.borrow().clone();
|
let ring = self.system.ring.borrow().clone();
|
||||||
let who = ring.walk_ring(&hash, self.param.replication_factor);
|
let who = ring.walk_ring(&hash, self.param.replication_factor);
|
||||||
eprintln!("get who: {:?}", who);
|
//eprintln!("get who: {:?}", who);
|
||||||
|
|
||||||
let rpc = &TableRPC::<F>::ReadEntry(partition_key.clone(), sort_key.clone());
|
let rpc = &TableRPC::<F>::ReadEntry(partition_key.clone(), sort_key.clone());
|
||||||
let resps = self
|
let resps = self
|
||||||
|
|
|
@ -29,6 +29,8 @@ pub struct TableSyncer<F: TableSchema> {
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize)]
|
#[derive(Serialize, Deserialize)]
|
||||||
pub enum SyncRPC {
|
pub enum SyncRPC {
|
||||||
|
GetRootChecksumRange(Hash, Hash),
|
||||||
|
RootChecksumRange(SyncRange),
|
||||||
Checksums(Vec<RangeChecksum>, bool),
|
Checksums(Vec<RangeChecksum>, bool),
|
||||||
Difference(Vec<SyncRange>, Vec<Arc<ByteBuf>>),
|
Difference(Vec<SyncRange>, Vec<Arc<ByteBuf>>),
|
||||||
}
|
}
|
||||||
|
@ -180,6 +182,7 @@ impl<F: TableSchema + 'static> TableSyncer<F> {
|
||||||
.filter(|node| **node != my_id)
|
.filter(|node| **node != my_id)
|
||||||
.map(|node| {
|
.map(|node| {
|
||||||
self.clone().do_sync_with(
|
self.clone().do_sync_with(
|
||||||
|
partition.clone(),
|
||||||
root_cks.clone(),
|
root_cks.clone(),
|
||||||
node.clone(),
|
node.clone(),
|
||||||
partition.retain,
|
partition.retain,
|
||||||
|
@ -346,13 +349,33 @@ impl<F: TableSchema + 'static> TableSyncer<F> {
|
||||||
|
|
||||||
async fn do_sync_with(
|
async fn do_sync_with(
|
||||||
self: Arc<Self>,
|
self: Arc<Self>,
|
||||||
|
partition: Partition,
|
||||||
root_ck: RangeChecksum,
|
root_ck: RangeChecksum,
|
||||||
who: UUID,
|
who: UUID,
|
||||||
retain: bool,
|
retain: bool,
|
||||||
mut must_exit: watch::Receiver<bool>,
|
mut must_exit: watch::Receiver<bool>,
|
||||||
) -> Result<(), Error> {
|
) -> Result<(), Error> {
|
||||||
let mut todo = VecDeque::new();
|
let mut todo = VecDeque::new();
|
||||||
todo.push_back(root_ck);
|
|
||||||
|
// If their root checksum has level > than us, use that as a reference
|
||||||
|
let root_cks_resp = self
|
||||||
|
.table
|
||||||
|
.rpc_call(
|
||||||
|
&who,
|
||||||
|
&TableRPC::<F>::SyncRPC(SyncRPC::GetRootChecksumRange(
|
||||||
|
partition.begin.clone(),
|
||||||
|
partition.end.clone(),
|
||||||
|
)),
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
if let TableRPC::<F>::SyncRPC(SyncRPC::RootChecksumRange(range)) = root_cks_resp {
|
||||||
|
if range.level > root_ck.bounds.level {
|
||||||
|
let their_root_range_ck = self.range_checksum(&range, &mut must_exit).await?;
|
||||||
|
todo.push_back(their_root_range_ck);
|
||||||
|
} else {
|
||||||
|
todo.push_back(root_ck);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
while !todo.is_empty() && !*must_exit.borrow() {
|
while !todo.is_empty() && !*must_exit.borrow() {
|
||||||
let total_children = todo.iter().map(|x| x.children.len()).fold(0, |x, y| x + y);
|
let total_children = todo.iter().map(|x| x.children.len()).fold(0, |x, y| x + y);
|
||||||
|
@ -445,83 +468,95 @@ impl<F: TableSchema + 'static> TableSyncer<F> {
|
||||||
message: &SyncRPC,
|
message: &SyncRPC,
|
||||||
mut must_exit: watch::Receiver<bool>,
|
mut must_exit: watch::Receiver<bool>,
|
||||||
) -> Result<SyncRPC, Error> {
|
) -> Result<SyncRPC, Error> {
|
||||||
if let SyncRPC::Checksums(checksums, retain) = message {
|
match message {
|
||||||
let mut ret_ranges = vec![];
|
SyncRPC::GetRootChecksumRange(begin, end) => {
|
||||||
let mut ret_items = vec![];
|
let root_cks = self.root_checksum(&begin, &end, &mut must_exit).await?;
|
||||||
for ckr in checksums.iter() {
|
Ok(SyncRPC::RootChecksumRange(root_cks.bounds))
|
||||||
let our_ckr = self.range_checksum(&ckr.bounds, &mut must_exit).await?;
|
}
|
||||||
for (range, hash) in ckr.children.iter() {
|
SyncRPC::Checksums(checksums, retain) => {
|
||||||
// Only consider items that are in the intersection of the two ranges
|
self.handle_checksums_rpc(&checksums[..], *retain, &mut must_exit)
|
||||||
// (other ranges will be exchanged at some point)
|
.await
|
||||||
if our_ckr
|
}
|
||||||
.found_limit
|
_ => Err(Error::Message(format!("Unexpected sync RPC"))),
|
||||||
.as_ref()
|
}
|
||||||
.map(|x| range.begin.as_slice() >= x.as_slice())
|
}
|
||||||
.unwrap_or(false)
|
|
||||||
{
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
let differs = match our_ckr
|
pub async fn handle_checksums_rpc(
|
||||||
.children
|
self: &Arc<Self>,
|
||||||
.binary_search_by(|(our_range, _)| our_range.begin.cmp(&range.begin))
|
checksums: &[RangeChecksum],
|
||||||
{
|
retain: bool,
|
||||||
Err(_) => true,
|
must_exit: &mut watch::Receiver<bool>,
|
||||||
Ok(i) => our_ckr.children[i].1 != *hash,
|
) -> Result<SyncRPC, Error> {
|
||||||
};
|
let mut ret_ranges = vec![];
|
||||||
if differs {
|
let mut ret_items = vec![];
|
||||||
ret_ranges.push(range.clone());
|
for ckr in checksums.iter() {
|
||||||
if *retain && range.level == 0 {
|
let our_ckr = self.range_checksum(&ckr.bounds, must_exit).await?;
|
||||||
if let Some(item_bytes) =
|
for (range, hash) in ckr.children.iter() {
|
||||||
self.table.store.get(range.begin.as_slice())?
|
// Only consider items that are in the intersection of the two ranges
|
||||||
{
|
// (other ranges will be exchanged at some point)
|
||||||
ret_items.push(Arc::new(ByteBuf::from(item_bytes.to_vec())));
|
if our_ckr
|
||||||
}
|
.found_limit
|
||||||
}
|
.as_ref()
|
||||||
}
|
.map(|x| range.begin.as_slice() >= x.as_slice())
|
||||||
|
.unwrap_or(false)
|
||||||
|
{
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
for (range, _hash) in our_ckr.children.iter() {
|
|
||||||
if ckr
|
|
||||||
.found_limit
|
|
||||||
.as_ref()
|
|
||||||
.map(|x| range.begin.as_slice() >= x.as_slice())
|
|
||||||
.unwrap_or(false)
|
|
||||||
{
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
let not_present = ckr
|
let differs = match our_ckr
|
||||||
.children
|
.children
|
||||||
.binary_search_by(|(their_range, _)| their_range.begin.cmp(&range.begin))
|
.binary_search_by(|(our_range, _)| our_range.begin.cmp(&range.begin))
|
||||||
.is_err();
|
{
|
||||||
if not_present {
|
Err(_) => true,
|
||||||
if range.level > 0 {
|
Ok(i) => our_ckr.children[i].1 != *hash,
|
||||||
ret_ranges.push(range.clone());
|
};
|
||||||
}
|
if differs {
|
||||||
if *retain && range.level == 0 {
|
ret_ranges.push(range.clone());
|
||||||
if let Some(item_bytes) =
|
if retain && range.level == 0 {
|
||||||
self.table.store.get(range.begin.as_slice())?
|
if let Some(item_bytes) = self.table.store.get(range.begin.as_slice())? {
|
||||||
{
|
ret_items.push(Arc::new(ByteBuf::from(item_bytes.to_vec())));
|
||||||
ret_items.push(Arc::new(ByteBuf::from(item_bytes.to_vec())));
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (range, _hash) in our_ckr.children.iter() {
|
||||||
|
if ckr
|
||||||
|
.found_limit
|
||||||
|
.as_ref()
|
||||||
|
.map(|x| range.begin.as_slice() >= x.as_slice())
|
||||||
|
.unwrap_or(false)
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
let not_present = ckr
|
||||||
|
.children
|
||||||
|
.binary_search_by(|(their_range, _)| their_range.begin.cmp(&range.begin))
|
||||||
|
.is_err();
|
||||||
|
if not_present {
|
||||||
|
if range.level > 0 {
|
||||||
|
ret_ranges.push(range.clone());
|
||||||
|
}
|
||||||
|
if retain && range.level == 0 {
|
||||||
|
if let Some(item_bytes) = self.table.store.get(range.begin.as_slice())? {
|
||||||
|
ret_items.push(Arc::new(ByteBuf::from(item_bytes.to_vec())));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
let n_checksums = checksums
|
|
||||||
.iter()
|
|
||||||
.map(|x| x.children.len())
|
|
||||||
.fold(0, |x, y| x + y);
|
|
||||||
eprintln!(
|
|
||||||
"({}) Checksum comparison RPC: {} different + {} items for {} received",
|
|
||||||
self.table.name,
|
|
||||||
ret_ranges.len(),
|
|
||||||
ret_items.len(),
|
|
||||||
n_checksums
|
|
||||||
);
|
|
||||||
return Ok(SyncRPC::Difference(ret_ranges, ret_items));
|
|
||||||
}
|
}
|
||||||
Err(Error::Message(format!("Unexpected sync RPC")))
|
let n_checksums = checksums
|
||||||
|
.iter()
|
||||||
|
.map(|x| x.children.len())
|
||||||
|
.fold(0, |x, y| x + y);
|
||||||
|
eprintln!(
|
||||||
|
"({}) Checksum comparison RPC: {} different + {} items for {} received",
|
||||||
|
self.table.name,
|
||||||
|
ret_ranges.len(),
|
||||||
|
ret_items.len(),
|
||||||
|
n_checksums
|
||||||
|
);
|
||||||
|
Ok(SyncRPC::Difference(ret_ranges, ret_items))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn invalidate(self: Arc<Self>, item_key: Vec<u8>) -> Result<(), Error> {
|
pub async fn invalidate(self: Arc<Self>, item_key: Vec<u8>) -> Result<(), Error> {
|
||||||
|
|
Loading…
Reference in a new issue