forked from Deuxfleurs/garage
Make sync send data both ways
This commit is contained in:
parent
69f1d8fef2
commit
b780f6485d
3 changed files with 59 additions and 25 deletions
|
@ -25,7 +25,7 @@ impl Eq for FixedBytes32 {}
|
||||||
|
|
||||||
impl fmt::Debug for FixedBytes32 {
|
impl fmt::Debug for FixedBytes32 {
|
||||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
write!(f, "{}", hex::encode(self.0))
|
write!(f, "{}…", hex::encode(&self.0[..8]))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
14
src/table.rs
14
src/table.rs
|
@ -6,7 +6,7 @@ use async_trait::async_trait;
|
||||||
use futures::stream::*;
|
use futures::stream::*;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use serde_bytes::ByteBuf;
|
use serde_bytes::ByteBuf;
|
||||||
use tokio::sync::RwLock;
|
use arc_swap::ArcSwapOption;
|
||||||
|
|
||||||
use crate::data::*;
|
use crate::data::*;
|
||||||
use crate::error::Error;
|
use crate::error::Error;
|
||||||
|
@ -22,7 +22,7 @@ pub struct Table<F: TableSchema> {
|
||||||
|
|
||||||
pub system: Arc<System>,
|
pub system: Arc<System>,
|
||||||
pub store: sled::Tree,
|
pub store: sled::Tree,
|
||||||
pub syncer: RwLock<Option<Arc<TableSyncer<F>>>>,
|
pub syncer: ArcSwapOption<TableSyncer<F>>,
|
||||||
|
|
||||||
pub param: TableReplicationParams,
|
pub param: TableReplicationParams,
|
||||||
}
|
}
|
||||||
|
@ -142,10 +142,10 @@ impl<F: TableSchema + 'static> Table<F> {
|
||||||
system,
|
system,
|
||||||
store,
|
store,
|
||||||
param,
|
param,
|
||||||
syncer: RwLock::new(None),
|
syncer: ArcSwapOption::from(None),
|
||||||
});
|
});
|
||||||
let syncer = TableSyncer::launch(table.clone()).await;
|
let syncer = TableSyncer::launch(table.clone()).await;
|
||||||
*table.syncer.write().await = Some(syncer);
|
table.syncer.swap(Some(syncer));
|
||||||
table
|
table
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -389,7 +389,7 @@ impl<F: TableSchema + 'static> Table<F> {
|
||||||
Ok(TableRPC::Ok)
|
Ok(TableRPC::Ok)
|
||||||
}
|
}
|
||||||
TableRPC::SyncRPC(rpc) => {
|
TableRPC::SyncRPC(rpc) => {
|
||||||
let syncer = self.syncer.read().await.as_ref().unwrap().clone();
|
let syncer = self.syncer.load_full().unwrap();
|
||||||
let response = syncer
|
let response = syncer
|
||||||
.handle_rpc(&rpc, self.system.background.stop_signal.clone())
|
.handle_rpc(&rpc, self.system.background.stop_signal.clone())
|
||||||
.await?;
|
.await?;
|
||||||
|
@ -408,7 +408,7 @@ impl<F: TableSchema + 'static> Table<F> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn handle_update(self: &Arc<Self>, mut entries: Vec<Arc<ByteBuf>>) -> Result<(), Error> {
|
pub async fn handle_update(self: &Arc<Self>, mut entries: Vec<Arc<ByteBuf>>) -> Result<(), Error> {
|
||||||
for update_bytes in entries.drain(..) {
|
for update_bytes in entries.drain(..) {
|
||||||
let update = rmp_serde::decode::from_read_ref::<_, F::E>(update_bytes.as_slice())?;
|
let update = rmp_serde::decode::from_read_ref::<_, F::E>(update_bytes.as_slice())?;
|
||||||
|
|
||||||
|
@ -437,7 +437,7 @@ impl<F: TableSchema + 'static> Table<F> {
|
||||||
if old_entry != new_entry {
|
if old_entry != new_entry {
|
||||||
self.instance.updated(old_entry, new_entry).await;
|
self.instance.updated(old_entry, new_entry).await;
|
||||||
|
|
||||||
let syncer = self.syncer.read().await.as_ref().unwrap().clone();
|
let syncer = self.syncer.load_full().unwrap();
|
||||||
self.system.background.spawn(syncer.invalidate(tree_key));
|
self.system.background.spawn(syncer.invalidate(tree_key));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -30,7 +30,7 @@ pub struct TableSyncer<F: TableSchema> {
|
||||||
#[derive(Serialize, Deserialize)]
|
#[derive(Serialize, Deserialize)]
|
||||||
pub enum SyncRPC {
|
pub enum SyncRPC {
|
||||||
Checksums(Vec<RangeChecksum>),
|
Checksums(Vec<RangeChecksum>),
|
||||||
DifferentSet(Vec<SyncRange>),
|
Difference(Vec<SyncRange>, Vec<Arc<ByteBuf>>),
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct SyncTodo {
|
pub struct SyncTodo {
|
||||||
|
@ -172,10 +172,12 @@ impl<F: TableSchema + 'static> TableSyncer<F> {
|
||||||
.root_checksum(&partition.begin, &partition.end, must_exit)
|
.root_checksum(&partition.begin, &partition.end, must_exit)
|
||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
|
let my_id = self.table.system.id.clone();
|
||||||
let ring = self.table.system.ring.borrow().clone();
|
let ring = self.table.system.ring.borrow().clone();
|
||||||
let nodes = ring.walk_ring(&partition.begin, self.table.param.replication_factor);
|
let nodes = ring.walk_ring(&partition.begin, self.table.param.replication_factor);
|
||||||
let mut sync_futures = nodes
|
let mut sync_futures = nodes
|
||||||
.iter()
|
.iter()
|
||||||
|
.filter(|node| **node != my_id)
|
||||||
.map(|node| {
|
.map(|node| {
|
||||||
self.clone()
|
self.clone()
|
||||||
.do_sync_with(root_cks.clone(), node.clone(), must_exit.clone())
|
.do_sync_with(root_cks.clone(), node.clone(), must_exit.clone())
|
||||||
|
@ -364,21 +366,25 @@ impl<F: TableSchema + 'static> TableSyncer<F> {
|
||||||
.table
|
.table
|
||||||
.rpc_call(&who, &TableRPC::<F>::SyncRPC(SyncRPC::Checksums(step)))
|
.rpc_call(&who, &TableRPC::<F>::SyncRPC(SyncRPC::Checksums(step)))
|
||||||
.await?;
|
.await?;
|
||||||
if let TableRPC::<F>::SyncRPC(SyncRPC::DifferentSet(mut s)) = rpc_resp {
|
if let TableRPC::<F>::SyncRPC(SyncRPC::Difference(mut diff_ranges, diff_items)) = rpc_resp {
|
||||||
let mut items = vec![];
|
eprintln!("({}) Sync with {:?}: difference {} ranges, {} items", self.table.name, who, diff_ranges.len(), diff_items.len());
|
||||||
for differing in s.drain(..) {
|
let mut items_to_send = vec![];
|
||||||
|
for differing in diff_ranges.drain(..) {
|
||||||
if differing.level == 0 {
|
if differing.level == 0 {
|
||||||
items.push(differing.begin);
|
items_to_send.push(differing.begin);
|
||||||
} else {
|
} else {
|
||||||
let checksum = self.range_checksum(&differing, &mut must_exit).await?;
|
let checksum = self.range_checksum(&differing, &mut must_exit).await?;
|
||||||
todo.push_back(checksum);
|
todo.push_back(checksum);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if items.len() > 0 {
|
if diff_items.len() > 0 {
|
||||||
|
self.table.handle_update(diff_items).await?;
|
||||||
|
}
|
||||||
|
if items_to_send.len() > 0 {
|
||||||
self.table
|
self.table
|
||||||
.system
|
.system
|
||||||
.background
|
.background
|
||||||
.spawn(self.clone().send_items(who.clone(), items));
|
.spawn(self.clone().send_items(who.clone(), items_to_send));
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
return Err(Error::Message(format!(
|
return Err(Error::Message(format!(
|
||||||
|
@ -424,20 +430,47 @@ impl<F: TableSchema + 'static> TableSyncer<F> {
|
||||||
mut must_exit: watch::Receiver<bool>,
|
mut must_exit: watch::Receiver<bool>,
|
||||||
) -> Result<SyncRPC, Error> {
|
) -> Result<SyncRPC, Error> {
|
||||||
if let SyncRPC::Checksums(checksums) = message {
|
if let SyncRPC::Checksums(checksums) = message {
|
||||||
let mut ret = vec![];
|
let mut ret_ranges = vec![];
|
||||||
|
let mut ret_items = vec![];
|
||||||
for ckr in checksums.iter() {
|
for ckr in checksums.iter() {
|
||||||
let our_ckr = self.range_checksum(&ckr.bounds, &mut must_exit).await?;
|
let our_ckr = self.range_checksum(&ckr.bounds, &mut must_exit).await?;
|
||||||
for (range, hash) in ckr.children.iter() {
|
for (range, hash) in ckr.children.iter() {
|
||||||
match our_ckr
|
// Only consider items that are in the intersection of the two ranges
|
||||||
|
// (other ranges will be exchanged at some point)
|
||||||
|
if our_ckr.found_limit.as_ref().map(|x| range.begin.as_slice() >= x.as_slice()).unwrap_or(false) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
let differs = match our_ckr
|
||||||
.children
|
.children
|
||||||
.binary_search_by(|(our_range, _)| our_range.begin.cmp(&range.begin))
|
.binary_search_by(|(our_range, _)| our_range.begin.cmp(&range.begin))
|
||||||
{
|
{
|
||||||
Err(_) => {
|
Err(_) => true,
|
||||||
ret.push(range.clone());
|
Ok(i) => our_ckr.children[i].1 != *hash,
|
||||||
|
};
|
||||||
|
if differs {
|
||||||
|
ret_ranges.push(range.clone());
|
||||||
|
if range.level == 0 {
|
||||||
|
if let Some(item_bytes) = self.table.store.get(range.begin.as_slice())? {
|
||||||
|
ret_items.push(Arc::new(ByteBuf::from(item_bytes.to_vec())));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
Ok(i) => {
|
}
|
||||||
if our_ckr.children[i].1 != *hash {
|
}
|
||||||
ret.push(range.clone());
|
for (range, _hash) in our_ckr.children.iter() {
|
||||||
|
if ckr.found_limit.as_ref().map(|x| range.begin.as_slice() >= x.as_slice()).unwrap_or(false) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
let not_present = ckr
|
||||||
|
.children
|
||||||
|
.binary_search_by(|(their_range, _)| their_range.begin.cmp(&range.begin))
|
||||||
|
.is_err();
|
||||||
|
if not_present {
|
||||||
|
ret_ranges.push(range.clone());
|
||||||
|
if range.level == 0 {
|
||||||
|
if let Some(item_bytes) = self.table.store.get(range.begin.as_slice())? {
|
||||||
|
ret_items.push(Arc::new(ByteBuf::from(item_bytes.to_vec())));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -448,12 +481,13 @@ impl<F: TableSchema + 'static> TableSyncer<F> {
|
||||||
.map(|x| x.children.len())
|
.map(|x| x.children.len())
|
||||||
.fold(0, |x, y| x + y);
|
.fold(0, |x, y| x + y);
|
||||||
eprintln!(
|
eprintln!(
|
||||||
"({}) Checksum comparison RPC: {} different out of {}",
|
"({}) Checksum comparison RPC: {} different + {} items for {} received",
|
||||||
self.table.name,
|
self.table.name,
|
||||||
ret.len(),
|
ret_ranges.len(),
|
||||||
|
ret_items.len(),
|
||||||
n_checksums
|
n_checksums
|
||||||
);
|
);
|
||||||
return Ok(SyncRPC::DifferentSet(ret));
|
return Ok(SyncRPC::Difference(ret_ranges, ret_items));
|
||||||
}
|
}
|
||||||
Err(Error::Message(format!("Unexpected sync RPC")))
|
Err(Error::Message(format!("Unexpected sync RPC")))
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue