layout/sync: fix bugs and add tracing
This commit is contained in:
parent
ce89d1ddab
commit
df24bb806d
3 changed files with 48 additions and 25 deletions
|
@ -131,7 +131,8 @@ impl LayoutHistory {
|
||||||
pub(crate) fn cleanup_old_versions(&mut self) {
|
pub(crate) fn cleanup_old_versions(&mut self) {
|
||||||
let min_sync_ack = self.calculate_global_min(&self.update_trackers.sync_ack_map);
|
let min_sync_ack = self.calculate_global_min(&self.update_trackers.sync_ack_map);
|
||||||
while self.versions.first().as_ref().unwrap().version < min_sync_ack {
|
while self.versions.first().as_ref().unwrap().version < min_sync_ack {
|
||||||
self.versions.remove(0);
|
let removed = self.versions.remove(0);
|
||||||
|
info!("Layout history: pruning old version {}", removed.version);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -133,7 +133,7 @@ impl LayoutManager {
|
||||||
pub fn sync_table_until(self: &Arc<Self>, table_name: &'static str, version: u64) {
|
pub fn sync_table_until(self: &Arc<Self>, table_name: &'static str, version: u64) {
|
||||||
let mut table_sync_version = self.table_sync_version.lock().unwrap();
|
let mut table_sync_version = self.table_sync_version.lock().unwrap();
|
||||||
*table_sync_version.get_mut(table_name).unwrap() = version;
|
*table_sync_version.get_mut(table_name).unwrap() = version;
|
||||||
let sync_until = table_sync_version.iter().map(|(_, v)| *v).max().unwrap();
|
let sync_until = table_sync_version.iter().map(|(_, v)| *v).min().unwrap();
|
||||||
drop(table_sync_version);
|
drop(table_sync_version);
|
||||||
|
|
||||||
let mut layout = self.layout.write().unwrap();
|
let mut layout = self.layout.write().unwrap();
|
||||||
|
@ -142,6 +142,7 @@ impl LayoutManager {
|
||||||
.sync_map
|
.sync_map
|
||||||
.set_max(self.node_id, sync_until)
|
.set_max(self.node_id, sync_until)
|
||||||
{
|
{
|
||||||
|
debug!("sync_until updated to {}", sync_until);
|
||||||
layout.update_hashes();
|
layout.update_hashes();
|
||||||
self.broadcast_update(SystemRpc::AdvertiseClusterLayoutTrackers(
|
self.broadcast_update(SystemRpc::AdvertiseClusterLayoutTrackers(
|
||||||
layout.update_trackers.clone(),
|
layout.update_trackers.clone(),
|
||||||
|
@ -277,7 +278,12 @@ impl LayoutManager {
|
||||||
self: &Arc<Self>,
|
self: &Arc<Self>,
|
||||||
adv: &LayoutHistory,
|
adv: &LayoutHistory,
|
||||||
) -> Result<SystemRpc, Error> {
|
) -> Result<SystemRpc, Error> {
|
||||||
debug!("handle_advertise_cluster_layout: {:?}", adv);
|
debug!(
|
||||||
|
"handle_advertise_cluster_layout: {} versions, last={}, trackers={:?}",
|
||||||
|
adv.versions.len(),
|
||||||
|
adv.current().version,
|
||||||
|
adv.update_trackers
|
||||||
|
);
|
||||||
|
|
||||||
if adv.current().replication_factor != self.replication_factor {
|
if adv.current().replication_factor != self.replication_factor {
|
||||||
let msg = format!(
|
let msg = format!(
|
||||||
|
|
|
@ -488,8 +488,29 @@ struct SyncWorker<F: TableSchema, R: TableReplication> {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<F: TableSchema, R: TableReplication> SyncWorker<F, R> {
|
impl<F: TableSchema, R: TableReplication> SyncWorker<F, R> {
|
||||||
|
fn check_add_full_sync(&mut self) {
|
||||||
|
let layout_versions = self.syncer.system.cluster_layout().sync_versions();
|
||||||
|
if layout_versions != self.layout_versions {
|
||||||
|
self.layout_versions = layout_versions;
|
||||||
|
info!(
|
||||||
|
"({}) Layout versions changed (max={}, ack={}, min stored={}), adding full sync to syncer todo list",
|
||||||
|
F::TABLE_NAME,
|
||||||
|
layout_versions.0,
|
||||||
|
layout_versions.1,
|
||||||
|
layout_versions.2
|
||||||
|
);
|
||||||
|
self.add_full_sync();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn add_full_sync(&mut self) {
|
fn add_full_sync(&mut self) {
|
||||||
let mut partitions = self.syncer.data.replication.sync_partitions();
|
let mut partitions = self.syncer.data.replication.sync_partitions();
|
||||||
|
info!(
|
||||||
|
"{}: Adding full sync for ack layout version {}",
|
||||||
|
F::TABLE_NAME,
|
||||||
|
partitions.layout_version
|
||||||
|
);
|
||||||
|
|
||||||
partitions.partitions.shuffle(&mut thread_rng());
|
partitions.partitions.shuffle(&mut thread_rng());
|
||||||
self.todo = Some(partitions);
|
self.todo = Some(partitions);
|
||||||
self.next_full_sync = Instant::now() + ANTI_ENTROPY_INTERVAL;
|
self.next_full_sync = Instant::now() + ANTI_ENTROPY_INTERVAL;
|
||||||
|
@ -510,6 +531,8 @@ impl<F: TableSchema, R: TableReplication> Worker for SyncWorker<F, R> {
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn work(&mut self, must_exit: &mut watch::Receiver<bool>) -> Result<WorkerState, Error> {
|
async fn work(&mut self, must_exit: &mut watch::Receiver<bool>) -> Result<WorkerState, Error> {
|
||||||
|
self.check_add_full_sync();
|
||||||
|
|
||||||
if let Some(todo) = &mut self.todo {
|
if let Some(todo) = &mut self.todo {
|
||||||
let partition = todo.partitions.pop().unwrap();
|
let partition = todo.partitions.pop().unwrap();
|
||||||
|
|
||||||
|
@ -531,20 +554,24 @@ impl<F: TableSchema, R: TableReplication> Worker for SyncWorker<F, R> {
|
||||||
return Err(e);
|
return Err(e);
|
||||||
}
|
}
|
||||||
|
|
||||||
// done
|
if todo.partitions.is_empty() {
|
||||||
if !todo.partitions.is_empty() {
|
info!(
|
||||||
return Ok(WorkerState::Busy);
|
"{}: Completed full sync for ack layout version {}",
|
||||||
}
|
F::TABLE_NAME,
|
||||||
|
todo.layout_version
|
||||||
|
);
|
||||||
self.syncer
|
self.syncer
|
||||||
.system
|
.system
|
||||||
.layout_manager
|
.layout_manager
|
||||||
.sync_table_until(F::TABLE_NAME, todo.layout_version);
|
.sync_table_until(F::TABLE_NAME, todo.layout_version);
|
||||||
|
self.todo = None;
|
||||||
}
|
}
|
||||||
|
|
||||||
self.todo = None;
|
Ok(WorkerState::Busy)
|
||||||
|
} else {
|
||||||
Ok(WorkerState::Idle)
|
Ok(WorkerState::Idle)
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
async fn wait_for_work(&mut self) -> WorkerState {
|
async fn wait_for_work(&mut self) -> WorkerState {
|
||||||
select! {
|
select! {
|
||||||
|
@ -554,18 +581,7 @@ impl<F: TableSchema, R: TableReplication> Worker for SyncWorker<F, R> {
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
_ = self.layout_notify.notified() => {
|
_ = self.layout_notify.notified() => {
|
||||||
let layout_versions = self.syncer.system.cluster_layout().sync_versions();
|
self.check_add_full_sync();
|
||||||
if layout_versions != self.layout_versions {
|
|
||||||
self.layout_versions = layout_versions;
|
|
||||||
debug!(
|
|
||||||
"({}) Layout versions changed (max={}, ack={}, min stored={}), adding full sync to syncer todo list",
|
|
||||||
F::TABLE_NAME,
|
|
||||||
layout_versions.0,
|
|
||||||
layout_versions.1,
|
|
||||||
layout_versions.2
|
|
||||||
);
|
|
||||||
self.add_full_sync();
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
_ = tokio::time::sleep_until(self.next_full_sync.into()) => {
|
_ = tokio::time::sleep_until(self.next_full_sync.into()) => {
|
||||||
self.add_full_sync();
|
self.add_full_sync();
|
||||||
|
|
Loading…
Reference in a new issue