Block repair with new worker semantics
This commit is contained in:
parent
a855c54bdb
commit
ba1ace6cf6
8 changed files with 237 additions and 184 deletions
1
Cargo.lock
generated
1
Cargo.lock
generated
|
@ -959,6 +959,7 @@ dependencies = [
|
||||||
"futures",
|
"futures",
|
||||||
"futures-util",
|
"futures-util",
|
||||||
"garage_api",
|
"garage_api",
|
||||||
|
"garage_block",
|
||||||
"garage_db",
|
"garage_db",
|
||||||
"garage_model 0.7.0",
|
"garage_model 0.7.0",
|
||||||
"garage_rpc 0.7.0",
|
"garage_rpc 0.7.0",
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
extern crate tracing;
|
extern crate tracing;
|
||||||
|
|
||||||
pub mod manager;
|
pub mod manager;
|
||||||
|
pub mod repair;
|
||||||
|
|
||||||
mod block;
|
mod block;
|
||||||
mod metrics;
|
mod metrics;
|
||||||
|
|
|
@ -1,7 +1,5 @@
|
||||||
use core::ops::Bound;
|
|
||||||
|
|
||||||
use std::convert::TryInto;
|
use std::convert::TryInto;
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::PathBuf;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
|
|
||||||
|
@ -94,7 +92,7 @@ pub struct BlockManager {
|
||||||
|
|
||||||
mutation_lock: Mutex<BlockManagerLocked>,
|
mutation_lock: Mutex<BlockManagerLocked>,
|
||||||
|
|
||||||
rc: BlockRc,
|
pub(crate) rc: BlockRc,
|
||||||
|
|
||||||
resync_queue: CountedTree,
|
resync_queue: CountedTree,
|
||||||
resync_notify: Notify,
|
resync_notify: Notify,
|
||||||
|
@ -225,90 +223,6 @@ impl BlockManager {
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Launch the repair procedure on the data store
|
|
||||||
///
|
|
||||||
/// This will list all blocks locally present, as well as those
|
|
||||||
/// that are required because of refcount > 0, and will try
|
|
||||||
/// to fix any mismatch between the two.
|
|
||||||
pub async fn repair_data_store(&self, must_exit: &watch::Receiver<bool>) -> Result<(), Error> {
|
|
||||||
// 1. Repair blocks from RC table.
|
|
||||||
let mut next_start: Option<Hash> = None;
|
|
||||||
loop {
|
|
||||||
// We have to do this complicated two-step process where we first read a bunch
|
|
||||||
// of hashes from the RC table, and then insert them in the to-resync queue,
|
|
||||||
// because of SQLite. Basically, as long as we have an iterator on a DB table,
|
|
||||||
// we can't do anything else on the DB. The naive approach (which we had previously)
|
|
||||||
// of just iterating on the RC table and inserting items one to one in the resync
|
|
||||||
// queue can't work here, it would just provoke a deadlock in the SQLite adapter code.
|
|
||||||
// This is mostly because the Rust bindings for SQLite assume a worst-case scenario
|
|
||||||
// where SQLite is not compiled in thread-safe mode, so we have to wrap everything
|
|
||||||
// in a mutex (see db/sqlite_adapter.rs and discussion in PR #322).
|
|
||||||
let mut batch_of_hashes = vec![];
|
|
||||||
let start_bound = match next_start.as_ref() {
|
|
||||||
None => Bound::Unbounded,
|
|
||||||
Some(x) => Bound::Excluded(x.as_slice()),
|
|
||||||
};
|
|
||||||
for entry in self
|
|
||||||
.rc
|
|
||||||
.rc
|
|
||||||
.range::<&[u8], _>((start_bound, Bound::Unbounded))?
|
|
||||||
{
|
|
||||||
let (hash, _) = entry?;
|
|
||||||
let hash = Hash::try_from(&hash[..]).unwrap();
|
|
||||||
batch_of_hashes.push(hash);
|
|
||||||
if batch_of_hashes.len() >= 1000 {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if batch_of_hashes.is_empty() {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
for hash in batch_of_hashes.into_iter() {
|
|
||||||
self.put_to_resync(&hash, Duration::from_secs(0))?;
|
|
||||||
next_start = Some(hash)
|
|
||||||
}
|
|
||||||
|
|
||||||
if *must_exit.borrow() {
|
|
||||||
return Ok(());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// 2. Repair blocks actually on disk
|
|
||||||
// Lists all blocks on disk and adds them to the resync queue.
|
|
||||||
// This allows us to find blocks we are storing but don't actually need,
|
|
||||||
// so that we can offload them if necessary and then delete them locally.
|
|
||||||
self.for_each_file(
|
|
||||||
(),
|
|
||||||
move |_, hash| async move {
|
|
||||||
self.put_to_resync(&hash, Duration::from_secs(0))
|
|
||||||
.map_err(Into::into)
|
|
||||||
},
|
|
||||||
must_exit,
|
|
||||||
)
|
|
||||||
.await
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Verify integrity of each block on disk. Use `speed_limit` to limit the load generated by
|
|
||||||
/// this function.
|
|
||||||
pub async fn scrub_data_store(
|
|
||||||
&self,
|
|
||||||
must_exit: &watch::Receiver<bool>,
|
|
||||||
tranquility: u32,
|
|
||||||
) -> Result<(), Error> {
|
|
||||||
let tranquilizer = Tranquilizer::new(30);
|
|
||||||
self.for_each_file(
|
|
||||||
tranquilizer,
|
|
||||||
move |mut tranquilizer, hash| async move {
|
|
||||||
let _ = self.read_block(&hash).await;
|
|
||||||
tranquilizer.tranquilize(tranquility).await;
|
|
||||||
Ok(tranquilizer)
|
|
||||||
},
|
|
||||||
must_exit,
|
|
||||||
)
|
|
||||||
.await
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Get lenght of resync queue
|
/// Get lenght of resync queue
|
||||||
pub fn resync_queue_len(&self) -> Result<usize, Error> {
|
pub fn resync_queue_len(&self) -> Result<usize, Error> {
|
||||||
// This currently can't return an error because the CountedTree hack
|
// This currently can't return an error because the CountedTree hack
|
||||||
|
@ -397,7 +311,7 @@ impl BlockManager {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Read block from disk, verifying it's integrity
|
/// Read block from disk, verifying it's integrity
|
||||||
async fn read_block(&self, hash: &Hash) -> Result<BlockRpc, Error> {
|
pub(crate) async fn read_block(&self, hash: &Hash) -> Result<BlockRpc, Error> {
|
||||||
let data = self
|
let data = self
|
||||||
.read_block_internal(hash)
|
.read_block_internal(hash)
|
||||||
.bound_record_duration(&self.metrics.block_read_duration)
|
.bound_record_duration(&self.metrics.block_read_duration)
|
||||||
|
@ -575,7 +489,7 @@ impl BlockManager {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
fn put_to_resync(&self, hash: &Hash, delay: Duration) -> db::Result<()> {
|
pub(crate) fn put_to_resync(&self, hash: &Hash, delay: Duration) -> db::Result<()> {
|
||||||
let when = now_msec() + delay.as_millis() as u64;
|
let when = now_msec() + delay.as_millis() as u64;
|
||||||
self.put_to_resync_at(hash, when)
|
self.put_to_resync_at(hash, when)
|
||||||
}
|
}
|
||||||
|
@ -784,72 +698,6 @@ impl BlockManager {
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
// ---- Utility: iteration on files in the data directory ----
|
|
||||||
|
|
||||||
async fn for_each_file<F, Fut, State>(
|
|
||||||
&self,
|
|
||||||
state: State,
|
|
||||||
mut f: F,
|
|
||||||
must_exit: &watch::Receiver<bool>,
|
|
||||||
) -> Result<(), Error>
|
|
||||||
where
|
|
||||||
F: FnMut(State, Hash) -> Fut + Send,
|
|
||||||
Fut: Future<Output = Result<State, Error>> + Send,
|
|
||||||
State: Send,
|
|
||||||
{
|
|
||||||
self.for_each_file_rec(&self.data_dir, state, &mut f, must_exit)
|
|
||||||
.await
|
|
||||||
.map(|_| ())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn for_each_file_rec<'a, F, Fut, State>(
|
|
||||||
&'a self,
|
|
||||||
path: &'a Path,
|
|
||||||
mut state: State,
|
|
||||||
f: &'a mut F,
|
|
||||||
must_exit: &'a watch::Receiver<bool>,
|
|
||||||
) -> BoxFuture<'a, Result<State, Error>>
|
|
||||||
where
|
|
||||||
F: FnMut(State, Hash) -> Fut + Send,
|
|
||||||
Fut: Future<Output = Result<State, Error>> + Send,
|
|
||||||
State: Send + 'a,
|
|
||||||
{
|
|
||||||
async move {
|
|
||||||
let mut ls_data_dir = fs::read_dir(path).await?;
|
|
||||||
while let Some(data_dir_ent) = ls_data_dir.next_entry().await? {
|
|
||||||
if *must_exit.borrow() {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
let name = data_dir_ent.file_name();
|
|
||||||
let name = if let Ok(n) = name.into_string() {
|
|
||||||
n
|
|
||||||
} else {
|
|
||||||
continue;
|
|
||||||
};
|
|
||||||
let ent_type = data_dir_ent.file_type().await?;
|
|
||||||
|
|
||||||
let name = name.strip_suffix(".zst").unwrap_or(&name);
|
|
||||||
if name.len() == 2 && hex::decode(&name).is_ok() && ent_type.is_dir() {
|
|
||||||
state = self
|
|
||||||
.for_each_file_rec(&data_dir_ent.path(), state, f, must_exit)
|
|
||||||
.await?;
|
|
||||||
} else if name.len() == 64 {
|
|
||||||
let hash_bytes = if let Ok(h) = hex::decode(&name) {
|
|
||||||
h
|
|
||||||
} else {
|
|
||||||
continue;
|
|
||||||
};
|
|
||||||
let mut hash = [0u8; 32];
|
|
||||||
hash.copy_from_slice(&hash_bytes[..]);
|
|
||||||
state = f(state, hash.into()).await?;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(state)
|
|
||||||
}
|
|
||||||
.boxed()
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[async_trait]
|
#[async_trait]
|
||||||
|
|
204
src/block/repair.rs
Normal file
204
src/block/repair.rs
Normal file
|
@ -0,0 +1,204 @@
|
||||||
|
use core::ops::Bound;
|
||||||
|
|
||||||
|
use std::sync::Arc;
|
||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
|
use async_trait::async_trait;
|
||||||
|
use tokio::fs;
|
||||||
|
use tokio::sync::watch;
|
||||||
|
|
||||||
|
use garage_util::background::*;
|
||||||
|
use garage_util::data::*;
|
||||||
|
use garage_util::error::*;
|
||||||
|
use garage_util::tranquilizer::Tranquilizer;
|
||||||
|
|
||||||
|
use crate::manager::*;
|
||||||
|
|
||||||
|
pub struct RepairWorker {
|
||||||
|
manager: Arc<BlockManager>,
|
||||||
|
next_start: Option<Hash>,
|
||||||
|
block_iter: Option<BlockStoreIterator>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl RepairWorker {
|
||||||
|
pub fn new(manager: Arc<BlockManager>) -> Self {
|
||||||
|
Self {
|
||||||
|
manager,
|
||||||
|
next_start: None,
|
||||||
|
block_iter: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl Worker for RepairWorker {
|
||||||
|
fn name(&self) -> String {
|
||||||
|
"Block repair worker".into()
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn work(
|
||||||
|
&mut self,
|
||||||
|
_must_exit: &mut watch::Receiver<bool>,
|
||||||
|
) -> Result<WorkerStatus, Error> {
|
||||||
|
match self.block_iter.as_mut() {
|
||||||
|
None => {
|
||||||
|
// Phase 1: Repair blocks from RC table.
|
||||||
|
|
||||||
|
// We have to do this complicated two-step process where we first read a bunch
|
||||||
|
// of hashes from the RC table, and then insert them in the to-resync queue,
|
||||||
|
// because of SQLite. Basically, as long as we have an iterator on a DB table,
|
||||||
|
// we can't do anything else on the DB. The naive approach (which we had previously)
|
||||||
|
// of just iterating on the RC table and inserting items one to one in the resync
|
||||||
|
// queue can't work here, it would just provoke a deadlock in the SQLite adapter code.
|
||||||
|
// This is mostly because the Rust bindings for SQLite assume a worst-case scenario
|
||||||
|
// where SQLite is not compiled in thread-safe mode, so we have to wrap everything
|
||||||
|
// in a mutex (see db/sqlite_adapter.rs and discussion in PR #322).
|
||||||
|
let mut batch_of_hashes = vec![];
|
||||||
|
let start_bound = match self.next_start.as_ref() {
|
||||||
|
None => Bound::Unbounded,
|
||||||
|
Some(x) => Bound::Excluded(x.as_slice()),
|
||||||
|
};
|
||||||
|
for entry in self
|
||||||
|
.manager
|
||||||
|
.rc
|
||||||
|
.rc
|
||||||
|
.range::<&[u8], _>((start_bound, Bound::Unbounded))?
|
||||||
|
{
|
||||||
|
let (hash, _) = entry?;
|
||||||
|
let hash = Hash::try_from(&hash[..]).unwrap();
|
||||||
|
batch_of_hashes.push(hash);
|
||||||
|
if batch_of_hashes.len() >= 1000 {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if batch_of_hashes.is_empty() {
|
||||||
|
// move on to phase 2
|
||||||
|
self.block_iter = Some(BlockStoreIterator::new(&self.manager).await?);
|
||||||
|
return Ok(WorkerStatus::Busy);
|
||||||
|
}
|
||||||
|
|
||||||
|
for hash in batch_of_hashes.into_iter() {
|
||||||
|
self.manager.put_to_resync(&hash, Duration::from_secs(0))?;
|
||||||
|
self.next_start = Some(hash)
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(WorkerStatus::Busy)
|
||||||
|
}
|
||||||
|
Some(bi) => {
|
||||||
|
// Phase 2: Repair blocks actually on disk
|
||||||
|
// Lists all blocks on disk and adds them to the resync queue.
|
||||||
|
// This allows us to find blocks we are storing but don't actually need,
|
||||||
|
// so that we can offload them if necessary and then delete them locally.
|
||||||
|
if let Some(hash) = bi.next().await? {
|
||||||
|
self.manager.put_to_resync(&hash, Duration::from_secs(0))?;
|
||||||
|
Ok(WorkerStatus::Busy)
|
||||||
|
} else {
|
||||||
|
Ok(WorkerStatus::Done)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn wait_for_work(&mut self, _must_exit: &watch::Receiver<bool>) -> WorkerStatus {
|
||||||
|
unreachable!()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ----
|
||||||
|
|
||||||
|
pub struct ScrubWorker {
|
||||||
|
manager: Arc<BlockManager>,
|
||||||
|
iterator: BlockStoreIterator,
|
||||||
|
tranquilizer: Tranquilizer,
|
||||||
|
tranquility: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ScrubWorker {
|
||||||
|
pub async fn new(manager: Arc<BlockManager>, tranquility: u32) -> Result<Self, Error> {
|
||||||
|
let iterator = BlockStoreIterator::new(&manager).await?;
|
||||||
|
Ok(Self {
|
||||||
|
manager,
|
||||||
|
iterator,
|
||||||
|
tranquilizer: Tranquilizer::new(30),
|
||||||
|
tranquility,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl Worker for ScrubWorker {
|
||||||
|
fn name(&self) -> String {
|
||||||
|
"Block scrub worker".into()
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn work(
|
||||||
|
&mut self,
|
||||||
|
_must_exit: &mut watch::Receiver<bool>,
|
||||||
|
) -> Result<WorkerStatus, Error> {
|
||||||
|
self.tranquilizer.reset();
|
||||||
|
if let Some(hash) = self.iterator.next().await? {
|
||||||
|
let _ = self.manager.read_block(&hash).await;
|
||||||
|
self.tranquilizer.tranquilize(self.tranquility).await;
|
||||||
|
Ok(WorkerStatus::Busy)
|
||||||
|
} else {
|
||||||
|
Ok(WorkerStatus::Done)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn wait_for_work(&mut self, _must_exit: &watch::Receiver<bool>) -> WorkerStatus {
|
||||||
|
unreachable!()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ----
|
||||||
|
|
||||||
|
struct BlockStoreIterator {
|
||||||
|
path: Vec<fs::ReadDir>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl BlockStoreIterator {
|
||||||
|
async fn new(manager: &BlockManager) -> Result<Self, Error> {
|
||||||
|
let root_dir = manager.data_dir.clone();
|
||||||
|
let read_root_dir = fs::read_dir(&root_dir).await?;
|
||||||
|
Ok(Self {
|
||||||
|
path: vec![read_root_dir],
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn next(&mut self) -> Result<Option<Hash>, Error> {
|
||||||
|
loop {
|
||||||
|
if let Some(reader) = self.path.last_mut() {
|
||||||
|
if let Some(data_dir_ent) = reader.next_entry().await? {
|
||||||
|
let name = data_dir_ent.file_name();
|
||||||
|
let name = if let Ok(n) = name.into_string() {
|
||||||
|
n
|
||||||
|
} else {
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
let ent_type = data_dir_ent.file_type().await?;
|
||||||
|
|
||||||
|
let name = name.strip_suffix(".zst").unwrap_or(&name);
|
||||||
|
if name.len() == 2 && hex::decode(&name).is_ok() && ent_type.is_dir() {
|
||||||
|
let read_child_dir = fs::read_dir(&data_dir_ent.path()).await?;
|
||||||
|
self.path.push(read_child_dir);
|
||||||
|
continue;
|
||||||
|
} else if name.len() == 64 {
|
||||||
|
let hash_bytes = if let Ok(h) = hex::decode(&name) {
|
||||||
|
h
|
||||||
|
} else {
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
let mut hash = [0u8; 32];
|
||||||
|
hash.copy_from_slice(&hash_bytes[..]);
|
||||||
|
return Ok(Some(hash.into()));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
self.path.pop();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return Ok(None);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -23,6 +23,7 @@ path = "tests/lib.rs"
|
||||||
[dependencies]
|
[dependencies]
|
||||||
garage_db = { version = "0.8.0", path = "../db" }
|
garage_db = { version = "0.8.0", path = "../db" }
|
||||||
garage_api = { version = "0.7.0", path = "../api" }
|
garage_api = { version = "0.7.0", path = "../api" }
|
||||||
|
garage_block = { version = "0.7.0", path = "../block" }
|
||||||
garage_model = { version = "0.7.0", path = "../model" }
|
garage_model = { version = "0.7.0", path = "../model" }
|
||||||
garage_rpc = { version = "0.7.0", path = "../rpc" }
|
garage_rpc = { version = "0.7.0", path = "../rpc" }
|
||||||
garage_table = { version = "0.7.0", path = "../table" }
|
garage_table = { version = "0.7.0", path = "../table" }
|
||||||
|
|
|
@ -693,7 +693,7 @@ impl AdminRpcHandler {
|
||||||
)))
|
)))
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
launch_online_repair(self.garage.clone(), opt)?;
|
launch_online_repair(self.garage.clone(), opt).await?;
|
||||||
Ok(AdminRpc::Ok(format!(
|
Ok(AdminRpc::Ok(format!(
|
||||||
"Repair launched on {:?}",
|
"Repair launched on {:?}",
|
||||||
self.garage.system.id
|
self.garage.system.id
|
||||||
|
|
|
@ -13,7 +13,7 @@ use garage_util::error::Error;
|
||||||
|
|
||||||
use crate::*;
|
use crate::*;
|
||||||
|
|
||||||
pub fn launch_online_repair(garage: Arc<Garage>, opt: RepairOpt) -> Result<(), Error> {
|
pub async fn launch_online_repair(garage: Arc<Garage>, opt: RepairOpt) -> Result<(), Error> {
|
||||||
match opt.what {
|
match opt.what {
|
||||||
RepairWhat::Tables => {
|
RepairWhat::Tables => {
|
||||||
info!("Launching a full sync of tables");
|
info!("Launching a full sync of tables");
|
||||||
|
@ -36,24 +36,19 @@ pub fn launch_online_repair(garage: Arc<Garage>, opt: RepairOpt) -> Result<(), E
|
||||||
.spawn_worker(RepairBlockrefsWorker::new(garage.clone()));
|
.spawn_worker(RepairBlockrefsWorker::new(garage.clone()));
|
||||||
}
|
}
|
||||||
RepairWhat::Blocks => {
|
RepairWhat::Blocks => {
|
||||||
unimplemented!()
|
|
||||||
/*
|
|
||||||
info!("Repairing the stored blocks");
|
info!("Repairing the stored blocks");
|
||||||
self.garage
|
garage
|
||||||
.block_manager
|
.background
|
||||||
.repair_data_store(&must_exit)
|
.spawn_worker(garage_block::repair::RepairWorker::new(
|
||||||
.await?;
|
garage.block_manager.clone(),
|
||||||
*/
|
));
|
||||||
}
|
}
|
||||||
RepairWhat::Scrub { tranquility } => {
|
RepairWhat::Scrub { tranquility } => {
|
||||||
unimplemented!()
|
|
||||||
/*
|
|
||||||
info!("Verifying integrity of stored blocks");
|
info!("Verifying integrity of stored blocks");
|
||||||
self.garage
|
garage.background.spawn_worker(
|
||||||
.block_manager
|
garage_block::repair::ScrubWorker::new(garage.block_manager.clone(), tranquility)
|
||||||
.scrub_data_store(&must_exit, tranquility)
|
.await?,
|
||||||
.await?;
|
);
|
||||||
*/
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
|
@ -64,7 +59,7 @@ pub fn launch_online_repair(garage: Arc<Garage>, opt: RepairOpt) -> Result<(), E
|
||||||
struct RepairVersionsWorker {
|
struct RepairVersionsWorker {
|
||||||
garage: Arc<Garage>,
|
garage: Arc<Garage>,
|
||||||
pos: Vec<u8>,
|
pos: Vec<u8>,
|
||||||
iter: usize,
|
counter: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl RepairVersionsWorker {
|
impl RepairVersionsWorker {
|
||||||
|
@ -72,7 +67,7 @@ impl RepairVersionsWorker {
|
||||||
Self {
|
Self {
|
||||||
garage,
|
garage,
|
||||||
pos: vec![],
|
pos: vec![],
|
||||||
iter: 0,
|
counter: 0,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -93,14 +88,14 @@ impl Worker for RepairVersionsWorker {
|
||||||
v
|
v
|
||||||
}
|
}
|
||||||
None => {
|
None => {
|
||||||
info!("repair_versions: finished, done {}", self.iter);
|
info!("repair_versions: finished, done {}", self.counter);
|
||||||
return Ok(WorkerStatus::Done);
|
return Ok(WorkerStatus::Done);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
self.iter += 1;
|
self.counter += 1;
|
||||||
if self.iter % 1000 == 0 {
|
if self.counter % 1000 == 0 {
|
||||||
info!("repair_versions: {}", self.iter);
|
info!("repair_versions: {}", self.counter);
|
||||||
}
|
}
|
||||||
|
|
||||||
let version = rmp_serde::decode::from_read_ref::<_, Version>(&item_bytes)?;
|
let version = rmp_serde::decode::from_read_ref::<_, Version>(&item_bytes)?;
|
||||||
|
@ -144,7 +139,7 @@ impl Worker for RepairVersionsWorker {
|
||||||
struct RepairBlockrefsWorker {
|
struct RepairBlockrefsWorker {
|
||||||
garage: Arc<Garage>,
|
garage: Arc<Garage>,
|
||||||
pos: Vec<u8>,
|
pos: Vec<u8>,
|
||||||
iter: usize,
|
counter: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl RepairBlockrefsWorker {
|
impl RepairBlockrefsWorker {
|
||||||
|
@ -152,7 +147,7 @@ impl RepairBlockrefsWorker {
|
||||||
Self {
|
Self {
|
||||||
garage,
|
garage,
|
||||||
pos: vec![],
|
pos: vec![],
|
||||||
iter: 0,
|
counter: 0,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -173,14 +168,14 @@ impl Worker for RepairBlockrefsWorker {
|
||||||
v
|
v
|
||||||
}
|
}
|
||||||
None => {
|
None => {
|
||||||
info!("repair_block_ref: finished, done {}", self.iter);
|
info!("repair_block_ref: finished, done {}", self.counter);
|
||||||
return Ok(WorkerStatus::Done);
|
return Ok(WorkerStatus::Done);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
self.iter += 1;
|
self.counter += 1;
|
||||||
if self.iter % 1000 == 0 {
|
if self.counter % 1000 == 0 {
|
||||||
info!("repair_block_ref: {}", self.iter);
|
info!("repair_block_ref: {}", self.counter);
|
||||||
}
|
}
|
||||||
|
|
||||||
let block_ref = rmp_serde::decode::from_read_ref::<_, BlockRef>(&item_bytes)?;
|
let block_ref = rmp_serde::decode::from_read_ref::<_, BlockRef>(&item_bytes)?;
|
||||||
|
|
|
@ -159,6 +159,9 @@ impl WorkerHandler {
|
||||||
self.task_id,
|
self.task_id,
|
||||||
e
|
e
|
||||||
);
|
);
|
||||||
|
// Sleep a bit so that error won't repeat immediately
|
||||||
|
// (TODO good way to handle errors)
|
||||||
|
tokio::time::sleep(Duration::from_secs(10)).await;
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
WorkerStatus::Idle => {
|
WorkerStatus::Idle => {
|
||||||
|
|
Loading…
Reference in a new issue