Report progress of scrub and block repair
Some checks failed
continuous-integration/drone/push Build is failing
continuous-integration/drone/pr Build is passing

This commit is contained in:
Alex 2022-06-27 16:52:46 +02:00
parent fc50724256
commit 0e5175abee
Signed by: lx
GPG key ID: 0E496D15096376BE
4 changed files with 133 additions and 53 deletions

View file

@ -3,26 +3,25 @@
all: all:
clear; cargo build --all-features clear; cargo build --all-features
doc:
cd doc/book; mdbook build
release: release:
nix-build --arg release true nix-build --arg release true
shell: shell:
nix-shell nix-shell
# ----
run1: run1:
RUST_LOG=garage=debug ./target/debug/garage -c tmp/config.1.toml server RUST_LOG=garage=debug ./target/debug/garage -c tmp/config1.toml server
run1rel: run1rel:
RUST_LOG=garage=debug ./target/release/garage -c tmp/config.1.toml server RUST_LOG=garage=debug ./target/release/garage -c tmp/config1.toml server
run2: run2:
RUST_LOG=garage=debug ./target/debug/garage -c tmp/config.2.toml server RUST_LOG=garage=debug ./target/debug/garage -c tmp/config2.toml server
run2rel: run2rel:
RUST_LOG=garage=debug ./target/release/garage -c tmp/config.2.toml server RUST_LOG=garage=debug ./target/release/garage -c tmp/config2.toml server
run3: run3:
RUST_LOG=garage=debug ./target/debug/garage -c tmp/config.3.toml server RUST_LOG=garage=debug ./target/debug/garage -c tmp/config3.toml server
run3rel: run3rel:
RUST_LOG=garage=debug ./target/release/garage -c tmp/config.3.toml server RUST_LOG=garage=debug ./target/release/garage -c tmp/config3.toml server

View file

@ -1,4 +1,5 @@
use core::ops::Bound; use core::ops::Bound;
use std::path::PathBuf;
use std::sync::Arc; use std::sync::Arc;
use std::time::Duration; use std::time::Duration;
@ -36,6 +37,25 @@ impl Worker for RepairWorker {
"Block repair worker".into() "Block repair worker".into()
} }
fn info(&self) -> Option<String> {
match self.block_iter.as_ref() {
None => {
let idx_bytes = self
.next_start
.as_ref()
.map(|x| x.as_slice())
.unwrap_or(&[]);
let idx_bytes = if idx_bytes.len() > 4 {
&idx_bytes[..4]
} else {
idx_bytes
};
Some(format!("Phase 1: {}", hex::encode(idx_bytes)))
}
Some(bi) => Some(format!("Phase 2: {:.2}% done", bi.progress() * 100.)),
}
}
async fn work( async fn work(
&mut self, &mut self,
_must_exit: &mut watch::Receiver<bool>, _must_exit: &mut watch::Receiver<bool>,
@ -74,7 +94,7 @@ impl Worker for RepairWorker {
} }
if batch_of_hashes.is_empty() { if batch_of_hashes.is_empty() {
// move on to phase 2 // move on to phase 2
self.block_iter = Some(BlockStoreIterator::new(&self.manager).await?); self.block_iter = Some(BlockStoreIterator::new(&self.manager));
return Ok(WorkerStatus::Busy); return Ok(WorkerStatus::Busy);
} }
@ -115,14 +135,14 @@ pub struct ScrubWorker {
} }
impl ScrubWorker { impl ScrubWorker {
pub async fn new(manager: Arc<BlockManager>, tranquility: u32) -> Result<Self, Error> { pub fn new(manager: Arc<BlockManager>, tranquility: u32) -> Self {
let iterator = BlockStoreIterator::new(&manager).await?; let iterator = BlockStoreIterator::new(&manager);
Ok(Self { Self {
manager, manager,
iterator, iterator,
tranquilizer: Tranquilizer::new(30), tranquilizer: Tranquilizer::new(30),
tranquility, tranquility,
}) }
} }
} }
@ -132,6 +152,10 @@ impl Worker for ScrubWorker {
"Block scrub worker".into() "Block scrub worker".into()
} }
fn info(&self) -> Option<String> {
Some(format!("{:.2}% done", self.iterator.progress() * 100.))
}
async fn work( async fn work(
&mut self, &mut self,
_must_exit: &mut watch::Receiver<bool>, _must_exit: &mut watch::Receiver<bool>,
@ -153,51 +177,107 @@ impl Worker for ScrubWorker {
// ---- // ----
struct BlockStoreIterator { struct BlockStoreIterator {
path: Vec<fs::ReadDir>, path: Vec<ReadingDir>,
}
enum ReadingDir {
Pending(PathBuf),
Read {
subpaths: Vec<fs::DirEntry>,
pos: usize,
},
} }
impl BlockStoreIterator { impl BlockStoreIterator {
async fn new(manager: &BlockManager) -> Result<Self, Error> { fn new(manager: &BlockManager) -> Self {
let root_dir = manager.data_dir.clone(); let root_dir = manager.data_dir.clone();
let read_root_dir = fs::read_dir(&root_dir).await?; Self {
Ok(Self { path: vec![ReadingDir::Pending(root_dir)],
path: vec![read_root_dir], }
}) }
/// Returns progress done, between 0% and 1%
fn progress(&self) -> f32 {
if self.path.is_empty() {
1.0
} else {
let mut ret = 0.0;
let mut next_div = 1;
for p in self.path.iter() {
match p {
ReadingDir::Pending(_) => break,
ReadingDir::Read { subpaths, pos } => {
next_div *= subpaths.len();
ret += ((*pos - 1) as f32) / (next_div as f32);
}
}
}
ret
}
} }
async fn next(&mut self) -> Result<Option<Hash>, Error> { async fn next(&mut self) -> Result<Option<Hash>, Error> {
loop { loop {
if let Some(reader) = self.path.last_mut() { let last_path = match self.path.last_mut() {
if let Some(data_dir_ent) = reader.next_entry().await? { None => return Ok(None),
let name = data_dir_ent.file_name(); Some(lp) => lp,
let name = if let Ok(n) = name.into_string() { };
n
} else {
continue;
};
let ent_type = data_dir_ent.file_type().await?;
let name = name.strip_suffix(".zst").unwrap_or(&name); if let ReadingDir::Pending(path) = last_path {
if name.len() == 2 && hex::decode(&name).is_ok() && ent_type.is_dir() { let mut reader = fs::read_dir(&path).await?;
let read_child_dir = fs::read_dir(&data_dir_ent.path()).await?; let mut subpaths = vec![];
self.path.push(read_child_dir); while let Some(ent) = reader.next_entry().await? {
continue; subpaths.push(ent);
} else if name.len() == 64 { }
let hash_bytes = if let Ok(h) = hex::decode(&name) { *last_path = ReadingDir::Read { subpaths, pos: 0 };
h }
} else {
continue; let (subpaths, pos) = match *last_path {
}; ReadingDir::Read {
let mut hash = [0u8; 32]; ref subpaths,
hash.copy_from_slice(&hash_bytes[..]); ref mut pos,
return Ok(Some(hash.into())); } => (subpaths, pos),
} ReadingDir::Pending(_) => unreachable!(),
} else { };
if *pos >= subpaths.len() {
self.path.pop();
continue;
}
let data_dir_ent = match subpaths.get(*pos) {
None => {
self.path.pop(); self.path.pop();
continue; continue;
} }
Some(ent) => {
*pos += 1;
ent
}
};
let name = data_dir_ent.file_name();
let name = if let Ok(n) = name.into_string() {
n
} else { } else {
return Ok(None); continue;
};
let ent_type = data_dir_ent.file_type().await?;
let name = name.strip_suffix(".zst").unwrap_or(&name);
if name.len() == 2 && hex::decode(&name).is_ok() && ent_type.is_dir() {
let path = data_dir_ent.path();
self.path.push(ReadingDir::Pending(path));
continue;
} else if name.len() == 64 {
let hash_bytes = if let Ok(h) = hex::decode(&name) {
h
} else {
continue;
};
let mut hash = [0u8; 32];
hash.copy_from_slice(&hash_bytes[..]);
return Ok(Some(hash.into()));
} }
} }
} }

View file

@ -698,7 +698,7 @@ impl AdminRpcHandler {
))) )))
} }
} else { } else {
launch_online_repair(self.garage.clone(), opt).await?; launch_online_repair(self.garage.clone(), opt);
Ok(AdminRpc::Ok(format!( Ok(AdminRpc::Ok(format!(
"Repair launched on {:?}", "Repair launched on {:?}",
self.garage.system.id self.garage.system.id

View file

@ -13,7 +13,7 @@ use garage_util::error::Error;
use crate::*; use crate::*;
pub async fn launch_online_repair(garage: Arc<Garage>, opt: RepairOpt) -> Result<(), Error> { pub fn launch_online_repair(garage: Arc<Garage>, opt: RepairOpt) {
match opt.what { match opt.what {
RepairWhat::Tables => { RepairWhat::Tables => {
info!("Launching a full sync of tables"); info!("Launching a full sync of tables");
@ -45,13 +45,14 @@ pub async fn launch_online_repair(garage: Arc<Garage>, opt: RepairOpt) -> Result
} }
RepairWhat::Scrub { tranquility } => { RepairWhat::Scrub { tranquility } => {
info!("Verifying integrity of stored blocks"); info!("Verifying integrity of stored blocks");
garage.background.spawn_worker( garage
garage_block::repair::ScrubWorker::new(garage.block_manager.clone(), tranquility) .background
.await?, .spawn_worker(garage_block::repair::ScrubWorker::new(
); garage.block_manager.clone(),
tranquility,
));
} }
} }
Ok(())
} }
// ---- // ----