Report progress of scrub and block repair
This commit is contained in:
parent
fc50724256
commit
0e5175abee
4 changed files with 133 additions and 53 deletions
17
Makefile
17
Makefile
|
@ -3,26 +3,25 @@
|
||||||
all:
|
all:
|
||||||
clear; cargo build --all-features
|
clear; cargo build --all-features
|
||||||
|
|
||||||
doc:
|
|
||||||
cd doc/book; mdbook build
|
|
||||||
|
|
||||||
release:
|
release:
|
||||||
nix-build --arg release true
|
nix-build --arg release true
|
||||||
|
|
||||||
shell:
|
shell:
|
||||||
nix-shell
|
nix-shell
|
||||||
|
|
||||||
|
# ----
|
||||||
|
|
||||||
run1:
|
run1:
|
||||||
RUST_LOG=garage=debug ./target/debug/garage -c tmp/config.1.toml server
|
RUST_LOG=garage=debug ./target/debug/garage -c tmp/config1.toml server
|
||||||
run1rel:
|
run1rel:
|
||||||
RUST_LOG=garage=debug ./target/release/garage -c tmp/config.1.toml server
|
RUST_LOG=garage=debug ./target/release/garage -c tmp/config1.toml server
|
||||||
|
|
||||||
run2:
|
run2:
|
||||||
RUST_LOG=garage=debug ./target/debug/garage -c tmp/config.2.toml server
|
RUST_LOG=garage=debug ./target/debug/garage -c tmp/config2.toml server
|
||||||
run2rel:
|
run2rel:
|
||||||
RUST_LOG=garage=debug ./target/release/garage -c tmp/config.2.toml server
|
RUST_LOG=garage=debug ./target/release/garage -c tmp/config2.toml server
|
||||||
|
|
||||||
run3:
|
run3:
|
||||||
RUST_LOG=garage=debug ./target/debug/garage -c tmp/config.3.toml server
|
RUST_LOG=garage=debug ./target/debug/garage -c tmp/config3.toml server
|
||||||
run3rel:
|
run3rel:
|
||||||
RUST_LOG=garage=debug ./target/release/garage -c tmp/config.3.toml server
|
RUST_LOG=garage=debug ./target/release/garage -c tmp/config3.toml server
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
use core::ops::Bound;
|
use core::ops::Bound;
|
||||||
|
use std::path::PathBuf;
|
||||||
|
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
|
@ -36,6 +37,25 @@ impl Worker for RepairWorker {
|
||||||
"Block repair worker".into()
|
"Block repair worker".into()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn info(&self) -> Option<String> {
|
||||||
|
match self.block_iter.as_ref() {
|
||||||
|
None => {
|
||||||
|
let idx_bytes = self
|
||||||
|
.next_start
|
||||||
|
.as_ref()
|
||||||
|
.map(|x| x.as_slice())
|
||||||
|
.unwrap_or(&[]);
|
||||||
|
let idx_bytes = if idx_bytes.len() > 4 {
|
||||||
|
&idx_bytes[..4]
|
||||||
|
} else {
|
||||||
|
idx_bytes
|
||||||
|
};
|
||||||
|
Some(format!("Phase 1: {}", hex::encode(idx_bytes)))
|
||||||
|
}
|
||||||
|
Some(bi) => Some(format!("Phase 2: {:.2}% done", bi.progress() * 100.)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
async fn work(
|
async fn work(
|
||||||
&mut self,
|
&mut self,
|
||||||
_must_exit: &mut watch::Receiver<bool>,
|
_must_exit: &mut watch::Receiver<bool>,
|
||||||
|
@ -74,7 +94,7 @@ impl Worker for RepairWorker {
|
||||||
}
|
}
|
||||||
if batch_of_hashes.is_empty() {
|
if batch_of_hashes.is_empty() {
|
||||||
// move on to phase 2
|
// move on to phase 2
|
||||||
self.block_iter = Some(BlockStoreIterator::new(&self.manager).await?);
|
self.block_iter = Some(BlockStoreIterator::new(&self.manager));
|
||||||
return Ok(WorkerStatus::Busy);
|
return Ok(WorkerStatus::Busy);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -115,14 +135,14 @@ pub struct ScrubWorker {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ScrubWorker {
|
impl ScrubWorker {
|
||||||
pub async fn new(manager: Arc<BlockManager>, tranquility: u32) -> Result<Self, Error> {
|
pub fn new(manager: Arc<BlockManager>, tranquility: u32) -> Self {
|
||||||
let iterator = BlockStoreIterator::new(&manager).await?;
|
let iterator = BlockStoreIterator::new(&manager);
|
||||||
Ok(Self {
|
Self {
|
||||||
manager,
|
manager,
|
||||||
iterator,
|
iterator,
|
||||||
tranquilizer: Tranquilizer::new(30),
|
tranquilizer: Tranquilizer::new(30),
|
||||||
tranquility,
|
tranquility,
|
||||||
})
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -132,6 +152,10 @@ impl Worker for ScrubWorker {
|
||||||
"Block scrub worker".into()
|
"Block scrub worker".into()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn info(&self) -> Option<String> {
|
||||||
|
Some(format!("{:.2}% done", self.iterator.progress() * 100.))
|
||||||
|
}
|
||||||
|
|
||||||
async fn work(
|
async fn work(
|
||||||
&mut self,
|
&mut self,
|
||||||
_must_exit: &mut watch::Receiver<bool>,
|
_must_exit: &mut watch::Receiver<bool>,
|
||||||
|
@ -153,22 +177,85 @@ impl Worker for ScrubWorker {
|
||||||
// ----
|
// ----
|
||||||
|
|
||||||
struct BlockStoreIterator {
|
struct BlockStoreIterator {
|
||||||
path: Vec<fs::ReadDir>,
|
path: Vec<ReadingDir>,
|
||||||
|
}
|
||||||
|
|
||||||
|
enum ReadingDir {
|
||||||
|
Pending(PathBuf),
|
||||||
|
Read {
|
||||||
|
subpaths: Vec<fs::DirEntry>,
|
||||||
|
pos: usize,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
impl BlockStoreIterator {
|
impl BlockStoreIterator {
|
||||||
async fn new(manager: &BlockManager) -> Result<Self, Error> {
|
fn new(manager: &BlockManager) -> Self {
|
||||||
let root_dir = manager.data_dir.clone();
|
let root_dir = manager.data_dir.clone();
|
||||||
let read_root_dir = fs::read_dir(&root_dir).await?;
|
Self {
|
||||||
Ok(Self {
|
path: vec![ReadingDir::Pending(root_dir)],
|
||||||
path: vec![read_root_dir],
|
}
|
||||||
})
|
}
|
||||||
|
|
||||||
|
/// Returns progress done, between 0% and 1%
|
||||||
|
fn progress(&self) -> f32 {
|
||||||
|
if self.path.is_empty() {
|
||||||
|
1.0
|
||||||
|
} else {
|
||||||
|
let mut ret = 0.0;
|
||||||
|
let mut next_div = 1;
|
||||||
|
for p in self.path.iter() {
|
||||||
|
match p {
|
||||||
|
ReadingDir::Pending(_) => break,
|
||||||
|
ReadingDir::Read { subpaths, pos } => {
|
||||||
|
next_div *= subpaths.len();
|
||||||
|
ret += ((*pos - 1) as f32) / (next_div as f32);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ret
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn next(&mut self) -> Result<Option<Hash>, Error> {
|
async fn next(&mut self) -> Result<Option<Hash>, Error> {
|
||||||
loop {
|
loop {
|
||||||
if let Some(reader) = self.path.last_mut() {
|
let last_path = match self.path.last_mut() {
|
||||||
if let Some(data_dir_ent) = reader.next_entry().await? {
|
None => return Ok(None),
|
||||||
|
Some(lp) => lp,
|
||||||
|
};
|
||||||
|
|
||||||
|
if let ReadingDir::Pending(path) = last_path {
|
||||||
|
let mut reader = fs::read_dir(&path).await?;
|
||||||
|
let mut subpaths = vec![];
|
||||||
|
while let Some(ent) = reader.next_entry().await? {
|
||||||
|
subpaths.push(ent);
|
||||||
|
}
|
||||||
|
*last_path = ReadingDir::Read { subpaths, pos: 0 };
|
||||||
|
}
|
||||||
|
|
||||||
|
let (subpaths, pos) = match *last_path {
|
||||||
|
ReadingDir::Read {
|
||||||
|
ref subpaths,
|
||||||
|
ref mut pos,
|
||||||
|
} => (subpaths, pos),
|
||||||
|
ReadingDir::Pending(_) => unreachable!(),
|
||||||
|
};
|
||||||
|
|
||||||
|
if *pos >= subpaths.len() {
|
||||||
|
self.path.pop();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
let data_dir_ent = match subpaths.get(*pos) {
|
||||||
|
None => {
|
||||||
|
self.path.pop();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
Some(ent) => {
|
||||||
|
*pos += 1;
|
||||||
|
ent
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
let name = data_dir_ent.file_name();
|
let name = data_dir_ent.file_name();
|
||||||
let name = if let Ok(n) = name.into_string() {
|
let name = if let Ok(n) = name.into_string() {
|
||||||
n
|
n
|
||||||
|
@ -179,8 +266,8 @@ impl BlockStoreIterator {
|
||||||
|
|
||||||
let name = name.strip_suffix(".zst").unwrap_or(&name);
|
let name = name.strip_suffix(".zst").unwrap_or(&name);
|
||||||
if name.len() == 2 && hex::decode(&name).is_ok() && ent_type.is_dir() {
|
if name.len() == 2 && hex::decode(&name).is_ok() && ent_type.is_dir() {
|
||||||
let read_child_dir = fs::read_dir(&data_dir_ent.path()).await?;
|
let path = data_dir_ent.path();
|
||||||
self.path.push(read_child_dir);
|
self.path.push(ReadingDir::Pending(path));
|
||||||
continue;
|
continue;
|
||||||
} else if name.len() == 64 {
|
} else if name.len() == 64 {
|
||||||
let hash_bytes = if let Ok(h) = hex::decode(&name) {
|
let hash_bytes = if let Ok(h) = hex::decode(&name) {
|
||||||
|
@ -192,13 +279,6 @@ impl BlockStoreIterator {
|
||||||
hash.copy_from_slice(&hash_bytes[..]);
|
hash.copy_from_slice(&hash_bytes[..]);
|
||||||
return Ok(Some(hash.into()));
|
return Ok(Some(hash.into()));
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
self.path.pop();
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
return Ok(None);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -698,7 +698,7 @@ impl AdminRpcHandler {
|
||||||
)))
|
)))
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
launch_online_repair(self.garage.clone(), opt).await?;
|
launch_online_repair(self.garage.clone(), opt);
|
||||||
Ok(AdminRpc::Ok(format!(
|
Ok(AdminRpc::Ok(format!(
|
||||||
"Repair launched on {:?}",
|
"Repair launched on {:?}",
|
||||||
self.garage.system.id
|
self.garage.system.id
|
||||||
|
|
|
@ -13,7 +13,7 @@ use garage_util::error::Error;
|
||||||
|
|
||||||
use crate::*;
|
use crate::*;
|
||||||
|
|
||||||
pub async fn launch_online_repair(garage: Arc<Garage>, opt: RepairOpt) -> Result<(), Error> {
|
pub fn launch_online_repair(garage: Arc<Garage>, opt: RepairOpt) {
|
||||||
match opt.what {
|
match opt.what {
|
||||||
RepairWhat::Tables => {
|
RepairWhat::Tables => {
|
||||||
info!("Launching a full sync of tables");
|
info!("Launching a full sync of tables");
|
||||||
|
@ -45,13 +45,14 @@ pub async fn launch_online_repair(garage: Arc<Garage>, opt: RepairOpt) -> Result
|
||||||
}
|
}
|
||||||
RepairWhat::Scrub { tranquility } => {
|
RepairWhat::Scrub { tranquility } => {
|
||||||
info!("Verifying integrity of stored blocks");
|
info!("Verifying integrity of stored blocks");
|
||||||
garage.background.spawn_worker(
|
garage
|
||||||
garage_block::repair::ScrubWorker::new(garage.block_manager.clone(), tranquility)
|
.background
|
||||||
.await?,
|
.spawn_worker(garage_block::repair::ScrubWorker::new(
|
||||||
);
|
garage.block_manager.clone(),
|
||||||
|
tranquility,
|
||||||
|
));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(())
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// ----
|
// ----
|
||||||
|
|
Loading…
Reference in a new issue