Online repair new workers, except blocks and scrub
Some checks failed
continuous-integration/drone/pr Build is failing
continuous-integration/drone/push Build is failing

This commit is contained in:
Alex 2022-06-21 15:27:58 +02:00
parent b8338dea56
commit a855c54bdb
Signed by: lx
GPG key ID: 0E496D15096376BE
2 changed files with 142 additions and 96 deletions

View file

@ -24,7 +24,7 @@ use garage_model::migrate::Migrate;
use garage_model::permission::*; use garage_model::permission::*;
use crate::cli::*; use crate::cli::*;
use crate::repair::online::OnlineRepair; use crate::repair::online::launch_online_repair;
pub const ADMIN_RPC_PATH: &str = "garage/admin_rpc.rs/Rpc"; pub const ADMIN_RPC_PATH: &str = "garage/admin_rpc.rs/Rpc";
@ -693,15 +693,7 @@ impl AdminRpcHandler {
))) )))
} }
} else { } else {
let repair = OnlineRepair { launch_online_repair(self.garage.clone(), opt)?;
garage: self.garage.clone(),
};
self.garage
.system
.background
.spawn_worker("Repair worker".into(), move |must_exit| async move {
repair.repair_worker(opt, must_exit).await
});
Ok(AdminRpc::Ok(format!( Ok(AdminRpc::Ok(format!(
"Repair launched on {:?}", "Repair launched on {:?}",
self.garage.system.id self.garage.system.id

View file

@ -1,5 +1,6 @@
use std::sync::Arc; use std::sync::Arc;
use async_trait::async_trait;
use tokio::sync::watch; use tokio::sync::watch;
use garage_model::garage::Garage; use garage_model::garage::Garage;
@ -7,83 +8,103 @@ use garage_model::s3::block_ref_table::*;
use garage_model::s3::object_table::*; use garage_model::s3::object_table::*;
use garage_model::s3::version_table::*; use garage_model::s3::version_table::*;
use garage_table::*; use garage_table::*;
use garage_util::background::*;
use garage_util::error::Error; use garage_util::error::Error;
use crate::*; use crate::*;
pub struct OnlineRepair { pub fn launch_online_repair(garage: Arc<Garage>, opt: RepairOpt) -> Result<(), Error> {
pub garage: Arc<Garage>,
}
impl OnlineRepair {
pub async fn repair_worker(&self, opt: RepairOpt, must_exit: watch::Receiver<bool>) {
if let Err(e) = self.repair_worker_aux(opt, must_exit).await {
warn!("Repair worker failed with error: {}", e);
}
}
async fn repair_worker_aux(
&self,
opt: RepairOpt,
must_exit: watch::Receiver<bool>,
) -> Result<(), Error> {
match opt.what { match opt.what {
RepairWhat::Tables => { RepairWhat::Tables => {
info!("Launching a full sync of tables"); info!("Launching a full sync of tables");
self.garage.bucket_table.syncer.add_full_sync(); garage.bucket_table.syncer.add_full_sync();
self.garage.object_table.syncer.add_full_sync(); garage.object_table.syncer.add_full_sync();
self.garage.version_table.syncer.add_full_sync(); garage.version_table.syncer.add_full_sync();
self.garage.block_ref_table.syncer.add_full_sync(); garage.block_ref_table.syncer.add_full_sync();
self.garage.key_table.syncer.add_full_sync(); garage.key_table.syncer.add_full_sync();
} }
RepairWhat::Versions => { RepairWhat::Versions => {
info!("Repairing the versions table"); info!("Repairing the versions table");
self.repair_versions(&must_exit).await?; garage
.background
.spawn_worker(RepairVersionsWorker::new(garage.clone()));
} }
RepairWhat::BlockRefs => { RepairWhat::BlockRefs => {
info!("Repairing the block refs table"); info!("Repairing the block refs table");
self.repair_block_ref(&must_exit).await?; garage
.background
.spawn_worker(RepairBlockrefsWorker::new(garage.clone()));
} }
RepairWhat::Blocks => { RepairWhat::Blocks => {
unimplemented!()
/*
info!("Repairing the stored blocks"); info!("Repairing the stored blocks");
self.garage self.garage
.block_manager .block_manager
.repair_data_store(&must_exit) .repair_data_store(&must_exit)
.await?; .await?;
*/
} }
RepairWhat::Scrub { tranquility } => { RepairWhat::Scrub { tranquility } => {
unimplemented!()
/*
info!("Verifying integrity of stored blocks"); info!("Verifying integrity of stored blocks");
self.garage self.garage
.block_manager .block_manager
.scrub_data_store(&must_exit, tranquility) .scrub_data_store(&must_exit, tranquility)
.await?; .await?;
*/
} }
} }
Ok(()) Ok(())
} }
async fn repair_versions(&self, must_exit: &watch::Receiver<bool>) -> Result<(), Error> { // ----
let mut pos = vec![];
let mut i = 0;
while !*must_exit.borrow() { struct RepairVersionsWorker {
let item_bytes = match self.garage.version_table.data.store.get_gt(pos)? { garage: Arc<Garage>,
pos: Vec<u8>,
iter: usize,
}
impl RepairVersionsWorker {
fn new(garage: Arc<Garage>) -> Self {
Self {
garage,
pos: vec![],
iter: 0,
}
}
}
#[async_trait]
impl Worker for RepairVersionsWorker {
fn name(&self) -> String {
"Version repair worker".into()
}
async fn work(
&mut self,
_must_exit: &mut watch::Receiver<bool>,
) -> Result<WorkerStatus, Error> {
let item_bytes = match self.garage.version_table.data.store.get_gt(&self.pos)? {
Some((k, v)) => { Some((k, v)) => {
pos = k; self.pos = k;
v v
} }
None => break, None => {
info!("repair_versions: finished, done {}", self.iter);
return Ok(WorkerStatus::Done);
}
}; };
i += 1; self.iter += 1;
if i % 1000 == 0 { if self.iter % 1000 == 0 {
info!("repair_versions: {}", i); info!("repair_versions: {}", self.iter);
} }
let version = rmp_serde::decode::from_read_ref::<_, Version>(&item_bytes)?; let version = rmp_serde::decode::from_read_ref::<_, Version>(&item_bytes)?;
if version.deleted.get() { if !version.deleted.get() {
continue;
}
let object = self let object = self
.garage .garage
.object_table .object_table
@ -109,32 +130,61 @@ impl OnlineRepair {
.await?; .await?;
} }
} }
info!("repair_versions: finished, done {}", i);
Ok(()) Ok(WorkerStatus::Busy)
} }
async fn repair_block_ref(&self, must_exit: &watch::Receiver<bool>) -> Result<(), Error> { async fn wait_for_work(&mut self, _must_exit: &watch::Receiver<bool>) -> WorkerStatus {
let mut pos = vec![]; unreachable!()
let mut i = 0; }
}
while !*must_exit.borrow() { // ----
let item_bytes = match self.garage.block_ref_table.data.store.get_gt(pos)? {
struct RepairBlockrefsWorker {
garage: Arc<Garage>,
pos: Vec<u8>,
iter: usize,
}
impl RepairBlockrefsWorker {
fn new(garage: Arc<Garage>) -> Self {
Self {
garage,
pos: vec![],
iter: 0,
}
}
}
#[async_trait]
impl Worker for RepairBlockrefsWorker {
fn name(&self) -> String {
"Block refs repair worker".into()
}
async fn work(
&mut self,
_must_exit: &mut watch::Receiver<bool>,
) -> Result<WorkerStatus, Error> {
let item_bytes = match self.garage.block_ref_table.data.store.get_gt(&self.pos)? {
Some((k, v)) => { Some((k, v)) => {
pos = k; self.pos = k;
v v
} }
None => break, None => {
info!("repair_block_ref: finished, done {}", self.iter);
return Ok(WorkerStatus::Done);
}
}; };
i += 1; self.iter += 1;
if i % 1000 == 0 { if self.iter % 1000 == 0 {
info!("repair_block_ref: {}", i); info!("repair_block_ref: {}", self.iter);
} }
let block_ref = rmp_serde::decode::from_read_ref::<_, BlockRef>(&item_bytes)?; let block_ref = rmp_serde::decode::from_read_ref::<_, BlockRef>(&item_bytes)?;
if block_ref.deleted.get() { if !block_ref.deleted.get() {
continue;
}
let version = self let version = self
.garage .garage
.version_table .version_table
@ -157,7 +207,11 @@ impl OnlineRepair {
.await?; .await?;
} }
} }
info!("repair_block_ref: finished, done {}", i);
Ok(()) Ok(WorkerStatus::Busy)
}
async fn wait_for_work(&mut self, _must_exit: &watch::Receiver<bool>) -> WorkerStatus {
unreachable!()
} }
} }