Online repair new workers, except blocks and scrub
This commit is contained in:
parent
b8338dea56
commit
a855c54bdb
2 changed files with 142 additions and 96 deletions
|
@ -24,7 +24,7 @@ use garage_model::migrate::Migrate;
|
||||||
use garage_model::permission::*;
|
use garage_model::permission::*;
|
||||||
|
|
||||||
use crate::cli::*;
|
use crate::cli::*;
|
||||||
use crate::repair::online::OnlineRepair;
|
use crate::repair::online::launch_online_repair;
|
||||||
|
|
||||||
pub const ADMIN_RPC_PATH: &str = "garage/admin_rpc.rs/Rpc";
|
pub const ADMIN_RPC_PATH: &str = "garage/admin_rpc.rs/Rpc";
|
||||||
|
|
||||||
|
@ -693,15 +693,7 @@ impl AdminRpcHandler {
|
||||||
)))
|
)))
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
let repair = OnlineRepair {
|
launch_online_repair(self.garage.clone(), opt)?;
|
||||||
garage: self.garage.clone(),
|
|
||||||
};
|
|
||||||
self.garage
|
|
||||||
.system
|
|
||||||
.background
|
|
||||||
.spawn_worker("Repair worker".into(), move |must_exit| async move {
|
|
||||||
repair.repair_worker(opt, must_exit).await
|
|
||||||
});
|
|
||||||
Ok(AdminRpc::Ok(format!(
|
Ok(AdminRpc::Ok(format!(
|
||||||
"Repair launched on {:?}",
|
"Repair launched on {:?}",
|
||||||
self.garage.system.id
|
self.garage.system.id
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use async_trait::async_trait;
|
||||||
use tokio::sync::watch;
|
use tokio::sync::watch;
|
||||||
|
|
||||||
use garage_model::garage::Garage;
|
use garage_model::garage::Garage;
|
||||||
|
@ -7,83 +8,103 @@ use garage_model::s3::block_ref_table::*;
|
||||||
use garage_model::s3::object_table::*;
|
use garage_model::s3::object_table::*;
|
||||||
use garage_model::s3::version_table::*;
|
use garage_model::s3::version_table::*;
|
||||||
use garage_table::*;
|
use garage_table::*;
|
||||||
|
use garage_util::background::*;
|
||||||
use garage_util::error::Error;
|
use garage_util::error::Error;
|
||||||
|
|
||||||
use crate::*;
|
use crate::*;
|
||||||
|
|
||||||
pub struct OnlineRepair {
|
pub fn launch_online_repair(garage: Arc<Garage>, opt: RepairOpt) -> Result<(), Error> {
|
||||||
pub garage: Arc<Garage>,
|
match opt.what {
|
||||||
|
RepairWhat::Tables => {
|
||||||
|
info!("Launching a full sync of tables");
|
||||||
|
garage.bucket_table.syncer.add_full_sync();
|
||||||
|
garage.object_table.syncer.add_full_sync();
|
||||||
|
garage.version_table.syncer.add_full_sync();
|
||||||
|
garage.block_ref_table.syncer.add_full_sync();
|
||||||
|
garage.key_table.syncer.add_full_sync();
|
||||||
|
}
|
||||||
|
RepairWhat::Versions => {
|
||||||
|
info!("Repairing the versions table");
|
||||||
|
garage
|
||||||
|
.background
|
||||||
|
.spawn_worker(RepairVersionsWorker::new(garage.clone()));
|
||||||
|
}
|
||||||
|
RepairWhat::BlockRefs => {
|
||||||
|
info!("Repairing the block refs table");
|
||||||
|
garage
|
||||||
|
.background
|
||||||
|
.spawn_worker(RepairBlockrefsWorker::new(garage.clone()));
|
||||||
|
}
|
||||||
|
RepairWhat::Blocks => {
|
||||||
|
unimplemented!()
|
||||||
|
/*
|
||||||
|
info!("Repairing the stored blocks");
|
||||||
|
self.garage
|
||||||
|
.block_manager
|
||||||
|
.repair_data_store(&must_exit)
|
||||||
|
.await?;
|
||||||
|
*/
|
||||||
|
}
|
||||||
|
RepairWhat::Scrub { tranquility } => {
|
||||||
|
unimplemented!()
|
||||||
|
/*
|
||||||
|
info!("Verifying integrity of stored blocks");
|
||||||
|
self.garage
|
||||||
|
.block_manager
|
||||||
|
.scrub_data_store(&must_exit, tranquility)
|
||||||
|
.await?;
|
||||||
|
*/
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
impl OnlineRepair {
|
// ----
|
||||||
pub async fn repair_worker(&self, opt: RepairOpt, must_exit: watch::Receiver<bool>) {
|
|
||||||
if let Err(e) = self.repair_worker_aux(opt, must_exit).await {
|
struct RepairVersionsWorker {
|
||||||
warn!("Repair worker failed with error: {}", e);
|
garage: Arc<Garage>,
|
||||||
|
pos: Vec<u8>,
|
||||||
|
iter: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl RepairVersionsWorker {
|
||||||
|
fn new(garage: Arc<Garage>) -> Self {
|
||||||
|
Self {
|
||||||
|
garage,
|
||||||
|
pos: vec![],
|
||||||
|
iter: 0,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
async fn repair_worker_aux(
|
#[async_trait]
|
||||||
&self,
|
impl Worker for RepairVersionsWorker {
|
||||||
opt: RepairOpt,
|
fn name(&self) -> String {
|
||||||
must_exit: watch::Receiver<bool>,
|
"Version repair worker".into()
|
||||||
) -> Result<(), Error> {
|
|
||||||
match opt.what {
|
|
||||||
RepairWhat::Tables => {
|
|
||||||
info!("Launching a full sync of tables");
|
|
||||||
self.garage.bucket_table.syncer.add_full_sync();
|
|
||||||
self.garage.object_table.syncer.add_full_sync();
|
|
||||||
self.garage.version_table.syncer.add_full_sync();
|
|
||||||
self.garage.block_ref_table.syncer.add_full_sync();
|
|
||||||
self.garage.key_table.syncer.add_full_sync();
|
|
||||||
}
|
|
||||||
RepairWhat::Versions => {
|
|
||||||
info!("Repairing the versions table");
|
|
||||||
self.repair_versions(&must_exit).await?;
|
|
||||||
}
|
|
||||||
RepairWhat::BlockRefs => {
|
|
||||||
info!("Repairing the block refs table");
|
|
||||||
self.repair_block_ref(&must_exit).await?;
|
|
||||||
}
|
|
||||||
RepairWhat::Blocks => {
|
|
||||||
info!("Repairing the stored blocks");
|
|
||||||
self.garage
|
|
||||||
.block_manager
|
|
||||||
.repair_data_store(&must_exit)
|
|
||||||
.await?;
|
|
||||||
}
|
|
||||||
RepairWhat::Scrub { tranquility } => {
|
|
||||||
info!("Verifying integrity of stored blocks");
|
|
||||||
self.garage
|
|
||||||
.block_manager
|
|
||||||
.scrub_data_store(&must_exit, tranquility)
|
|
||||||
.await?;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn repair_versions(&self, must_exit: &watch::Receiver<bool>) -> Result<(), Error> {
|
async fn work(
|
||||||
let mut pos = vec![];
|
&mut self,
|
||||||
let mut i = 0;
|
_must_exit: &mut watch::Receiver<bool>,
|
||||||
|
) -> Result<WorkerStatus, Error> {
|
||||||
while !*must_exit.borrow() {
|
let item_bytes = match self.garage.version_table.data.store.get_gt(&self.pos)? {
|
||||||
let item_bytes = match self.garage.version_table.data.store.get_gt(pos)? {
|
Some((k, v)) => {
|
||||||
Some((k, v)) => {
|
self.pos = k;
|
||||||
pos = k;
|
v
|
||||||
v
|
|
||||||
}
|
|
||||||
None => break,
|
|
||||||
};
|
|
||||||
|
|
||||||
i += 1;
|
|
||||||
if i % 1000 == 0 {
|
|
||||||
info!("repair_versions: {}", i);
|
|
||||||
}
|
}
|
||||||
|
None => {
|
||||||
let version = rmp_serde::decode::from_read_ref::<_, Version>(&item_bytes)?;
|
info!("repair_versions: finished, done {}", self.iter);
|
||||||
if version.deleted.get() {
|
return Ok(WorkerStatus::Done);
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
self.iter += 1;
|
||||||
|
if self.iter % 1000 == 0 {
|
||||||
|
info!("repair_versions: {}", self.iter);
|
||||||
|
}
|
||||||
|
|
||||||
|
let version = rmp_serde::decode::from_read_ref::<_, Version>(&item_bytes)?;
|
||||||
|
if !version.deleted.get() {
|
||||||
let object = self
|
let object = self
|
||||||
.garage
|
.garage
|
||||||
.object_table
|
.object_table
|
||||||
|
@ -109,32 +130,61 @@ impl OnlineRepair {
|
||||||
.await?;
|
.await?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
info!("repair_versions: finished, done {}", i);
|
|
||||||
Ok(())
|
Ok(WorkerStatus::Busy)
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn repair_block_ref(&self, must_exit: &watch::Receiver<bool>) -> Result<(), Error> {
|
async fn wait_for_work(&mut self, _must_exit: &watch::Receiver<bool>) -> WorkerStatus {
|
||||||
let mut pos = vec![];
|
unreachable!()
|
||||||
let mut i = 0;
|
}
|
||||||
|
}
|
||||||
|
|
||||||
while !*must_exit.borrow() {
|
// ----
|
||||||
let item_bytes = match self.garage.block_ref_table.data.store.get_gt(pos)? {
|
|
||||||
Some((k, v)) => {
|
|
||||||
pos = k;
|
|
||||||
v
|
|
||||||
}
|
|
||||||
None => break,
|
|
||||||
};
|
|
||||||
|
|
||||||
i += 1;
|
struct RepairBlockrefsWorker {
|
||||||
if i % 1000 == 0 {
|
garage: Arc<Garage>,
|
||||||
info!("repair_block_ref: {}", i);
|
pos: Vec<u8>,
|
||||||
|
iter: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl RepairBlockrefsWorker {
|
||||||
|
fn new(garage: Arc<Garage>) -> Self {
|
||||||
|
Self {
|
||||||
|
garage,
|
||||||
|
pos: vec![],
|
||||||
|
iter: 0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl Worker for RepairBlockrefsWorker {
|
||||||
|
fn name(&self) -> String {
|
||||||
|
"Block refs repair worker".into()
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn work(
|
||||||
|
&mut self,
|
||||||
|
_must_exit: &mut watch::Receiver<bool>,
|
||||||
|
) -> Result<WorkerStatus, Error> {
|
||||||
|
let item_bytes = match self.garage.block_ref_table.data.store.get_gt(&self.pos)? {
|
||||||
|
Some((k, v)) => {
|
||||||
|
self.pos = k;
|
||||||
|
v
|
||||||
}
|
}
|
||||||
|
None => {
|
||||||
let block_ref = rmp_serde::decode::from_read_ref::<_, BlockRef>(&item_bytes)?;
|
info!("repair_block_ref: finished, done {}", self.iter);
|
||||||
if block_ref.deleted.get() {
|
return Ok(WorkerStatus::Done);
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
self.iter += 1;
|
||||||
|
if self.iter % 1000 == 0 {
|
||||||
|
info!("repair_block_ref: {}", self.iter);
|
||||||
|
}
|
||||||
|
|
||||||
|
let block_ref = rmp_serde::decode::from_read_ref::<_, BlockRef>(&item_bytes)?;
|
||||||
|
if !block_ref.deleted.get() {
|
||||||
let version = self
|
let version = self
|
||||||
.garage
|
.garage
|
||||||
.version_table
|
.version_table
|
||||||
|
@ -157,7 +207,11 @@ impl OnlineRepair {
|
||||||
.await?;
|
.await?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
info!("repair_block_ref: finished, done {}", i);
|
|
||||||
Ok(())
|
Ok(WorkerStatus::Busy)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn wait_for_work(&mut self, _must_exit: &watch::Receiver<bool>) -> WorkerStatus {
|
||||||
|
unreachable!()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue