2020-04-23 18:36:12 +00:00
|
|
|
use std::sync::Arc;
|
2022-07-08 11:30:26 +00:00
|
|
|
use std::time::Duration;
|
2020-04-23 18:36:12 +00:00
|
|
|
|
2022-07-08 11:30:26 +00:00
|
|
|
use async_trait::async_trait;
|
2020-04-23 18:36:12 +00:00
|
|
|
use tokio::sync::watch;
|
|
|
|
|
2022-07-08 11:30:26 +00:00
|
|
|
use garage_block::repair::ScrubWorkerCommand;
|
2020-07-07 11:59:22 +00:00
|
|
|
use garage_model::garage::Garage;
|
2022-05-10 11:16:57 +00:00
|
|
|
use garage_model::s3::block_ref_table::*;
|
|
|
|
use garage_model::s3::object_table::*;
|
|
|
|
use garage_model::s3::version_table::*;
|
2020-04-24 10:10:01 +00:00
|
|
|
use garage_table::*;
|
2022-07-08 11:30:26 +00:00
|
|
|
use garage_util::background::*;
|
2020-04-24 10:10:01 +00:00
|
|
|
use garage_util::error::Error;
|
2020-04-23 18:36:12 +00:00
|
|
|
|
|
|
|
use crate::*;
|
|
|
|
|
2022-07-08 11:30:26 +00:00
|
|
|
pub async fn launch_online_repair(garage: Arc<Garage>, opt: RepairOpt) {
|
|
|
|
match opt.what {
|
|
|
|
RepairWhat::Tables => {
|
|
|
|
info!("Launching a full sync of tables");
|
|
|
|
garage.bucket_table.syncer.add_full_sync();
|
|
|
|
garage.object_table.syncer.add_full_sync();
|
|
|
|
garage.version_table.syncer.add_full_sync();
|
|
|
|
garage.block_ref_table.syncer.add_full_sync();
|
|
|
|
garage.key_table.syncer.add_full_sync();
|
|
|
|
}
|
|
|
|
RepairWhat::Versions => {
|
|
|
|
info!("Repairing the versions table");
|
|
|
|
garage
|
|
|
|
.background
|
|
|
|
.spawn_worker(RepairVersionsWorker::new(garage.clone()));
|
|
|
|
}
|
|
|
|
RepairWhat::BlockRefs => {
|
|
|
|
info!("Repairing the block refs table");
|
|
|
|
garage
|
|
|
|
.background
|
|
|
|
.spawn_worker(RepairBlockrefsWorker::new(garage.clone()));
|
|
|
|
}
|
|
|
|
RepairWhat::Blocks => {
|
|
|
|
info!("Repairing the stored blocks");
|
|
|
|
garage
|
|
|
|
.background
|
|
|
|
.spawn_worker(garage_block::repair::RepairWorker::new(
|
|
|
|
garage.block_manager.clone(),
|
|
|
|
));
|
|
|
|
}
|
|
|
|
RepairWhat::Scrub { cmd } => {
|
|
|
|
let cmd = match cmd {
|
|
|
|
ScrubCmd::Start => ScrubWorkerCommand::Start,
|
|
|
|
ScrubCmd::Pause => ScrubWorkerCommand::Pause(Duration::from_secs(3600 * 24)),
|
|
|
|
ScrubCmd::Resume => ScrubWorkerCommand::Resume,
|
|
|
|
ScrubCmd::Cancel => ScrubWorkerCommand::Cancel,
|
|
|
|
ScrubCmd::SetTranquility { tranquility } => {
|
|
|
|
ScrubWorkerCommand::SetTranquility(tranquility)
|
|
|
|
}
|
|
|
|
};
|
|
|
|
info!("Sending command to scrub worker: {:?}", cmd);
|
|
|
|
garage.block_manager.send_scrub_command(cmd).await;
|
2021-03-15 19:09:44 +00:00
|
|
|
}
|
|
|
|
}
|
2022-07-08 11:30:26 +00:00
|
|
|
}
|
2021-03-15 19:09:44 +00:00
|
|
|
|
2022-07-08 11:30:26 +00:00
|
|
|
// ----
|
|
|
|
|
|
|
|
struct RepairVersionsWorker {
|
|
|
|
garage: Arc<Garage>,
|
|
|
|
pos: Vec<u8>,
|
|
|
|
counter: usize,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl RepairVersionsWorker {
|
|
|
|
fn new(garage: Arc<Garage>) -> Self {
|
|
|
|
Self {
|
|
|
|
garage,
|
|
|
|
pos: vec![],
|
|
|
|
counter: 0,
|
2021-06-23 23:34:28 +00:00
|
|
|
}
|
2020-04-23 18:36:12 +00:00
|
|
|
}
|
2022-07-08 11:30:26 +00:00
|
|
|
}
|
2020-04-23 18:36:12 +00:00
|
|
|
|
2022-07-08 11:30:26 +00:00
|
|
|
#[async_trait]
|
|
|
|
impl Worker for RepairVersionsWorker {
|
|
|
|
fn name(&self) -> String {
|
|
|
|
"Version repair worker".into()
|
|
|
|
}
|
2020-04-23 18:36:12 +00:00
|
|
|
|
2022-07-08 11:30:26 +00:00
|
|
|
fn info(&self) -> Option<String> {
|
|
|
|
Some(format!("{} items done", self.counter))
|
|
|
|
}
|
2022-06-08 08:01:44 +00:00
|
|
|
|
2022-07-08 11:30:26 +00:00
|
|
|
async fn work(&mut self, _must_exit: &mut watch::Receiver<bool>) -> Result<WorkerState, Error> {
|
|
|
|
let item_bytes = match self.garage.version_table.data.store.get_gt(&self.pos)? {
|
|
|
|
Some((k, v)) => {
|
|
|
|
self.pos = k;
|
|
|
|
v
|
2022-06-08 08:01:44 +00:00
|
|
|
}
|
2022-07-08 11:30:26 +00:00
|
|
|
None => {
|
|
|
|
info!("repair_versions: finished, done {}", self.counter);
|
|
|
|
return Ok(WorkerState::Done);
|
2020-04-23 18:36:12 +00:00
|
|
|
}
|
2022-07-08 11:30:26 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
self.counter += 1;
|
|
|
|
|
|
|
|
let version = rmp_serde::decode::from_read_ref::<_, Version>(&item_bytes)?;
|
|
|
|
if !version.deleted.get() {
|
2020-04-23 18:36:12 +00:00
|
|
|
let object = self
|
|
|
|
.garage
|
|
|
|
.object_table
|
2021-12-14 12:55:11 +00:00
|
|
|
.get(&version.bucket_id, &version.key)
|
2020-04-23 18:36:12 +00:00
|
|
|
.await?;
|
|
|
|
let version_exists = match object {
|
2020-04-26 18:59:17 +00:00
|
|
|
Some(o) => o
|
|
|
|
.versions()
|
|
|
|
.iter()
|
|
|
|
.any(|x| x.uuid == version.uuid && x.state != ObjectVersionState::Aborted),
|
2021-03-15 14:26:29 +00:00
|
|
|
None => false,
|
2020-04-23 18:36:12 +00:00
|
|
|
};
|
|
|
|
if !version_exists {
|
|
|
|
info!("Repair versions: marking version as deleted: {:?}", version);
|
|
|
|
self.garage
|
|
|
|
.version_table
|
|
|
|
.insert(&Version::new(
|
|
|
|
version.uuid,
|
2021-12-14 12:55:11 +00:00
|
|
|
version.bucket_id,
|
2020-04-23 18:36:12 +00:00
|
|
|
version.key,
|
|
|
|
true,
|
|
|
|
))
|
|
|
|
.await?;
|
|
|
|
}
|
|
|
|
}
|
2022-07-08 11:30:26 +00:00
|
|
|
|
|
|
|
Ok(WorkerState::Busy)
|
2020-04-23 18:36:12 +00:00
|
|
|
}
|
|
|
|
|
2022-07-08 11:30:26 +00:00
|
|
|
async fn wait_for_work(&mut self, _must_exit: &watch::Receiver<bool>) -> WorkerState {
|
|
|
|
unreachable!()
|
|
|
|
}
|
|
|
|
}
|
2020-04-23 18:36:12 +00:00
|
|
|
|
2022-07-08 11:30:26 +00:00
|
|
|
// ----
|
2022-06-08 08:01:44 +00:00
|
|
|
|
2022-07-08 11:30:26 +00:00
|
|
|
struct RepairBlockrefsWorker {
|
|
|
|
garage: Arc<Garage>,
|
|
|
|
pos: Vec<u8>,
|
|
|
|
counter: usize,
|
|
|
|
}
|
2020-04-23 18:36:12 +00:00
|
|
|
|
2022-07-08 11:30:26 +00:00
|
|
|
impl RepairBlockrefsWorker {
|
|
|
|
fn new(garage: Arc<Garage>) -> Self {
|
|
|
|
Self {
|
|
|
|
garage,
|
|
|
|
pos: vec![],
|
|
|
|
counter: 0,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[async_trait]
|
|
|
|
impl Worker for RepairBlockrefsWorker {
|
|
|
|
fn name(&self) -> String {
|
|
|
|
"Block refs repair worker".into()
|
|
|
|
}
|
|
|
|
|
|
|
|
fn info(&self) -> Option<String> {
|
|
|
|
Some(format!("{} items done", self.counter))
|
|
|
|
}
|
|
|
|
|
|
|
|
async fn work(&mut self, _must_exit: &mut watch::Receiver<bool>) -> Result<WorkerState, Error> {
|
|
|
|
let item_bytes = match self.garage.block_ref_table.data.store.get_gt(&self.pos)? {
|
|
|
|
Some((k, v)) => {
|
|
|
|
self.pos = k;
|
|
|
|
v
|
2020-04-23 18:36:12 +00:00
|
|
|
}
|
2022-07-08 11:30:26 +00:00
|
|
|
None => {
|
|
|
|
info!("repair_block_ref: finished, done {}", self.counter);
|
|
|
|
return Ok(WorkerState::Done);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
self.counter += 1;
|
|
|
|
|
|
|
|
let block_ref = rmp_serde::decode::from_read_ref::<_, BlockRef>(&item_bytes)?;
|
|
|
|
if !block_ref.deleted.get() {
|
2020-04-23 18:36:12 +00:00
|
|
|
let version = self
|
|
|
|
.garage
|
|
|
|
.version_table
|
|
|
|
.get(&block_ref.version, &EmptyKey)
|
|
|
|
.await?;
|
2021-03-15 14:26:29 +00:00
|
|
|
// The version might not exist if it has been GC'ed
|
|
|
|
let ref_exists = version.map(|v| !v.deleted.get()).unwrap_or(false);
|
2020-04-23 18:36:12 +00:00
|
|
|
if !ref_exists {
|
|
|
|
info!(
|
|
|
|
"Repair block ref: marking block_ref as deleted: {:?}",
|
|
|
|
block_ref
|
|
|
|
);
|
|
|
|
self.garage
|
|
|
|
.block_ref_table
|
|
|
|
.insert(&BlockRef {
|
|
|
|
block: block_ref.block,
|
|
|
|
version: block_ref.version,
|
2021-03-10 15:21:56 +00:00
|
|
|
deleted: true.into(),
|
2020-04-23 18:36:12 +00:00
|
|
|
})
|
|
|
|
.await?;
|
|
|
|
}
|
|
|
|
}
|
2022-07-08 11:30:26 +00:00
|
|
|
|
|
|
|
Ok(WorkerState::Busy)
|
|
|
|
}
|
|
|
|
|
|
|
|
async fn wait_for_work(&mut self, _must_exit: &watch::Receiver<bool>) -> WorkerState {
|
|
|
|
unreachable!()
|
2020-04-23 18:36:12 +00:00
|
|
|
}
|
|
|
|
}
|