2020-04-23 18:36:12 +00:00
|
|
|
use std::sync::Arc;
|
2022-07-08 11:30:26 +00:00
|
|
|
use std::time::Duration;
|
2020-04-23 18:36:12 +00:00
|
|
|
|
2022-07-08 11:30:26 +00:00
|
|
|
use async_trait::async_trait;
|
2020-04-23 18:36:12 +00:00
|
|
|
use tokio::sync::watch;
|
|
|
|
|
2022-07-08 11:30:26 +00:00
|
|
|
use garage_block::repair::ScrubWorkerCommand;
|
2023-05-04 08:36:48 +00:00
|
|
|
|
2020-07-07 11:59:22 +00:00
|
|
|
use garage_model::garage::Garage;
|
2022-05-10 11:16:57 +00:00
|
|
|
use garage_model::s3::block_ref_table::*;
|
2023-05-04 08:45:44 +00:00
|
|
|
use garage_model::s3::mpu_table::*;
|
2022-05-10 11:16:57 +00:00
|
|
|
use garage_model::s3::object_table::*;
|
|
|
|
use garage_model::s3::version_table::*;
|
2023-05-04 08:36:48 +00:00
|
|
|
|
|
|
|
use garage_table::replication::*;
|
2020-04-24 10:10:01 +00:00
|
|
|
use garage_table::*;
|
2023-05-04 08:36:48 +00:00
|
|
|
|
2022-07-08 11:30:26 +00:00
|
|
|
use garage_util::background::*;
|
2020-04-24 10:10:01 +00:00
|
|
|
use garage_util::error::Error;
|
2023-01-03 14:27:36 +00:00
|
|
|
use garage_util::migrate::Migrate;
|
2020-04-23 18:36:12 +00:00
|
|
|
|
|
|
|
use crate::*;
|
|
|
|
|
2022-12-14 11:51:16 +00:00
|
|
|
pub async fn launch_online_repair(
|
|
|
|
garage: &Arc<Garage>,
|
|
|
|
bg: &BackgroundRunner,
|
|
|
|
opt: RepairOpt,
|
|
|
|
) -> Result<(), Error> {
|
2022-07-08 11:30:26 +00:00
|
|
|
match opt.what {
|
|
|
|
RepairWhat::Tables => {
|
|
|
|
info!("Launching a full sync of tables");
|
2022-12-14 11:28:07 +00:00
|
|
|
garage.bucket_table.syncer.add_full_sync()?;
|
|
|
|
garage.object_table.syncer.add_full_sync()?;
|
|
|
|
garage.version_table.syncer.add_full_sync()?;
|
|
|
|
garage.block_ref_table.syncer.add_full_sync()?;
|
|
|
|
garage.key_table.syncer.add_full_sync()?;
|
2022-07-08 11:30:26 +00:00
|
|
|
}
|
|
|
|
RepairWhat::Versions => {
|
|
|
|
info!("Repairing the versions table");
|
2023-05-04 08:36:48 +00:00
|
|
|
bg.spawn_worker(TableRepairWorker::new(garage.clone(), RepairVersions));
|
2022-07-08 11:30:26 +00:00
|
|
|
}
|
2023-05-04 08:45:44 +00:00
|
|
|
RepairWhat::MultipartUploads => {
|
|
|
|
info!("Repairing the multipart uploads table");
|
|
|
|
bg.spawn_worker(TableRepairWorker::new(garage.clone(), RepairMpu));
|
|
|
|
}
|
2022-07-08 11:30:26 +00:00
|
|
|
RepairWhat::BlockRefs => {
|
|
|
|
info!("Repairing the block refs table");
|
2023-05-04 08:36:48 +00:00
|
|
|
bg.spawn_worker(TableRepairWorker::new(garage.clone(), RepairBlockRefs));
|
2022-07-08 11:30:26 +00:00
|
|
|
}
|
|
|
|
RepairWhat::Blocks => {
|
|
|
|
info!("Repairing the stored blocks");
|
2022-12-14 11:51:16 +00:00
|
|
|
bg.spawn_worker(garage_block::repair::RepairWorker::new(
|
|
|
|
garage.block_manager.clone(),
|
|
|
|
));
|
2022-07-08 11:30:26 +00:00
|
|
|
}
|
|
|
|
RepairWhat::Scrub { cmd } => {
|
|
|
|
let cmd = match cmd {
|
|
|
|
ScrubCmd::Start => ScrubWorkerCommand::Start,
|
|
|
|
ScrubCmd::Pause => ScrubWorkerCommand::Pause(Duration::from_secs(3600 * 24)),
|
|
|
|
ScrubCmd::Resume => ScrubWorkerCommand::Resume,
|
|
|
|
ScrubCmd::Cancel => ScrubWorkerCommand::Cancel,
|
|
|
|
ScrubCmd::SetTranquility { tranquility } => {
|
2023-01-04 12:07:13 +00:00
|
|
|
garage
|
|
|
|
.block_manager
|
|
|
|
.scrub_persister
|
|
|
|
.set_with(|x| x.tranquility = tranquility)?;
|
|
|
|
return Ok(());
|
2022-07-08 11:30:26 +00:00
|
|
|
}
|
|
|
|
};
|
|
|
|
info!("Sending command to scrub worker: {:?}", cmd);
|
2022-12-14 11:28:07 +00:00
|
|
|
garage.block_manager.send_scrub_command(cmd).await?;
|
2021-03-15 19:09:44 +00:00
|
|
|
}
|
|
|
|
}
|
2022-12-14 11:28:07 +00:00
|
|
|
Ok(())
|
2022-07-08 11:30:26 +00:00
|
|
|
}
|
2021-03-15 19:09:44 +00:00
|
|
|
|
2022-07-08 11:30:26 +00:00
|
|
|
// ----
|
|
|
|
|
2023-05-04 08:36:48 +00:00
|
|
|
#[async_trait]
|
|
|
|
trait TableRepair: Send + Sync + 'static {
|
|
|
|
type T: TableSchema;
|
|
|
|
|
|
|
|
fn table(garage: &Garage) -> &Table<Self::T, TableShardedReplication>;
|
|
|
|
|
|
|
|
async fn process(
|
|
|
|
&mut self,
|
|
|
|
garage: &Garage,
|
|
|
|
entry: <<Self as TableRepair>::T as TableSchema>::E,
|
|
|
|
) -> Result<bool, Error>;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct TableRepairWorker<T: TableRepair> {
|
2022-07-08 11:30:26 +00:00
|
|
|
garage: Arc<Garage>,
|
|
|
|
pos: Vec<u8>,
|
|
|
|
counter: usize,
|
2023-05-04 08:36:48 +00:00
|
|
|
repairs: usize,
|
|
|
|
inner: T,
|
2022-07-08 11:30:26 +00:00
|
|
|
}
|
|
|
|
|
2023-05-04 08:36:48 +00:00
|
|
|
impl<R: TableRepair> TableRepairWorker<R> {
|
|
|
|
fn new(garage: Arc<Garage>, inner: R) -> Self {
|
2022-07-08 11:30:26 +00:00
|
|
|
Self {
|
|
|
|
garage,
|
2023-05-04 08:36:48 +00:00
|
|
|
inner,
|
2022-07-08 11:30:26 +00:00
|
|
|
pos: vec![],
|
|
|
|
counter: 0,
|
2023-05-04 08:36:48 +00:00
|
|
|
repairs: 0,
|
2021-06-23 23:34:28 +00:00
|
|
|
}
|
2020-04-23 18:36:12 +00:00
|
|
|
}
|
2022-07-08 11:30:26 +00:00
|
|
|
}
|
2020-04-23 18:36:12 +00:00
|
|
|
|
2022-07-08 11:30:26 +00:00
|
|
|
#[async_trait]
|
2023-05-04 08:36:48 +00:00
|
|
|
impl<R: TableRepair> Worker for TableRepairWorker<R> {
|
2022-07-08 11:30:26 +00:00
|
|
|
fn name(&self) -> String {
|
2023-05-04 08:36:48 +00:00
|
|
|
format!("{} repair worker", R::T::TABLE_NAME)
|
2022-07-08 11:30:26 +00:00
|
|
|
}
|
2020-04-23 18:36:12 +00:00
|
|
|
|
2022-12-12 16:16:49 +00:00
|
|
|
fn status(&self) -> WorkerStatus {
|
|
|
|
WorkerStatus {
|
2023-05-04 08:36:48 +00:00
|
|
|
progress: Some(format!("{} ({})", self.counter, self.repairs)),
|
2022-12-12 16:16:49 +00:00
|
|
|
..Default::default()
|
|
|
|
}
|
2022-07-08 11:30:26 +00:00
|
|
|
}
|
2022-06-08 08:01:44 +00:00
|
|
|
|
2022-07-08 11:30:26 +00:00
|
|
|
async fn work(&mut self, _must_exit: &mut watch::Receiver<bool>) -> Result<WorkerState, Error> {
|
2023-05-04 08:36:48 +00:00
|
|
|
let (item_bytes, next_pos) = match R::table(&self.garage).data.store.get_gt(&self.pos)? {
|
2022-12-14 15:31:31 +00:00
|
|
|
Some((k, v)) => (v, k),
|
2022-07-08 11:30:26 +00:00
|
|
|
None => {
|
2023-05-04 08:36:48 +00:00
|
|
|
info!(
|
|
|
|
"{}: finished, done {}, fixed {}",
|
|
|
|
self.name(),
|
|
|
|
self.counter,
|
|
|
|
self.repairs
|
|
|
|
);
|
2022-07-08 11:30:26 +00:00
|
|
|
return Ok(WorkerState::Done);
|
2020-04-23 18:36:12 +00:00
|
|
|
}
|
2022-07-08 11:30:26 +00:00
|
|
|
};
|
|
|
|
|
2023-05-04 08:36:48 +00:00
|
|
|
let entry = <R::T as TableSchema>::E::decode(&item_bytes)
|
|
|
|
.ok_or_message("Cannot decode table entry")?;
|
|
|
|
if self.inner.process(&self.garage, entry).await? {
|
|
|
|
self.repairs += 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
self.counter += 1;
|
|
|
|
self.pos = next_pos;
|
|
|
|
|
|
|
|
Ok(WorkerState::Busy)
|
|
|
|
}
|
|
|
|
|
|
|
|
async fn wait_for_work(&mut self) -> WorkerState {
|
|
|
|
unreachable!()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// ----
|
|
|
|
|
|
|
|
struct RepairVersions;
|
|
|
|
|
|
|
|
#[async_trait]
|
|
|
|
impl TableRepair for RepairVersions {
|
|
|
|
type T = VersionTable;
|
|
|
|
|
|
|
|
fn table(garage: &Garage) -> &Table<Self::T, TableShardedReplication> {
|
|
|
|
&garage.version_table
|
|
|
|
}
|
|
|
|
|
|
|
|
async fn process(&mut self, garage: &Garage, version: Version) -> Result<bool, Error> {
|
2022-07-08 11:30:26 +00:00
|
|
|
if !version.deleted.get() {
|
2023-05-04 08:45:44 +00:00
|
|
|
let ref_exists = match &version.backlink {
|
|
|
|
VersionBacklink::Object { bucket_id, key } => garage
|
|
|
|
.object_table
|
2023-05-04 17:44:01 +00:00
|
|
|
.get(bucket_id, key)
|
2023-05-04 08:45:44 +00:00
|
|
|
.await?
|
|
|
|
.map(|o| {
|
|
|
|
o.versions().iter().any(|x| {
|
2023-05-03 17:21:35 +00:00
|
|
|
x.uuid == version.uuid && x.state != ObjectVersionState::Aborted
|
2023-05-04 08:45:44 +00:00
|
|
|
})
|
|
|
|
})
|
|
|
|
.unwrap_or(false),
|
|
|
|
VersionBacklink::MultipartUpload { upload_id } => garage
|
|
|
|
.mpu_table
|
2023-05-04 17:44:01 +00:00
|
|
|
.get(upload_id, &EmptyKey)
|
2023-05-04 08:45:44 +00:00
|
|
|
.await?
|
|
|
|
.map(|u| !u.deleted.get())
|
|
|
|
.unwrap_or(false),
|
2020-04-23 18:36:12 +00:00
|
|
|
};
|
2023-05-04 08:45:44 +00:00
|
|
|
|
|
|
|
if !ref_exists {
|
2020-04-23 18:36:12 +00:00
|
|
|
info!("Repair versions: marking version as deleted: {:?}", version);
|
2023-05-04 08:36:48 +00:00
|
|
|
garage
|
2020-04-23 18:36:12 +00:00
|
|
|
.version_table
|
2023-05-03 17:21:35 +00:00
|
|
|
.insert(&Version::new(version.uuid, version.backlink, true))
|
2020-04-23 18:36:12 +00:00
|
|
|
.await?;
|
2023-05-04 08:36:48 +00:00
|
|
|
return Ok(true);
|
2020-04-23 18:36:12 +00:00
|
|
|
}
|
|
|
|
}
|
2022-07-08 11:30:26 +00:00
|
|
|
|
2023-05-04 08:36:48 +00:00
|
|
|
Ok(false)
|
2022-07-08 11:30:26 +00:00
|
|
|
}
|
|
|
|
}
|
2020-04-23 18:36:12 +00:00
|
|
|
|
2022-07-08 11:30:26 +00:00
|
|
|
// ----
|
2022-06-08 08:01:44 +00:00
|
|
|
|
2023-05-04 08:36:48 +00:00
|
|
|
struct RepairBlockRefs;
|
2022-07-08 11:30:26 +00:00
|
|
|
|
|
|
|
#[async_trait]
|
2023-05-04 08:36:48 +00:00
|
|
|
impl TableRepair for RepairBlockRefs {
|
|
|
|
type T = BlockRefTable;
|
2022-07-08 11:30:26 +00:00
|
|
|
|
2023-05-04 08:36:48 +00:00
|
|
|
fn table(garage: &Garage) -> &Table<Self::T, TableShardedReplication> {
|
|
|
|
&garage.block_ref_table
|
2022-07-08 11:30:26 +00:00
|
|
|
}
|
|
|
|
|
2023-05-04 08:45:44 +00:00
|
|
|
async fn process(&mut self, garage: &Garage, mut block_ref: BlockRef) -> Result<bool, Error> {
|
2022-07-08 11:30:26 +00:00
|
|
|
if !block_ref.deleted.get() {
|
2023-05-04 08:45:44 +00:00
|
|
|
let ref_exists = garage
|
2020-04-23 18:36:12 +00:00
|
|
|
.version_table
|
|
|
|
.get(&block_ref.version, &EmptyKey)
|
2023-05-04 08:45:44 +00:00
|
|
|
.await?
|
|
|
|
.map(|v| !v.deleted.get())
|
|
|
|
.unwrap_or(false);
|
|
|
|
|
2020-04-23 18:36:12 +00:00
|
|
|
if !ref_exists {
|
|
|
|
info!(
|
|
|
|
"Repair block ref: marking block_ref as deleted: {:?}",
|
|
|
|
block_ref
|
|
|
|
);
|
2023-05-04 08:45:44 +00:00
|
|
|
block_ref.deleted.set();
|
|
|
|
garage.block_ref_table.insert(&block_ref).await?;
|
|
|
|
return Ok(true);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
Ok(false)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// ----
|
|
|
|
|
|
|
|
struct RepairMpu;
|
|
|
|
|
|
|
|
#[async_trait]
|
|
|
|
impl TableRepair for RepairMpu {
|
|
|
|
type T = MultipartUploadTable;
|
|
|
|
|
|
|
|
fn table(garage: &Garage) -> &Table<Self::T, TableShardedReplication> {
|
|
|
|
&garage.mpu_table
|
|
|
|
}
|
|
|
|
|
|
|
|
async fn process(&mut self, garage: &Garage, mut mpu: MultipartUpload) -> Result<bool, Error> {
|
|
|
|
if !mpu.deleted.get() {
|
|
|
|
let ref_exists = garage
|
|
|
|
.object_table
|
|
|
|
.get(&mpu.bucket_id, &mpu.key)
|
|
|
|
.await?
|
|
|
|
.map(|o| {
|
|
|
|
o.versions()
|
|
|
|
.iter()
|
|
|
|
.any(|x| x.uuid == mpu.upload_id && x.is_uploading(Some(true)))
|
|
|
|
})
|
|
|
|
.unwrap_or(false);
|
|
|
|
|
|
|
|
if !ref_exists {
|
|
|
|
info!(
|
|
|
|
"Repair multipart uploads: marking mpu as deleted: {:?}",
|
|
|
|
mpu
|
|
|
|
);
|
|
|
|
mpu.parts.clear();
|
|
|
|
mpu.deleted.set();
|
|
|
|
garage.mpu_table.insert(&mpu).await?;
|
2023-05-04 08:36:48 +00:00
|
|
|
return Ok(true);
|
2020-04-23 18:36:12 +00:00
|
|
|
}
|
|
|
|
}
|
2022-07-08 11:30:26 +00:00
|
|
|
|
2023-05-04 08:36:48 +00:00
|
|
|
Ok(false)
|
2020-04-23 18:36:12 +00:00
|
|
|
}
|
|
|
|
}
|