garage/src/garage/repair/online.rs

279 lines
6.4 KiB
Rust
Raw Normal View History

2020-04-23 18:36:12 +00:00
use std::sync::Arc;
use std::time::Duration;
2020-04-23 18:36:12 +00:00
use async_trait::async_trait;
2020-04-23 18:36:12 +00:00
use tokio::sync::watch;
use garage_block::repair::ScrubWorkerCommand;
2023-05-04 08:36:48 +00:00
2020-07-07 11:59:22 +00:00
use garage_model::garage::Garage;
First implementation of K2V (#293) **Specification:** View spec at [this URL](https://git.deuxfleurs.fr/Deuxfleurs/garage/src/branch/k2v/doc/drafts/k2v-spec.md) - [x] Specify the structure of K2V triples - [x] Specify the DVVS format used for causality detection - [x] Specify the K2V index (just a counter of number of values per partition key) - [x] Specify single-item endpoints: ReadItem, InsertItem, DeleteItem - [x] Specify index endpoint: ReadIndex - [x] Specify multi-item endpoints: InsertBatch, ReadBatch, DeleteBatch - [x] Move to JSON objects instead of tuples - [x] Specify endpoints for polling for updates on single values (PollItem) **Implementation:** - [x] Table for K2V items, causal contexts - [x] Indexing mechanism and table for K2V index - [x] Make API handlers a bit more generic - [x] K2V API endpoint - [x] K2V API router - [x] ReadItem - [x] InsertItem - [x] DeleteItem - [x] PollItem - [x] ReadIndex - [x] InsertBatch - [x] ReadBatch - [x] DeleteBatch **Testing:** - [x] Just a simple Python script that does some requests to check visually that things are going right (does not contain parsing of results or assertions on returned values) - [x] Actual tests: - [x] Adapt testing framework - [x] Simple test with InsertItem + ReadItem - [x] Test with several Insert/Read/DeleteItem + ReadIndex - [x] Test all combinations of return formats for ReadItem - [x] Test with ReadBatch, InsertBatch, DeleteBatch - [x] Test with PollItem - [x] Test error codes - [ ] Fix most broken stuff - [x] test PollItem broken randomly - [x] when invalid causality tokens are given, errors should be 4xx not 5xx **Improvements:** - [x] Descending range queries - [x] Specify - [x] Implement - [x] Add test - [x] Batch updates to index counter - [x] Put K2V behind `k2v` feature flag Co-authored-by: Alex Auvolat <alex@adnab.me> Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/293 Co-authored-by: Alex <alex@adnab.me> Co-committed-by: Alex <alex@adnab.me>
2022-05-10 11:16:57 +00:00
use garage_model::s3::block_ref_table::*;
2023-05-04 08:45:44 +00:00
use garage_model::s3::mpu_table::*;
First implementation of K2V (#293) **Specification:** View spec at [this URL](https://git.deuxfleurs.fr/Deuxfleurs/garage/src/branch/k2v/doc/drafts/k2v-spec.md) - [x] Specify the structure of K2V triples - [x] Specify the DVVS format used for causality detection - [x] Specify the K2V index (just a counter of number of values per partition key) - [x] Specify single-item endpoints: ReadItem, InsertItem, DeleteItem - [x] Specify index endpoint: ReadIndex - [x] Specify multi-item endpoints: InsertBatch, ReadBatch, DeleteBatch - [x] Move to JSON objects instead of tuples - [x] Specify endpoints for polling for updates on single values (PollItem) **Implementation:** - [x] Table for K2V items, causal contexts - [x] Indexing mechanism and table for K2V index - [x] Make API handlers a bit more generic - [x] K2V API endpoint - [x] K2V API router - [x] ReadItem - [x] InsertItem - [x] DeleteItem - [x] PollItem - [x] ReadIndex - [x] InsertBatch - [x] ReadBatch - [x] DeleteBatch **Testing:** - [x] Just a simple Python script that does some requests to check visually that things are going right (does not contain parsing of results or assertions on returned values) - [x] Actual tests: - [x] Adapt testing framework - [x] Simple test with InsertItem + ReadItem - [x] Test with several Insert/Read/DeleteItem + ReadIndex - [x] Test all combinations of return formats for ReadItem - [x] Test with ReadBatch, InsertBatch, DeleteBatch - [x] Test with PollItem - [x] Test error codes - [ ] Fix most broken stuff - [x] test PollItem broken randomly - [x] when invalid causality tokens are given, errors should be 4xx not 5xx **Improvements:** - [x] Descending range queries - [x] Specify - [x] Implement - [x] Add test - [x] Batch updates to index counter - [x] Put K2V behind `k2v` feature flag Co-authored-by: Alex Auvolat <alex@adnab.me> Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/293 Co-authored-by: Alex <alex@adnab.me> Co-committed-by: Alex <alex@adnab.me>
2022-05-10 11:16:57 +00:00
use garage_model::s3::object_table::*;
use garage_model::s3::version_table::*;
2023-05-04 08:36:48 +00:00
use garage_table::replication::*;
2020-04-24 10:10:01 +00:00
use garage_table::*;
2023-05-04 08:36:48 +00:00
use garage_util::background::*;
2020-04-24 10:10:01 +00:00
use garage_util::error::Error;
use garage_util::migrate::Migrate;
2020-04-23 18:36:12 +00:00
use crate::*;
pub async fn launch_online_repair(
garage: &Arc<Garage>,
bg: &BackgroundRunner,
opt: RepairOpt,
) -> Result<(), Error> {
match opt.what {
RepairWhat::Tables => {
info!("Launching a full sync of tables");
garage.bucket_table.syncer.add_full_sync()?;
garage.object_table.syncer.add_full_sync()?;
garage.version_table.syncer.add_full_sync()?;
garage.block_ref_table.syncer.add_full_sync()?;
garage.key_table.syncer.add_full_sync()?;
}
RepairWhat::Versions => {
info!("Repairing the versions table");
2023-05-04 08:36:48 +00:00
bg.spawn_worker(TableRepairWorker::new(garage.clone(), RepairVersions));
}
2023-05-04 08:45:44 +00:00
RepairWhat::MultipartUploads => {
info!("Repairing the multipart uploads table");
bg.spawn_worker(TableRepairWorker::new(garage.clone(), RepairMpu));
}
RepairWhat::BlockRefs => {
info!("Repairing the block refs table");
2023-05-04 08:36:48 +00:00
bg.spawn_worker(TableRepairWorker::new(garage.clone(), RepairBlockRefs));
}
RepairWhat::Blocks => {
info!("Repairing the stored blocks");
bg.spawn_worker(garage_block::repair::RepairWorker::new(
garage.block_manager.clone(),
));
}
RepairWhat::Scrub { cmd } => {
let cmd = match cmd {
ScrubCmd::Start => ScrubWorkerCommand::Start,
ScrubCmd::Pause => ScrubWorkerCommand::Pause(Duration::from_secs(3600 * 24)),
ScrubCmd::Resume => ScrubWorkerCommand::Resume,
ScrubCmd::Cancel => ScrubWorkerCommand::Cancel,
ScrubCmd::SetTranquility { tranquility } => {
garage
.block_manager
.scrub_persister
.set_with(|x| x.tranquility = tranquility)?;
return Ok(());
}
};
info!("Sending command to scrub worker: {:?}", cmd);
garage.block_manager.send_scrub_command(cmd).await?;
}
}
Ok(())
}
// ----
2023-05-04 08:36:48 +00:00
#[async_trait]
trait TableRepair: Send + Sync + 'static {
type T: TableSchema;
fn table(garage: &Garage) -> &Table<Self::T, TableShardedReplication>;
async fn process(
&mut self,
garage: &Garage,
entry: <<Self as TableRepair>::T as TableSchema>::E,
) -> Result<bool, Error>;
}
struct TableRepairWorker<T: TableRepair> {
garage: Arc<Garage>,
pos: Vec<u8>,
counter: usize,
2023-05-04 08:36:48 +00:00
repairs: usize,
inner: T,
}
2023-05-04 08:36:48 +00:00
impl<R: TableRepair> TableRepairWorker<R> {
fn new(garage: Arc<Garage>, inner: R) -> Self {
Self {
garage,
2023-05-04 08:36:48 +00:00
inner,
pos: vec![],
counter: 0,
2023-05-04 08:36:48 +00:00
repairs: 0,
}
2020-04-23 18:36:12 +00:00
}
}
2020-04-23 18:36:12 +00:00
#[async_trait]
2023-05-04 08:36:48 +00:00
impl<R: TableRepair> Worker for TableRepairWorker<R> {
fn name(&self) -> String {
2023-05-04 08:36:48 +00:00
format!("{} repair worker", R::T::TABLE_NAME)
}
2020-04-23 18:36:12 +00:00
fn status(&self) -> WorkerStatus {
WorkerStatus {
2023-05-04 08:36:48 +00:00
progress: Some(format!("{} ({})", self.counter, self.repairs)),
..Default::default()
}
}
Abstract database behind generic interface and implement alternative drivers (#322) - [x] Design interface - [x] Implement Sled backend - [x] Re-implement the SledCountedTree hack ~~on Sled backend~~ on all backends (i.e. over the abstraction) - [x] Convert Garage code to use generic interface - [x] Proof-read converted Garage code - [ ] Test everything well - [x] Implement sqlite backend - [x] Implement LMDB backend - [ ] (Implement Persy backend?) - [ ] (Implement other backends? (like RocksDB, ...)) - [x] Implement backend choice in config file and garage server module - [x] Add CLI for converting between DB formats - Exploit the new interface to put more things in transactions - [x] `.updated()` trigger on Garage tables Fix #284 **Bugs** - [x] When exporting sqlite, trees iterate empty?? - [x] LMDB doesn't work **Known issues for various back-ends** - Sled: - Eats all my RAM and also all my disk space - `.len()` has to traverse the whole table - Is actually quite slow on some operations - And is actually pretty bad code... - Sqlite: - Requires a lock to be taken on all operations. The lock is also taken when iterating on a table with `.iter()`, and the lock isn't released until the iterator is dropped. This means that we must be VERY carefull to not do anything else inside a `.iter()` loop or else we will have a deadlock! Most such cases have been eliminated from the Garage codebase, but there might still be some that remain. If your Garage-over-Sqlite seems to hang/freeze, this is the reason. - (adapter uses a bunch of unsafe code) - Heed (LMDB): - Not suited for 32-bit machines as it has to map the whole DB in memory. - (adpater uses a tiny bit of unsafe code) **My recommendation:** avoid 32-bit machines and use LMDB as much as possible. **Converting databases** is actually quite easy. For example from Sled to LMDB: ```bash cd src/db cargo run --features cli --bin convert -- -i path/to/garage/meta/db -a sled -o path/to/garage/meta/db.lmdb -b lmdb ``` Then, just add this to your `config.toml`: ```toml db_engine = "lmdb" ``` Co-authored-by: Alex Auvolat <alex@adnab.me> Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/322 Co-authored-by: Alex <alex@adnab.me> Co-committed-by: Alex <alex@adnab.me>
2022-06-08 08:01:44 +00:00
async fn work(&mut self, _must_exit: &mut watch::Receiver<bool>) -> Result<WorkerState, Error> {
2023-05-04 08:36:48 +00:00
let (item_bytes, next_pos) = match R::table(&self.garage).data.store.get_gt(&self.pos)? {
2022-12-14 15:31:31 +00:00
Some((k, v)) => (v, k),
None => {
2023-05-04 08:36:48 +00:00
info!(
"{}: finished, done {}, fixed {}",
self.name(),
self.counter,
self.repairs
);
return Ok(WorkerState::Done);
2020-04-23 18:36:12 +00:00
}
};
2023-05-04 08:36:48 +00:00
let entry = <R::T as TableSchema>::E::decode(&item_bytes)
.ok_or_message("Cannot decode table entry")?;
if self.inner.process(&self.garage, entry).await? {
self.repairs += 1;
}
self.counter += 1;
self.pos = next_pos;
Ok(WorkerState::Busy)
}
async fn wait_for_work(&mut self) -> WorkerState {
unreachable!()
}
}
// ----
struct RepairVersions;
#[async_trait]
impl TableRepair for RepairVersions {
type T = VersionTable;
fn table(garage: &Garage) -> &Table<Self::T, TableShardedReplication> {
&garage.version_table
}
async fn process(&mut self, garage: &Garage, version: Version) -> Result<bool, Error> {
if !version.deleted.get() {
2023-05-04 08:45:44 +00:00
let ref_exists = match &version.backlink {
VersionBacklink::Object { bucket_id, key } => garage
.object_table
2023-05-04 17:44:01 +00:00
.get(bucket_id, key)
2023-05-04 08:45:44 +00:00
.await?
.map(|o| {
o.versions().iter().any(|x| {
2023-05-03 17:21:35 +00:00
x.uuid == version.uuid && x.state != ObjectVersionState::Aborted
2023-05-04 08:45:44 +00:00
})
})
.unwrap_or(false),
VersionBacklink::MultipartUpload { upload_id } => garage
.mpu_table
2023-05-04 17:44:01 +00:00
.get(upload_id, &EmptyKey)
2023-05-04 08:45:44 +00:00
.await?
.map(|u| !u.deleted.get())
.unwrap_or(false),
2020-04-23 18:36:12 +00:00
};
2023-05-04 08:45:44 +00:00
if !ref_exists {
2020-04-23 18:36:12 +00:00
info!("Repair versions: marking version as deleted: {:?}", version);
2023-05-04 08:36:48 +00:00
garage
2020-04-23 18:36:12 +00:00
.version_table
2023-05-03 17:21:35 +00:00
.insert(&Version::new(version.uuid, version.backlink, true))
2020-04-23 18:36:12 +00:00
.await?;
2023-05-04 08:36:48 +00:00
return Ok(true);
2020-04-23 18:36:12 +00:00
}
}
2023-05-04 08:36:48 +00:00
Ok(false)
}
}
2020-04-23 18:36:12 +00:00
// ----
Abstract database behind generic interface and implement alternative drivers (#322) - [x] Design interface - [x] Implement Sled backend - [x] Re-implement the SledCountedTree hack ~~on Sled backend~~ on all backends (i.e. over the abstraction) - [x] Convert Garage code to use generic interface - [x] Proof-read converted Garage code - [ ] Test everything well - [x] Implement sqlite backend - [x] Implement LMDB backend - [ ] (Implement Persy backend?) - [ ] (Implement other backends? (like RocksDB, ...)) - [x] Implement backend choice in config file and garage server module - [x] Add CLI for converting between DB formats - Exploit the new interface to put more things in transactions - [x] `.updated()` trigger on Garage tables Fix #284 **Bugs** - [x] When exporting sqlite, trees iterate empty?? - [x] LMDB doesn't work **Known issues for various back-ends** - Sled: - Eats all my RAM and also all my disk space - `.len()` has to traverse the whole table - Is actually quite slow on some operations - And is actually pretty bad code... - Sqlite: - Requires a lock to be taken on all operations. The lock is also taken when iterating on a table with `.iter()`, and the lock isn't released until the iterator is dropped. This means that we must be VERY carefull to not do anything else inside a `.iter()` loop or else we will have a deadlock! Most such cases have been eliminated from the Garage codebase, but there might still be some that remain. If your Garage-over-Sqlite seems to hang/freeze, this is the reason. - (adapter uses a bunch of unsafe code) - Heed (LMDB): - Not suited for 32-bit machines as it has to map the whole DB in memory. - (adpater uses a tiny bit of unsafe code) **My recommendation:** avoid 32-bit machines and use LMDB as much as possible. **Converting databases** is actually quite easy. For example from Sled to LMDB: ```bash cd src/db cargo run --features cli --bin convert -- -i path/to/garage/meta/db -a sled -o path/to/garage/meta/db.lmdb -b lmdb ``` Then, just add this to your `config.toml`: ```toml db_engine = "lmdb" ``` Co-authored-by: Alex Auvolat <alex@adnab.me> Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/322 Co-authored-by: Alex <alex@adnab.me> Co-committed-by: Alex <alex@adnab.me>
2022-06-08 08:01:44 +00:00
2023-05-04 08:36:48 +00:00
struct RepairBlockRefs;
#[async_trait]
2023-05-04 08:36:48 +00:00
impl TableRepair for RepairBlockRefs {
type T = BlockRefTable;
2023-05-04 08:36:48 +00:00
fn table(garage: &Garage) -> &Table<Self::T, TableShardedReplication> {
&garage.block_ref_table
}
2023-05-04 08:45:44 +00:00
async fn process(&mut self, garage: &Garage, mut block_ref: BlockRef) -> Result<bool, Error> {
if !block_ref.deleted.get() {
2023-05-04 08:45:44 +00:00
let ref_exists = garage
2020-04-23 18:36:12 +00:00
.version_table
.get(&block_ref.version, &EmptyKey)
2023-05-04 08:45:44 +00:00
.await?
.map(|v| !v.deleted.get())
.unwrap_or(false);
2020-04-23 18:36:12 +00:00
if !ref_exists {
info!(
"Repair block ref: marking block_ref as deleted: {:?}",
block_ref
);
2023-05-04 08:45:44 +00:00
block_ref.deleted.set();
garage.block_ref_table.insert(&block_ref).await?;
return Ok(true);
}
}
Ok(false)
}
}
// ----
struct RepairMpu;
#[async_trait]
impl TableRepair for RepairMpu {
type T = MultipartUploadTable;
fn table(garage: &Garage) -> &Table<Self::T, TableShardedReplication> {
&garage.mpu_table
}
async fn process(&mut self, garage: &Garage, mut mpu: MultipartUpload) -> Result<bool, Error> {
if !mpu.deleted.get() {
let ref_exists = garage
.object_table
.get(&mpu.bucket_id, &mpu.key)
.await?
.map(|o| {
o.versions()
.iter()
.any(|x| x.uuid == mpu.upload_id && x.is_uploading(Some(true)))
})
.unwrap_or(false);
if !ref_exists {
info!(
"Repair multipart uploads: marking mpu as deleted: {:?}",
mpu
);
mpu.parts.clear();
mpu.deleted.set();
garage.mpu_table.insert(&mpu).await?;
2023-05-04 08:36:48 +00:00
return Ok(true);
2020-04-23 18:36:12 +00:00
}
}
2023-05-04 08:36:48 +00:00
Ok(false)
2020-04-23 18:36:12 +00:00
}
}