diff --git a/doc/book/operations/durability-repairs.md b/doc/book/operations/durability-repairs.md index 578899a8..f4450dae 100644 --- a/doc/book/operations/durability-repairs.md +++ b/doc/book/operations/durability-repairs.md @@ -19,7 +19,7 @@ connecting to. To run on all nodes, add the `-a` flag as follows: # Data block operations -## Data store scrub +## Data store scrub {#scrub} Scrubbing the data store means examining each individual data block to check that their content is correct, by verifying their hash. Any block found to be corrupted diff --git a/doc/book/reference-manual/configuration.md b/doc/book/reference-manual/configuration.md index 81af1de0..8e87b7d8 100644 --- a/doc/book/reference-manual/configuration.md +++ b/doc/book/reference-manual/configuration.md @@ -14,6 +14,7 @@ metadata_dir = "/var/lib/garage/meta" data_dir = "/var/lib/garage/data" metadata_fsync = true data_fsync = false +disable_scrub = false db_engine = "lmdb" @@ -87,6 +88,7 @@ Top-level configuration options: [`data_dir`](#data_dir), [`data_fsync`](#data_fsync), [`db_engine`](#db_engine), +[`disable_scrub`](#disable_scrub), [`lmdb_map_size`](#lmdb_map_size), [`metadata_dir`](#metadata_dir), [`metadata_fsync`](#metadata_fsync), @@ -344,6 +346,22 @@ at the cost of a moderate drop in write performance. Similarly to `metatada_fsync`, this is likely not necessary if geographical replication is used. +#### `disable_scrub` {#disable_scrub} + +By default, Garage runs a scrub of the data directory approximately once per +month, with a random delay to avoid all nodes running at the same time. When +it scrubs the data directory, Garage will read all of the data files stored on +disk to check their integrity, and will rebuild any data files that it finds +corrupted, using the remaining valid copies stored on other nodes. +See [this page](@/documentation/operations/durability-repair.md#scrub) for details. + +Set the `disable_scrub` configuration value to `true` if you don't need Garage +to scrub the data directory, for instance if you are already scrubbing at the +filesystem level. Note that in this case, if you find a corrupted data file, +you should delete it from the data directory and then call `garage repair +blocks` on the node to ensure that it re-obtains a copy from another node on +the network. + #### `block_size` {#block_size} Garage splits stored objects in consecutive chunks of size `block_size` diff --git a/src/block/manager.rs b/src/block/manager.rs index 890ea8b7..ef7279e9 100644 --- a/src/block/manager.rs +++ b/src/block/manager.rs @@ -22,7 +22,7 @@ use garage_net::stream::{read_stream_to_end, stream_asyncread, ByteStream}; use garage_db as db; use garage_util::background::{vars, BackgroundRunner}; -use garage_util::config::DataDirEnum; +use garage_util::config::Config; use garage_util::data::*; use garage_util::error::*; use garage_util::metrics::RecordDuration; @@ -84,6 +84,7 @@ pub struct BlockManager { data_fsync: bool, compression_level: Option, + disable_scrub: bool, mutation_lock: Vec>, @@ -119,9 +120,7 @@ struct BlockManagerLocked(); impl BlockManager { pub fn new( db: &db::Db, - data_dir: DataDirEnum, - data_fsync: bool, - compression_level: Option, + config: &Config, replication: TableShardedReplication, system: Arc, ) -> Result, Error> { @@ -131,11 +130,13 @@ impl BlockManager { let data_layout = match data_layout_persister.load() { Ok(mut layout) => { layout - .update(&data_dir) + .update(&config.data_dir) .ok_or_message("invalid data_dir config")?; layout } - Err(_) => DataLayout::initialize(&data_dir).ok_or_message("invalid data_dir config")?, + Err(_) => { + DataLayout::initialize(&config.data_dir).ok_or_message("invalid data_dir config")? + } }; data_layout_persister .save(&data_layout) @@ -154,7 +155,7 @@ impl BlockManager { .endpoint("garage_block/manager.rs/Rpc".to_string()); let metrics = BlockManagerMetrics::new( - compression_level, + config.compression_level, rc.rc.clone(), resync.queue.clone(), resync.errors.clone(), @@ -166,8 +167,9 @@ impl BlockManager { replication, data_layout: ArcSwap::new(Arc::new(data_layout)), data_layout_persister, - data_fsync, - compression_level, + data_fsync: config.data_fsync, + disable_scrub: config.disable_scrub, + compression_level: config.compression_level, mutation_lock: vec![(); MUTEX_COUNT] .iter() .map(|_| Mutex::new(BlockManagerLocked())) @@ -194,33 +196,37 @@ impl BlockManager { } // Spawn scrub worker - let (scrub_tx, scrub_rx) = mpsc::channel(1); - self.tx_scrub_command.store(Some(Arc::new(scrub_tx))); - bg.spawn_worker(ScrubWorker::new( - self.clone(), - scrub_rx, - self.scrub_persister.clone(), - )); + if !self.disable_scrub { + let (scrub_tx, scrub_rx) = mpsc::channel(1); + self.tx_scrub_command.store(Some(Arc::new(scrub_tx))); + bg.spawn_worker(ScrubWorker::new( + self.clone(), + scrub_rx, + self.scrub_persister.clone(), + )); + } } pub fn register_bg_vars(&self, vars: &mut vars::BgVars) { self.resync.register_bg_vars(vars); - vars.register_rw( - &self.scrub_persister, - "scrub-tranquility", - |p| p.get_with(|x| x.tranquility), - |p, tranquility| p.set_with(|x| x.tranquility = tranquility), - ); - vars.register_ro(&self.scrub_persister, "scrub-last-completed", |p| { - p.get_with(|x| msec_to_rfc3339(x.time_last_complete_scrub)) - }); - vars.register_ro(&self.scrub_persister, "scrub-next-run", |p| { - p.get_with(|x| msec_to_rfc3339(x.time_next_run_scrub)) - }); - vars.register_ro(&self.scrub_persister, "scrub-corruptions_detected", |p| { - p.get_with(|x| x.corruptions_detected) - }); + if !self.disable_scrub { + vars.register_rw( + &self.scrub_persister, + "scrub-tranquility", + |p| p.get_with(|x| x.tranquility), + |p, tranquility| p.set_with(|x| x.tranquility = tranquility), + ); + vars.register_ro(&self.scrub_persister, "scrub-last-completed", |p| { + p.get_with(|x| msec_to_rfc3339(x.time_last_complete_scrub)) + }); + vars.register_ro(&self.scrub_persister, "scrub-next-run", |p| { + p.get_with(|x| msec_to_rfc3339(x.time_next_run_scrub)) + }); + vars.register_ro(&self.scrub_persister, "scrub-corruptions_detected", |p| { + p.get_with(|x| x.corruptions_detected) + }); + } } /// Ask nodes that might have a (possibly compressed) block for it diff --git a/src/model/garage.rs b/src/model/garage.rs index 18421ca3..acf943f6 100644 --- a/src/model/garage.rs +++ b/src/model/garage.rs @@ -177,14 +177,7 @@ impl Garage { }; info!("Initialize block manager..."); - let block_manager = BlockManager::new( - &db, - config.data_dir.clone(), - config.data_fsync, - config.compression_level, - data_rep_param, - system.clone(), - )?; + let block_manager = BlockManager::new(&db, &config, data_rep_param, system.clone())?; block_manager.register_bg_vars(&mut bg_vars); // ---- admin tables ---- diff --git a/src/util/config.rs b/src/util/config.rs index 056c625d..7338a506 100644 --- a/src/util/config.rs +++ b/src/util/config.rs @@ -23,6 +23,10 @@ pub struct Config { #[serde(default)] pub data_fsync: bool, + /// Disable automatic scrubbing of the data directory + #[serde(default)] + pub disable_scrub: bool, + /// Size of data blocks to save to disk #[serde( deserialize_with = "deserialize_capacity",