Compare commits

...

2 commits

Author SHA1 Message Date
6a067e30ee doc: documentation of rebalance repair
All checks were successful
continuous-integration/drone/pr Build is passing
continuous-integration/drone/push Build is passing
2023-09-07 13:49:12 +02:00
6b008b5bd3 block manager: add rebalance operation to rebalance multi-hdd setups 2023-09-07 13:44:11 +02:00
6 changed files with 125 additions and 14 deletions

View file

@ -91,6 +91,16 @@ is definitely lost, then there is no other choice than to declare your S3 object
as unrecoverable, and to delete them properly from the data store. This can be done
using the `garage block purge` command.
## Rebalancing data directories
In [multi-HDD setups](@/documentation/operations/multi-hdd.md), to ensure that
data blocks are well balanced between storage locations, you may run a
rebalance operation using `garage repair rebalance`. This is usefull when
adding storage locations or when capacities of the storage locations have been
changed. Once this is finished, Garage will know for each block of a single
possible location where it can be, which can increase access speed. This
operation will also move out all data from locations marked as read-only.
# Metadata operations

View file

@ -252,6 +252,14 @@ impl DataLayout {
path.push(hex::encode(&hash.as_slice()[1..2]));
path
}
pub(crate) fn without_secondary_locations(&self) -> Self {
Self {
data_dirs: self.data_dirs.clone(),
part_prim: self.part_prim.clone(),
part_sec: self.part_sec.iter().map(|_| vec![]).collect::<Vec<_>>(),
}
}
}
impl InitialFormat for DataLayout {

View file

@ -3,7 +3,7 @@ use std::pin::Pin;
use std::sync::Arc;
use std::time::Duration;
use arc_swap::ArcSwapOption;
use arc_swap::{ArcSwap, ArcSwapOption};
use async_trait::async_trait;
use bytes::Bytes;
use rand::prelude::*;
@ -83,7 +83,9 @@ pub struct BlockManager {
/// Directory/ies in which block are stored
pub data_dir: DataDirEnum,
/// Data layout
pub(crate) data_layout: DataLayout,
pub(crate) data_layout: ArcSwap<DataLayout>,
/// Data layout persister
pub(crate) data_layout_persister: Persister<DataLayout>,
data_fsync: bool,
compression_level: Option<i32>,
@ -129,9 +131,9 @@ impl BlockManager {
system: Arc<System>,
) -> Result<Arc<Self>, Error> {
// Load or compute layout, i.e. assignment of data blocks to the different data directories
let layout_persister: Persister<DataLayout> =
let data_layout_persister: Persister<DataLayout> =
Persister::new(&system.metadata_dir, "data_layout");
let data_layout = match layout_persister.load() {
let data_layout = match data_layout_persister.load() {
Ok(mut layout) => {
layout
.update(&data_dir)
@ -140,7 +142,7 @@ impl BlockManager {
}
Err(_) => DataLayout::initialize(&data_dir).ok_or_message("invalid data_dir config")?,
};
layout_persister
data_layout_persister
.save(&data_layout)
.expect("cannot save data_layout");
@ -168,7 +170,8 @@ impl BlockManager {
let block_manager = Arc::new(Self {
replication,
data_dir,
data_layout,
data_layout: ArcSwap::new(Arc::new(data_layout)),
data_layout_persister,
data_fsync,
compression_level,
mutation_lock: vec![(); MUTEX_COUNT]
@ -606,9 +609,10 @@ impl BlockManager {
/// Find the path where a block is currently stored
pub(crate) async fn find_block(&self, hash: &Hash) -> Option<DataBlockPath> {
let dirs = Some(self.data_layout.primary_block_dir(hash))
let data_layout = self.data_layout.load_full();
let dirs = Some(data_layout.primary_block_dir(hash))
.into_iter()
.chain(self.data_layout.secondary_block_dirs(hash));
.chain(data_layout.secondary_block_dirs(hash));
let filename = hex::encode(hash.as_ref());
for dir in dirs {
@ -682,7 +686,7 @@ impl BlockManagerLocked {
let compressed = data.is_compressed();
let data = data.inner_buffer();
let directory = mgr.data_layout.primary_block_dir(hash);
let directory = mgr.data_layout.load().primary_block_dir(hash);
let mut tgt_path = directory.clone();
tgt_path.push(hex::encode(hash));

View file

@ -518,6 +518,86 @@ impl Worker for ScrubWorker {
}
}
// ---- ---- ----
// THIRD KIND OF REPAIR: REBALANCING DATA BLOCKS
// between multiple storage locations.
// This is a one-shot repair operation that can be launched,
// checks everything, and then exits.
// ---- ---- ----
pub struct RebalanceWorker {
manager: Arc<BlockManager>,
block_iter: BlockStoreIterator,
moved: usize,
moved_bytes: usize,
}
impl RebalanceWorker {
pub fn new(manager: Arc<BlockManager>) -> Self {
let block_iter = BlockStoreIterator::new(&manager);
Self {
manager,
block_iter,
moved: 0,
moved_bytes: 0,
}
}
}
#[async_trait]
impl Worker for RebalanceWorker {
fn name(&self) -> String {
"Block rebalance worker".into()
}
fn status(&self) -> WorkerStatus {
WorkerStatus {
progress: Some(format!("{:.2}%", self.block_iter.progress() * 100.)),
freeform: vec![
format!("Blocks moved: {}", self.moved),
format!("Bytes moved: {}", self.moved_bytes),
],
..Default::default()
}
}
async fn work(&mut self, _must_exit: &mut watch::Receiver<bool>) -> Result<WorkerState, Error> {
if let Some((path, hash)) = self.block_iter.next().await? {
let prim_loc = self.manager.data_layout.load().primary_block_dir(&hash);
if path.parent().expect("no parent?") != prim_loc {
// block is not in its primary location,
// move it there (reading and re-writing does the trick)
let data = self.manager.read_block(&hash).await?;
self.manager.write_block(&hash, &data).await?;
self.moved += 1;
self.moved_bytes += data.inner_buffer().len();
}
Ok(WorkerState::Busy)
} else {
// all blocks are in their primary location:
// - the ones we moved now are
// - the ones written in the meantime always were, because we only
// write to primary locations
// so we can safely remove all secondary locations from the data layout
let new_layout = self
.manager
.data_layout
.load_full()
.without_secondary_locations();
self.manager
.data_layout_persister
.save_async(&new_layout)
.await?;
self.manager.data_layout.store(Arc::new(new_layout));
Ok(WorkerState::Done)
}
}
async fn wait_for_work(&mut self) -> WorkerState {
unreachable!()
}
}
// ---- ---- ----
// UTILITY FOR ENUMERATING THE BLOCK STORE
// ---- ---- ----
@ -526,16 +606,16 @@ const PROGRESS_FP: u64 = 1_000_000_000;
impl BlockStoreIterator {
fn new(manager: &BlockManager) -> Self {
let min_cap = manager
.data_layout
let data_layout = manager.data_layout.load_full();
let min_cap = data_layout
.data_dirs
.iter()
.filter_map(|x| x.capacity())
.min()
.unwrap_or(0);
let sum_cap = manager
.data_layout
let sum_cap = data_layout
.data_dirs
.iter()
.map(|x| x.capacity().unwrap_or(min_cap /* approximation */))
@ -543,7 +623,7 @@ impl BlockStoreIterator {
let mut cum_cap = 0;
let mut todo = vec![];
for dir in manager.data_layout.data_dirs.iter() {
for dir in data_layout.data_dirs.iter() {
let cap = match dir.state {
DataDirState::Active { capacity } => capacity,
_ => min_cap,

View file

@ -471,6 +471,9 @@ pub enum RepairWhat {
#[structopt(subcommand)]
cmd: ScrubCmd,
},
/// Rebalance data blocks among storage locations
#[structopt(name = "rebalance", version = garage_version())]
Rebalance,
}
#[derive(Serialize, Deserialize, StructOpt, Debug, Eq, PartialEq, Clone)]

View file

@ -70,6 +70,12 @@ pub async fn launch_online_repair(
info!("Sending command to scrub worker: {:?}", cmd);
garage.block_manager.send_scrub_command(cmd).await?;
}
RepairWhat::Rebalance => {
info!("Rebalancing the stored blocks among storage locations");
bg.spawn_worker(garage_block::repair::RebalanceWorker::new(
garage.block_manager.clone(),
));
}
}
Ok(())
}