garage/src/table/replication/sharded.rs

84 lines
2.1 KiB
Rust
Raw Permalink Normal View History

2021-03-16 10:14:27 +00:00
use std::sync::Arc;
use garage_rpc::layout::*;
2021-10-14 09:50:12 +00:00
use garage_rpc::system::System;
2020-04-24 10:10:01 +00:00
use garage_util::data::*;
2021-03-11 15:54:15 +00:00
use crate::replication::*;
2021-03-26 18:41:46 +00:00
/// Sharded replication schema:
/// - based on the ring of nodes, a certain set of neighbors
/// store entries, given as a function of the position of the
/// entry's hash in the ring
/// - reads are done on all of the nodes that replicate the data
/// - writes as well
#[derive(Clone)]
pub struct TableShardedReplication {
2021-03-26 18:41:46 +00:00
/// The membership manager of this node
2021-03-16 10:14:27 +00:00
pub system: Arc<System>,
2021-03-26 18:41:46 +00:00
/// How many time each data should be replicated
pub replication_factor: usize,
2021-03-26 18:41:46 +00:00
/// How many nodes to contact for a read, should be at most `replication_factor`
pub read_quorum: usize,
2021-03-26 18:41:46 +00:00
/// How many nodes to contact for a write, should be at most `replication_factor`
pub write_quorum: usize,
}
impl TableReplication for TableShardedReplication {
2023-11-15 14:40:44 +00:00
type WriteSets = WriteLock<Vec<Vec<Uuid>>>;
2023-11-14 13:28:16 +00:00
fn storage_nodes(&self, hash: &Hash) -> Vec<Uuid> {
self.system.cluster_layout().storage_nodes_of(hash)
}
fn read_nodes(&self, hash: &Hash) -> Vec<Uuid> {
2023-11-14 13:28:16 +00:00
self.system.cluster_layout().read_nodes_of(hash)
}
fn read_quorum(&self) -> usize {
self.read_quorum
}
2023-11-15 14:40:44 +00:00
fn write_sets(&self, hash: &Hash) -> Self::WriteSets {
self.system.layout_manager.write_sets_of(hash)
}
2021-03-16 10:14:27 +00:00
fn write_quorum(&self) -> usize {
self.write_quorum
}
2021-03-16 11:18:03 +00:00
fn partition_of(&self, hash: &Hash) -> Partition {
self.system.cluster_layout().current().partition_of(hash)
2021-03-16 11:18:03 +00:00
}
fn sync_partitions(&self) -> SyncPartitions {
let layout = self.system.cluster_layout();
let layout_version = layout.ack_map_min();
let mut partitions = layout
.current()
.partitions()
.map(|(partition, first_hash)| {
let storage_sets = layout.storage_sets_of(&first_hash);
SyncPartition {
partition,
first_hash,
last_hash: [0u8; 32].into(), // filled in just after
storage_sets,
}
})
.collect::<Vec<_>>();
for i in 0..partitions.len() {
partitions[i].last_hash = if i + 1 < partitions.len() {
partitions[i + 1].first_hash
} else {
[0xFFu8; 32].into()
};
}
SyncPartitions {
layout_version,
partitions,
}
}
}