forked from Deuxfleurs/garage
83 lines
2.1 KiB
Rust
83 lines
2.1 KiB
Rust
use std::sync::Arc;
|
|
|
|
use garage_rpc::layout::*;
|
|
use garage_rpc::system::System;
|
|
use garage_util::data::*;
|
|
|
|
use crate::replication::*;
|
|
|
|
/// Sharded replication schema:
|
|
/// - based on the ring of nodes, a certain set of neighbors
|
|
/// store entries, given as a function of the position of the
|
|
/// entry's hash in the ring
|
|
/// - reads are done on all of the nodes that replicate the data
|
|
/// - writes as well
|
|
#[derive(Clone)]
|
|
pub struct TableShardedReplication {
|
|
/// The membership manager of this node
|
|
pub system: Arc<System>,
|
|
/// How many time each data should be replicated
|
|
pub replication_factor: usize,
|
|
/// How many nodes to contact for a read, should be at most `replication_factor`
|
|
pub read_quorum: usize,
|
|
/// How many nodes to contact for a write, should be at most `replication_factor`
|
|
pub write_quorum: usize,
|
|
}
|
|
|
|
impl TableReplication for TableShardedReplication {
|
|
type WriteSets = WriteLock<Vec<Vec<Uuid>>>;
|
|
|
|
fn storage_nodes(&self, hash: &Hash) -> Vec<Uuid> {
|
|
self.system.cluster_layout().storage_nodes_of(hash)
|
|
}
|
|
|
|
fn read_nodes(&self, hash: &Hash) -> Vec<Uuid> {
|
|
self.system.cluster_layout().read_nodes_of(hash)
|
|
}
|
|
fn read_quorum(&self) -> usize {
|
|
self.read_quorum
|
|
}
|
|
|
|
fn write_sets(&self, hash: &Hash) -> Self::WriteSets {
|
|
self.system.layout_manager.write_sets_of(hash)
|
|
}
|
|
fn write_quorum(&self) -> usize {
|
|
self.write_quorum
|
|
}
|
|
|
|
fn partition_of(&self, hash: &Hash) -> Partition {
|
|
self.system.cluster_layout().current().partition_of(hash)
|
|
}
|
|
|
|
fn sync_partitions(&self) -> SyncPartitions {
|
|
let layout = self.system.cluster_layout();
|
|
let layout_version = layout.ack_map_min();
|
|
|
|
let mut partitions = layout
|
|
.current()
|
|
.partitions()
|
|
.map(|(partition, first_hash)| {
|
|
let storage_sets = layout.storage_sets_of(&first_hash);
|
|
SyncPartition {
|
|
partition,
|
|
first_hash,
|
|
last_hash: [0u8; 32].into(), // filled in just after
|
|
storage_sets,
|
|
}
|
|
})
|
|
.collect::<Vec<_>>();
|
|
|
|
for i in 0..partitions.len() {
|
|
partitions[i].last_hash = if i + 1 < partitions.len() {
|
|
partitions[i + 1].first_hash
|
|
} else {
|
|
[0xFFu8; 32].into()
|
|
};
|
|
}
|
|
|
|
SyncPartitions {
|
|
layout_version,
|
|
partitions,
|
|
}
|
|
}
|
|
}
|