block manager: read_block using old layout versions if necessary
This commit is contained in:
parent
3ecd14b9f6
commit
d6d239fc79
5 changed files with 50 additions and 9 deletions
|
@ -264,8 +264,10 @@ impl BlockManager {
|
||||||
F: Fn(DataBlockHeader, ByteStream) -> Fut,
|
F: Fn(DataBlockHeader, ByteStream) -> Fut,
|
||||||
Fut: futures::Future<Output = Result<T, Error>>,
|
Fut: futures::Future<Output = Result<T, Error>>,
|
||||||
{
|
{
|
||||||
let who = self.replication.read_nodes(hash);
|
let who = self
|
||||||
let who = self.system.rpc_helper().request_order(&who);
|
.system
|
||||||
|
.cluster_layout()
|
||||||
|
.block_read_nodes_of(hash, self.system.rpc_helper());
|
||||||
|
|
||||||
for node in who.iter() {
|
for node in who.iter() {
|
||||||
let node_id = NodeID::from(*node);
|
let node_id = NodeID::from(*node);
|
||||||
|
|
|
@ -7,6 +7,7 @@ use serde::{Deserialize, Serialize};
|
||||||
use garage_util::data::*;
|
use garage_util::data::*;
|
||||||
|
|
||||||
use super::schema::*;
|
use super::schema::*;
|
||||||
|
use crate::rpc_helper::RpcHelper;
|
||||||
|
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq, Eq)]
|
#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq, Eq)]
|
||||||
pub struct LayoutDigest {
|
pub struct LayoutDigest {
|
||||||
|
@ -140,6 +141,28 @@ impl LayoutHelper {
|
||||||
.collect()
|
.collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn block_read_nodes_of(&self, position: &Hash, rpc_helper: &RpcHelper) -> Vec<Uuid> {
|
||||||
|
let mut ret = Vec::with_capacity(12);
|
||||||
|
let ver_iter = self
|
||||||
|
.layout()
|
||||||
|
.versions
|
||||||
|
.iter()
|
||||||
|
.rev()
|
||||||
|
.chain(self.layout().old_versions.iter().rev());
|
||||||
|
for ver in ver_iter {
|
||||||
|
if ver.version > self.sync_map_min {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let nodes = ver.nodes_of(position, ver.replication_factor);
|
||||||
|
for node in rpc_helper.request_order(nodes) {
|
||||||
|
if !ret.contains(&node) {
|
||||||
|
ret.push(node);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ret
|
||||||
|
}
|
||||||
|
|
||||||
pub(crate) fn write_sets_of(&self, position: &Hash) -> Vec<Vec<Uuid>> {
|
pub(crate) fn write_sets_of(&self, position: &Hash) -> Vec<Vec<Uuid>> {
|
||||||
self.layout()
|
self.layout()
|
||||||
.versions
|
.versions
|
||||||
|
|
|
@ -18,6 +18,7 @@ impl LayoutHistory {
|
||||||
|
|
||||||
LayoutHistory {
|
LayoutHistory {
|
||||||
versions: vec![version],
|
versions: vec![version],
|
||||||
|
old_versions: vec![],
|
||||||
update_trackers: Default::default(),
|
update_trackers: Default::default(),
|
||||||
staging: Lww::raw(0, staging),
|
staging: Lww::raw(0, staging),
|
||||||
}
|
}
|
||||||
|
@ -86,11 +87,20 @@ impl LayoutHistory {
|
||||||
.min(&all_nongateway_nodes, min_version);
|
.min(&all_nongateway_nodes, min_version);
|
||||||
if self.min_stored() < sync_ack_map_min {
|
if self.min_stored() < sync_ack_map_min {
|
||||||
let removed = self.versions.remove(0);
|
let removed = self.versions.remove(0);
|
||||||
info!("Layout history: pruning old version {}", removed.version);
|
info!(
|
||||||
|
"Layout history: moving version {} to old_versions",
|
||||||
|
removed.version
|
||||||
|
);
|
||||||
|
self.old_versions.push(removed);
|
||||||
} else {
|
} else {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
while self.old_versions.len() > OLD_VERSION_COUNT {
|
||||||
|
let removed = self.old_versions.remove(0);
|
||||||
|
info!("Layout history: removing old_version {}", removed.version);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn clamp_update_trackers(&mut self, nodes: &[Uuid]) {
|
pub(crate) fn clamp_update_trackers(&mut self, nodes: &[Uuid]) {
|
||||||
|
|
|
@ -193,12 +193,18 @@ mod v010 {
|
||||||
use std::collections::BTreeMap;
|
use std::collections::BTreeMap;
|
||||||
pub use v09::{LayoutParameters, NodeRole, NodeRoleV, ZoneRedundancy};
|
pub use v09::{LayoutParameters, NodeRole, NodeRoleV, ZoneRedundancy};
|
||||||
|
|
||||||
|
pub const OLD_VERSION_COUNT: usize = 5;
|
||||||
|
|
||||||
/// The history of cluster layouts, with trackers to keep a record
|
/// The history of cluster layouts, with trackers to keep a record
|
||||||
/// of which nodes are up-to-date to current cluster data
|
/// of which nodes are up-to-date to current cluster data
|
||||||
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
|
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
|
||||||
pub struct LayoutHistory {
|
pub struct LayoutHistory {
|
||||||
/// The versions currently in use in the cluster
|
/// The versions currently in use in the cluster
|
||||||
pub versions: Vec<LayoutVersion>,
|
pub versions: Vec<LayoutVersion>,
|
||||||
|
/// At most 5 of the previous versions, not used by the garage_table
|
||||||
|
/// module, but usefull for the garage_block module to find data blocks
|
||||||
|
/// that have not yet been moved
|
||||||
|
pub old_versions: Vec<LayoutVersion>,
|
||||||
|
|
||||||
/// Update trackers
|
/// Update trackers
|
||||||
pub update_trackers: UpdateTrackers,
|
pub update_trackers: UpdateTrackers,
|
||||||
|
@ -300,6 +306,7 @@ mod v010 {
|
||||||
};
|
};
|
||||||
Self {
|
Self {
|
||||||
versions: vec![version],
|
versions: vec![version],
|
||||||
|
old_versions: vec![],
|
||||||
update_trackers: UpdateTrackers {
|
update_trackers: UpdateTrackers {
|
||||||
ack_map: update_tracker.clone(),
|
ack_map: update_tracker.clone(),
|
||||||
sync_map: update_tracker.clone(),
|
sync_map: update_tracker.clone(),
|
||||||
|
|
|
@ -267,7 +267,7 @@ impl RpcHelper {
|
||||||
// When there are errors, we start new requests to compensate.
|
// When there are errors, we start new requests to compensate.
|
||||||
|
|
||||||
// Reorder requests to priorize closeness / low latency
|
// Reorder requests to priorize closeness / low latency
|
||||||
let request_order = self.request_order(to);
|
let request_order = self.request_order(to.iter().copied());
|
||||||
let send_all_at_once = strategy.rs_send_all_at_once.unwrap_or(false);
|
let send_all_at_once = strategy.rs_send_all_at_once.unwrap_or(false);
|
||||||
|
|
||||||
// Build future for each request
|
// Build future for each request
|
||||||
|
@ -335,7 +335,7 @@ impl RpcHelper {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn request_order(&self, nodes: &[Uuid]) -> Vec<Uuid> {
|
pub fn request_order(&self, nodes: impl Iterator<Item = Uuid>) -> Vec<Uuid> {
|
||||||
// Retrieve some status variables that we will use to sort requests
|
// Retrieve some status variables that we will use to sort requests
|
||||||
let peer_list = self.0.fullmesh.get_peer_list();
|
let peer_list = self.0.fullmesh.get_peer_list();
|
||||||
let layout = self.0.layout.read().unwrap();
|
let layout = self.0.layout.read().unwrap();
|
||||||
|
@ -351,9 +351,8 @@ impl RpcHelper {
|
||||||
// By sorting this vec, we priorize ourself, then nodes in the same zone,
|
// By sorting this vec, we priorize ourself, then nodes in the same zone,
|
||||||
// and within a same zone we priorize nodes with the lowest latency.
|
// and within a same zone we priorize nodes with the lowest latency.
|
||||||
let mut nodes = nodes
|
let mut nodes = nodes
|
||||||
.iter()
|
|
||||||
.map(|to| {
|
.map(|to| {
|
||||||
let peer_zone = match layout.current().node_role(to) {
|
let peer_zone = match layout.current().node_role(&to) {
|
||||||
Some(pc) => &pc.zone,
|
Some(pc) => &pc.zone,
|
||||||
None => "",
|
None => "",
|
||||||
};
|
};
|
||||||
|
@ -363,10 +362,10 @@ impl RpcHelper {
|
||||||
.and_then(|pi| pi.avg_ping)
|
.and_then(|pi| pi.avg_ping)
|
||||||
.unwrap_or_else(|| Duration::from_secs(10));
|
.unwrap_or_else(|| Duration::from_secs(10));
|
||||||
(
|
(
|
||||||
*to != self.0.our_node_id,
|
to != self.0.our_node_id,
|
||||||
peer_zone != our_zone,
|
peer_zone != our_zone,
|
||||||
peer_avg_ping,
|
peer_avg_ping,
|
||||||
*to,
|
to,
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
|
|
Loading…
Reference in a new issue