Try to solve persistence issues #259

Merged
lx merged 5 commits from fix-resync into main 2022-03-14 15:27:15 +00:00
15 changed files with 68 additions and 66 deletions

View file

@ -200,12 +200,7 @@ pub fn find_matching_cors_rule<'a>(
None => vec![],
};
return Ok(cors_config.iter().find(|rule| {
cors_rule_matches(
rule,
origin,
&req.method().to_string(),
request_headers.iter(),
)
cors_rule_matches(rule, origin, req.method().as_ref(), request_headers.iter())
}));
}
}

View file

@ -1042,12 +1042,12 @@ mod tests {
query.common.prefix = "a/".to_string();
assert_eq!(
common_prefix(&objs.get(0).unwrap(), &query.common),
common_prefix(objs.get(0).unwrap(), &query.common),
Some("a/b/")
);
query.common.prefix = "a/b/".to_string();
assert_eq!(common_prefix(&objs.get(0).unwrap(), &query.common), None);
assert_eq!(common_prefix(objs.get(0).unwrap(), &query.common), None);
}
#[test]
@ -1272,7 +1272,7 @@ mod tests {
Version {
bucket_id: uuid,
key: "a".to_string(),
uuid: uuid,
uuid,
deleted: false.into(),
blocks: crdt::Map::<VersionBlockKey, VersionBlock>::from_iter(blocks),
parts_etags: crdt::Map::<u64, String>::from_iter(etags),

View file

@ -259,8 +259,7 @@ impl RoutingRuleInner {
let has_prefix = self
.condition
.as_ref()
.map(|c| c.prefix.as_ref())
.flatten()
.and_then(|c| c.prefix.as_ref())
.is_some();
self.redirect.validate(has_prefix)
}

View file

@ -51,7 +51,7 @@ pub async fn check_payload_signature(
let canonical_request = canonical_request(
request.method(),
&request.uri().path().to_string(),
request.uri().path(),
&canonical_query_string(request.uri()),
&headers,
&authorization.signed_headers,

View file

@ -115,7 +115,7 @@ async fn cli_command(opt: Opt) -> Result<(), Error> {
} else {
let node_id = garage_rpc::system::read_node_id(&config.as_ref().unwrap().metadata_dir)
.err_context(READ_KEY_ERROR)?;
if let Some(a) = config.as_ref().map(|c| c.rpc_public_addr).flatten() {
if let Some(a) = config.as_ref().and_then(|c| c.rpc_public_addr) {
(node_id, a)
} else {
let default_addr = SocketAddr::new(

View file

@ -27,7 +27,7 @@ async fn test_bucket_all() {
.buckets
.as_ref()
.unwrap()
.into_iter()
.iter()
.filter(|x| x.name.as_ref().is_some())
.find(|x| x.name.as_ref().unwrap() == "hello")
.is_some());
@ -79,7 +79,7 @@ async fn test_bucket_all() {
.buckets
.as_ref()
.unwrap()
.into_iter()
.iter()
.filter(|x| x.name.as_ref().is_some())
.find(|x| x.name.as_ref().unwrap() == "hello")
.is_none());

View file

@ -527,8 +527,8 @@ async fn test_listmultipart() {
upnext = r.next_upload_id_marker;
loopcnt += 1;
upcnt += r.uploads.unwrap_or(vec![]).len();
pfxcnt += r.common_prefixes.unwrap_or(vec![]).len();
upcnt += r.uploads.unwrap_or_default().len();
pfxcnt += r.common_prefixes.unwrap_or_default().len();
if next.is_none() {
break;

View file

@ -124,7 +124,7 @@ async fn test_uploadlistpart() {
assert!(r.part_number_marker.is_none());
assert!(r.next_part_number_marker.is_some());
assert_eq!(r.max_parts, 1 as i32);
assert_eq!(r.max_parts, 1_i32);
assert!(r.is_truncated);
assert_eq!(r.key.unwrap(), "a");
assert_eq!(r.upload_id.unwrap().as_str(), uid.as_str());
@ -146,7 +146,7 @@ async fn test_uploadlistpart() {
r2.part_number_marker.as_ref().unwrap(),
r.next_part_number_marker.as_ref().unwrap()
);
assert_eq!(r2.max_parts, 1 as i32);
assert_eq!(r2.max_parts, 1_i32);
assert!(r2.is_truncated);
assert_eq!(r2.key.unwrap(), "a");
assert_eq!(r2.upload_id.unwrap().as_str(), uid.as_str());

View file

@ -38,7 +38,6 @@ use crate::garage::Garage;
/// Size under which data will be stored inlined in database instead of as files
pub const INLINE_THRESHOLD: usize = 3072;
pub const BACKGROUND_WORKERS: u64 = 1;
pub const BACKGROUND_TRANQUILITY: u32 = 2;
// Timeout for RPCs that read and write blocks to remote nodes
@ -512,17 +511,14 @@ impl BlockManager {
// ---- Resync loop ----
pub fn spawn_background_worker(self: Arc<Self>) {
// Launch n simultaneous workers for background resync loop preprocessing
for i in 0..BACKGROUND_WORKERS {
let bm2 = self.clone();
let background = self.system.background.clone();
tokio::spawn(async move {
tokio::time::sleep(Duration::from_secs(10 * (i + 1))).await;
background.spawn_worker(format!("block resync worker {}", i), move |must_exit| {
bm2.resync_loop(must_exit)
});
// Launch a background workers for background resync loop processing
let background = self.system.background.clone();
tokio::spawn(async move {
tokio::time::sleep(Duration::from_secs(10)).await;
background.spawn_worker("block resync worker".into(), move |must_exit| {
self.resync_loop(must_exit)
});
}
});
}
fn put_to_resync(&self, hash: &Hash, delay: Duration) -> Result<(), Error> {
@ -566,9 +562,12 @@ impl BlockManager {
}
async fn resync_iter(&self, must_exit: &mut watch::Receiver<bool>) -> Result<bool, Error> {
if let Some((time_bytes, hash_bytes)) = self.resync_queue.pop_min()? {
if let Some(first_pair_res) = self.resync_queue.iter().next() {
let (time_bytes, hash_bytes) = first_pair_res?;
let time_msec = u64::from_be_bytes(time_bytes[0..8].try_into().unwrap());
let now = now_msec();
if now >= time_msec {
let hash = Hash::try_from(&hash_bytes[..]).unwrap();
@ -579,6 +578,9 @@ impl BlockManager {
// don't do resync and return early, but still
// make sure the item is still in queue at expected time
self.put_to_resync_at(&hash, ec.next_try())?;
// ec.next_try() > now >= time_msec, so this remove

I don't understand the condition in which we are here, can you confirm my following reasoning ?

We get a block to repair in the queue
-> its scheduled resync time is before now, so we handle it
-> we get the block associated error counter
-> the error counter has a next_try method that implement exponential backoff
-> (the block can be added by another tool in the queue that do not consider exponential backoff?)
-> The exponential backoff says we should not reschedule now, as its exponential backoff value is greater than the previously scheduled one
-> We re-add the block at the value computed by the exponential backoff
-> We remove the block at the current time value

After this analysis, I have a question:

  • Why do we not have an helper/method to put a block in the resync queue that would directly check the value of the ErrorCounter (if any) and its associated exponential backoff time?
I don't understand the condition in which we are here, can you confirm my following reasoning ? We get a block to repair in the queue -> its scheduled resync time is before now, so we handle it -> we get the block associated error counter -> the error counter has a next_try method that implement exponential backoff -> (the block can be added by another tool in the queue that do not consider exponential backoff?) -> The exponential backoff says we should not reschedule now, as its exponential backoff value is greater than the previously scheduled one -> We re-add the block at the value computed by the exponential backoff -> We remove the block at the current time value After this analysis, I have a question: - Why do we not have an helper/method to put a block in the resync queue that would directly check the value of the ErrorCounter (if any) and its associated exponential backoff time?
Outdated
Review

True. I'll see if I can refactor this logic to make the handling of the resync queue more self-contained and more understandable. But I think that to implement what you are saying, we need to have a transaction that takes a lock on the two trees at once (resync_notify and resync_errors), which we cannot do with the SledCountedTree wrapper, so we probably need to have a mutex for all operations on the queue. I have to think about it.

True. I'll see if I can refactor this logic to make the handling of the resync queue more self-contained and more understandable. But I think that to implement what you are saying, we need to have a transaction that takes a lock on the two trees at once (resync_notify and resync_errors), which we cannot do with the `SledCountedTree` wrapper, so we probably need to have a mutex for all operations on the queue. I have to think about it.
// is not removing the one we added just above
self.resync_queue.remove(time_bytes)?;
return Ok(false);
}
}
@ -609,20 +611,25 @@ impl BlockManager {
warn!("Error when resyncing {:?}: {}", hash, e);
let err_counter = match self.resync_errors.get(hash.as_slice())? {
Some(ec) => ErrorCounter::decode(ec).add1(),
None => ErrorCounter::new(),
Some(ec) => ErrorCounter::decode(ec).add1(now + 1),
None => ErrorCounter::new(now + 1),
};
self.put_to_resync_at(&hash, err_counter.next_try())?;
self.resync_errors
.insert(hash.as_slice(), err_counter.encode())?;
self.put_to_resync_at(&hash, err_counter.next_try())?;
// err_counter.next_try() >= now + 1 > now,
// the entry we remove from the queue is not
// the entry we inserted with put_to_resync_at
self.resync_queue.remove(time_bytes)?;
} else {
self.resync_errors.remove(hash.as_slice())?;
self.resync_queue.remove(time_bytes)?;
}
Ok(true)
} else {
self.resync_queue.insert(time_bytes, hash_bytes)?;
let delay = tokio::time::sleep(Duration::from_millis(time_msec - now));
select! {
_ = delay.fuse() => {},
@ -862,9 +869,11 @@ impl BlockManagerLocked {
let data = data.inner_buffer();
let mut path = mgr.block_dir(hash);
fs::create_dir_all(&path).await?;
let directory = path.clone();
path.push(hex::encode(hash));
fs::create_dir_all(&directory).await?;
let to_delete = match (mgr.is_block_compressed(hash).await, compressed) {
(Ok(true), _) => return Ok(BlockRpc::Ok),
(Ok(false), false) => return Ok(BlockRpc::Ok),
@ -885,6 +894,7 @@ impl BlockManagerLocked {
path2.set_extension("tmp");
let mut f = fs::File::create(&path2).await?;
f.write_all(data).await?;
f.sync_all().await?;
drop(f);
fs::rename(path2, path).await?;
@ -892,6 +902,19 @@ impl BlockManagerLocked {
fs::remove_file(to_delete).await?;
}
// We want to ensure that when this function returns, data is properly persisted
lx marked this conversation as resolved Outdated

If I am correct, this code is used to fsync a move, as you mentionned on Matrix?
I suggest we add a comment to avoid removing it if someone else refactor this part of the code.

If I am correct, this code is used to fsync a move, as you mentionned on Matrix? I suggest we add a comment to avoid removing it if someone else refactor this part of the code.
// to disk. The first step is the sync_all above that does an fsync on the data file.
// Now, we do an fsync on the containing directory, to ensure that the rename
// is persisted properly. See:
// http://thedjbway.b0llix.net/qmail/syncdir.html
let dir = fs::OpenOptions::new()
.read(true)
.mode(0)
.open(directory)
.await?;
dir.sync_all().await?;
drop(dir);
Ok(BlockRpc::Ok)
}
@ -1037,19 +1060,13 @@ struct ErrorCounter {
last_try: u64,
}
impl Default for ErrorCounter {
fn default() -> Self {
impl ErrorCounter {
fn new(now: u64) -> Self {
Self {
errors: 1,
last_try: now_msec(),
last_try: now,
}
}
}
impl ErrorCounter {
fn new() -> Self {
Self::default()
}
fn decode(data: sled::IVec) -> Self {
Self {
@ -1065,10 +1082,10 @@ impl ErrorCounter {
.concat()
}
fn add1(self) -> Self {
fn add1(self, now: u64) -> Self {
Self {
errors: self.errors + 1,
last_try: now_msec(),
last_try: now,
}
}

View file

@ -30,8 +30,7 @@ impl<'a> BucketHelper<'a> {
// the AWS spec, and hex-encoded UUIDs are 64 chars long.
let hexbucket = hex::decode(bucket_name.as_str())
.ok()
.map(|by| Uuid::try_from(&by))
.flatten();
.and_then(|by| Uuid::try_from(&by));
if let Some(bucket_id) = hexbucket {
Ok(self
.0
@ -46,8 +45,7 @@ impl<'a> BucketHelper<'a> {
.bucket_alias_table
.get(&EmptyKey, bucket_name)
.await?
.map(|x| *x.state.get())
.flatten())
.and_then(|x| *x.state.get()))
}
}

View file

@ -106,8 +106,7 @@ impl Key {
/// Get permissions for a bucket
pub fn bucket_permissions(&self, bucket: &Uuid) -> BucketKeyPerm {
self.params()
.map(|params| params.authorized_buckets.get(bucket))
.flatten()
.and_then(|params| params.authorized_buckets.get(bucket))
.cloned()
.unwrap_or(BucketKeyPerm::NO_PERMISSIONS)
}

View file

@ -51,10 +51,8 @@ pub async fn get_consul_nodes(
let pubkey = ent
.node_meta
.get("pubkey")
.map(|k| hex::decode(&k).ok())
.flatten()
.map(|k| NodeID::from_slice(&k[..]))
.flatten();
.and_then(|k| hex::decode(&k).ok())
.and_then(|k| NodeID::from_slice(&k[..]));
if let (Some(ip), Some(pubkey)) = (ip, pubkey) {
ret.push((pubkey, SocketAddr::new(ip, ent.service_port)));
} else {

View file

@ -63,10 +63,8 @@ pub async fn get_kubernetes_nodes(
let pubkey = &node
.metadata
.name
.map(|k| hex::decode(&k).ok())
.flatten()
.map(|k| NodeID::from_slice(&k[..]))
.flatten();
.and_then(|k| hex::decode(&k).ok())
.and_then(|k| NodeID::from_slice(&k[..]));
if let Some(pubkey) = pubkey {
ret.push((*pubkey, SocketAddr::new(node.spec.address, node.spec.port)))

View file

@ -322,8 +322,7 @@ impl RpcHelper {
let peer_avg_ping = peer_list
.iter()
.find(|x| x.id.as_ref() == to.as_slice())
.map(|pi| pi.avg_ping)
.flatten()
.and_then(|pi| pi.avg_ping)
.unwrap_or_else(|| Duration::from_secs(1));
(
to != self.0.our_node_id,

View file

@ -175,8 +175,7 @@ async fn serve_file(garage: Arc<Garage>, req: &Request<Body>) -> Result<Response
.bucket_alias_table
.get(&EmptyKey, &bucket_name.to_string())
.await?
.map(|x| x.state.take())
.flatten()
.and_then(|x| x.state.take())
.ok_or(Error::NotFound)?;
// Check bucket isn't deleted and has website access enabled