Try to solve persistence issues #259
15 changed files with 68 additions and 66 deletions
|
@ -200,12 +200,7 @@ pub fn find_matching_cors_rule<'a>(
|
|||
None => vec![],
|
||||
};
|
||||
return Ok(cors_config.iter().find(|rule| {
|
||||
cors_rule_matches(
|
||||
rule,
|
||||
origin,
|
||||
&req.method().to_string(),
|
||||
request_headers.iter(),
|
||||
)
|
||||
cors_rule_matches(rule, origin, req.method().as_ref(), request_headers.iter())
|
||||
}));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1042,12 +1042,12 @@ mod tests {
|
|||
|
||||
query.common.prefix = "a/".to_string();
|
||||
assert_eq!(
|
||||
common_prefix(&objs.get(0).unwrap(), &query.common),
|
||||
common_prefix(objs.get(0).unwrap(), &query.common),
|
||||
Some("a/b/")
|
||||
);
|
||||
|
||||
query.common.prefix = "a/b/".to_string();
|
||||
assert_eq!(common_prefix(&objs.get(0).unwrap(), &query.common), None);
|
||||
assert_eq!(common_prefix(objs.get(0).unwrap(), &query.common), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -1272,7 +1272,7 @@ mod tests {
|
|||
Version {
|
||||
bucket_id: uuid,
|
||||
key: "a".to_string(),
|
||||
uuid: uuid,
|
||||
uuid,
|
||||
deleted: false.into(),
|
||||
blocks: crdt::Map::<VersionBlockKey, VersionBlock>::from_iter(blocks),
|
||||
parts_etags: crdt::Map::<u64, String>::from_iter(etags),
|
||||
|
|
|
@ -259,8 +259,7 @@ impl RoutingRuleInner {
|
|||
let has_prefix = self
|
||||
.condition
|
||||
.as_ref()
|
||||
.map(|c| c.prefix.as_ref())
|
||||
.flatten()
|
||||
.and_then(|c| c.prefix.as_ref())
|
||||
.is_some();
|
||||
self.redirect.validate(has_prefix)
|
||||
}
|
||||
|
|
|
@ -51,7 +51,7 @@ pub async fn check_payload_signature(
|
|||
|
||||
let canonical_request = canonical_request(
|
||||
request.method(),
|
||||
&request.uri().path().to_string(),
|
||||
request.uri().path(),
|
||||
&canonical_query_string(request.uri()),
|
||||
&headers,
|
||||
&authorization.signed_headers,
|
||||
|
|
|
@ -115,7 +115,7 @@ async fn cli_command(opt: Opt) -> Result<(), Error> {
|
|||
} else {
|
||||
let node_id = garage_rpc::system::read_node_id(&config.as_ref().unwrap().metadata_dir)
|
||||
.err_context(READ_KEY_ERROR)?;
|
||||
if let Some(a) = config.as_ref().map(|c| c.rpc_public_addr).flatten() {
|
||||
if let Some(a) = config.as_ref().and_then(|c| c.rpc_public_addr) {
|
||||
(node_id, a)
|
||||
} else {
|
||||
let default_addr = SocketAddr::new(
|
||||
|
|
|
@ -27,7 +27,7 @@ async fn test_bucket_all() {
|
|||
.buckets
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.into_iter()
|
||||
.iter()
|
||||
.filter(|x| x.name.as_ref().is_some())
|
||||
.find(|x| x.name.as_ref().unwrap() == "hello")
|
||||
.is_some());
|
||||
|
@ -79,7 +79,7 @@ async fn test_bucket_all() {
|
|||
.buckets
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.into_iter()
|
||||
.iter()
|
||||
.filter(|x| x.name.as_ref().is_some())
|
||||
.find(|x| x.name.as_ref().unwrap() == "hello")
|
||||
.is_none());
|
||||
|
|
|
@ -527,8 +527,8 @@ async fn test_listmultipart() {
|
|||
upnext = r.next_upload_id_marker;
|
||||
|
||||
loopcnt += 1;
|
||||
upcnt += r.uploads.unwrap_or(vec![]).len();
|
||||
pfxcnt += r.common_prefixes.unwrap_or(vec![]).len();
|
||||
upcnt += r.uploads.unwrap_or_default().len();
|
||||
pfxcnt += r.common_prefixes.unwrap_or_default().len();
|
||||
|
||||
if next.is_none() {
|
||||
break;
|
||||
|
|
|
@ -124,7 +124,7 @@ async fn test_uploadlistpart() {
|
|||
|
||||
assert!(r.part_number_marker.is_none());
|
||||
assert!(r.next_part_number_marker.is_some());
|
||||
assert_eq!(r.max_parts, 1 as i32);
|
||||
assert_eq!(r.max_parts, 1_i32);
|
||||
assert!(r.is_truncated);
|
||||
assert_eq!(r.key.unwrap(), "a");
|
||||
assert_eq!(r.upload_id.unwrap().as_str(), uid.as_str());
|
||||
|
@ -146,7 +146,7 @@ async fn test_uploadlistpart() {
|
|||
r2.part_number_marker.as_ref().unwrap(),
|
||||
r.next_part_number_marker.as_ref().unwrap()
|
||||
);
|
||||
assert_eq!(r2.max_parts, 1 as i32);
|
||||
assert_eq!(r2.max_parts, 1_i32);
|
||||
assert!(r2.is_truncated);
|
||||
assert_eq!(r2.key.unwrap(), "a");
|
||||
assert_eq!(r2.upload_id.unwrap().as_str(), uid.as_str());
|
||||
|
|
|
@ -38,7 +38,6 @@ use crate::garage::Garage;
|
|||
/// Size under which data will be stored inlined in database instead of as files
|
||||
pub const INLINE_THRESHOLD: usize = 3072;
|
||||
|
||||
pub const BACKGROUND_WORKERS: u64 = 1;
|
||||
pub const BACKGROUND_TRANQUILITY: u32 = 2;
|
||||
|
||||
// Timeout for RPCs that read and write blocks to remote nodes
|
||||
|
@ -512,18 +511,15 @@ impl BlockManager {
|
|||
// ---- Resync loop ----
|
||||
|
||||
pub fn spawn_background_worker(self: Arc<Self>) {
|
||||
// Launch n simultaneous workers for background resync loop preprocessing
|
||||
for i in 0..BACKGROUND_WORKERS {
|
||||
let bm2 = self.clone();
|
||||
// Launch a background workers for background resync loop processing
|
||||
let background = self.system.background.clone();
|
||||
tokio::spawn(async move {
|
||||
tokio::time::sleep(Duration::from_secs(10 * (i + 1))).await;
|
||||
background.spawn_worker(format!("block resync worker {}", i), move |must_exit| {
|
||||
bm2.resync_loop(must_exit)
|
||||
tokio::time::sleep(Duration::from_secs(10)).await;
|
||||
background.spawn_worker("block resync worker".into(), move |must_exit| {
|
||||
self.resync_loop(must_exit)
|
||||
});
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
fn put_to_resync(&self, hash: &Hash, delay: Duration) -> Result<(), Error> {
|
||||
let when = now_msec() + delay.as_millis() as u64;
|
||||
|
@ -566,9 +562,12 @@ impl BlockManager {
|
|||
}
|
||||
|
||||
async fn resync_iter(&self, must_exit: &mut watch::Receiver<bool>) -> Result<bool, Error> {
|
||||
if let Some((time_bytes, hash_bytes)) = self.resync_queue.pop_min()? {
|
||||
if let Some(first_pair_res) = self.resync_queue.iter().next() {
|
||||
let (time_bytes, hash_bytes) = first_pair_res?;
|
||||
|
||||
let time_msec = u64::from_be_bytes(time_bytes[0..8].try_into().unwrap());
|
||||
let now = now_msec();
|
||||
|
||||
if now >= time_msec {
|
||||
let hash = Hash::try_from(&hash_bytes[..]).unwrap();
|
||||
|
||||
|
@ -579,6 +578,9 @@ impl BlockManager {
|
|||
// don't do resync and return early, but still
|
||||
// make sure the item is still in queue at expected time
|
||||
self.put_to_resync_at(&hash, ec.next_try())?;
|
||||
// ec.next_try() > now >= time_msec, so this remove
|
||||
|
||||
// is not removing the one we added just above
|
||||
self.resync_queue.remove(time_bytes)?;
|
||||
return Ok(false);
|
||||
}
|
||||
}
|
||||
|
@ -609,20 +611,25 @@ impl BlockManager {
|
|||
warn!("Error when resyncing {:?}: {}", hash, e);
|
||||
|
||||
let err_counter = match self.resync_errors.get(hash.as_slice())? {
|
||||
Some(ec) => ErrorCounter::decode(ec).add1(),
|
||||
None => ErrorCounter::new(),
|
||||
Some(ec) => ErrorCounter::decode(ec).add1(now + 1),
|
||||
None => ErrorCounter::new(now + 1),
|
||||
};
|
||||
|
||||
self.put_to_resync_at(&hash, err_counter.next_try())?;
|
||||
self.resync_errors
|
||||
.insert(hash.as_slice(), err_counter.encode())?;
|
||||
|
||||
self.put_to_resync_at(&hash, err_counter.next_try())?;
|
||||
// err_counter.next_try() >= now + 1 > now,
|
||||
// the entry we remove from the queue is not
|
||||
// the entry we inserted with put_to_resync_at
|
||||
self.resync_queue.remove(time_bytes)?;
|
||||
} else {
|
||||
self.resync_errors.remove(hash.as_slice())?;
|
||||
self.resync_queue.remove(time_bytes)?;
|
||||
}
|
||||
|
||||
Ok(true)
|
||||
} else {
|
||||
self.resync_queue.insert(time_bytes, hash_bytes)?;
|
||||
let delay = tokio::time::sleep(Duration::from_millis(time_msec - now));
|
||||
select! {
|
||||
_ = delay.fuse() => {},
|
||||
|
@ -862,9 +869,11 @@ impl BlockManagerLocked {
|
|||
let data = data.inner_buffer();
|
||||
|
||||
let mut path = mgr.block_dir(hash);
|
||||
fs::create_dir_all(&path).await?;
|
||||
|
||||
let directory = path.clone();
|
||||
path.push(hex::encode(hash));
|
||||
|
||||
fs::create_dir_all(&directory).await?;
|
||||
|
||||
let to_delete = match (mgr.is_block_compressed(hash).await, compressed) {
|
||||
(Ok(true), _) => return Ok(BlockRpc::Ok),
|
||||
(Ok(false), false) => return Ok(BlockRpc::Ok),
|
||||
|
@ -885,6 +894,7 @@ impl BlockManagerLocked {
|
|||
path2.set_extension("tmp");
|
||||
let mut f = fs::File::create(&path2).await?;
|
||||
f.write_all(data).await?;
|
||||
f.sync_all().await?;
|
||||
drop(f);
|
||||
|
||||
fs::rename(path2, path).await?;
|
||||
|
@ -892,6 +902,19 @@ impl BlockManagerLocked {
|
|||
fs::remove_file(to_delete).await?;
|
||||
}
|
||||
|
||||
// We want to ensure that when this function returns, data is properly persisted
|
||||
lx marked this conversation as resolved
Outdated
quentin
commented
If I am correct, this code is used to fsync a move, as you mentionned on Matrix? If I am correct, this code is used to fsync a move, as you mentionned on Matrix?
I suggest we add a comment to avoid removing it if someone else refactor this part of the code.
|
||||
// to disk. The first step is the sync_all above that does an fsync on the data file.
|
||||
// Now, we do an fsync on the containing directory, to ensure that the rename
|
||||
// is persisted properly. See:
|
||||
// http://thedjbway.b0llix.net/qmail/syncdir.html
|
||||
let dir = fs::OpenOptions::new()
|
||||
.read(true)
|
||||
.mode(0)
|
||||
.open(directory)
|
||||
.await?;
|
||||
dir.sync_all().await?;
|
||||
drop(dir);
|
||||
|
||||
Ok(BlockRpc::Ok)
|
||||
}
|
||||
|
||||
|
@ -1037,19 +1060,13 @@ struct ErrorCounter {
|
|||
last_try: u64,
|
||||
}
|
||||
|
||||
impl Default for ErrorCounter {
|
||||
fn default() -> Self {
|
||||
impl ErrorCounter {
|
||||
fn new(now: u64) -> Self {
|
||||
Self {
|
||||
errors: 1,
|
||||
last_try: now_msec(),
|
||||
last_try: now,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ErrorCounter {
|
||||
fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
|
||||
fn decode(data: sled::IVec) -> Self {
|
||||
Self {
|
||||
|
@ -1065,10 +1082,10 @@ impl ErrorCounter {
|
|||
.concat()
|
||||
}
|
||||
|
||||
fn add1(self) -> Self {
|
||||
fn add1(self, now: u64) -> Self {
|
||||
Self {
|
||||
errors: self.errors + 1,
|
||||
last_try: now_msec(),
|
||||
last_try: now,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -30,8 +30,7 @@ impl<'a> BucketHelper<'a> {
|
|||
// the AWS spec, and hex-encoded UUIDs are 64 chars long.
|
||||
let hexbucket = hex::decode(bucket_name.as_str())
|
||||
.ok()
|
||||
.map(|by| Uuid::try_from(&by))
|
||||
.flatten();
|
||||
.and_then(|by| Uuid::try_from(&by));
|
||||
if let Some(bucket_id) = hexbucket {
|
||||
Ok(self
|
||||
.0
|
||||
|
@ -46,8 +45,7 @@ impl<'a> BucketHelper<'a> {
|
|||
.bucket_alias_table
|
||||
.get(&EmptyKey, bucket_name)
|
||||
.await?
|
||||
.map(|x| *x.state.get())
|
||||
.flatten())
|
||||
.and_then(|x| *x.state.get()))
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -106,8 +106,7 @@ impl Key {
|
|||
/// Get permissions for a bucket
|
||||
pub fn bucket_permissions(&self, bucket: &Uuid) -> BucketKeyPerm {
|
||||
self.params()
|
||||
.map(|params| params.authorized_buckets.get(bucket))
|
||||
.flatten()
|
||||
.and_then(|params| params.authorized_buckets.get(bucket))
|
||||
.cloned()
|
||||
.unwrap_or(BucketKeyPerm::NO_PERMISSIONS)
|
||||
}
|
||||
|
|
|
@ -51,10 +51,8 @@ pub async fn get_consul_nodes(
|
|||
let pubkey = ent
|
||||
.node_meta
|
||||
.get("pubkey")
|
||||
.map(|k| hex::decode(&k).ok())
|
||||
.flatten()
|
||||
.map(|k| NodeID::from_slice(&k[..]))
|
||||
.flatten();
|
||||
.and_then(|k| hex::decode(&k).ok())
|
||||
.and_then(|k| NodeID::from_slice(&k[..]));
|
||||
if let (Some(ip), Some(pubkey)) = (ip, pubkey) {
|
||||
ret.push((pubkey, SocketAddr::new(ip, ent.service_port)));
|
||||
} else {
|
||||
|
|
|
@ -63,10 +63,8 @@ pub async fn get_kubernetes_nodes(
|
|||
let pubkey = &node
|
||||
.metadata
|
||||
.name
|
||||
.map(|k| hex::decode(&k).ok())
|
||||
.flatten()
|
||||
.map(|k| NodeID::from_slice(&k[..]))
|
||||
.flatten();
|
||||
.and_then(|k| hex::decode(&k).ok())
|
||||
.and_then(|k| NodeID::from_slice(&k[..]));
|
||||
|
||||
if let Some(pubkey) = pubkey {
|
||||
ret.push((*pubkey, SocketAddr::new(node.spec.address, node.spec.port)))
|
||||
|
|
|
@ -322,8 +322,7 @@ impl RpcHelper {
|
|||
let peer_avg_ping = peer_list
|
||||
.iter()
|
||||
.find(|x| x.id.as_ref() == to.as_slice())
|
||||
.map(|pi| pi.avg_ping)
|
||||
.flatten()
|
||||
.and_then(|pi| pi.avg_ping)
|
||||
.unwrap_or_else(|| Duration::from_secs(1));
|
||||
(
|
||||
to != self.0.our_node_id,
|
||||
|
|
|
@ -175,8 +175,7 @@ async fn serve_file(garage: Arc<Garage>, req: &Request<Body>) -> Result<Response
|
|||
.bucket_alias_table
|
||||
.get(&EmptyKey, &bucket_name.to_string())
|
||||
.await?
|
||||
.map(|x| x.state.take())
|
||||
.flatten()
|
||||
.and_then(|x| x.state.take())
|
||||
.ok_or(Error::NotFound)?;
|
||||
|
||||
// Check bucket isn't deleted and has website access enabled
|
||||
|
|
Loading…
Reference in a new issue
I don't understand the condition in which we are here, can you confirm my following reasoning ?
We get a block to repair in the queue
-> its scheduled resync time is before now, so we handle it
-> we get the block associated error counter
-> the error counter has a next_try method that implement exponential backoff
-> (the block can be added by another tool in the queue that do not consider exponential backoff?)
-> The exponential backoff says we should not reschedule now, as its exponential backoff value is greater than the previously scheduled one
-> We re-add the block at the value computed by the exponential backoff
-> We remove the block at the current time value
After this analysis, I have a question:
True. I'll see if I can refactor this logic to make the handling of the resync queue more self-contained and more understandable. But I think that to implement what you are saying, we need to have a transaction that takes a lock on the two trees at once (resync_notify and resync_errors), which we cannot do with the
SledCountedTree
wrapper, so we probably need to have a mutex for all operations on the queue. I have to think about it.