2022-05-31 22:06:26 +00:00
|
|
|
use std::str::FromStr;
|
2022-07-13 10:30:35 +00:00
|
|
|
use std::sync::{Arc, Weak};
|
2022-05-18 12:54:48 +00:00
|
|
|
use std::time::{Duration, Instant};
|
2022-05-18 10:24:37 +00:00
|
|
|
|
2022-05-18 12:54:48 +00:00
|
|
|
use anyhow::{anyhow, bail, Result};
|
2022-07-13 10:30:35 +00:00
|
|
|
use log::{debug, error, info};
|
2022-05-18 10:24:37 +00:00
|
|
|
use rand::prelude::*;
|
|
|
|
use serde::{Deserialize, Serialize};
|
2022-05-18 12:54:48 +00:00
|
|
|
use tokio::io::AsyncReadExt;
|
2022-07-13 10:30:35 +00:00
|
|
|
use tokio::sync::{watch, Notify};
|
2022-05-18 10:24:37 +00:00
|
|
|
|
2022-07-13 10:30:35 +00:00
|
|
|
use k2v_client::{BatchDeleteOp, BatchReadOp, CausalityToken, Filter, K2vClient, K2vValue};
|
2022-05-18 13:53:13 +00:00
|
|
|
use rusoto_s3::{
|
|
|
|
DeleteObjectRequest, GetObjectRequest, ListObjectsV2Request, PutObjectRequest, S3Client, S3,
|
|
|
|
};
|
2022-05-18 10:24:37 +00:00
|
|
|
|
2022-05-18 12:54:48 +00:00
|
|
|
use crate::cryptoblob::*;
|
2022-07-13 10:30:35 +00:00
|
|
|
use crate::k2v_util::k2v_wait_value_changed;
|
2022-05-19 11:54:38 +00:00
|
|
|
use crate::login::Credentials;
|
2022-05-18 10:24:37 +00:00
|
|
|
use crate::time::now_msec;
|
|
|
|
|
2022-07-13 12:21:14 +00:00
|
|
|
const KEEP_STATE_EVERY: usize = 64;
|
2022-05-18 12:54:48 +00:00
|
|
|
|
|
|
|
// Checkpointing interval constants: a checkpoint is not made earlier
|
|
|
|
// than CHECKPOINT_INTERVAL time after the last one, and is not made
|
|
|
|
// if there are less than CHECKPOINT_MIN_OPS new operations since last one.
|
2022-07-13 12:21:14 +00:00
|
|
|
const CHECKPOINT_INTERVAL: Duration = Duration::from_secs(6 * 3600);
|
2022-05-20 11:36:45 +00:00
|
|
|
const CHECKPOINT_MIN_OPS: usize = 16;
|
2022-05-18 13:53:13 +00:00
|
|
|
// HYPOTHESIS: processes are able to communicate in a synchronous
|
|
|
|
// fashion in times that are small compared to CHECKPOINT_INTERVAL.
|
|
|
|
// More precisely, if a process tried to save an operation within the last
|
|
|
|
// CHECKPOINT_INTERVAL, we are sure to read it from storage if it was
|
|
|
|
// successfully saved (and if we don't read it, it means it has been
|
|
|
|
// definitely discarded due to an error).
|
|
|
|
|
|
|
|
// Keep at least two checkpoints, here three, to avoid race conditions
|
|
|
|
// between processes doing .checkpoint() and those doing .sync()
|
|
|
|
const CHECKPOINTS_TO_KEEP: usize = 3;
|
2022-05-18 12:54:48 +00:00
|
|
|
|
2022-07-13 10:30:35 +00:00
|
|
|
const WATCH_SK: &str = "watch";
|
|
|
|
|
2022-05-18 10:24:37 +00:00
|
|
|
pub trait BayouState:
|
|
|
|
Default + Clone + Serialize + for<'de> Deserialize<'de> + Send + Sync + 'static
|
|
|
|
{
|
2022-05-18 12:54:48 +00:00
|
|
|
type Op: Clone + Serialize + for<'de> Deserialize<'de> + std::fmt::Debug + Send + Sync + 'static;
|
2022-05-18 10:24:37 +00:00
|
|
|
|
|
|
|
fn apply(&self, op: &Self::Op) -> Self;
|
|
|
|
}
|
|
|
|
|
|
|
|
pub struct Bayou<S: BayouState> {
|
|
|
|
bucket: String,
|
|
|
|
path: String,
|
|
|
|
key: Key,
|
|
|
|
|
|
|
|
k2v: K2vClient,
|
|
|
|
s3: S3Client,
|
|
|
|
|
|
|
|
checkpoint: (Timestamp, S),
|
|
|
|
history: Vec<(Timestamp, S::Op, Option<S>)>,
|
2022-07-13 10:30:35 +00:00
|
|
|
|
2022-05-18 12:54:48 +00:00
|
|
|
last_sync: Option<Instant>,
|
2022-05-18 14:03:27 +00:00
|
|
|
last_try_checkpoint: Option<Instant>,
|
2022-07-13 10:30:35 +00:00
|
|
|
|
|
|
|
watch: Arc<K2vWatch>,
|
|
|
|
last_sync_watch_ct: Option<CausalityToken>,
|
2022-05-18 10:24:37 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
impl<S: BayouState> Bayou<S> {
|
2022-05-19 13:14:36 +00:00
|
|
|
pub fn new(creds: &Credentials, path: String) -> Result<Self> {
|
2022-05-19 12:33:49 +00:00
|
|
|
let k2v_client = creds.k2v_client()?;
|
|
|
|
let s3_client = creds.s3_client()?;
|
2022-05-18 10:24:37 +00:00
|
|
|
|
2022-07-13 10:30:35 +00:00
|
|
|
let watch = K2vWatch::new(creds, path.clone(), WATCH_SK.to_string())?;
|
|
|
|
|
2022-05-18 10:24:37 +00:00
|
|
|
Ok(Self {
|
2022-05-19 12:33:49 +00:00
|
|
|
bucket: creds.bucket().to_string(),
|
2022-05-18 10:24:37 +00:00
|
|
|
path,
|
2022-05-19 12:33:49 +00:00
|
|
|
key: creds.keys.master.clone(),
|
2022-05-18 10:24:37 +00:00
|
|
|
k2v: k2v_client,
|
|
|
|
s3: s3_client,
|
|
|
|
checkpoint: (Timestamp::zero(), S::default()),
|
|
|
|
history: vec![],
|
2022-05-18 12:54:48 +00:00
|
|
|
last_sync: None,
|
2022-05-18 14:03:27 +00:00
|
|
|
last_try_checkpoint: None,
|
2022-07-13 10:30:35 +00:00
|
|
|
watch,
|
|
|
|
last_sync_watch_ct: None,
|
2022-05-18 10:24:37 +00:00
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2022-05-18 10:42:25 +00:00
|
|
|
/// Re-reads the state from persistent storage backend
|
2022-05-18 10:24:37 +00:00
|
|
|
pub async fn sync(&mut self) -> Result<()> {
|
2022-07-13 12:21:14 +00:00
|
|
|
let new_last_sync = Some(Instant::now());
|
|
|
|
let new_last_sync_watch_ct = self.watch.rx.borrow().clone();
|
|
|
|
|
2022-05-18 10:24:37 +00:00
|
|
|
// 1. List checkpoints
|
2022-05-18 13:53:13 +00:00
|
|
|
let checkpoints = self.list_checkpoints().await?;
|
2022-05-23 16:19:33 +00:00
|
|
|
debug!("(sync) listed checkpoints: {:?}", checkpoints);
|
2022-05-18 12:54:48 +00:00
|
|
|
|
2022-05-18 10:42:25 +00:00
|
|
|
// 2. Load last checkpoint if different from currently used one
|
2022-05-18 12:54:48 +00:00
|
|
|
let checkpoint = if let Some((ts, key)) = checkpoints.last() {
|
|
|
|
if *ts == self.checkpoint.0 {
|
|
|
|
(*ts, None)
|
|
|
|
} else {
|
2022-05-23 16:19:33 +00:00
|
|
|
debug!("(sync) loading checkpoint: {}", key);
|
2022-05-18 12:54:48 +00:00
|
|
|
|
|
|
|
let mut gor = GetObjectRequest::default();
|
|
|
|
gor.bucket = self.bucket.clone();
|
|
|
|
gor.key = key.to_string();
|
|
|
|
let obj_res = self.s3.get_object(gor).await?;
|
|
|
|
|
|
|
|
let obj_body = obj_res.body.ok_or(anyhow!("Missing object body"))?;
|
|
|
|
let mut buf = Vec::with_capacity(obj_res.content_length.unwrap_or(128) as usize);
|
|
|
|
obj_body.into_async_read().read_to_end(&mut buf).await?;
|
|
|
|
|
2022-05-23 16:19:33 +00:00
|
|
|
debug!("(sync) checkpoint body length: {}", buf.len());
|
2022-05-18 14:03:27 +00:00
|
|
|
|
2022-05-18 12:54:48 +00:00
|
|
|
let ck = open_deserialize::<S>(&buf, &self.key)?;
|
|
|
|
(*ts, Some(ck))
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
(Timestamp::zero(), None)
|
|
|
|
};
|
|
|
|
|
|
|
|
if self.checkpoint.0 > checkpoint.0 {
|
2022-07-13 10:30:35 +00:00
|
|
|
bail!("Loaded checkpoint is more recent than stored one");
|
2022-05-18 12:54:48 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if let Some(ck) = checkpoint.1 {
|
2022-05-23 16:19:33 +00:00
|
|
|
debug!(
|
2022-05-18 12:54:48 +00:00
|
|
|
"(sync) updating checkpoint to loaded state at {:?}",
|
|
|
|
checkpoint.0
|
|
|
|
);
|
|
|
|
self.checkpoint = (checkpoint.0, ck);
|
|
|
|
};
|
|
|
|
|
|
|
|
// remove from history events before checkpoint
|
|
|
|
self.history = std::mem::take(&mut self.history)
|
|
|
|
.into_iter()
|
|
|
|
.skip_while(|(ts, _, _)| *ts < self.checkpoint.0)
|
|
|
|
.collect();
|
|
|
|
|
2022-05-18 10:24:37 +00:00
|
|
|
// 3. List all operations starting from checkpoint
|
2022-05-31 22:06:26 +00:00
|
|
|
let ts_ser = self.checkpoint.0.to_string();
|
2022-05-23 16:19:33 +00:00
|
|
|
debug!("(sync) looking up operations starting at {}", ts_ser);
|
2022-05-18 12:54:48 +00:00
|
|
|
let ops_map = self
|
|
|
|
.k2v
|
|
|
|
.read_batch(&[BatchReadOp {
|
|
|
|
partition_key: &self.path,
|
|
|
|
filter: Filter {
|
|
|
|
start: Some(&ts_ser),
|
2022-07-13 10:30:35 +00:00
|
|
|
end: Some(WATCH_SK),
|
2022-05-18 12:54:48 +00:00
|
|
|
prefix: None,
|
|
|
|
limit: None,
|
|
|
|
reverse: false,
|
|
|
|
},
|
|
|
|
single_item: false,
|
|
|
|
conflicts_only: false,
|
2022-05-18 20:33:27 +00:00
|
|
|
tombstones: false,
|
2022-05-18 12:54:48 +00:00
|
|
|
}])
|
|
|
|
.await?
|
|
|
|
.into_iter()
|
|
|
|
.next()
|
|
|
|
.ok_or(anyhow!("Missing K2V result"))?
|
|
|
|
.items;
|
|
|
|
|
|
|
|
let mut ops = vec![];
|
|
|
|
for (tsstr, val) in ops_map {
|
2022-05-31 22:06:26 +00:00
|
|
|
let ts = tsstr
|
|
|
|
.parse::<Timestamp>()
|
|
|
|
.map_err(|_| anyhow!("Invalid operation timestamp: {}", tsstr))?;
|
2022-05-18 12:54:48 +00:00
|
|
|
if val.value.len() != 1 {
|
|
|
|
bail!("Invalid operation, has {} values", val.value.len());
|
|
|
|
}
|
|
|
|
match &val.value[0] {
|
|
|
|
K2vValue::Value(v) => {
|
|
|
|
let op = open_deserialize::<S::Op>(&v, &self.key)?;
|
2022-05-23 16:19:33 +00:00
|
|
|
debug!("(sync) operation {}: {} {:?}", tsstr, base64::encode(v), op);
|
2022-05-18 12:54:48 +00:00
|
|
|
ops.push((ts, op));
|
|
|
|
}
|
|
|
|
K2vValue::Tombstone => {
|
|
|
|
unreachable!();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
ops.sort_by_key(|(ts, _)| *ts);
|
2022-05-23 16:19:33 +00:00
|
|
|
debug!("(sync) {} operations", ops.len());
|
2022-05-18 12:54:48 +00:00
|
|
|
|
2022-05-18 13:53:13 +00:00
|
|
|
if ops.len() < self.history.len() {
|
|
|
|
bail!("Some operations have disappeared from storage!");
|
2022-05-18 12:54:48 +00:00
|
|
|
}
|
|
|
|
|
2022-05-18 10:24:37 +00:00
|
|
|
// 4. Check that first operation has same timestamp as checkpoint (if not zero)
|
2022-05-18 12:54:48 +00:00
|
|
|
if self.checkpoint.0 != Timestamp::zero() && ops[0].0 != self.checkpoint.0 {
|
|
|
|
bail!(
|
|
|
|
"First operation in listing doesn't have timestamp that corresponds to checkpoint"
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
2022-05-18 10:24:37 +00:00
|
|
|
// 5. Apply all operations in order
|
2022-05-18 12:54:48 +00:00
|
|
|
// Hypothesis: before the loaded checkpoint, operations haven't changed
|
|
|
|
// between what's on storage and what we used to calculate the state in RAM here.
|
|
|
|
let i0 = self
|
|
|
|
.history
|
|
|
|
.iter()
|
|
|
|
.zip(ops.iter())
|
2022-07-13 10:30:35 +00:00
|
|
|
.take_while(|((ts1, _, _), (ts2, _))| ts1 == ts2)
|
|
|
|
.count();
|
2022-05-18 12:54:48 +00:00
|
|
|
|
|
|
|
if ops.len() > i0 {
|
|
|
|
// Remove operations from first position where histories differ
|
|
|
|
self.history.truncate(i0);
|
|
|
|
|
|
|
|
// Look up last calculated state which we have saved and start from there.
|
|
|
|
let mut last_state = (0, &self.checkpoint.1);
|
|
|
|
for (i, (_, _, state_opt)) in self.history.iter().enumerate().rev() {
|
|
|
|
if let Some(state) = state_opt {
|
|
|
|
last_state = (i + 1, state);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Calculate state at the end of this common part of the history
|
|
|
|
let mut state = last_state.1.clone();
|
|
|
|
for (_, op, _) in self.history[last_state.0..].iter() {
|
|
|
|
state = state.apply(op);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Now, apply all operations retrieved from storage after the common part
|
|
|
|
for (ts, op) in ops.drain(i0..) {
|
|
|
|
state = state.apply(&op);
|
2022-07-13 12:21:14 +00:00
|
|
|
if (self.history.len() + 1) % KEEP_STATE_EVERY == 0 {
|
2022-05-18 12:54:48 +00:00
|
|
|
self.history.push((ts, op, Some(state.clone())));
|
|
|
|
} else {
|
|
|
|
self.history.push((ts, op, None));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Always save final state as result of last operation
|
|
|
|
self.history.last_mut().unwrap().2 = Some(state);
|
|
|
|
}
|
|
|
|
|
2022-07-13 12:21:14 +00:00
|
|
|
// Save info that sync has been done
|
|
|
|
self.last_sync = new_last_sync;
|
|
|
|
self.last_sync_watch_ct = new_last_sync_watch_ct;
|
2022-05-18 12:54:48 +00:00
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
2022-07-13 12:21:14 +00:00
|
|
|
/// Does a sync() if either of the two conditions is met:
|
|
|
|
/// - last sync was more than CHECKPOINT_INTERVAL/5 ago
|
|
|
|
/// - a change was detected
|
|
|
|
pub async fn opportunistic_sync(&mut self) -> Result<()> {
|
|
|
|
let too_old = match self.last_sync {
|
|
|
|
Some(t) => Instant::now() > t + (CHECKPOINT_INTERVAL / 5),
|
|
|
|
_ => true,
|
|
|
|
};
|
|
|
|
let changed = self.last_sync_watch_ct != *self.watch.rx.borrow();
|
|
|
|
if too_old || changed {
|
|
|
|
self.sync().await?;
|
2022-05-18 12:54:48 +00:00
|
|
|
}
|
2022-07-13 12:21:14 +00:00
|
|
|
Ok(())
|
2022-05-18 10:24:37 +00:00
|
|
|
}
|
|
|
|
|
2022-05-18 10:42:25 +00:00
|
|
|
/// Applies a new operation on the state. Once this function returns,
|
2022-07-13 12:21:14 +00:00
|
|
|
/// the operation has been safely persisted to storage backend.
|
|
|
|
/// Make sure to call `.opportunistic_sync()` before doing this,
|
|
|
|
/// and even before calculating the `op` argument given here.
|
2022-05-18 10:42:25 +00:00
|
|
|
pub async fn push(&mut self, op: S::Op) -> Result<()> {
|
2022-05-23 16:19:33 +00:00
|
|
|
debug!("(push) add operation: {:?}", op);
|
2022-05-18 14:03:27 +00:00
|
|
|
|
2022-05-18 12:54:48 +00:00
|
|
|
let ts = Timestamp::after(
|
|
|
|
self.history
|
|
|
|
.last()
|
|
|
|
.map(|(ts, _, _)| ts)
|
|
|
|
.unwrap_or(&self.checkpoint.0),
|
|
|
|
);
|
|
|
|
self.k2v
|
|
|
|
.insert_item(
|
|
|
|
&self.path,
|
2022-05-31 22:06:26 +00:00
|
|
|
&ts.to_string(),
|
2022-05-18 12:54:48 +00:00
|
|
|
seal_serialize(&op, &self.key)?,
|
|
|
|
None,
|
|
|
|
)
|
|
|
|
.await?;
|
|
|
|
|
2022-07-13 10:30:35 +00:00
|
|
|
self.watch.notify.notify_one();
|
|
|
|
|
2022-05-18 12:54:48 +00:00
|
|
|
let new_state = self.state().apply(&op);
|
|
|
|
self.history.push((ts, op, Some(new_state)));
|
|
|
|
|
|
|
|
// Clear previously saved state in history if not required
|
|
|
|
let hlen = self.history.len();
|
2022-07-13 12:21:14 +00:00
|
|
|
if hlen >= 2 && (hlen - 1) % KEEP_STATE_EVERY != 0 {
|
2022-05-18 12:54:48 +00:00
|
|
|
self.history[hlen - 2].2 = None;
|
|
|
|
}
|
|
|
|
|
|
|
|
self.checkpoint().await?;
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Save a new checkpoint if previous checkpoint is too old
|
|
|
|
pub async fn checkpoint(&mut self) -> Result<()> {
|
2022-05-18 14:03:27 +00:00
|
|
|
match self.last_try_checkpoint {
|
2022-07-13 12:21:14 +00:00
|
|
|
Some(ts) if Instant::now() - ts < CHECKPOINT_INTERVAL / 5 => Ok(()),
|
2022-05-18 14:03:27 +00:00
|
|
|
_ => {
|
|
|
|
let res = self.checkpoint_internal().await;
|
|
|
|
if res.is_ok() {
|
|
|
|
self.last_try_checkpoint = Some(Instant::now());
|
|
|
|
}
|
|
|
|
res
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
async fn checkpoint_internal(&mut self) -> Result<()> {
|
2022-07-13 12:21:14 +00:00
|
|
|
self.sync().await?;
|
2022-05-18 12:54:48 +00:00
|
|
|
|
2022-05-18 13:53:13 +00:00
|
|
|
// Check what would be the possible time for a checkpoint in the history we have
|
|
|
|
let now = now_msec() as i128;
|
|
|
|
let i_cp = match self
|
|
|
|
.history
|
|
|
|
.iter()
|
|
|
|
.enumerate()
|
|
|
|
.rev()
|
|
|
|
.skip_while(|(_, (ts, _, _))| {
|
|
|
|
(now - ts.msec as i128) < CHECKPOINT_INTERVAL.as_millis() as i128
|
|
|
|
})
|
|
|
|
.map(|(i, _)| i)
|
|
|
|
.next()
|
|
|
|
{
|
|
|
|
Some(i) => i,
|
|
|
|
None => {
|
2022-05-23 16:19:33 +00:00
|
|
|
debug!("(cp) Oldest operation is too recent to trigger checkpoint");
|
2022-05-18 13:53:13 +00:00
|
|
|
return Ok(());
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
if i_cp < CHECKPOINT_MIN_OPS {
|
2022-05-23 16:19:33 +00:00
|
|
|
debug!("(cp) Not enough old operations to trigger checkpoint");
|
2022-05-18 13:53:13 +00:00
|
|
|
return Ok(());
|
|
|
|
}
|
|
|
|
|
|
|
|
let ts_cp = self.history[i_cp].0;
|
2022-05-23 16:19:33 +00:00
|
|
|
debug!(
|
2022-05-18 13:53:13 +00:00
|
|
|
"(cp) we could checkpoint at time {} (index {} in history)",
|
2022-05-31 22:06:26 +00:00
|
|
|
ts_cp.to_string(),
|
2022-05-18 13:53:13 +00:00
|
|
|
i_cp
|
|
|
|
);
|
|
|
|
|
|
|
|
// Check existing checkpoints: if last one is too recent, don't checkpoint again.
|
|
|
|
let existing_checkpoints = self.list_checkpoints().await?;
|
2022-05-23 16:19:33 +00:00
|
|
|
debug!("(cp) listed checkpoints: {:?}", existing_checkpoints);
|
2022-05-18 13:53:13 +00:00
|
|
|
|
|
|
|
if let Some(last_cp) = existing_checkpoints.last() {
|
|
|
|
if (ts_cp.msec as i128 - last_cp.0.msec as i128)
|
|
|
|
< CHECKPOINT_INTERVAL.as_millis() as i128
|
|
|
|
{
|
2022-05-23 16:19:33 +00:00
|
|
|
debug!(
|
2022-05-18 13:53:13 +00:00
|
|
|
"(cp) last checkpoint is too recent: {}, not checkpointing",
|
2022-05-31 22:06:26 +00:00
|
|
|
last_cp.0.to_string()
|
2022-05-18 13:53:13 +00:00
|
|
|
);
|
|
|
|
return Ok(());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-05-31 22:06:26 +00:00
|
|
|
debug!("(cp) saving checkpoint at {}", ts_cp.to_string());
|
2022-05-18 13:53:13 +00:00
|
|
|
|
|
|
|
// Calculate state at time of checkpoint
|
|
|
|
let mut last_known_state = (0, &self.checkpoint.1);
|
|
|
|
for (i, (_, _, st)) in self.history[..i_cp].iter().enumerate() {
|
|
|
|
if let Some(s) = st {
|
|
|
|
last_known_state = (i + 1, s);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
let mut state_cp = last_known_state.1.clone();
|
|
|
|
for (_, op, _) in self.history[last_known_state.0..i_cp].iter() {
|
|
|
|
state_cp = state_cp.apply(op);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Serialize and save checkpoint
|
|
|
|
let cryptoblob = seal_serialize(&state_cp, &self.key)?;
|
2022-05-23 16:19:33 +00:00
|
|
|
debug!("(cp) checkpoint body length: {}", cryptoblob.len());
|
2022-05-18 13:53:13 +00:00
|
|
|
|
|
|
|
let mut por = PutObjectRequest::default();
|
|
|
|
por.bucket = self.bucket.clone();
|
2022-05-31 22:06:26 +00:00
|
|
|
por.key = format!("{}/checkpoint/{}", self.path, ts_cp.to_string());
|
2022-05-18 13:53:13 +00:00
|
|
|
por.body = Some(cryptoblob.into());
|
|
|
|
self.s3.put_object(por).await?;
|
|
|
|
|
|
|
|
// Drop old checkpoints (but keep at least CHECKPOINTS_TO_KEEP of them)
|
|
|
|
let ecp_len = existing_checkpoints.len();
|
|
|
|
if ecp_len + 1 > CHECKPOINTS_TO_KEEP {
|
|
|
|
let last_to_keep = ecp_len + 1 - CHECKPOINTS_TO_KEEP;
|
|
|
|
|
|
|
|
// Delete blobs
|
|
|
|
for (_ts, key) in existing_checkpoints[..last_to_keep].iter() {
|
2022-05-23 16:19:33 +00:00
|
|
|
debug!("(cp) drop old checkpoint {}", key);
|
2022-05-18 13:53:13 +00:00
|
|
|
let mut dor = DeleteObjectRequest::default();
|
|
|
|
dor.bucket = self.bucket.clone();
|
|
|
|
dor.key = key.to_string();
|
|
|
|
self.s3.delete_object(dor).await?;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Delete corresponding range of operations
|
2022-05-31 22:06:26 +00:00
|
|
|
let ts_ser = existing_checkpoints[last_to_keep].0.to_string();
|
2022-05-18 13:53:13 +00:00
|
|
|
self.k2v
|
|
|
|
.delete_batch(&[BatchDeleteOp {
|
|
|
|
partition_key: &self.path,
|
|
|
|
prefix: None,
|
|
|
|
start: None,
|
|
|
|
end: Some(&ts_ser),
|
|
|
|
single_item: false,
|
|
|
|
}])
|
|
|
|
.await?;
|
|
|
|
}
|
|
|
|
|
2022-05-18 12:54:48 +00:00
|
|
|
Ok(())
|
2022-05-18 10:24:37 +00:00
|
|
|
}
|
2022-05-18 10:42:25 +00:00
|
|
|
|
|
|
|
pub fn state(&self) -> &S {
|
|
|
|
if let Some(last) = self.history.last() {
|
|
|
|
last.2.as_ref().unwrap()
|
|
|
|
} else {
|
|
|
|
&self.checkpoint.1
|
|
|
|
}
|
|
|
|
}
|
2022-05-18 13:53:13 +00:00
|
|
|
|
|
|
|
// ---- INTERNAL ----
|
|
|
|
|
|
|
|
async fn list_checkpoints(&self) -> Result<Vec<(Timestamp, String)>> {
|
|
|
|
let prefix = format!("{}/checkpoint/", self.path);
|
|
|
|
|
|
|
|
let mut lor = ListObjectsV2Request::default();
|
|
|
|
lor.bucket = self.bucket.clone();
|
|
|
|
lor.max_keys = Some(1000);
|
|
|
|
lor.prefix = Some(prefix.clone());
|
|
|
|
|
|
|
|
let checkpoints_res = self.s3.list_objects_v2(lor).await?;
|
|
|
|
|
|
|
|
let mut checkpoints = vec![];
|
|
|
|
for object in checkpoints_res.contents.unwrap_or_default() {
|
|
|
|
if let Some(key) = object.key {
|
|
|
|
if let Some(ckid) = key.strip_prefix(&prefix) {
|
2022-05-31 22:06:26 +00:00
|
|
|
if let Ok(ts) = ckid.parse::<Timestamp>() {
|
2022-05-18 13:53:13 +00:00
|
|
|
checkpoints.push((ts, key));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
checkpoints.sort_by_key(|(ts, _)| *ts);
|
|
|
|
Ok(checkpoints)
|
|
|
|
}
|
2022-05-18 10:24:37 +00:00
|
|
|
}
|
|
|
|
|
2022-07-13 10:30:35 +00:00
|
|
|
// ---- Bayou watch in K2V ----
|
|
|
|
|
|
|
|
struct K2vWatch {
|
|
|
|
pk: String,
|
|
|
|
sk: String,
|
|
|
|
rx: watch::Receiver<Option<CausalityToken>>,
|
|
|
|
notify: Notify,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl K2vWatch {
|
|
|
|
/// Creates a new watch and launches subordinate threads.
|
|
|
|
/// These threads hold Weak pointers to the struct;
|
|
|
|
/// the exit when the Arc is dropped.
|
|
|
|
fn new(creds: &Credentials, pk: String, sk: String) -> Result<Arc<Self>> {
|
|
|
|
let (tx, rx) = watch::channel::<Option<CausalityToken>>(None);
|
|
|
|
let notify = Notify::new();
|
|
|
|
|
|
|
|
let watch = Arc::new(K2vWatch { pk, sk, rx, notify });
|
|
|
|
|
2022-07-13 14:14:10 +00:00
|
|
|
tokio::spawn(Self::background_task(
|
2022-07-13 10:30:35 +00:00
|
|
|
Arc::downgrade(&watch),
|
|
|
|
creds.k2v_client()?,
|
|
|
|
tx,
|
|
|
|
));
|
|
|
|
|
|
|
|
Ok(watch)
|
|
|
|
}
|
|
|
|
|
2022-07-13 14:14:10 +00:00
|
|
|
async fn background_task(
|
2022-07-13 10:30:35 +00:00
|
|
|
self_weak: Weak<Self>,
|
|
|
|
k2v: K2vClient,
|
|
|
|
tx: watch::Sender<Option<CausalityToken>>,
|
|
|
|
) {
|
|
|
|
let mut ct = None;
|
|
|
|
while let Some(this) = Weak::upgrade(&self_weak) {
|
2022-07-13 14:14:10 +00:00
|
|
|
debug!(
|
|
|
|
"bayou k2v watch bg loop iter ({}, {}): ct = {:?}",
|
|
|
|
this.pk, this.sk, ct
|
2022-07-13 10:30:35 +00:00
|
|
|
);
|
2022-07-13 14:14:10 +00:00
|
|
|
tokio::select!(
|
|
|
|
_ = tokio::time::sleep(Duration::from_secs(60)) => continue,
|
|
|
|
update = k2v_wait_value_changed(&k2v, &this.pk, &this.sk, &ct) => {
|
|
|
|
match update {
|
|
|
|
Err(e) => {
|
|
|
|
error!("Error in bayou k2v wait value changed: {}", e);
|
|
|
|
tokio::time::sleep(Duration::from_secs(30)).await;
|
|
|
|
}
|
|
|
|
Ok(cv) => {
|
|
|
|
if tx.send(Some(cv.causality.clone())).is_err() {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
ct = Some(cv.causality);
|
|
|
|
}
|
2022-07-13 10:30:35 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
_ = this.notify.notified() => {
|
2022-07-13 14:14:10 +00:00
|
|
|
let rand = u128::to_be_bytes(thread_rng().gen()).to_vec();
|
2022-07-13 10:30:35 +00:00
|
|
|
if let Err(e) = k2v
|
|
|
|
.insert_item(
|
|
|
|
&this.pk,
|
|
|
|
&this.sk,
|
|
|
|
rand,
|
2022-07-13 14:14:10 +00:00
|
|
|
ct.clone(),
|
2022-07-13 10:30:35 +00:00
|
|
|
)
|
|
|
|
.await
|
|
|
|
{
|
|
|
|
error!("Error in bayou k2v watch updater loop: {}", e);
|
|
|
|
tokio::time::sleep(Duration::from_secs(30)).await;
|
|
|
|
}
|
|
|
|
}
|
2022-07-13 14:14:10 +00:00
|
|
|
);
|
2022-07-13 10:30:35 +00:00
|
|
|
}
|
2022-07-13 14:14:10 +00:00
|
|
|
info!("bayou k2v watch bg loop exiting");
|
2022-07-13 10:30:35 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// ---- TIMESTAMP CLASS ----
|
|
|
|
|
2022-05-18 12:54:48 +00:00
|
|
|
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)]
|
2022-05-18 10:24:37 +00:00
|
|
|
pub struct Timestamp {
|
|
|
|
pub msec: u64,
|
|
|
|
pub rand: u64,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Timestamp {
|
|
|
|
pub fn now() -> Self {
|
|
|
|
let mut rng = thread_rng();
|
|
|
|
Self {
|
|
|
|
msec: now_msec(),
|
|
|
|
rand: rng.gen::<u64>(),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn after(other: &Self) -> Self {
|
|
|
|
let mut rng = thread_rng();
|
|
|
|
Self {
|
|
|
|
msec: std::cmp::max(now_msec(), other.msec + 1),
|
|
|
|
rand: rng.gen::<u64>(),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn zero() -> Self {
|
|
|
|
Self { msec: 0, rand: 0 }
|
|
|
|
}
|
2022-05-31 22:06:26 +00:00
|
|
|
}
|
2022-05-18 10:24:37 +00:00
|
|
|
|
2022-05-31 22:06:26 +00:00
|
|
|
impl ToString for Timestamp {
|
|
|
|
fn to_string(&self) -> String {
|
2022-05-18 10:24:37 +00:00
|
|
|
let mut bytes = [0u8; 16];
|
|
|
|
bytes[0..8].copy_from_slice(&u64::to_be_bytes(self.msec));
|
|
|
|
bytes[8..16].copy_from_slice(&u64::to_be_bytes(self.rand));
|
|
|
|
hex::encode(&bytes)
|
|
|
|
}
|
2022-05-31 22:06:26 +00:00
|
|
|
}
|
2022-05-18 10:24:37 +00:00
|
|
|
|
2022-05-31 22:06:26 +00:00
|
|
|
impl FromStr for Timestamp {
|
|
|
|
type Err = &'static str;
|
|
|
|
|
|
|
|
fn from_str(s: &str) -> Result<Timestamp, &'static str> {
|
|
|
|
let bytes = hex::decode(s).map_err(|_| "invalid hex")?;
|
2022-05-18 10:24:37 +00:00
|
|
|
if bytes.len() != 16 {
|
2022-05-31 22:06:26 +00:00
|
|
|
return Err("bad length");
|
2022-05-18 10:24:37 +00:00
|
|
|
}
|
2022-05-31 22:06:26 +00:00
|
|
|
Ok(Self {
|
2022-05-18 10:24:37 +00:00
|
|
|
msec: u64::from_be_bytes(bytes[0..8].try_into().unwrap()),
|
|
|
|
rand: u64::from_be_bytes(bytes[8..16].try_into().unwrap()),
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|