2024-03-27 14:09:18 +00:00
|
|
|
use anyhow::{bail, Result};
|
2024-03-27 09:33:46 +00:00
|
|
|
use serde::{Deserialize, Deserializer, Serialize, Serializer};
|
|
|
|
use im::{OrdMap, OrdSet, ordset};
|
|
|
|
|
|
|
|
use aero_bayou::*;
|
|
|
|
|
2024-03-27 15:16:37 +00:00
|
|
|
use crate::unique_ident::{gen_ident, UniqueIdent};
|
2024-03-27 09:33:46 +00:00
|
|
|
|
|
|
|
/// Parents are only persisted in the event log,
|
|
|
|
/// not in the checkpoints.
|
2024-03-27 15:16:37 +00:00
|
|
|
pub type Token = UniqueIdent;
|
|
|
|
pub type Parents = Vec<Token>;
|
|
|
|
pub type SyncDesc = (Parents, Token);
|
|
|
|
|
|
|
|
pub type BlobId = UniqueIdent;
|
2024-03-27 09:33:46 +00:00
|
|
|
pub type Etag = String;
|
|
|
|
pub type FileName = String;
|
2024-03-27 15:16:37 +00:00
|
|
|
pub type IndexEntry = (BlobId, FileName, Etag);
|
2024-03-27 09:33:46 +00:00
|
|
|
|
|
|
|
#[derive(Clone, Default)]
|
|
|
|
pub struct DavDag {
|
|
|
|
/// Source of trust
|
2024-04-04 09:28:15 +00:00
|
|
|
pub table: OrdMap<BlobId, IndexEntry>,
|
2024-03-27 09:33:46 +00:00
|
|
|
|
|
|
|
/// Indexes optimized for queries
|
2024-04-04 09:28:15 +00:00
|
|
|
pub idx_by_filename: OrdMap<FileName, BlobId>,
|
|
|
|
|
|
|
|
// ------------ Below this line, data is ephemeral, ie. not checkpointed
|
2024-03-27 09:33:46 +00:00
|
|
|
|
|
|
|
/// Partial synchronization graph
|
2024-04-04 09:28:15 +00:00
|
|
|
pub ancestors: OrdMap<Token, OrdSet<Token>>,
|
2024-03-27 14:09:18 +00:00
|
|
|
|
|
|
|
/// All nodes
|
2024-04-04 09:28:15 +00:00
|
|
|
pub all_nodes: OrdSet<Token>,
|
2024-03-27 09:33:46 +00:00
|
|
|
/// Head nodes
|
2024-04-04 09:28:15 +00:00
|
|
|
pub heads: OrdSet<Token>,
|
2024-03-27 14:09:18 +00:00
|
|
|
/// Origin nodes
|
2024-04-04 09:28:15 +00:00
|
|
|
pub origins: OrdSet<Token>,
|
|
|
|
|
|
|
|
/// File change token by token
|
|
|
|
pub change: OrdMap<Token, SyncChange>,
|
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(Clone, Debug)]
|
|
|
|
pub enum SyncChange {
|
|
|
|
Ok(FileName),
|
|
|
|
NotFound(FileName),
|
2024-03-27 09:33:46 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(Clone, Serialize, Deserialize, Debug)]
|
|
|
|
pub enum DavDagOp {
|
|
|
|
/// Merge is a virtual operation run when multiple heads are discovered
|
2024-03-27 15:16:37 +00:00
|
|
|
Merge(SyncDesc),
|
2024-03-27 09:33:46 +00:00
|
|
|
|
|
|
|
/// Add an item to the collection
|
2024-03-27 15:16:37 +00:00
|
|
|
Put(SyncDesc, IndexEntry),
|
2024-03-27 09:33:46 +00:00
|
|
|
|
|
|
|
/// Delete an item from the collection
|
2024-03-27 15:16:37 +00:00
|
|
|
Delete(SyncDesc, BlobId),
|
|
|
|
}
|
|
|
|
impl DavDagOp {
|
|
|
|
pub fn token(&self) -> Token {
|
|
|
|
match self {
|
|
|
|
Self::Merge((_, t)) => *t,
|
|
|
|
Self::Put((_, t), _) => *t,
|
|
|
|
Self::Delete((_, t), _) => *t,
|
|
|
|
}
|
|
|
|
}
|
2024-03-27 09:33:46 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
impl DavDag {
|
2024-03-27 15:16:37 +00:00
|
|
|
pub fn op_merge(&self) -> DavDagOp {
|
|
|
|
DavDagOp::Merge(self.sync_desc())
|
2024-03-27 09:33:46 +00:00
|
|
|
}
|
|
|
|
|
2024-03-27 15:16:37 +00:00
|
|
|
pub fn op_put(&self, entry: IndexEntry) -> DavDagOp {
|
|
|
|
DavDagOp::Put(self.sync_desc(), entry)
|
2024-03-27 09:33:46 +00:00
|
|
|
}
|
|
|
|
|
2024-04-04 09:28:15 +00:00
|
|
|
pub fn op_delete(&self, blob_id: BlobId) -> DavDagOp {
|
|
|
|
DavDagOp::Delete(self.sync_desc(), blob_id)
|
2024-03-27 09:33:46 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// HELPER functions
|
2024-03-27 14:09:18 +00:00
|
|
|
|
2024-03-27 15:16:37 +00:00
|
|
|
pub fn heads_vec(&self) -> Vec<Token> {
|
|
|
|
self.heads.clone().into_iter().collect()
|
|
|
|
}
|
|
|
|
|
|
|
|
/// A sync descriptor
|
|
|
|
pub fn sync_desc(&self) -> SyncDesc {
|
|
|
|
(self.heads_vec(), gen_ident())
|
2024-03-27 09:33:46 +00:00
|
|
|
}
|
|
|
|
|
2024-03-27 14:09:18 +00:00
|
|
|
/// Resolve a sync token
|
2024-04-04 13:40:26 +00:00
|
|
|
pub fn resolve(&self, known: Token) -> Result<OrdSet<Token>> {
|
2024-03-27 14:09:18 +00:00
|
|
|
let already_known = self.all_ancestors(known);
|
|
|
|
|
|
|
|
// We can't capture all missing events if we are not connected
|
2024-03-27 15:16:37 +00:00
|
|
|
// to all sinks of the graph,
|
|
|
|
// ie. if we don't already know all the sinks,
|
|
|
|
// ie. if we are missing so much history that
|
|
|
|
// the event log has been transformed into a checkpoint
|
2024-03-27 14:09:18 +00:00
|
|
|
if !self.origins.is_subset(already_known.clone()) {
|
|
|
|
bail!("Not enough history to produce a correct diff, a full resync is needed");
|
|
|
|
}
|
|
|
|
|
2024-03-27 15:16:37 +00:00
|
|
|
// Missing items are *all existing graph items* from which
|
|
|
|
// we removed *all items known by the given node*.
|
|
|
|
// In other words, all values in `all_nodes` that are not in `already_known`.
|
2024-03-27 14:09:18 +00:00
|
|
|
Ok(self.all_nodes.clone().relative_complement(already_known))
|
|
|
|
}
|
|
|
|
|
2024-03-27 15:16:37 +00:00
|
|
|
/// Find all ancestors of a given node
|
2024-04-04 13:40:26 +00:00
|
|
|
fn all_ancestors(&self, known: Token) -> OrdSet<Token> {
|
2024-03-27 14:09:18 +00:00
|
|
|
let mut all_known: OrdSet<UniqueIdent> = OrdSet::new();
|
|
|
|
let mut to_collect = vec![known];
|
|
|
|
loop {
|
|
|
|
let cursor = match to_collect.pop() {
|
|
|
|
// Loop stops here
|
|
|
|
None => break,
|
|
|
|
Some(v) => v,
|
|
|
|
};
|
|
|
|
|
|
|
|
if all_known.insert(cursor).is_some() {
|
|
|
|
// Item already processed
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
// Collect parents
|
|
|
|
let parents = match self.ancestors.get(&cursor) {
|
|
|
|
None => continue,
|
|
|
|
Some(c) => c,
|
|
|
|
};
|
|
|
|
to_collect.extend(parents.iter());
|
|
|
|
}
|
|
|
|
all_known
|
|
|
|
}
|
|
|
|
|
2024-03-27 09:33:46 +00:00
|
|
|
// INTERNAL functions
|
2024-03-27 14:09:18 +00:00
|
|
|
|
|
|
|
/// Register a WebDAV item (put, copy, move)
|
2024-04-04 09:28:15 +00:00
|
|
|
fn register(&mut self, sync_token: Option<Token>, entry: IndexEntry) {
|
2024-03-27 15:16:37 +00:00
|
|
|
let (blob_id, filename, _etag) = entry.clone();
|
|
|
|
|
2024-03-27 09:33:46 +00:00
|
|
|
// Insert item in the source of trust
|
2024-03-27 15:16:37 +00:00
|
|
|
self.table.insert(blob_id, entry);
|
2024-03-27 09:33:46 +00:00
|
|
|
|
|
|
|
// Update the cache
|
2024-04-04 09:28:15 +00:00
|
|
|
self.idx_by_filename.insert(filename.to_string(), blob_id);
|
|
|
|
|
|
|
|
// Record the change in the ephemeral synchronization map
|
|
|
|
if let Some(sync_token) = sync_token {
|
|
|
|
self.change.insert(sync_token, SyncChange::Ok(filename));
|
|
|
|
}
|
2024-03-27 09:33:46 +00:00
|
|
|
}
|
|
|
|
|
2024-03-27 14:09:18 +00:00
|
|
|
/// Unregister a WebDAV item (delete, move)
|
2024-04-04 09:28:15 +00:00
|
|
|
fn unregister(&mut self, sync_token: Token, blob_id: &BlobId) {
|
2024-03-27 09:33:46 +00:00
|
|
|
// Query the source of truth to get the information we
|
|
|
|
// need to clean the indexes
|
2024-04-04 09:28:15 +00:00
|
|
|
let (_blob_id, filename, _etag) = match self.table.get(blob_id) {
|
2024-03-27 09:33:46 +00:00
|
|
|
Some(v) => v,
|
2024-03-27 15:16:37 +00:00
|
|
|
// Element does not exist, return early
|
2024-03-27 09:33:46 +00:00
|
|
|
None => return,
|
|
|
|
};
|
|
|
|
self.idx_by_filename.remove(filename);
|
|
|
|
|
2024-04-04 09:28:15 +00:00
|
|
|
// Record the change in the ephemeral synchronization map
|
|
|
|
self.change.insert(sync_token, SyncChange::NotFound(filename.to_string()));
|
|
|
|
|
2024-03-27 09:33:46 +00:00
|
|
|
// Finally clear item from the source of trust
|
2024-04-04 09:28:15 +00:00
|
|
|
self.table.remove(blob_id);
|
2024-03-27 09:33:46 +00:00
|
|
|
}
|
|
|
|
|
2024-03-27 14:09:18 +00:00
|
|
|
/// When an event is processed, update the synchronization DAG
|
2024-04-04 09:28:15 +00:00
|
|
|
fn sync_dag(&mut self, sync_desc: &SyncDesc) {
|
2024-03-27 15:16:37 +00:00
|
|
|
let (parents, child) = sync_desc;
|
2024-03-27 14:09:18 +00:00
|
|
|
|
|
|
|
// --- Update ANCESTORS
|
|
|
|
// We register ancestors as it is required for the sync algorithm
|
|
|
|
self.ancestors.insert(*child, parents.iter().fold(ordset![], |mut acc, p| {
|
|
|
|
acc.insert(*p);
|
|
|
|
acc
|
|
|
|
}));
|
|
|
|
|
|
|
|
// --- Update ORIGINS
|
|
|
|
// If this event has no parents, it's an origin
|
|
|
|
if parents.is_empty() {
|
|
|
|
self.origins.insert(*child);
|
|
|
|
}
|
|
|
|
|
|
|
|
// --- Update HEADS
|
2024-03-27 09:33:46 +00:00
|
|
|
// Remove from HEADS this event's parents
|
|
|
|
parents.iter().for_each(|par| { self.heads.remove(par); });
|
|
|
|
|
|
|
|
// This event becomes a new HEAD in turn
|
|
|
|
self.heads.insert(*child);
|
2024-03-27 15:16:37 +00:00
|
|
|
|
|
|
|
// --- Update ALL NODES
|
|
|
|
self.all_nodes.insert(*child);
|
2024-03-27 09:33:46 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl BayouState for DavDag {
|
|
|
|
type Op = DavDagOp;
|
|
|
|
|
|
|
|
fn apply(&self, op: &Self::Op) -> Self {
|
|
|
|
let mut new = self.clone();
|
|
|
|
|
|
|
|
match op {
|
2024-03-27 15:16:37 +00:00
|
|
|
DavDagOp::Put(sync_desc, entry) => {
|
2024-04-04 09:28:15 +00:00
|
|
|
new.sync_dag(sync_desc);
|
|
|
|
new.register(Some(sync_desc.1), entry.clone());
|
2024-03-27 09:33:46 +00:00
|
|
|
},
|
2024-03-27 15:16:37 +00:00
|
|
|
DavDagOp::Delete(sync_desc, blob_id) => {
|
2024-04-04 09:28:15 +00:00
|
|
|
new.sync_dag(sync_desc);
|
|
|
|
new.unregister(sync_desc.1, blob_id);
|
2024-03-27 09:33:46 +00:00
|
|
|
},
|
2024-03-27 15:16:37 +00:00
|
|
|
DavDagOp::Merge(sync_desc) => {
|
|
|
|
new.sync_dag(sync_desc);
|
2024-03-27 09:33:46 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
new
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// CUSTOM SERIALIZATION & DESERIALIZATION
|
|
|
|
#[derive(Serialize, Deserialize)]
|
|
|
|
struct DavDagSerializedRepr {
|
2024-03-27 15:16:37 +00:00
|
|
|
items: Vec<IndexEntry>,
|
2024-03-27 09:33:46 +00:00
|
|
|
heads: Vec<UniqueIdent>,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<'de> Deserialize<'de> for DavDag {
|
|
|
|
fn deserialize<D>(d: D) -> Result<Self, D::Error>
|
|
|
|
where
|
|
|
|
D: Deserializer<'de>,
|
|
|
|
{
|
|
|
|
let val: DavDagSerializedRepr = DavDagSerializedRepr::deserialize(d)?;
|
|
|
|
let mut davdag = DavDag::default();
|
|
|
|
|
|
|
|
// Build the table + index
|
2024-04-04 09:28:15 +00:00
|
|
|
val.items.into_iter().for_each(|entry| davdag.register(None, entry));
|
2024-03-27 09:33:46 +00:00
|
|
|
|
|
|
|
// Initialize the synchronization DAG with its roots
|
|
|
|
val.heads.into_iter().for_each(|ident| {
|
|
|
|
davdag.heads.insert(ident);
|
2024-03-27 14:09:18 +00:00
|
|
|
davdag.origins.insert(ident);
|
|
|
|
davdag.all_nodes.insert(ident);
|
2024-03-27 09:33:46 +00:00
|
|
|
});
|
|
|
|
|
|
|
|
Ok(davdag)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Serialize for DavDag {
|
|
|
|
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
|
|
|
where
|
|
|
|
S: Serializer,
|
|
|
|
{
|
|
|
|
// Indexes are rebuilt on the fly, we serialize only the core database
|
2024-03-27 15:16:37 +00:00
|
|
|
let items = self.table.iter().map(|(_, entry)| entry.clone()).collect();
|
2024-03-27 09:33:46 +00:00
|
|
|
|
|
|
|
// We keep only the head entries from the sync graph,
|
|
|
|
// these entries will be used to initialize it back when deserializing
|
|
|
|
let heads = self.heads_vec();
|
|
|
|
|
|
|
|
// Finale serialization object
|
|
|
|
let val = DavDagSerializedRepr { items, heads };
|
|
|
|
val.serialize(serializer)
|
|
|
|
}
|
|
|
|
}
|
2024-03-27 15:16:37 +00:00
|
|
|
|
|
|
|
// ---- TESTS ----
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
mod tests {
|
|
|
|
use super::*;
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn base() {
|
|
|
|
let mut state = DavDag::default();
|
|
|
|
|
|
|
|
// Add item 1
|
|
|
|
{
|
|
|
|
let m = UniqueIdent([0x01; 24]);
|
|
|
|
let ev = state.op_put((m, "cal.ics".into(), "321-321".into()));
|
|
|
|
state = state.apply(&ev);
|
|
|
|
|
|
|
|
assert_eq!(state.table.len(), 1);
|
|
|
|
assert_eq!(state.resolve(ev.token()).unwrap().len(), 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Add 2 concurrent items
|
|
|
|
let (t1, t2) = {
|
|
|
|
let blob1 = UniqueIdent([0x02; 24]);
|
|
|
|
let ev1 = state.op_put((blob1, "cal2.ics".into(), "321-321".into()));
|
|
|
|
|
|
|
|
let blob2 = UniqueIdent([0x01; 24]);
|
|
|
|
let ev2 = state.op_delete(blob2);
|
|
|
|
|
|
|
|
state = state.apply(&ev1);
|
|
|
|
state = state.apply(&ev2);
|
|
|
|
|
|
|
|
assert_eq!(state.table.len(), 1);
|
|
|
|
assert_eq!(state.resolve(ev1.token()).unwrap(), ordset![ev2.token()]);
|
|
|
|
|
|
|
|
(ev1.token(), ev2.token())
|
|
|
|
};
|
|
|
|
|
|
|
|
// Add later a new item
|
|
|
|
{
|
|
|
|
let blob3 = UniqueIdent([0x03; 24]);
|
|
|
|
let ev = state.op_put((blob3, "cal3.ics".into(), "321-321".into()));
|
|
|
|
|
|
|
|
state = state.apply(&ev);
|
|
|
|
assert_eq!(state.table.len(), 2);
|
|
|
|
assert_eq!(state.resolve(ev.token()).unwrap().len(), 0);
|
|
|
|
assert_eq!(state.resolve(t1).unwrap(), ordset![t2, ev.token()]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|