2020-12-12 16:06:40 +00:00
|
|
|
//! This package provides a simple implementation of conflict-free replicated data types (CRDTs)
|
|
|
|
//!
|
|
|
|
//! CRDTs are a type of data structures that do not require coordination. In other words, we can
|
|
|
|
//! edit them in parallel, we will always find a way to merge it.
|
|
|
|
//!
|
|
|
|
//! A general example is a counter. Its initial value is 0. Alice and Bob get a copy of the
|
|
|
|
//! counter. Alice does +1 on her copy, she reads 1. Bob does +3 on his copy, he reads 3. Now,
|
|
|
|
//! it is easy to merge their counters, order does not count: we always get 4.
|
|
|
|
//!
|
|
|
|
//! Learn more about CRDT [on Wikipedia](https://en.wikipedia.org/wiki/Conflict-free_replicated_data_type)
|
|
|
|
|
2020-11-20 20:15:24 +00:00
|
|
|
use serde::{Deserialize, Serialize};
|
|
|
|
|
|
|
|
use garage_util::data::*;
|
|
|
|
|
2020-12-12 16:06:40 +00:00
|
|
|
/// Definition of a CRDT - all CRDT Rust types implement this.
|
|
|
|
///
|
|
|
|
/// A CRDT is defined as a merge operator that respects a certain set of axioms.
|
2020-11-23 16:49:21 +00:00
|
|
|
///
|
2020-12-12 16:06:40 +00:00
|
|
|
/// In particular, the merge operator must be commutative, associative,
|
|
|
|
/// idempotent, and monotonic.
|
|
|
|
/// In other words, if `a`, `b` and `c` are CRDTs, and `⊔` denotes the merge operator,
|
|
|
|
/// the following axioms must apply:
|
2020-11-23 16:49:21 +00:00
|
|
|
///
|
2020-12-12 16:06:40 +00:00
|
|
|
/// ```text
|
|
|
|
/// a ⊔ b = b ⊔ a (commutativity)
|
|
|
|
/// (a ⊔ b) ⊔ c = a ⊔ (b ⊔ c) (associativity)
|
|
|
|
/// (a ⊔ b) ⊔ b = a ⊔ b (idempotence)
|
|
|
|
/// ```
|
2020-11-23 16:49:21 +00:00
|
|
|
///
|
2020-12-12 16:06:40 +00:00
|
|
|
/// Moreover, the relationship `≥` defined by `a ≥ b ⇔ ∃c. a = b ⊔ c` must be a partial order.
|
|
|
|
/// This implies a few properties such as: if `a ⊔ b ≠ a`, then there is no `c` such that `(a ⊔ b) ⊔ c = a`,
|
|
|
|
/// as this would imply a cycle in the partial order.
|
2020-11-20 20:15:24 +00:00
|
|
|
pub trait CRDT {
|
2020-12-12 16:06:40 +00:00
|
|
|
/// Merge the two datastructures according to the CRDT rules.
|
|
|
|
/// `self` is modified to contain the merged CRDT value. `other` is not modified.
|
2020-11-23 16:49:21 +00:00
|
|
|
///
|
|
|
|
/// # Arguments
|
|
|
|
///
|
2020-12-12 16:06:40 +00:00
|
|
|
/// * `other` - the other CRDT we wish to merge with
|
2020-11-20 20:15:24 +00:00
|
|
|
fn merge(&mut self, other: &Self);
|
|
|
|
}
|
|
|
|
|
2020-12-12 16:06:40 +00:00
|
|
|
/// All types that implement `Ord` (a total order) also implement a trivial CRDT
|
|
|
|
/// defined by the merge rule: `a ⊔ b = max(a, b)`.
|
2020-11-20 20:15:24 +00:00
|
|
|
impl<T> CRDT for T
|
2020-11-20 22:01:12 +00:00
|
|
|
where
|
|
|
|
T: Ord + Clone,
|
|
|
|
{
|
2020-11-20 20:15:24 +00:00
|
|
|
fn merge(&mut self, other: &Self) {
|
|
|
|
if other > self {
|
|
|
|
*self = other.clone();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// ---- LWW Register ----
|
|
|
|
|
2020-11-23 16:49:21 +00:00
|
|
|
/// Last Write Win (LWW)
|
|
|
|
///
|
2020-12-12 16:06:40 +00:00
|
|
|
/// An LWW CRDT associates a timestamp with a value, in order to implement a
|
|
|
|
/// time-based reconciliation rule: the most recent write wins.
|
|
|
|
/// For completeness, the LWW reconciliation rule must also be defined for two LWW CRDTs
|
|
|
|
/// with the same timestamp but different values.
|
|
|
|
///
|
|
|
|
/// In our case, we add the constraint that the value that is wrapped inside the LWW CRDT must
|
|
|
|
/// itself be a CRDT: in the case when the timestamp does not allow us to decide on which value to
|
|
|
|
/// keep, the merge rule of the inner CRDT is applied on the wrapped values. (Note that all types
|
|
|
|
/// that implement the `Ord` trait get a default CRDT implemetnation that keeps the maximum value.
|
|
|
|
/// This enables us to use LWW directly with primitive data types such as numbers or strings. It is
|
|
|
|
/// generally desirable in this case to never explicitly produce LWW values with the same timestamp
|
|
|
|
/// but different inner values, as the rule to keep the maximum value isn't generally the desired
|
|
|
|
/// semantics.)
|
|
|
|
///
|
2020-11-23 17:17:48 +00:00
|
|
|
/// As multiple computers clocks are always desynchronized,
|
|
|
|
/// when operations are close enough, it is equivalent to
|
|
|
|
/// take one copy and drop the other one.
|
2020-12-12 16:06:40 +00:00
|
|
|
///
|
2020-11-23 17:17:48 +00:00
|
|
|
/// Given that clocks are not too desynchronized, this assumption
|
|
|
|
/// is enough for most cases, as there is few chance that two humans
|
|
|
|
/// coordonate themself faster than the time difference between two NTP servers.
|
|
|
|
///
|
|
|
|
/// As a more concret example, let's suppose you want to upload a file
|
|
|
|
/// with the same key (path) in the same bucket at the very same time.
|
|
|
|
/// For each request, the file will be timestamped by the receiving server
|
|
|
|
/// and may differ from what you observed with your atomic clock!
|
|
|
|
///
|
|
|
|
/// This scheme is used by AWS S3 or Soundcloud and often without knowing
|
|
|
|
/// in entreprise when reconciliating databases with ad-hoc scripts.
|
2020-11-20 20:15:24 +00:00
|
|
|
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
|
2020-11-20 22:01:12 +00:00
|
|
|
pub struct LWW<T> {
|
2020-11-20 20:15:24 +00:00
|
|
|
ts: u64,
|
|
|
|
v: T,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<T> LWW<T>
|
2020-11-20 22:01:12 +00:00
|
|
|
where
|
2020-11-20 22:23:55 +00:00
|
|
|
T: CRDT,
|
2020-11-20 20:15:24 +00:00
|
|
|
{
|
2020-11-23 17:17:48 +00:00
|
|
|
/// Creates a new CRDT
|
|
|
|
///
|
|
|
|
/// CRDT's internal timestamp is set with current node's clock.
|
2020-11-20 20:15:24 +00:00
|
|
|
pub fn new(value: T) -> Self {
|
|
|
|
Self {
|
|
|
|
ts: now_msec(),
|
|
|
|
v: value,
|
|
|
|
}
|
|
|
|
}
|
2020-11-23 17:17:48 +00:00
|
|
|
|
|
|
|
/// Build a new CRDT from a previous non-compatible one
|
|
|
|
///
|
|
|
|
/// Compared to new, the CRDT's timestamp is not set to now
|
|
|
|
/// but must be set to the previous, non-compatible, CRDT's timestamp.
|
2020-11-20 20:15:24 +00:00
|
|
|
pub fn migrate_from_raw(ts: u64, value: T) -> Self {
|
2020-11-20 22:01:12 +00:00
|
|
|
Self { ts, v: value }
|
2020-11-20 20:15:24 +00:00
|
|
|
}
|
2020-11-23 17:17:48 +00:00
|
|
|
|
|
|
|
/// Update the LWW CRDT while keeping some causal ordering.
|
2020-12-12 16:06:40 +00:00
|
|
|
///
|
|
|
|
/// The timestamp of the LWW CRDT is updated to be the current node's clock
|
|
|
|
/// at time of update, or the previous timestamp + 1 if that's bigger,
|
|
|
|
/// so that the new timestamp is always strictly larger than the previous one.
|
|
|
|
/// This ensures that merging the update with the old value will result in keeping
|
|
|
|
/// the updated value.
|
2020-11-20 20:15:24 +00:00
|
|
|
pub fn update(&mut self, new_value: T) {
|
|
|
|
self.ts = std::cmp::max(self.ts + 1, now_msec());
|
|
|
|
self.v = new_value;
|
|
|
|
}
|
2020-11-23 17:17:48 +00:00
|
|
|
|
|
|
|
/// Get the CRDT value
|
2020-11-20 20:15:24 +00:00
|
|
|
pub fn get(&self) -> &T {
|
|
|
|
&self.v
|
|
|
|
}
|
2020-11-23 17:17:48 +00:00
|
|
|
|
2020-12-12 16:06:40 +00:00
|
|
|
/// Get a mutable reference to the CRDT's value
|
|
|
|
///
|
|
|
|
/// This is usefull to mutate the inside value without changing the LWW timestamp.
|
|
|
|
/// When such mutation is done, the merge between two LWW values is done using the inner
|
|
|
|
/// CRDT's merge operation. This is usefull in the case where the inner CRDT is a large
|
|
|
|
/// data type, such as a map, and we only want to change a single item in the map.
|
|
|
|
/// To do this, we can produce a "CRDT delta", i.e. a LWW that contains only the modification.
|
|
|
|
/// This delta consists in a LWW with the same timestamp, and the map
|
|
|
|
/// inside only contains the updated value.
|
|
|
|
/// The advantage of such a delta is that it is much smaller than the whole map.
|
|
|
|
///
|
|
|
|
/// Avoid using this if the inner data type is a primitive type such as a number or a string,
|
|
|
|
/// as you will then rely on the merge function defined on `Ord` types by keeping the maximum
|
|
|
|
/// of both values.
|
2020-11-20 22:01:12 +00:00
|
|
|
pub fn get_mut(&mut self) -> &mut T {
|
|
|
|
&mut self.v
|
|
|
|
}
|
2020-11-20 20:15:24 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
impl<T> CRDT for LWW<T>
|
2020-11-20 22:01:12 +00:00
|
|
|
where
|
2020-11-20 22:23:55 +00:00
|
|
|
T: Clone + CRDT,
|
2020-11-20 20:15:24 +00:00
|
|
|
{
|
|
|
|
fn merge(&mut self, other: &Self) {
|
|
|
|
if other.ts > self.ts {
|
|
|
|
self.ts = other.ts;
|
|
|
|
self.v = other.v.clone();
|
|
|
|
} else if other.ts == self.ts {
|
|
|
|
self.v.merge(&other.v);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-12-12 16:06:40 +00:00
|
|
|
/// Boolean, where `true` is an absorbing state
|
2020-11-20 20:15:24 +00:00
|
|
|
#[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq)]
|
|
|
|
pub struct Bool(bool);
|
|
|
|
|
|
|
|
impl Bool {
|
2020-12-12 16:06:40 +00:00
|
|
|
/// Create a new boolean with the specified value
|
2020-11-20 20:15:24 +00:00
|
|
|
pub fn new(b: bool) -> Self {
|
|
|
|
Self(b)
|
|
|
|
}
|
2020-12-12 16:06:40 +00:00
|
|
|
/// Set the boolean to true
|
2020-11-20 20:15:24 +00:00
|
|
|
pub fn set(&mut self) {
|
|
|
|
self.0 = true;
|
|
|
|
}
|
2020-12-12 16:06:40 +00:00
|
|
|
/// Get the boolean value
|
2020-11-20 20:15:24 +00:00
|
|
|
pub fn get(&self) -> bool {
|
|
|
|
self.0
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl CRDT for Bool {
|
|
|
|
fn merge(&mut self, other: &Self) {
|
|
|
|
self.0 = self.0 || other.0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-11-23 16:49:21 +00:00
|
|
|
/// Last Write Win Map
|
|
|
|
///
|
2020-12-12 16:06:40 +00:00
|
|
|
/// This types defines a CRDT for a map from keys to values.
|
|
|
|
/// The values have an associated timestamp, such that the last written value
|
|
|
|
/// takes precedence over previous ones. As for the simpler `LWW` type, the value
|
|
|
|
/// type `V` is also required to implement the CRDT trait.
|
|
|
|
/// We do not encourage mutating the values associated with a given key
|
|
|
|
/// without updating the timestamp, in fact at the moment we do not provide a `.get_mut()`
|
|
|
|
/// method that would allow that.
|
2020-11-23 16:49:21 +00:00
|
|
|
///
|
2020-12-12 16:06:40 +00:00
|
|
|
/// Internally, the map is stored as a vector of keys and values, sorted by ascending key order.
|
|
|
|
/// This is why the key type `K` must implement `Ord` (and also to ensure a unique serialization,
|
|
|
|
/// such that two values can be compared for equality based on their hashes). As a consequence,
|
|
|
|
/// insertions take `O(n)` time. This means that LWWMap should be used for reasonably small maps.
|
|
|
|
/// However, note that even if we were using a more efficient data structure such as a `BTreeMap`,
|
|
|
|
/// the serialization cost `O(n)` would still have to be paid at each modification, so we are
|
|
|
|
/// actually not losing anything here.
|
2020-11-20 20:15:24 +00:00
|
|
|
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
|
2020-11-20 22:01:12 +00:00
|
|
|
pub struct LWWMap<K, V> {
|
2020-11-20 20:15:24 +00:00
|
|
|
vals: Vec<(K, u64, V)>,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<K, V> LWWMap<K, V>
|
2020-11-20 22:01:12 +00:00
|
|
|
where
|
2020-11-20 22:23:55 +00:00
|
|
|
K: Ord,
|
|
|
|
V: CRDT,
|
2020-11-20 20:15:24 +00:00
|
|
|
{
|
2020-12-12 16:06:40 +00:00
|
|
|
/// Create a new empty map CRDT
|
2020-11-20 20:15:24 +00:00
|
|
|
pub fn new() -> Self {
|
2020-11-20 22:01:12 +00:00
|
|
|
Self { vals: vec![] }
|
2020-11-20 20:15:24 +00:00
|
|
|
}
|
2020-12-12 16:06:40 +00:00
|
|
|
/// Used to migrate from a map defined in an incompatible format. This produces
|
|
|
|
/// a map that contains a single item with the specified timestamp (copied from
|
|
|
|
/// the incompatible format). Do this as many times as you have items to migrate,
|
|
|
|
/// and put them all together using the CRDT merge operator.
|
2020-11-20 20:15:24 +00:00
|
|
|
pub fn migrate_from_raw_item(k: K, ts: u64, v: V) -> Self {
|
2020-11-20 22:01:12 +00:00
|
|
|
Self {
|
2020-11-20 20:15:24 +00:00
|
|
|
vals: vec![(k, ts, v)],
|
|
|
|
}
|
|
|
|
}
|
2020-12-12 16:06:40 +00:00
|
|
|
/// Returns a map that contains a single mapping from the specified key to the specified value.
|
|
|
|
/// This map is a mutator, or a delta-CRDT, such that when it is merged with the original map,
|
|
|
|
/// the previous value will be replaced with the one specified here.
|
|
|
|
/// The timestamp in the provided mutator is set to the maximum of the current system's clock
|
|
|
|
/// and 1 + the previous value's timestamp (if there is one), so that the new value will always
|
|
|
|
/// take precedence (LWW rule).
|
|
|
|
///
|
|
|
|
/// Typically, to update the value associated to a key in the map, you would do the following:
|
|
|
|
///
|
2021-01-15 16:03:38 +00:00
|
|
|
/// ```ignore
|
2020-12-12 16:06:40 +00:00
|
|
|
/// let my_update = my_crdt.update_mutator(key_to_modify, new_value);
|
|
|
|
/// my_crdt.merge(&my_update);
|
|
|
|
/// ```
|
|
|
|
///
|
|
|
|
/// However extracting the mutator on its own and only sending that on the network is very
|
|
|
|
/// interesting as it is much smaller than the whole map.
|
2020-11-20 20:15:24 +00:00
|
|
|
pub fn update_mutator(&self, k: K, new_v: V) -> Self {
|
2020-11-20 22:01:12 +00:00
|
|
|
let new_vals = match self.vals.binary_search_by(|(k2, _, _)| k2.cmp(&k)) {
|
2020-11-20 20:15:24 +00:00
|
|
|
Ok(i) => {
|
|
|
|
let (_, old_ts, _) = self.vals[i];
|
2020-11-20 22:01:12 +00:00
|
|
|
let new_ts = std::cmp::max(old_ts + 1, now_msec());
|
2020-11-20 20:15:24 +00:00
|
|
|
vec![(k, new_ts, new_v)]
|
|
|
|
}
|
2020-11-20 22:01:12 +00:00
|
|
|
Err(_) => vec![(k, now_msec(), new_v)],
|
2020-11-20 20:15:24 +00:00
|
|
|
};
|
2020-11-20 22:01:12 +00:00
|
|
|
Self { vals: new_vals }
|
2020-11-20 20:15:24 +00:00
|
|
|
}
|
2020-12-12 16:06:40 +00:00
|
|
|
/// Takes all of the values of the map and returns them. The current map is reset to the
|
|
|
|
/// empty map. This is very usefull to produce in-place a new map that contains only a delta
|
|
|
|
/// that modifies a certain value:
|
|
|
|
///
|
2021-01-15 16:03:38 +00:00
|
|
|
/// ```ignore
|
2020-12-12 16:06:40 +00:00
|
|
|
/// let mut a = get_my_crdt_value();
|
|
|
|
/// let old_a = a.take_and_clear();
|
|
|
|
/// a.merge(&old_a.update_mutator(key_to_modify, new_value));
|
|
|
|
/// put_my_crdt_value(a);
|
|
|
|
/// ```
|
|
|
|
///
|
|
|
|
/// Of course in this simple example we could have written simply
|
|
|
|
/// `pyt_my_crdt_value(a.update_mutator(key_to_modify, new_value))`,
|
|
|
|
/// but in the case where the map is a field in a struct for instance (as is always the case),
|
|
|
|
/// this becomes very handy:
|
|
|
|
///
|
2021-01-15 16:03:38 +00:00
|
|
|
/// ```ignore
|
2020-12-12 16:06:40 +00:00
|
|
|
/// let mut a = get_my_crdt_value();
|
|
|
|
/// let old_a_map = a.map_field.take_and_clear();
|
|
|
|
/// a.map_field.merge(&old_a_map.update_mutator(key_to_modify, new_value));
|
|
|
|
/// put_my_crdt_value(a);
|
|
|
|
/// ```
|
|
|
|
pub fn take_and_clear(&mut self) -> Self {
|
|
|
|
let vals = std::mem::replace(&mut self.vals, vec![]);
|
|
|
|
Self { vals }
|
|
|
|
}
|
|
|
|
/// Removes all values from the map
|
|
|
|
pub fn clear(&mut self) {
|
|
|
|
self.vals.clear();
|
|
|
|
}
|
|
|
|
/// Get a reference to the value assigned to a key
|
2020-11-20 20:15:24 +00:00
|
|
|
pub fn get(&self, k: &K) -> Option<&V> {
|
2020-11-20 22:01:12 +00:00
|
|
|
match self.vals.binary_search_by(|(k2, _, _)| k2.cmp(&k)) {
|
2020-11-20 20:15:24 +00:00
|
|
|
Ok(i) => Some(&self.vals[i].2),
|
2020-11-20 22:01:12 +00:00
|
|
|
Err(_) => None,
|
2020-11-20 20:15:24 +00:00
|
|
|
}
|
|
|
|
}
|
2020-12-12 16:06:40 +00:00
|
|
|
/// Gets a reference to all of the items, as a slice. Usefull to iterate on all map values.
|
|
|
|
/// In most case you will want to ignore the timestamp (second item of the tuple).
|
2020-11-20 20:15:24 +00:00
|
|
|
pub fn items(&self) -> &[(K, u64, V)] {
|
|
|
|
&self.vals[..]
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<K, V> CRDT for LWWMap<K, V>
|
2020-11-20 22:01:12 +00:00
|
|
|
where
|
2020-11-20 22:23:55 +00:00
|
|
|
K: Clone + Ord,
|
|
|
|
V: Clone + CRDT,
|
2020-11-20 20:15:24 +00:00
|
|
|
{
|
|
|
|
fn merge(&mut self, other: &Self) {
|
|
|
|
for (k, ts2, v2) in other.vals.iter() {
|
2020-11-20 22:01:12 +00:00
|
|
|
match self.vals.binary_search_by(|(k2, _, _)| k2.cmp(&k)) {
|
2020-11-20 20:15:24 +00:00
|
|
|
Ok(i) => {
|
2020-11-20 22:01:12 +00:00
|
|
|
let (_, ts1, _v1) = &self.vals[i];
|
2020-11-20 20:15:24 +00:00
|
|
|
if ts2 > ts1 {
|
|
|
|
self.vals[i].1 = *ts2;
|
|
|
|
self.vals[i].2 = v2.clone();
|
|
|
|
} else if ts1 == ts2 {
|
|
|
|
self.vals[i].2.merge(&v2);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
Err(i) => {
|
|
|
|
self.vals.insert(i, (k.clone(), *ts2, v2.clone()));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|