More documentation on CRDTs (we should probably extract this to a

standalone crate!)
2020-12-12 17:06:40 +01:00 · 2020-12-12 17:06:40 +01:00 · 5c6c067b0c
commit 5c6c067b0c
parent 0b3084ca5f
1 changed files with 138 additions and 26 deletions
--- a/src/table/crdt.rs
+++ b/src/table/crdt.rs
@ -1,30 +1,48 @@
 //! This package provides a simple implementation of conflict-free replicated data types (CRDTs)
 //!
 //! CRDTs are a type of data structures that do not require coordination.  In other words, we can
 //! edit them in parallel, we will always find a way to merge it.
 //!
 //! A general example is a counter. Its initial value is 0.  Alice and Bob get a copy of the
 //! counter.  Alice does +1 on her copy, she reads 1.  Bob does +3 on his copy, he reads 3.  Now,
 //! it is easy to merge their counters, order does not count: we always get 4.
 //!
 //! Learn more about CRDT [on Wikipedia](https://en.wikipedia.org/wiki/Conflict-free_replicated_data_type)
 use serde::{Deserialize, Serialize};
 use garage_util::data::*;
-/// Conflict-free replicated data type (CRDT)
+/// Definition of a CRDT - all CRDT Rust types implement this.
 ///
-/// CRDT are a type of data structures that do not require coordination.
+/// A CRDT is defined as a merge operator that respects a certain set of axioms.
 /// In other words, we can edit them in parallel, we will always
 /// find a way to merge it.
 ///
-/// A general example is a counter. Its initial value is 0.
+/// In particular, the merge operator must be commutative, associative,
-/// Alice and Bob get a copy of the counter.
+/// idempotent, and monotonic.
-/// Alice does +1 on her copy, she reads 1.
+/// In other words, if `a`, `b` and `c` are CRDTs, and `⊔` denotes the merge operator,
-/// Bob does +3 on his copy, he reads 3.
+/// the following axioms must apply:
 /// Now, it is easy to merge their counters, order does not count:
 /// we always get 4.
 ///
-/// Learn more about CRDT [on Wikipedia](https://en.wikipedia.org/wiki/Conflict-free_replicated_data_type)
+/// ```text
 /// a ⊔ b = b ⊔ a                   (commutativity)
 /// (a ⊔ b) ⊔ c = a ⊔ (b ⊔ c)       (associativity)
 /// (a ⊔ b) ⊔ b = a ⊔ b             (idempotence)
 /// ```
 ///
 /// Moreover, the relationship `≥` defined by `a ≥ b ⇔ ∃c. a = b ⊔ c` must be a partial order.
 /// This implies a few properties such as: if `a ⊔ b ≠ a`, then there is no `c` such that `(a ⊔ b) ⊔ c = a`,
 /// as this would imply a cycle in the partial order.
 pub trait CRDT {
-	/// Merge the two datastructures according to the CRDT rules
+	/// Merge the two datastructures according to the CRDT rules.
 	/// `self` is modified to contain the merged CRDT value. `other` is not modified.
 	///
 	/// # Arguments
 	///
-	/// * `other` - the other copy of the CRDT
+	/// * `other` - the other CRDT we wish to merge with
 	fn merge(&mut self, other: &Self);
 }
 /// All types that implement `Ord` (a total order) also implement a trivial CRDT
 /// defined by the merge rule: `a ⊔ b = max(a, b)`.
 impl<T> CRDT for T
 where
 	T: Ord + Clone,
@ -40,11 +58,24 @@ where
 /// Last Write Win (LWW)
 ///
-/// LWW is based on time, the most recent write wins.
+/// An LWW CRDT associates a timestamp with a value, in order to implement a
 /// time-based reconciliation rule: the most recent write wins.
 /// For completeness, the LWW reconciliation rule must also be defined for two LWW CRDTs
 /// with the same timestamp but different values.
 ///
 /// In our case, we add the constraint that the value that is wrapped inside the LWW CRDT must
 /// itself be a CRDT: in the case when the timestamp does not allow us to decide on which value to
 /// keep, the merge rule of the inner CRDT is applied on the wrapped values.  (Note that all types
 /// that implement the `Ord` trait get a default CRDT implemetnation that keeps the maximum value.
 /// This enables us to use LWW directly with primitive data types such as numbers or strings. It is
 /// generally desirable in this case to never explicitly produce LWW values with the same timestamp
 /// but different inner values, as the rule to keep the maximum value isn't generally the desired
 /// semantics.)
 ///
 /// As multiple computers clocks are always desynchronized,
 /// when operations are close enough, it is equivalent to
 /// take one copy and drop the other one.
-/// 
+///
 /// Given that clocks are not too desynchronized, this assumption
 /// is enough for most cases, as there is few chance that two humans
 /// coordonate themself faster than the time difference between two NTP servers.
@ -85,6 +116,12 @@ where
 	}
 	/// Update the LWW CRDT while keeping some causal ordering.
 	///
 	/// The timestamp of the LWW CRDT is updated to be the current node's clock
 	/// at time of update, or the previous timestamp + 1 if that's bigger,
 	/// so that the new timestamp is always strictly larger than the previous one.
 	/// This ensures that merging the update with the old value will result in keeping
 	/// the updated value.
 	pub fn update(&mut self, new_value: T) {
 		self.ts = std::cmp::max(self.ts + 1, now_msec());
 		self.v = new_value;
@ -95,7 +132,20 @@ where
 		&self.v
 	}
-	/// Get a mutable value for the CRDT
+	/// Get a mutable reference to the CRDT's value
 	///
 	/// This is usefull to mutate the inside value without changing the LWW timestamp.
 	/// When such mutation is done, the merge between two LWW values is done using the inner
 	/// CRDT's merge operation. This is usefull in the case where the inner CRDT is a large
 	/// data type, such as a map, and we only want to change a single item in the map.
 	/// To do this, we can produce a "CRDT delta", i.e. a LWW that contains only the modification.
 	/// This delta consists in a LWW with the same timestamp, and the map
 	/// inside only contains the updated value.
 	/// The advantage of such a delta is that it is much smaller than the whole map.
 	///
 	/// Avoid using this if the inner data type is a primitive type such as a number or a string,
 	/// as you will then rely on the merge function defined on `Ord` types by keeping the maximum
 	/// of both values.
 	pub fn get_mut(&mut self) -> &mut T {
 		&mut self.v
 	}
@ -115,19 +165,20 @@ where
 	}
 }
-/// Boolean
+/// Boolean, where `true` is an absorbing state
 ///
 /// with True as absorbing state
 #[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq)]
 pub struct Bool(bool);
 impl Bool {
 	/// Create a new boolean with the specified value
 	pub fn new(b: bool) -> Self {
 		Self(b)
 	}
 	/// Set the boolean to true
 	pub fn set(&mut self) {
 		self.0 = true;
 	}
 	/// Get the boolean value
 	pub fn get(&self) -> bool {
 		self.0
 	}
@ -141,7 +192,21 @@ impl CRDT for Bool {
 /// Last Write Win Map
 ///
 /// This types defines a CRDT for a map from keys to values.
 /// The values have an associated timestamp, such that the last written value
 /// takes precedence over previous ones. As for the simpler `LWW` type, the value
 /// type `V` is also required to implement the CRDT trait.
 /// We do not encourage mutating the values associated with a given key
 /// without updating the timestamp, in fact at the moment we do not provide a `.get_mut()`
 /// method that would allow that.
 ///
 /// Internally, the map is stored as a vector of keys and values, sorted by ascending key order.
 /// This is why the key type `K` must implement `Ord` (and also to ensure a unique serialization,
 /// such that two values can be compared for equality based on their hashes). As a consequence,
 /// insertions take `O(n)` time. This means that LWWMap should be used for reasonably small maps.
 /// However, note that even if we were using a more efficient data structure such as a `BTreeMap`,
 /// the serialization cost `O(n)` would still have to be paid at each modification, so we are
 /// actually not losing anything here.
 #[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
 pub struct LWWMap<K, V> {
 	vals: Vec<(K, u64, V)>,
@ -152,21 +217,35 @@ where
 	K: Ord,
 	V: CRDT,
 {
 	/// Create a new empty map CRDT
 	pub fn new() -> Self {
 		Self { vals: vec![] }
 	}
 	/// Used to migrate from a map defined in an incompatible format. This produces
 	/// a map that contains a single item with the specified timestamp (copied from
 	/// the incompatible format). Do this as many times as you have items to migrate,
 	/// and put them all together using the CRDT merge operator.
 	pub fn migrate_from_raw_item(k: K, ts: u64, v: V) -> Self {
 		Self {
 			vals: vec![(k, ts, v)],
 		}
 	}
-	pub fn take_and_clear(&mut self) -> Self {
+	/// Returns a map that contains a single mapping from the specified key to the specified value.
-		let vals = std::mem::replace(&mut self.vals, vec![]);
+	/// This map is a mutator, or a delta-CRDT, such that when it is merged with the original map,
-		Self { vals }
+	/// the previous value will be replaced with the one specified here.
-	}
+	/// The timestamp in the provided mutator is set to the maximum of the current system's clock
-	pub fn clear(&mut self) {
+	/// and 1 + the previous value's timestamp (if there is one), so that the new value will always
-		self.vals.clear();
+	/// take precedence (LWW rule).
-	}
+	///
 	/// Typically, to update the value associated to a key in the map, you would do the following:
 	///
 	/// ```
 	/// let my_update = my_crdt.update_mutator(key_to_modify, new_value);
 	/// my_crdt.merge(&my_update);
 	/// ```
 	///
 	/// However extracting the mutator on its own and only sending that on the network is very
 	/// interesting as it is much smaller than the whole map.
 	pub fn update_mutator(&self, k: K, new_v: V) -> Self {
 		let new_vals = match self.vals.binary_search_by(|(k2, _, _)| k2.cmp(&k)) {
 			Ok(i) => {
@ -178,12 +257,45 @@ where
 		};
 		Self { vals: new_vals }
 	}
 	/// Takes all of the values of the map and returns them. The current map is reset to the
 	/// empty map. This is very usefull to produce in-place a new map that contains only a delta
 	/// that modifies a certain value:
 	///
 	/// ```
 	/// let mut a = get_my_crdt_value();
 	/// let old_a = a.take_and_clear();
 	/// a.merge(&old_a.update_mutator(key_to_modify, new_value));
 	/// put_my_crdt_value(a);
 	/// ```
 	///
 	/// Of course in this simple example we could have written simply
 	/// `pyt_my_crdt_value(a.update_mutator(key_to_modify, new_value))`,
 	/// but in the case where the map is a field in a struct for instance (as is always the case),
 	/// this becomes very handy:
 	///
 	/// ```
 	/// let mut a = get_my_crdt_value();
 	/// let old_a_map = a.map_field.take_and_clear();
 	/// a.map_field.merge(&old_a_map.update_mutator(key_to_modify, new_value));
 	/// put_my_crdt_value(a);
 	/// ```
 	pub fn take_and_clear(&mut self) -> Self {
 		let vals = std::mem::replace(&mut self.vals, vec![]);
 		Self { vals }
 	}
 	/// Removes all values from the map
 	pub fn clear(&mut self) {
 		self.vals.clear();
 	}
 	/// Get a reference to the value assigned to a key
 	pub fn get(&self, k: &K) -> Option<&V> {
 		match self.vals.binary_search_by(|(k2, _, _)| k2.cmp(&k)) {
 			Ok(i) => Some(&self.vals[i].2),
 			Err(_) => None,
 		}
 	}
 	/// Gets a reference to all of the items, as a slice. Usefull to iterate on all map values.
 	/// In most case you will want to ignore the timestamp (second item of the tuple).
 	pub fn items(&self) -> &[(K, u64, V)] {
 		&self.vals[..]
 	}