forked from Deuxfleurs/garage
Work & TODO
This commit is contained in:
parent
cc580da0ae
commit
a450103ed0
5 changed files with 78 additions and 40 deletions
29
TODO
Normal file
29
TODO
Normal file
|
@ -0,0 +1,29 @@
|
||||||
|
Object table
|
||||||
|
------------
|
||||||
|
|
||||||
|
|
||||||
|
Rename version table to object table
|
||||||
|
In value handle the different versions
|
||||||
|
|
||||||
|
So that the table becomes (bucket, key) -> CRDT(list of versions)
|
||||||
|
|
||||||
|
CRDT merge rule:
|
||||||
|
- keep one complete version (the one with the highest timestamp)
|
||||||
|
- keep all incomplete versions with timestamps higher than the complete version
|
||||||
|
|
||||||
|
Cleanup rule: remove incomplete versions after a given delay (say 24h)
|
||||||
|
|
||||||
|
|
||||||
|
Block table
|
||||||
|
-----------
|
||||||
|
|
||||||
|
Table is version_UUID -> BTreeMap<(offset, block hash)> OR Deleted (= CRDT top)
|
||||||
|
|
||||||
|
|
||||||
|
Block reference table
|
||||||
|
---------------------
|
||||||
|
|
||||||
|
Table is block_Hash + Sort key: version_UUID -> boolean (true when deleted)
|
||||||
|
|
||||||
|
Since the hash key is the same as for the blocks themselves,
|
||||||
|
we can simply consider the updates to this table as events that increase/decrease a reference counter.
|
|
@ -67,6 +67,9 @@ impl FixedBytes32 {
|
||||||
pub fn as_slice_mut(&mut self) -> &mut [u8] {
|
pub fn as_slice_mut(&mut self) -> &mut [u8] {
|
||||||
&mut self.0[..]
|
&mut self.0[..]
|
||||||
}
|
}
|
||||||
|
pub fn to_vec(&self) -> Vec<u8> {
|
||||||
|
self.0.to_vec()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub type UUID = FixedBytes32;
|
pub type UUID = FixedBytes32;
|
||||||
|
|
|
@ -35,9 +35,6 @@ pub enum Error {
|
||||||
#[error(display = "{}", _0)]
|
#[error(display = "{}", _0)]
|
||||||
BadRequest(String),
|
BadRequest(String),
|
||||||
|
|
||||||
#[error(display = "Entry not found")]
|
|
||||||
NotFound,
|
|
||||||
|
|
||||||
#[error(display = "{}", _0)]
|
#[error(display = "{}", _0)]
|
||||||
Message(String),
|
Message(String),
|
||||||
}
|
}
|
||||||
|
|
74
src/table.rs
74
src/table.rs
|
@ -2,7 +2,6 @@ use std::time::Duration;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use serde::{Serialize, Deserialize};
|
use serde::{Serialize, Deserialize};
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use reduce::Reduce;
|
|
||||||
|
|
||||||
use crate::error::Error;
|
use crate::error::Error;
|
||||||
use crate::proto::*;
|
use crate::proto::*;
|
||||||
|
@ -52,7 +51,10 @@ impl<F: TableFormat + 'static> TableRpcHandler for TableRpcHandlerAdapter<F> {
|
||||||
#[derive(Serialize, Deserialize)]
|
#[derive(Serialize, Deserialize)]
|
||||||
pub enum TableRPC<F: TableFormat> {
|
pub enum TableRPC<F: TableFormat> {
|
||||||
Ok,
|
Ok,
|
||||||
Read(Vec<F::K>),
|
|
||||||
|
ReadEntry(F::K, Vec<u8>),
|
||||||
|
ReadEntryResponse(Option<F::V>),
|
||||||
|
|
||||||
Update(Vec<(F::K, F::V)>),
|
Update(Vec<(F::K, F::V)>),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -62,18 +64,19 @@ pub struct Partition {
|
||||||
pub other_nodes: Vec<UUID>,
|
pub other_nodes: Vec<UUID>,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub trait KeyHash {
|
pub trait TableKey {
|
||||||
fn hash(&self) -> Hash;
|
fn hash(&self) -> Hash;
|
||||||
}
|
}
|
||||||
|
|
||||||
pub trait ValueMerge {
|
pub trait TableValue {
|
||||||
|
fn sort_key(&self) -> Vec<u8>;
|
||||||
fn merge(&mut self, other: &Self);
|
fn merge(&mut self, other: &Self);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[async_trait]
|
#[async_trait]
|
||||||
pub trait TableFormat: Send + Sync {
|
pub trait TableFormat: Send + Sync {
|
||||||
type K: Clone + PartialEq + Serialize + for<'de> Deserialize<'de> + KeyHash + Send + Sync;
|
type K: Clone + PartialEq + Serialize + for<'de> Deserialize<'de> + TableKey + Send + Sync;
|
||||||
type V: Clone + Serialize + for<'de> Deserialize<'de> + ValueMerge + Send + Sync;
|
type V: Clone + Serialize + for<'de> Deserialize<'de> + TableValue + Send + Sync;
|
||||||
|
|
||||||
async fn updated(&self, key: &Self::K, old: Option<&Self::V>, new: &Self::V);
|
async fn updated(&self, key: &Self::K, old: Option<&Self::V>, new: &Self::V);
|
||||||
}
|
}
|
||||||
|
@ -109,33 +112,34 @@ impl<F: TableFormat + 'static> Table<F> {
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn get(&self, k: &F::K) -> Result<F::V, Error> {
|
pub async fn get(&self, k: &F::K, sort_key: &[u8]) -> Result<Option<F::V>, Error> {
|
||||||
let hash = k.hash();
|
let hash = k.hash();
|
||||||
let who = self.system.members.read().await
|
let who = self.system.members.read().await
|
||||||
.walk_ring(&hash, self.param.replication_factor);
|
.walk_ring(&hash, self.param.replication_factor);
|
||||||
|
|
||||||
let rpc = &TableRPC::<F>::Read(vec![k.clone()]);
|
let rpc = &TableRPC::<F>::ReadEntry(k.clone(), sort_key.to_vec());
|
||||||
let resps = self.rpc_try_call_many(&who[..],
|
let resps = self.rpc_try_call_many(&who[..],
|
||||||
&rpc,
|
&rpc,
|
||||||
self.param.read_quorum)
|
self.param.read_quorum)
|
||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
let mut values = vec![];
|
let mut ret = None;
|
||||||
for resp in resps {
|
for resp in resps {
|
||||||
if let TableRPC::Update(mut pairs) = resp {
|
if let TableRPC::ReadEntryResponse(value) = resp {
|
||||||
if pairs.len() == 0 {
|
if let Some(v) = value {
|
||||||
continue;
|
ret = match ret {
|
||||||
} else if pairs.len() == 1 && pairs[0].0 == *k {
|
None => Some(v),
|
||||||
values.push(pairs.drain(..).next().unwrap().1);
|
Some(mut x) => {
|
||||||
continue;
|
x.merge(&v);
|
||||||
|
Some(x)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
return Err(Error::Message(format!("Invalid return value to read")));
|
return Err(Error::Message(format!("Invalid return value to read")));
|
||||||
}
|
}
|
||||||
values.drain(..)
|
}
|
||||||
.reduce(|mut x, y| { x.merge(&y); x })
|
Ok(ret)
|
||||||
.map(Ok)
|
|
||||||
.unwrap_or(Err(Error::NotFound))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn rpc_try_call_many(&self, who: &[UUID], rpc: &TableRPC<F>, quorum: usize) -> Result<Vec<TableRPC<F>>, Error> {
|
async fn rpc_try_call_many(&self, who: &[UUID], rpc: &TableRPC<F>, quorum: usize) -> Result<Vec<TableRPC<F>>, Error> {
|
||||||
|
@ -163,33 +167,35 @@ impl<F: TableFormat + 'static> Table<F> {
|
||||||
|
|
||||||
async fn handle(&self, msg: TableRPC<F>) -> Result<TableRPC<F>, Error> {
|
async fn handle(&self, msg: TableRPC<F>) -> Result<TableRPC<F>, Error> {
|
||||||
match msg {
|
match msg {
|
||||||
TableRPC::Read(keys) => {
|
TableRPC::ReadEntry(key, sort_key) => {
|
||||||
Ok(TableRPC::Update(self.handle_read(&keys)?))
|
let value = self.handle_read_entry(&key, &sort_key)?;
|
||||||
|
Ok(TableRPC::ReadEntryResponse(value))
|
||||||
}
|
}
|
||||||
TableRPC::Update(pairs) => {
|
TableRPC::Update(pairs) => {
|
||||||
self.handle_write(pairs).await?;
|
self.handle_update(pairs).await?;
|
||||||
Ok(TableRPC::Ok)
|
Ok(TableRPC::Ok)
|
||||||
}
|
}
|
||||||
_ => Err(Error::RPCError(format!("Unexpected table RPC")))
|
_ => Err(Error::RPCError(format!("Unexpected table RPC")))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn handle_read(&self, keys: &[F::K]) -> Result<Vec<(F::K, F::V)>, Error> {
|
fn handle_read_entry(&self, key: &F::K, sort_key: &[u8]) -> Result<Option<F::V>, Error> {
|
||||||
let mut results = vec![];
|
let mut tree_key = key.hash().to_vec();
|
||||||
for key in keys.iter() {
|
tree_key.extend(sort_key);
|
||||||
if let Some(bytes) = self.store.get(&key.hash())? {
|
if let Some(bytes) = self.store.get(&tree_key)? {
|
||||||
let pair = rmp_serde::decode::from_read_ref::<_, (F::K, F::V)>(bytes.as_ref())?;
|
let (_, v) = rmp_serde::decode::from_read_ref::<_, (F::K, F::V)>(&bytes)?;
|
||||||
results.push(pair);
|
Ok(Some(v))
|
||||||
|
} else {
|
||||||
|
Ok(None)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(results)
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn handle_write(&self, mut pairs: Vec<(F::K, F::V)>) -> Result<(), Error> {
|
async fn handle_update(&self, mut pairs: Vec<(F::K, F::V)>) -> Result<(), Error> {
|
||||||
for mut pair in pairs.drain(..) {
|
for mut pair in pairs.drain(..) {
|
||||||
let hash = pair.0.hash();
|
let mut tree_key = pair.0.hash().to_vec();
|
||||||
|
tree_key.extend(pair.1.sort_key());
|
||||||
|
|
||||||
let old_val = match self.store.get(&hash)? {
|
let old_val = match self.store.get(&tree_key)? {
|
||||||
Some(prev_bytes) => {
|
Some(prev_bytes) => {
|
||||||
let (_, old_val) = rmp_serde::decode::from_read_ref::<_, (F::K, F::V)>(&prev_bytes)?;
|
let (_, old_val) = rmp_serde::decode::from_read_ref::<_, (F::K, F::V)>(&prev_bytes)?;
|
||||||
pair.1.merge(&old_val);
|
pair.1.merge(&old_val);
|
||||||
|
@ -199,7 +205,7 @@ impl<F: TableFormat + 'static> Table<F> {
|
||||||
};
|
};
|
||||||
|
|
||||||
let new_bytes = rmp_serde::encode::to_vec_named(&pair)?;
|
let new_bytes = rmp_serde::encode::to_vec_named(&pair)?;
|
||||||
self.store.insert(&hash, new_bytes)?;
|
self.store.insert(&tree_key, new_bytes)?;
|
||||||
|
|
||||||
self.instance.updated(&pair.0, old_val.as_ref(), &pair.1).await;
|
self.instance.updated(&pair.0, old_val.as_ref(), &pair.1).await;
|
||||||
}
|
}
|
||||||
|
|
|
@ -37,13 +37,16 @@ pub struct VersionTable {
|
||||||
pub garage: RwLock<Option<Arc<Garage>>>,
|
pub garage: RwLock<Option<Arc<Garage>>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl KeyHash for VersionMetaKey {
|
impl TableKey for VersionMetaKey {
|
||||||
fn hash(&self) -> Hash {
|
fn hash(&self) -> Hash {
|
||||||
hash(self.bucket.as_bytes())
|
hash(self.bucket.as_bytes())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ValueMerge for VersionMetaValue {
|
impl TableValue for VersionMetaValue {
|
||||||
|
fn sort_key(&self) -> Vec<u8> {
|
||||||
|
vec![]
|
||||||
|
}
|
||||||
fn merge(&mut self, other: &Self) {
|
fn merge(&mut self, other: &Self) {
|
||||||
unimplemented!()
|
unimplemented!()
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue