Reorganize table API
This commit is contained in:
parent
a450103ed0
commit
4c1aee42d5
4 changed files with 96 additions and 60 deletions
2
TODO
2
TODO
|
@ -5,7 +5,7 @@ Object table
|
|||
Rename version table to object table
|
||||
In value handle the different versions
|
||||
|
||||
So that the table becomes (bucket, key) -> CRDT(list of versions)
|
||||
So that the table becomes bucket + Sort key = object key -> CRDT(list of versions)
|
||||
|
||||
CRDT merge rule:
|
||||
- keep one complete version (the one with the highest timestamp)
|
||||
|
|
|
@ -97,11 +97,9 @@ async fn handle_put(garage: Arc<Garage>,
|
|||
None => return Err(Error::BadRequest(format!("Empty body"))),
|
||||
};
|
||||
|
||||
let version_key = VersionMetaKey{
|
||||
bucket: bucket.to_string(),
|
||||
key: key.to_string(),
|
||||
};
|
||||
let mut version_value = VersionMetaValue {
|
||||
let mut version = VersionMeta{
|
||||
bucket: bucket.into(),
|
||||
key: key.into(),
|
||||
timestamp: now_msec(),
|
||||
uuid: version_uuid.clone(),
|
||||
mime_type: mime_type.to_string(),
|
||||
|
@ -111,15 +109,15 @@ async fn handle_put(garage: Arc<Garage>,
|
|||
};
|
||||
|
||||
if first_block.len() < INLINE_THRESHOLD {
|
||||
version_value.data = VersionData::Inline(first_block);
|
||||
version_value.is_complete = true;
|
||||
garage.version_table.insert(&version_key, &version_value).await?;
|
||||
version.data = VersionData::Inline(first_block);
|
||||
version.is_complete = true;
|
||||
garage.version_table.insert(&version).await?;
|
||||
return Ok(version_uuid)
|
||||
}
|
||||
|
||||
let first_block_hash = hash(&first_block[..]);
|
||||
version_value.data = VersionData::FirstBlock(first_block_hash);
|
||||
garage.version_table.insert(&version_key, &version_value).await?;
|
||||
version.data = VersionData::FirstBlock(first_block_hash);
|
||||
garage.version_table.insert(&version).await?;
|
||||
|
||||
let block_meta = BlockMeta{
|
||||
version_uuid: version_uuid.clone(),
|
||||
|
@ -145,8 +143,8 @@ async fn handle_put(garage: Arc<Garage>,
|
|||
|
||||
// TODO: if at any step we have an error, we should undo everything we did
|
||||
|
||||
version_value.is_complete = true;
|
||||
garage.version_table.insert(&version_key, &version_value).await?;
|
||||
version.is_complete = true;
|
||||
garage.version_table.insert(&version).await?;
|
||||
Ok(version_uuid)
|
||||
}
|
||||
|
||||
|
|
100
src/table.rs
100
src/table.rs
|
@ -52,10 +52,10 @@ impl<F: TableFormat + 'static> TableRpcHandler for TableRpcHandlerAdapter<F> {
|
|||
pub enum TableRPC<F: TableFormat> {
|
||||
Ok,
|
||||
|
||||
ReadEntry(F::K, Vec<u8>),
|
||||
ReadEntryResponse(Option<F::V>),
|
||||
ReadEntry(F::P, F::S),
|
||||
ReadEntryResponse(Option<F::E>),
|
||||
|
||||
Update(Vec<(F::K, F::V)>),
|
||||
Update(Vec<F::E>),
|
||||
}
|
||||
|
||||
pub struct Partition {
|
||||
|
@ -64,21 +64,59 @@ pub struct Partition {
|
|||
pub other_nodes: Vec<UUID>,
|
||||
}
|
||||
|
||||
pub trait TableKey {
|
||||
pub trait PartitionKey: Clone + PartialEq + Serialize + for<'de> Deserialize<'de> + Send + Sync {
|
||||
fn hash(&self) -> Hash;
|
||||
}
|
||||
|
||||
pub trait TableValue {
|
||||
fn sort_key(&self) -> Vec<u8>;
|
||||
pub trait SortKey: Clone + Serialize + for<'de> Deserialize<'de> + Send + Sync {
|
||||
fn sort_key(&self) -> &[u8];
|
||||
}
|
||||
|
||||
pub trait Entry<P: PartitionKey, S: SortKey>: Clone + Serialize + for<'de> Deserialize<'de> + Send + Sync {
|
||||
fn partition_key(&self) -> &P;
|
||||
fn sort_key(&self) -> &S;
|
||||
|
||||
fn merge(&mut self, other: &Self);
|
||||
}
|
||||
|
||||
#[derive(Clone, Serialize, Deserialize)]
|
||||
pub struct EmptySortKey;
|
||||
impl SortKey for EmptySortKey {
|
||||
fn sort_key(&self) -> &[u8] {
|
||||
&[]
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct StringKey(String);
|
||||
impl PartitionKey for StringKey {
|
||||
fn hash(&self) -> Hash {
|
||||
hash(self.0.as_bytes())
|
||||
}
|
||||
}
|
||||
impl SortKey for StringKey {
|
||||
fn sort_key(&self) -> &[u8] {
|
||||
self.0.as_bytes()
|
||||
}
|
||||
}
|
||||
impl AsRef<str> for StringKey {
|
||||
fn as_ref(&self) -> &str {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
impl From<&str> for StringKey {
|
||||
fn from(s: &str) -> StringKey {
|
||||
StringKey(s.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
pub trait TableFormat: Send + Sync {
|
||||
type K: Clone + PartialEq + Serialize + for<'de> Deserialize<'de> + TableKey + Send + Sync;
|
||||
type V: Clone + Serialize + for<'de> Deserialize<'de> + TableValue + Send + Sync;
|
||||
type P: PartitionKey;
|
||||
type S: SortKey;
|
||||
type E: Entry<Self::P, Self::S>;
|
||||
|
||||
async fn updated(&self, key: &Self::K, old: Option<&Self::V>, new: &Self::V);
|
||||
async fn updated(&self, old: Option<&Self::E>, new: &Self::E);
|
||||
}
|
||||
|
||||
impl<F: TableFormat + 'static> Table<F> {
|
||||
|
@ -99,12 +137,12 @@ impl<F: TableFormat + 'static> Table<F> {
|
|||
Box::new(TableRpcHandlerAdapter::<F>{ table: self })
|
||||
}
|
||||
|
||||
pub async fn insert(&self, k: &F::K, v: &F::V) -> Result<(), Error> {
|
||||
let hash = k.hash();
|
||||
pub async fn insert(&self, e: &F::E) -> Result<(), Error> {
|
||||
let hash = e.partition_key().hash();
|
||||
let who = self.system.members.read().await
|
||||
.walk_ring(&hash, self.param.replication_factor);
|
||||
|
||||
let rpc = &TableRPC::<F>::Update(vec![(k.clone(), v.clone())]);
|
||||
let rpc = &TableRPC::<F>::Update(vec![e.clone()]);
|
||||
|
||||
self.rpc_try_call_many(&who[..],
|
||||
&rpc,
|
||||
|
@ -112,12 +150,12 @@ impl<F: TableFormat + 'static> Table<F> {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn get(&self, k: &F::K, sort_key: &[u8]) -> Result<Option<F::V>, Error> {
|
||||
let hash = k.hash();
|
||||
pub async fn get(&self, partition_key: &F::P, sort_key: &F::S) -> Result<Option<F::E>, Error> {
|
||||
let hash = partition_key.hash();
|
||||
let who = self.system.members.read().await
|
||||
.walk_ring(&hash, self.param.replication_factor);
|
||||
|
||||
let rpc = &TableRPC::<F>::ReadEntry(k.clone(), sort_key.to_vec());
|
||||
let rpc = &TableRPC::<F>::ReadEntry(partition_key.clone(), sort_key.clone());
|
||||
let resps = self.rpc_try_call_many(&who[..],
|
||||
&rpc,
|
||||
self.param.read_quorum)
|
||||
|
@ -179,36 +217,40 @@ impl<F: TableFormat + 'static> Table<F> {
|
|||
}
|
||||
}
|
||||
|
||||
fn handle_read_entry(&self, key: &F::K, sort_key: &[u8]) -> Result<Option<F::V>, Error> {
|
||||
let mut tree_key = key.hash().to_vec();
|
||||
tree_key.extend(sort_key);
|
||||
fn handle_read_entry(&self, p: &F::P, s: &F::S) -> Result<Option<F::E>, Error> {
|
||||
let tree_key = self.tree_key(p, s);
|
||||
if let Some(bytes) = self.store.get(&tree_key)? {
|
||||
let (_, v) = rmp_serde::decode::from_read_ref::<_, (F::K, F::V)>(&bytes)?;
|
||||
Ok(Some(v))
|
||||
let e = rmp_serde::decode::from_read_ref::<_, F::E>(&bytes)?;
|
||||
Ok(Some(e))
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
async fn handle_update(&self, mut pairs: Vec<(F::K, F::V)>) -> Result<(), Error> {
|
||||
for mut pair in pairs.drain(..) {
|
||||
let mut tree_key = pair.0.hash().to_vec();
|
||||
tree_key.extend(pair.1.sort_key());
|
||||
async fn handle_update(&self, mut entries: Vec<F::E>) -> Result<(), Error> {
|
||||
for mut entry in entries.drain(..) {
|
||||
let tree_key = self.tree_key(entry.partition_key(), entry.sort_key());
|
||||
|
||||
let old_val = match self.store.get(&tree_key)? {
|
||||
Some(prev_bytes) => {
|
||||
let (_, old_val) = rmp_serde::decode::from_read_ref::<_, (F::K, F::V)>(&prev_bytes)?;
|
||||
pair.1.merge(&old_val);
|
||||
Some(old_val)
|
||||
let old_entry = rmp_serde::decode::from_read_ref::<_, F::E>(&prev_bytes)?;
|
||||
entry.merge(&old_entry);
|
||||
Some(old_entry)
|
||||
}
|
||||
None => None
|
||||
};
|
||||
|
||||
let new_bytes = rmp_serde::encode::to_vec_named(&pair)?;
|
||||
let new_bytes = rmp_serde::encode::to_vec_named(&entry)?;
|
||||
self.store.insert(&tree_key, new_bytes)?;
|
||||
|
||||
self.instance.updated(&pair.0, old_val.as_ref(), &pair.1).await;
|
||||
self.instance.updated(old_val.as_ref(), &entry).await;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn tree_key(&self, p: &F::P, s: &F::S) -> Vec<u8> {
|
||||
let mut ret = p.hash().to_vec();
|
||||
ret.extend(s.sort_key());
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
|
|
@ -8,14 +8,11 @@ use crate::table::*;
|
|||
use crate::server::Garage;
|
||||
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
||||
pub struct VersionMetaKey {
|
||||
pub bucket: String,
|
||||
pub key: String,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct VersionMetaValue {
|
||||
pub struct VersionMeta {
|
||||
pub bucket: StringKey,
|
||||
pub key: StringKey,
|
||||
|
||||
pub timestamp: u64,
|
||||
pub uuid: UUID,
|
||||
|
||||
|
@ -37,16 +34,14 @@ pub struct VersionTable {
|
|||
pub garage: RwLock<Option<Arc<Garage>>>,
|
||||
}
|
||||
|
||||
impl TableKey for VersionMetaKey {
|
||||
fn hash(&self) -> Hash {
|
||||
hash(self.bucket.as_bytes())
|
||||
impl Entry<StringKey, StringKey> for VersionMeta {
|
||||
fn partition_key(&self) -> &StringKey {
|
||||
&self.bucket
|
||||
}
|
||||
fn sort_key(&self) -> &StringKey {
|
||||
&self.key
|
||||
}
|
||||
}
|
||||
|
||||
impl TableValue for VersionMetaValue {
|
||||
fn sort_key(&self) -> Vec<u8> {
|
||||
vec![]
|
||||
}
|
||||
fn merge(&mut self, other: &Self) {
|
||||
unimplemented!()
|
||||
}
|
||||
|
@ -54,10 +49,11 @@ impl TableValue for VersionMetaValue {
|
|||
|
||||
#[async_trait]
|
||||
impl TableFormat for VersionTable {
|
||||
type K = VersionMetaKey;
|
||||
type V = VersionMetaValue;
|
||||
type P = StringKey;
|
||||
type S = StringKey;
|
||||
type E = VersionMeta;
|
||||
|
||||
async fn updated(&self, key: &Self::K, old: Option<&Self::V>, new: &Self::V) {
|
||||
async fn updated(&self, old: Option<&Self::E>, new: &Self::E) {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue