2021-03-12 20:52:19 +00:00
|
|
|
use std::collections::HashMap;
|
2021-03-12 18:57:37 +00:00
|
|
|
use std::sync::Arc;
|
|
|
|
use std::time::Duration;
|
|
|
|
|
2021-10-14 09:50:12 +00:00
|
|
|
use async_trait::async_trait;
|
2021-03-12 18:57:37 +00:00
|
|
|
use serde::{Deserialize, Serialize};
|
|
|
|
use serde_bytes::ByteBuf;
|
|
|
|
|
|
|
|
use futures::future::join_all;
|
|
|
|
use futures::select;
|
|
|
|
use futures_util::future::*;
|
|
|
|
use tokio::sync::watch;
|
|
|
|
|
|
|
|
use garage_util::data::*;
|
|
|
|
use garage_util::error::Error;
|
|
|
|
|
2021-10-14 09:50:12 +00:00
|
|
|
use garage_rpc::system::System;
|
|
|
|
use garage_rpc::*;
|
2021-03-12 18:57:37 +00:00
|
|
|
|
|
|
|
use crate::data::*;
|
|
|
|
use crate::replication::*;
|
2021-03-12 20:52:19 +00:00
|
|
|
use crate::schema::*;
|
2021-03-12 18:57:37 +00:00
|
|
|
|
|
|
|
const TABLE_GC_BATCH_SIZE: usize = 1024;
|
|
|
|
const TABLE_GC_RPC_TIMEOUT: Duration = Duration::from_secs(30);
|
|
|
|
|
2021-10-14 09:50:12 +00:00
|
|
|
pub struct TableGc<F: TableSchema + 'static, R: TableReplication + 'static> {
|
2021-03-16 10:43:58 +00:00
|
|
|
system: Arc<System>,
|
2021-03-16 10:47:39 +00:00
|
|
|
data: Arc<TableData<F, R>>,
|
2021-03-12 18:57:37 +00:00
|
|
|
|
2021-10-14 09:50:12 +00:00
|
|
|
endpoint: Arc<Endpoint<GcRpc, Self>>,
|
2021-03-12 18:57:37 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(Serialize, Deserialize)]
|
2021-05-02 21:13:08 +00:00
|
|
|
enum GcRpc {
|
2021-03-12 18:57:37 +00:00
|
|
|
Update(Vec<ByteBuf>),
|
|
|
|
DeleteIfEqualHash(Vec<(ByteBuf, Hash)>),
|
|
|
|
Ok,
|
2021-10-14 09:50:12 +00:00
|
|
|
Error(String),
|
2021-03-12 18:57:37 +00:00
|
|
|
}
|
|
|
|
|
2021-10-14 09:50:12 +00:00
|
|
|
impl Message for GcRpc {
|
|
|
|
type Response = GcRpc;
|
|
|
|
}
|
2021-03-12 18:57:37 +00:00
|
|
|
|
2021-05-02 21:13:08 +00:00
|
|
|
impl<F, R> TableGc<F, R>
|
2021-03-12 18:57:37 +00:00
|
|
|
where
|
|
|
|
F: TableSchema + 'static,
|
|
|
|
R: TableReplication + 'static,
|
|
|
|
{
|
2021-10-14 09:50:12 +00:00
|
|
|
pub(crate) fn launch(system: Arc<System>, data: Arc<TableData<F, R>>) -> Arc<Self> {
|
|
|
|
let endpoint = system
|
|
|
|
.netapp
|
|
|
|
.endpoint(format!("garage_table/gc.rs/Rpc:{}", data.name));
|
2021-03-12 18:57:37 +00:00
|
|
|
|
|
|
|
let gc = Arc::new(Self {
|
2021-03-16 10:43:58 +00:00
|
|
|
system: system.clone(),
|
2021-03-16 10:47:39 +00:00
|
|
|
data: data.clone(),
|
2021-10-14 09:50:12 +00:00
|
|
|
endpoint,
|
2021-03-12 18:57:37 +00:00
|
|
|
});
|
|
|
|
|
2021-10-14 09:50:12 +00:00
|
|
|
gc.endpoint.set_handler(gc.clone());
|
2021-03-12 18:57:37 +00:00
|
|
|
|
|
|
|
let gc1 = gc.clone();
|
2021-03-16 10:43:58 +00:00
|
|
|
system.background.spawn_worker(
|
2021-03-12 18:57:37 +00:00
|
|
|
format!("GC loop for {}", data.name),
|
|
|
|
move |must_exit: watch::Receiver<bool>| gc1.gc_loop(must_exit),
|
|
|
|
);
|
|
|
|
|
|
|
|
gc
|
|
|
|
}
|
|
|
|
|
2021-03-15 19:09:44 +00:00
|
|
|
async fn gc_loop(self: Arc<Self>, mut must_exit: watch::Receiver<bool>) {
|
2021-03-12 18:57:37 +00:00
|
|
|
while !*must_exit.borrow() {
|
|
|
|
match self.gc_loop_iter().await {
|
|
|
|
Ok(true) => {
|
2021-03-26 18:41:46 +00:00
|
|
|
// Stuff was done, loop immediately
|
2021-03-15 15:21:41 +00:00
|
|
|
continue;
|
2021-03-12 18:57:37 +00:00
|
|
|
}
|
|
|
|
Ok(false) => {
|
2021-03-15 15:21:41 +00:00
|
|
|
// Nothing was done, sleep for some time (below)
|
2021-03-12 18:57:37 +00:00
|
|
|
}
|
|
|
|
Err(e) => {
|
|
|
|
warn!("({}) Error doing GC: {}", self.data.name, e);
|
|
|
|
}
|
|
|
|
}
|
2021-03-15 15:21:41 +00:00
|
|
|
select! {
|
2021-04-23 19:42:52 +00:00
|
|
|
_ = tokio::time::sleep(Duration::from_secs(10)).fuse() => {},
|
|
|
|
_ = must_exit.changed().fuse() => {},
|
2021-03-15 15:21:41 +00:00
|
|
|
}
|
2021-03-12 18:57:37 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
async fn gc_loop_iter(&self) -> Result<bool, Error> {
|
|
|
|
let mut entries = vec![];
|
|
|
|
let mut excluded = vec![];
|
|
|
|
|
|
|
|
for item in self.data.gc_todo.iter() {
|
|
|
|
let (k, vhash) = item?;
|
|
|
|
|
|
|
|
let vhash = Hash::try_from(&vhash[..]).unwrap();
|
|
|
|
|
2021-03-12 20:52:19 +00:00
|
|
|
let v_opt = self
|
|
|
|
.data
|
|
|
|
.store
|
|
|
|
.get(&k[..])?
|
2021-03-12 18:57:37 +00:00
|
|
|
.filter(|v| blake2sum(&v[..]) == vhash);
|
|
|
|
|
|
|
|
if let Some(v) = v_opt {
|
|
|
|
entries.push((ByteBuf::from(k.to_vec()), vhash, ByteBuf::from(v.to_vec())));
|
|
|
|
if entries.len() >= TABLE_GC_BATCH_SIZE {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
excluded.push((k, vhash));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for (k, vhash) in excluded {
|
2021-03-12 21:06:56 +00:00
|
|
|
self.todo_remove_if_equal(&k[..], vhash)?;
|
2021-03-12 18:57:37 +00:00
|
|
|
}
|
|
|
|
|
2021-04-23 19:42:52 +00:00
|
|
|
if entries.is_empty() {
|
2021-03-12 18:57:37 +00:00
|
|
|
// Nothing to do in this iteration
|
|
|
|
return Ok(false);
|
|
|
|
}
|
|
|
|
|
|
|
|
debug!("({}) GC: doing {} items", self.data.name, entries.len());
|
|
|
|
|
|
|
|
let mut partitions = HashMap::new();
|
|
|
|
for (k, vhash, v) in entries {
|
|
|
|
let pkh = Hash::try_from(&k[..32]).unwrap();
|
2021-03-16 10:43:58 +00:00
|
|
|
let mut nodes = self.data.replication.write_nodes(&pkh);
|
|
|
|
nodes.retain(|x| *x != self.system.id);
|
2021-03-12 18:57:37 +00:00
|
|
|
nodes.sort();
|
|
|
|
|
|
|
|
if !partitions.contains_key(&nodes) {
|
|
|
|
partitions.insert(nodes.clone(), vec![]);
|
|
|
|
}
|
|
|
|
partitions.get_mut(&nodes).unwrap().push((k, vhash, v));
|
|
|
|
}
|
|
|
|
|
2021-03-12 20:52:19 +00:00
|
|
|
let resps = join_all(
|
|
|
|
partitions
|
|
|
|
.into_iter()
|
|
|
|
.map(|(nodes, items)| self.try_send_and_delete(nodes, items)),
|
|
|
|
)
|
|
|
|
.await;
|
2021-03-16 15:35:10 +00:00
|
|
|
|
|
|
|
let mut errs = vec![];
|
2021-03-12 18:57:37 +00:00
|
|
|
for resp in resps {
|
|
|
|
if let Err(e) = resp {
|
2021-03-16 15:35:10 +00:00
|
|
|
errs.push(e);
|
2021-03-12 18:57:37 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-16 15:35:10 +00:00
|
|
|
if errs.is_empty() {
|
|
|
|
Ok(true)
|
|
|
|
} else {
|
2021-04-05 17:55:53 +00:00
|
|
|
Err(Error::Message(
|
|
|
|
errs.into_iter()
|
|
|
|
.map(|x| format!("{}", x))
|
|
|
|
.collect::<Vec<_>>()
|
|
|
|
.join(", "),
|
|
|
|
))
|
2021-03-16 15:35:10 +00:00
|
|
|
}
|
2021-03-12 18:57:37 +00:00
|
|
|
}
|
|
|
|
|
2021-03-12 20:52:19 +00:00
|
|
|
async fn try_send_and_delete(
|
|
|
|
&self,
|
2021-10-14 09:50:12 +00:00
|
|
|
nodes: Vec<NodeID>,
|
2021-03-12 20:52:19 +00:00
|
|
|
items: Vec<(ByteBuf, Hash, ByteBuf)>,
|
|
|
|
) -> Result<(), Error> {
|
2021-03-12 18:57:37 +00:00
|
|
|
let n_items = items.len();
|
|
|
|
|
|
|
|
let mut updates = vec![];
|
|
|
|
let mut deletes = vec![];
|
|
|
|
for (k, vhash, v) in items {
|
|
|
|
updates.push(v);
|
|
|
|
deletes.push((k, vhash));
|
|
|
|
}
|
|
|
|
|
2021-10-14 09:50:12 +00:00
|
|
|
self.system
|
|
|
|
.rpc
|
2021-03-12 20:52:19 +00:00
|
|
|
.try_call_many(
|
2021-10-14 09:50:12 +00:00
|
|
|
&self.endpoint,
|
2021-03-12 20:52:19 +00:00
|
|
|
&nodes[..],
|
2021-05-02 21:13:08 +00:00
|
|
|
GcRpc::Update(updates),
|
2021-10-14 09:50:12 +00:00
|
|
|
RequestStrategy::with_priority(PRIO_BACKGROUND)
|
|
|
|
.with_quorum(nodes.len())
|
|
|
|
.with_timeout(TABLE_GC_RPC_TIMEOUT),
|
2021-03-12 20:52:19 +00:00
|
|
|
)
|
|
|
|
.await?;
|
|
|
|
|
|
|
|
info!(
|
|
|
|
"({}) GC: {} items successfully pushed, will try to delete.",
|
|
|
|
self.data.name, n_items
|
|
|
|
);
|
2021-03-12 18:57:37 +00:00
|
|
|
|
2021-10-14 09:50:12 +00:00
|
|
|
self.system
|
|
|
|
.rpc
|
2021-03-12 20:52:19 +00:00
|
|
|
.try_call_many(
|
2021-10-14 09:50:12 +00:00
|
|
|
&self.endpoint,
|
2021-03-12 20:52:19 +00:00
|
|
|
&nodes[..],
|
2021-05-02 21:13:08 +00:00
|
|
|
GcRpc::DeleteIfEqualHash(deletes.clone()),
|
2021-10-14 09:50:12 +00:00
|
|
|
RequestStrategy::with_priority(PRIO_BACKGROUND)
|
|
|
|
.with_quorum(nodes.len())
|
|
|
|
.with_timeout(TABLE_GC_RPC_TIMEOUT),
|
2021-03-12 20:52:19 +00:00
|
|
|
)
|
|
|
|
.await?;
|
2021-03-12 18:57:37 +00:00
|
|
|
|
|
|
|
for (k, vhash) in deletes {
|
|
|
|
self.data.delete_if_equal_hash(&k[..], vhash)?;
|
2021-03-12 21:06:56 +00:00
|
|
|
self.todo_remove_if_equal(&k[..], vhash)?;
|
2021-03-12 18:57:37 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
2021-03-12 21:06:56 +00:00
|
|
|
fn todo_remove_if_equal(&self, key: &[u8], vhash: Hash) -> Result<(), Error> {
|
|
|
|
let _ = self
|
|
|
|
.data
|
|
|
|
.gc_todo
|
|
|
|
.compare_and_swap::<_, _, Vec<u8>>(key, Some(vhash), None)?;
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
2021-10-14 09:50:12 +00:00
|
|
|
async fn handle_rpc(&self, message: &GcRpc) -> Result<GcRpc, Error> {
|
2021-03-12 18:57:37 +00:00
|
|
|
match message {
|
2021-05-02 21:13:08 +00:00
|
|
|
GcRpc::Update(items) => {
|
2021-03-12 18:57:37 +00:00
|
|
|
self.data.update_many(items)?;
|
2021-05-02 21:13:08 +00:00
|
|
|
Ok(GcRpc::Ok)
|
2021-03-12 18:57:37 +00:00
|
|
|
}
|
2021-05-02 21:13:08 +00:00
|
|
|
GcRpc::DeleteIfEqualHash(items) => {
|
2021-03-12 18:57:37 +00:00
|
|
|
for (key, vhash) in items.iter() {
|
|
|
|
self.data.delete_if_equal_hash(&key[..], *vhash)?;
|
2021-03-12 21:06:56 +00:00
|
|
|
self.todo_remove_if_equal(&key[..], *vhash)?;
|
2021-03-12 18:57:37 +00:00
|
|
|
}
|
2021-05-02 21:13:08 +00:00
|
|
|
Ok(GcRpc::Ok)
|
2021-03-12 18:57:37 +00:00
|
|
|
}
|
2021-04-23 19:42:52 +00:00
|
|
|
_ => Err(Error::Message("Unexpected GC RPC".to_string())),
|
2021-03-12 18:57:37 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2021-10-14 09:50:12 +00:00
|
|
|
|
|
|
|
#[async_trait]
|
|
|
|
impl<F, R> EndpointHandler<GcRpc> for TableGc<F, R>
|
|
|
|
where
|
|
|
|
F: TableSchema + 'static,
|
|
|
|
R: TableReplication + 'static,
|
|
|
|
{
|
|
|
|
async fn handle(self: &Arc<Self>, message: &GcRpc, _from: NodeID) -> GcRpc {
|
|
|
|
self.handle_rpc(message)
|
|
|
|
.await
|
|
|
|
.unwrap_or_else(|e| GcRpc::Error(format!("{}", e)))
|
|
|
|
}
|
|
|
|
}
|