garage/src/table/gc.rs

251 lines
5.4 KiB
Rust
Raw Normal View History

2021-03-12 20:52:19 +00:00
use std::collections::HashMap;
use std::sync::Arc;
use std::time::Duration;
2021-10-14 09:50:12 +00:00
use async_trait::async_trait;
use serde::{Deserialize, Serialize};
use serde_bytes::ByteBuf;
use futures::future::join_all;
use futures::select;
use futures_util::future::*;
use tokio::sync::watch;
use garage_util::data::*;
use garage_util::error::Error;
2021-10-14 09:50:12 +00:00
use garage_rpc::system::System;
use garage_rpc::*;
use crate::data::*;
use crate::replication::*;
2021-03-12 20:52:19 +00:00
use crate::schema::*;
const TABLE_GC_BATCH_SIZE: usize = 1024;
const TABLE_GC_RPC_TIMEOUT: Duration = Duration::from_secs(30);
2021-10-14 09:50:12 +00:00
pub struct TableGc<F: TableSchema + 'static, R: TableReplication + 'static> {
2021-03-16 10:43:58 +00:00
system: Arc<System>,
2021-03-16 10:47:39 +00:00
data: Arc<TableData<F, R>>,
2021-10-14 09:50:12 +00:00
endpoint: Arc<Endpoint<GcRpc, Self>>,
}
#[derive(Serialize, Deserialize)]
2021-05-02 21:13:08 +00:00
enum GcRpc {
Update(Vec<ByteBuf>),
DeleteIfEqualHash(Vec<(ByteBuf, Hash)>),
Ok,
}
impl Rpc for GcRpc {
type Response = Result<GcRpc, Error>;
2021-10-14 09:50:12 +00:00
}
2021-05-02 21:13:08 +00:00
impl<F, R> TableGc<F, R>
where
F: TableSchema + 'static,
R: TableReplication + 'static,
{
2021-10-14 09:50:12 +00:00
pub(crate) fn launch(system: Arc<System>, data: Arc<TableData<F, R>>) -> Arc<Self> {
let endpoint = system
.netapp
.endpoint(format!("garage_table/gc.rs/Rpc:{}", data.name));
let gc = Arc::new(Self {
2021-03-16 10:43:58 +00:00
system: system.clone(),
2021-03-16 10:47:39 +00:00
data: data.clone(),
2021-10-14 09:50:12 +00:00
endpoint,
});
2021-10-14 09:50:12 +00:00
gc.endpoint.set_handler(gc.clone());
let gc1 = gc.clone();
2021-03-16 10:43:58 +00:00
system.background.spawn_worker(
format!("GC loop for {}", data.name),
move |must_exit: watch::Receiver<bool>| gc1.gc_loop(must_exit),
);
gc
}
async fn gc_loop(self: Arc<Self>, mut must_exit: watch::Receiver<bool>) {
while !*must_exit.borrow() {
match self.gc_loop_iter().await {
Ok(true) => {
2021-03-26 18:41:46 +00:00
// Stuff was done, loop immediately
2021-03-15 15:21:41 +00:00
continue;
}
Ok(false) => {
2021-03-15 15:21:41 +00:00
// Nothing was done, sleep for some time (below)
}
Err(e) => {
warn!("({}) Error doing GC: {}", self.data.name, e);
}
}
2021-03-15 15:21:41 +00:00
select! {
2021-04-23 19:42:52 +00:00
_ = tokio::time::sleep(Duration::from_secs(10)).fuse() => {},
_ = must_exit.changed().fuse() => {},
2021-03-15 15:21:41 +00:00
}
}
}
async fn gc_loop_iter(&self) -> Result<bool, Error> {
let mut entries = vec![];
let mut excluded = vec![];
for item in self.data.gc_todo.iter() {
let (k, vhash) = item?;
let vhash = Hash::try_from(&vhash[..]).unwrap();
2021-03-12 20:52:19 +00:00
let v_opt = self
.data
.store
.get(&k[..])?
.filter(|v| blake2sum(&v[..]) == vhash);
if let Some(v) = v_opt {
entries.push((ByteBuf::from(k.to_vec()), vhash, ByteBuf::from(v.to_vec())));
if entries.len() >= TABLE_GC_BATCH_SIZE {
break;
}
} else {
excluded.push((k, vhash));
}
}
for (k, vhash) in excluded {
2021-03-12 21:06:56 +00:00
self.todo_remove_if_equal(&k[..], vhash)?;
}
2021-04-23 19:42:52 +00:00
if entries.is_empty() {
// Nothing to do in this iteration
return Ok(false);
}
debug!("({}) GC: doing {} items", self.data.name, entries.len());
let mut partitions = HashMap::new();
for (k, vhash, v) in entries {
let pkh = Hash::try_from(&k[..32]).unwrap();
2021-03-16 10:43:58 +00:00
let mut nodes = self.data.replication.write_nodes(&pkh);
nodes.retain(|x| *x != self.system.id);
nodes.sort();
if !partitions.contains_key(&nodes) {
partitions.insert(nodes.clone(), vec![]);
}
partitions.get_mut(&nodes).unwrap().push((k, vhash, v));
}
2021-03-12 20:52:19 +00:00
let resps = join_all(
partitions
.into_iter()
.map(|(nodes, items)| self.try_send_and_delete(nodes, items)),
)
.await;
2021-03-16 15:35:10 +00:00
let mut errs = vec![];
for resp in resps {
if let Err(e) = resp {
2021-03-16 15:35:10 +00:00
errs.push(e);
}
}
2021-03-16 15:35:10 +00:00
if errs.is_empty() {
Ok(true)
} else {
Err(Error::Message(
errs.into_iter()
.map(|x| format!("{}", x))
.collect::<Vec<_>>()
.join(", "),
))
2021-03-16 15:35:10 +00:00
}
}
2021-03-12 20:52:19 +00:00
async fn try_send_and_delete(
&self,
nodes: Vec<Uuid>,
2021-03-12 20:52:19 +00:00
items: Vec<(ByteBuf, Hash, ByteBuf)>,
) -> Result<(), Error> {
let n_items = items.len();
let mut updates = vec![];
let mut deletes = vec![];
for (k, vhash, v) in items {
updates.push(v);
deletes.push((k, vhash));
}
2021-10-14 09:50:12 +00:00
self.system
.rpc
2021-03-12 20:52:19 +00:00
.try_call_many(
2021-10-14 09:50:12 +00:00
&self.endpoint,
2021-03-12 20:52:19 +00:00
&nodes[..],
2021-05-02 21:13:08 +00:00
GcRpc::Update(updates),
2021-10-14 09:50:12 +00:00
RequestStrategy::with_priority(PRIO_BACKGROUND)
.with_quorum(nodes.len())
.with_timeout(TABLE_GC_RPC_TIMEOUT),
2021-03-12 20:52:19 +00:00
)
.await?;
info!(
"({}) GC: {} items successfully pushed, will try to delete.",
self.data.name, n_items
);
2021-10-14 09:50:12 +00:00
self.system
.rpc
2021-03-12 20:52:19 +00:00
.try_call_many(
2021-10-14 09:50:12 +00:00
&self.endpoint,
2021-03-12 20:52:19 +00:00
&nodes[..],
2021-05-02 21:13:08 +00:00
GcRpc::DeleteIfEqualHash(deletes.clone()),
2021-10-14 09:50:12 +00:00
RequestStrategy::with_priority(PRIO_BACKGROUND)
.with_quorum(nodes.len())
.with_timeout(TABLE_GC_RPC_TIMEOUT),
2021-03-12 20:52:19 +00:00
)
.await?;
for (k, vhash) in deletes {
self.data.delete_if_equal_hash(&k[..], vhash)?;
2021-03-12 21:06:56 +00:00
self.todo_remove_if_equal(&k[..], vhash)?;
}
Ok(())
}
2021-03-12 21:06:56 +00:00
fn todo_remove_if_equal(&self, key: &[u8], vhash: Hash) -> Result<(), Error> {
let _ = self
.data
.gc_todo
.compare_and_swap::<_, _, Vec<u8>>(key, Some(vhash), None)?;
Ok(())
}
}
2021-03-12 21:06:56 +00:00
#[async_trait]
impl<F, R> EndpointHandler<GcRpc> for TableGc<F, R>
where
F: TableSchema + 'static,
R: TableReplication + 'static,
{
async fn handle(self: &Arc<Self>, message: &GcRpc, _from: NodeID) -> Result<GcRpc, Error> {
match message {
2021-05-02 21:13:08 +00:00
GcRpc::Update(items) => {
self.data.update_many(items)?;
2021-05-02 21:13:08 +00:00
Ok(GcRpc::Ok)
}
2021-05-02 21:13:08 +00:00
GcRpc::DeleteIfEqualHash(items) => {
for (key, vhash) in items.iter() {
self.data.delete_if_equal_hash(&key[..], *vhash)?;
2021-03-12 21:06:56 +00:00
self.todo_remove_if_equal(&key[..], *vhash)?;
}
2021-05-02 21:13:08 +00:00
Ok(GcRpc::Ok)
}
2021-04-23 19:42:52 +00:00
_ => Err(Error::Message("Unexpected GC RPC".to_string())),
}
}
}