2023-08-30 10:41:11 +00:00
|
|
|
use std::sync::Arc;
|
|
|
|
|
|
|
|
use async_trait::async_trait;
|
|
|
|
use chrono::prelude::*;
|
|
|
|
use std::time::{Duration, Instant};
|
|
|
|
use tokio::sync::watch;
|
|
|
|
|
|
|
|
use garage_util::background::*;
|
2023-08-30 12:28:48 +00:00
|
|
|
use garage_util::data::*;
|
2023-08-31 09:45:19 +00:00
|
|
|
use garage_util::error::Error;
|
2023-08-30 10:41:11 +00:00
|
|
|
use garage_util::persister::PersisterShared;
|
|
|
|
use garage_util::time::*;
|
|
|
|
|
|
|
|
use garage_table::EmptyKey;
|
|
|
|
|
|
|
|
use crate::bucket_table::*;
|
|
|
|
use crate::s3::object_table::*;
|
|
|
|
|
|
|
|
use crate::garage::Garage;
|
|
|
|
|
|
|
|
mod v090 {
|
|
|
|
use serde::{Deserialize, Serialize};
|
|
|
|
|
2023-08-30 21:46:15 +00:00
|
|
|
#[derive(Serialize, Deserialize, Default, Clone)]
|
2023-08-30 10:41:11 +00:00
|
|
|
pub struct LifecycleWorkerPersisted {
|
2023-08-30 21:46:15 +00:00
|
|
|
pub last_completed: Option<String>,
|
2023-08-30 10:41:11 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
impl garage_util::migrate::InitialFormat for LifecycleWorkerPersisted {
|
|
|
|
const VERSION_MARKER: &'static [u8] = b"G09lwp";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
pub use v090::*;
|
|
|
|
|
|
|
|
pub struct LifecycleWorker {
|
|
|
|
garage: Arc<Garage>,
|
|
|
|
|
|
|
|
state: State,
|
|
|
|
|
|
|
|
persister: PersisterShared<LifecycleWorkerPersisted>,
|
|
|
|
}
|
|
|
|
|
|
|
|
enum State {
|
|
|
|
Completed(NaiveDate),
|
|
|
|
Running {
|
|
|
|
date: NaiveDate,
|
|
|
|
pos: Vec<u8>,
|
|
|
|
counter: usize,
|
|
|
|
objects_expired: usize,
|
|
|
|
mpu_aborted: usize,
|
|
|
|
last_bucket: Option<Bucket>,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
2023-08-30 12:38:19 +00:00
|
|
|
#[derive(Clone, Copy, Eq, PartialEq)]
|
|
|
|
enum Skip {
|
|
|
|
SkipBucket,
|
|
|
|
NextObject,
|
|
|
|
}
|
|
|
|
|
2023-08-30 10:41:11 +00:00
|
|
|
pub fn register_bg_vars(
|
|
|
|
persister: &PersisterShared<LifecycleWorkerPersisted>,
|
|
|
|
vars: &mut vars::BgVars,
|
|
|
|
) {
|
|
|
|
vars.register_ro(persister, "lifecycle-last-completed", |p| {
|
2023-08-30 21:46:15 +00:00
|
|
|
p.get_with(|x| x.last_completed.clone().unwrap_or("never".to_string()))
|
2023-08-30 10:41:11 +00:00
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
impl LifecycleWorker {
|
|
|
|
pub fn new(garage: Arc<Garage>, persister: PersisterShared<LifecycleWorkerPersisted>) -> Self {
|
|
|
|
let today = today();
|
2023-08-30 21:46:15 +00:00
|
|
|
let last_completed = persister.get_with(|x| {
|
|
|
|
x.last_completed
|
|
|
|
.as_deref()
|
|
|
|
.and_then(|x| x.parse::<NaiveDate>().ok())
|
|
|
|
});
|
|
|
|
let state = match last_completed {
|
2023-08-30 10:41:11 +00:00
|
|
|
Some(d) if d >= today => State::Completed(d),
|
2023-08-31 09:25:14 +00:00
|
|
|
_ => State::start(today),
|
2023-08-30 10:41:11 +00:00
|
|
|
};
|
|
|
|
Self {
|
|
|
|
garage,
|
|
|
|
state,
|
|
|
|
persister,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-08-31 09:25:14 +00:00
|
|
|
impl State {
|
|
|
|
fn start(date: NaiveDate) -> Self {
|
|
|
|
info!("Starting lifecycle worker for {}", date);
|
|
|
|
State::Running {
|
|
|
|
date,
|
|
|
|
pos: vec![],
|
|
|
|
counter: 0,
|
|
|
|
objects_expired: 0,
|
|
|
|
mpu_aborted: 0,
|
|
|
|
last_bucket: None,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-08-30 10:41:11 +00:00
|
|
|
#[async_trait]
|
|
|
|
impl Worker for LifecycleWorker {
|
|
|
|
fn name(&self) -> String {
|
|
|
|
"object lifecycle worker".to_string()
|
|
|
|
}
|
|
|
|
|
|
|
|
fn status(&self) -> WorkerStatus {
|
|
|
|
match &self.state {
|
|
|
|
State::Completed(d) => WorkerStatus {
|
|
|
|
freeform: vec![format!("Last completed: {}", d)],
|
|
|
|
..Default::default()
|
|
|
|
},
|
|
|
|
State::Running {
|
|
|
|
date,
|
|
|
|
counter,
|
|
|
|
objects_expired,
|
|
|
|
mpu_aborted,
|
|
|
|
..
|
|
|
|
} => {
|
|
|
|
let n_objects = self
|
|
|
|
.garage
|
|
|
|
.object_table
|
|
|
|
.data
|
|
|
|
.store
|
|
|
|
.fast_len()
|
|
|
|
.unwrap_or(None);
|
|
|
|
let progress = match n_objects {
|
|
|
|
None => "...".to_string(),
|
|
|
|
Some(total) => format!(
|
|
|
|
"~{:.2}%",
|
|
|
|
100. * std::cmp::min(*counter, total) as f32 / total as f32
|
|
|
|
),
|
|
|
|
};
|
|
|
|
WorkerStatus {
|
|
|
|
progress: Some(progress),
|
|
|
|
freeform: vec![
|
|
|
|
format!("Started: {}", date),
|
|
|
|
format!("Objects expired: {}", objects_expired),
|
|
|
|
format!("Multipart uploads aborted: { }", mpu_aborted),
|
|
|
|
],
|
|
|
|
..Default::default()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
async fn work(&mut self, _must_exit: &mut watch::Receiver<bool>) -> Result<WorkerState, Error> {
|
|
|
|
match &mut self.state {
|
|
|
|
State::Completed(_) => Ok(WorkerState::Idle),
|
|
|
|
State::Running {
|
|
|
|
date,
|
|
|
|
counter,
|
|
|
|
objects_expired,
|
|
|
|
mpu_aborted,
|
|
|
|
pos,
|
|
|
|
last_bucket,
|
|
|
|
} => {
|
2023-08-31 09:19:26 +00:00
|
|
|
// Process a batch of 100 items before yielding to bg task scheduler
|
|
|
|
for _ in 0..100 {
|
|
|
|
let (object_bytes, next_pos) = match self
|
|
|
|
.garage
|
|
|
|
.object_table
|
|
|
|
.data
|
|
|
|
.store
|
|
|
|
.get_gt(&pos)?
|
|
|
|
{
|
|
|
|
None => {
|
|
|
|
info!("Lifecycle worker finished for {}, objects expired: {}, mpu aborted: {}", date, *objects_expired, *mpu_aborted);
|
|
|
|
self.persister
|
|
|
|
.set_with(|x| x.last_completed = Some(date.to_string()))?;
|
|
|
|
self.state = State::Completed(*date);
|
|
|
|
return Ok(WorkerState::Idle);
|
|
|
|
}
|
|
|
|
Some((k, v)) => (v, k),
|
|
|
|
};
|
|
|
|
|
|
|
|
let object = self.garage.object_table.data.decode_entry(&object_bytes)?;
|
|
|
|
let skip = process_object(
|
|
|
|
&self.garage,
|
|
|
|
*date,
|
|
|
|
&object,
|
|
|
|
objects_expired,
|
|
|
|
mpu_aborted,
|
|
|
|
last_bucket,
|
|
|
|
)
|
|
|
|
.await?;
|
|
|
|
|
|
|
|
*counter += 1;
|
|
|
|
if skip == Skip::SkipBucket {
|
|
|
|
let bucket_id_len = object.bucket_id.as_slice().len();
|
2023-09-06 14:34:07 +00:00
|
|
|
assert_eq!(
|
|
|
|
next_pos.get(..bucket_id_len),
|
|
|
|
Some(object.bucket_id.as_slice())
|
2023-08-31 09:29:54 +00:00
|
|
|
);
|
2023-09-06 14:34:07 +00:00
|
|
|
let last_bucket_pos = [&next_pos[..bucket_id_len], &[0xFFu8][..]].concat();
|
|
|
|
*pos = std::cmp::max(next_pos, last_bucket_pos);
|
2023-08-31 09:19:26 +00:00
|
|
|
} else {
|
|
|
|
*pos = next_pos;
|
2023-08-30 10:41:11 +00:00
|
|
|
}
|
2023-08-30 12:38:19 +00:00
|
|
|
}
|
2023-08-30 10:41:11 +00:00
|
|
|
|
|
|
|
Ok(WorkerState::Busy)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
async fn wait_for_work(&mut self) -> WorkerState {
|
|
|
|
match &self.state {
|
|
|
|
State::Completed(d) => {
|
2023-08-30 22:28:37 +00:00
|
|
|
let next_day = d.succ_opt().expect("no next day");
|
|
|
|
let next_start = midnight_ts(next_day);
|
|
|
|
loop {
|
|
|
|
let now = now_msec();
|
|
|
|
if now < next_start {
|
|
|
|
tokio::time::sleep_until(
|
|
|
|
(Instant::now() + Duration::from_millis(next_start - now)).into(),
|
|
|
|
)
|
|
|
|
.await;
|
|
|
|
} else {
|
|
|
|
break;
|
|
|
|
}
|
2023-08-30 10:41:11 +00:00
|
|
|
}
|
2023-08-31 09:25:14 +00:00
|
|
|
self.state = State::start(std::cmp::max(next_day, today()));
|
2023-08-30 10:41:11 +00:00
|
|
|
}
|
|
|
|
State::Running { .. } => (),
|
|
|
|
}
|
|
|
|
WorkerState::Busy
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
async fn process_object(
|
|
|
|
garage: &Arc<Garage>,
|
2023-08-30 12:28:48 +00:00
|
|
|
now_date: NaiveDate,
|
2023-08-30 12:38:19 +00:00
|
|
|
object: &Object,
|
2023-08-30 10:41:11 +00:00
|
|
|
objects_expired: &mut usize,
|
|
|
|
mpu_aborted: &mut usize,
|
|
|
|
last_bucket: &mut Option<Bucket>,
|
2023-08-30 12:38:19 +00:00
|
|
|
) -> Result<Skip, Error> {
|
2023-08-30 22:28:37 +00:00
|
|
|
if !object
|
|
|
|
.versions()
|
|
|
|
.iter()
|
|
|
|
.any(|x| x.is_data() || x.is_uploading(None))
|
|
|
|
{
|
|
|
|
return Ok(Skip::NextObject);
|
|
|
|
}
|
|
|
|
|
2023-08-30 10:41:11 +00:00
|
|
|
let bucket = match last_bucket.take() {
|
|
|
|
Some(b) if b.id == object.bucket_id => b,
|
2023-08-31 09:36:30 +00:00
|
|
|
_ => {
|
|
|
|
match garage
|
|
|
|
.bucket_table
|
|
|
|
.get(&EmptyKey, &object.bucket_id)
|
|
|
|
.await?
|
|
|
|
{
|
|
|
|
Some(b) => b,
|
|
|
|
None => {
|
|
|
|
warn!(
|
|
|
|
"Lifecycle worker: object in non-existent bucket {:?}",
|
|
|
|
object.bucket_id
|
|
|
|
);
|
|
|
|
return Ok(Skip::SkipBucket);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2023-08-30 10:41:11 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
let lifecycle_policy: &[LifecycleRule] = bucket
|
|
|
|
.state
|
|
|
|
.as_option()
|
|
|
|
.and_then(|s| s.lifecycle_config.get().as_deref())
|
|
|
|
.unwrap_or_default();
|
|
|
|
|
2023-08-30 21:52:09 +00:00
|
|
|
if lifecycle_policy.iter().all(|x| !x.enabled) {
|
2023-08-30 12:38:19 +00:00
|
|
|
return Ok(Skip::SkipBucket);
|
|
|
|
}
|
|
|
|
|
2023-08-31 09:19:26 +00:00
|
|
|
let db = garage.object_table.data.store.db();
|
|
|
|
|
2023-08-30 10:41:11 +00:00
|
|
|
for rule in lifecycle_policy.iter() {
|
2023-08-30 21:52:09 +00:00
|
|
|
if !rule.enabled {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2023-08-30 12:28:48 +00:00
|
|
|
if let Some(pfx) = &rule.filter.prefix {
|
|
|
|
if !object.key.starts_with(pfx) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if let Some(expire) = &rule.expiration {
|
|
|
|
if let Some(current_version) = object.versions().iter().rev().find(|v| v.is_data()) {
|
|
|
|
let version_date = next_date(current_version.timestamp);
|
|
|
|
|
|
|
|
let current_version_data = match ¤t_version.state {
|
|
|
|
ObjectVersionState::Complete(c) => c,
|
|
|
|
_ => unreachable!(),
|
|
|
|
};
|
|
|
|
|
|
|
|
let size_match = check_size_filter(current_version_data, &rule.filter);
|
|
|
|
let date_match = match expire {
|
|
|
|
LifecycleExpiration::AfterDays(n_days) => {
|
|
|
|
(now_date - version_date) >= chrono::Duration::days(*n_days as i64)
|
|
|
|
}
|
2023-08-30 18:02:07 +00:00
|
|
|
LifecycleExpiration::AtDate(exp_date) => {
|
2023-08-31 09:45:19 +00:00
|
|
|
if let Ok(exp_date) = parse_lifecycle_date(exp_date) {
|
2023-08-30 18:02:07 +00:00
|
|
|
now_date >= exp_date
|
|
|
|
} else {
|
2023-08-30 22:28:37 +00:00
|
|
|
warn!("Invalid expiration date stored in bucket {:?} lifecycle config: {}", bucket.id, exp_date);
|
2023-08-30 18:02:07 +00:00
|
|
|
false
|
|
|
|
}
|
|
|
|
}
|
2023-08-30 12:28:48 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
if size_match && date_match {
|
|
|
|
// Delete expired version
|
|
|
|
let deleted_object = Object::new(
|
|
|
|
object.bucket_id,
|
|
|
|
object.key.clone(),
|
|
|
|
vec![ObjectVersion {
|
|
|
|
uuid: gen_uuid(),
|
|
|
|
timestamp: std::cmp::max(now_msec(), current_version.timestamp + 1),
|
|
|
|
state: ObjectVersionState::Complete(ObjectVersionData::DeleteMarker),
|
|
|
|
}],
|
|
|
|
);
|
2023-08-30 12:54:52 +00:00
|
|
|
info!(
|
|
|
|
"Lifecycle: expiring 1 object in bucket {:?}",
|
|
|
|
object.bucket_id
|
|
|
|
);
|
2023-09-21 13:32:25 +00:00
|
|
|
db.transaction(|tx| garage.object_table.queue_insert(tx, &deleted_object))?;
|
2023-08-30 12:28:48 +00:00
|
|
|
*objects_expired += 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if let Some(abort_mpu_days) = &rule.abort_incomplete_mpu_days {
|
|
|
|
let aborted_versions = object
|
|
|
|
.versions()
|
|
|
|
.iter()
|
|
|
|
.filter_map(|v| {
|
|
|
|
let version_date = next_date(v.timestamp);
|
2023-08-30 22:28:37 +00:00
|
|
|
if (now_date - version_date) >= chrono::Duration::days(*abort_mpu_days as i64)
|
|
|
|
&& matches!(&v.state, ObjectVersionState::Uploading { .. })
|
|
|
|
{
|
|
|
|
Some(ObjectVersion {
|
|
|
|
state: ObjectVersionState::Aborted,
|
|
|
|
..*v
|
|
|
|
})
|
|
|
|
} else {
|
|
|
|
None
|
2023-08-30 12:28:48 +00:00
|
|
|
}
|
|
|
|
})
|
|
|
|
.collect::<Vec<_>>();
|
|
|
|
if !aborted_versions.is_empty() {
|
|
|
|
// Insert aborted mpu info
|
|
|
|
let n_aborted = aborted_versions.len();
|
2023-08-30 12:54:52 +00:00
|
|
|
info!(
|
|
|
|
"Lifecycle: aborting {} incomplete upload(s) in bucket {:?}",
|
|
|
|
n_aborted, object.bucket_id
|
|
|
|
);
|
2023-08-30 12:28:48 +00:00
|
|
|
let aborted_object =
|
|
|
|
Object::new(object.bucket_id, object.key.clone(), aborted_versions);
|
2023-09-21 13:32:25 +00:00
|
|
|
db.transaction(|tx| garage.object_table.queue_insert(tx, &aborted_object))?;
|
2023-08-30 12:28:48 +00:00
|
|
|
*mpu_aborted += n_aborted;
|
|
|
|
}
|
|
|
|
}
|
2023-08-30 10:41:11 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
*last_bucket = Some(bucket);
|
2023-08-30 12:38:19 +00:00
|
|
|
Ok(Skip::NextObject)
|
2023-08-30 10:41:11 +00:00
|
|
|
}
|
|
|
|
|
2023-08-30 12:28:48 +00:00
|
|
|
fn check_size_filter(version_data: &ObjectVersionData, filter: &LifecycleFilter) -> bool {
|
|
|
|
let size = match version_data {
|
|
|
|
ObjectVersionData::Inline(meta, _) | ObjectVersionData::FirstBlock(meta, _) => meta.size,
|
|
|
|
_ => unreachable!(),
|
|
|
|
};
|
|
|
|
if let Some(size_gt) = filter.size_gt {
|
|
|
|
if !(size > size_gt) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if let Some(size_lt) = filter.size_lt {
|
|
|
|
if !(size < size_lt) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
2023-08-31 09:45:19 +00:00
|
|
|
true
|
2023-08-30 12:28:48 +00:00
|
|
|
}
|
|
|
|
|
2023-08-30 10:41:11 +00:00
|
|
|
fn midnight_ts(date: NaiveDate) -> u64 {
|
2023-08-30 12:28:48 +00:00
|
|
|
date.and_hms_opt(0, 0, 0)
|
|
|
|
.expect("midnight does not exist")
|
|
|
|
.timestamp_millis() as u64
|
2023-08-30 10:41:11 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
fn next_date(ts: u64) -> NaiveDate {
|
|
|
|
NaiveDateTime::from_timestamp_millis(ts as i64)
|
|
|
|
.expect("bad timestamp")
|
|
|
|
.date()
|
2023-08-30 12:28:48 +00:00
|
|
|
.succ_opt()
|
|
|
|
.expect("no next day")
|
2023-08-30 10:41:11 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
fn today() -> NaiveDate {
|
2023-08-30 12:28:48 +00:00
|
|
|
Utc::now().naive_utc().date()
|
2023-08-30 10:41:11 +00:00
|
|
|
}
|