aerogramme/aero-user/src/storage/garage.rs

539 lines
19 KiB
Rust
Raw Normal View History

2023-12-27 13:58:28 +00:00
use aws_sdk_s3::{self as s3, error::SdkError, operation::get_object::GetObjectError};
2024-02-23 16:01:51 +00:00
use aws_smithy_runtime::client::http::hyper_014::HyperClientBuilder;
use aws_smithy_runtime_api::client::http::SharedHttpClient;
2024-02-23 16:31:29 +00:00
use hyper_rustls::HttpsConnector;
use hyper_util::client::legacy::{connect::HttpConnector, Client as HttpClient};
2024-02-23 17:28:04 +00:00
use hyper_util::rt::TokioExecutor;
2023-12-18 16:09:44 +00:00
use serde::Serialize;
2023-11-01 14:15:57 +00:00
2024-03-08 07:17:03 +00:00
use super::*;
2024-02-23 16:31:29 +00:00
2024-02-23 16:01:51 +00:00
pub struct GarageRoot {
2024-02-23 16:31:29 +00:00
k2v_http: HttpClient<HttpsConnector<HttpConnector>, k2v_client::Body>,
2024-02-23 16:01:51 +00:00
aws_http: SharedHttpClient,
}
impl GarageRoot {
2024-02-23 16:31:29 +00:00
pub fn new() -> anyhow::Result<Self> {
2024-02-23 17:28:04 +00:00
let connector = hyper_rustls::HttpsConnectorBuilder::new()
.with_native_roots()?
.https_or_http()
.enable_http1()
.enable_http2()
.build();
let k2v_http = HttpClient::builder(TokioExecutor::new()).build(connector);
2024-02-23 16:01:51 +00:00
let aws_http = HyperClientBuilder::new().build_https();
2024-02-23 16:31:29 +00:00
Ok(Self { k2v_http, aws_http })
2024-02-23 16:01:51 +00:00
}
pub fn user(&self, conf: GarageConf) -> anyhow::Result<Arc<GarageUser>> {
let mut unicity: Vec<u8> = vec![];
unicity.extend_from_slice(file!().as_bytes());
unicity.append(&mut rmp_serde::to_vec(&conf)?);
2024-02-23 17:28:04 +00:00
Ok(Arc::new(GarageUser {
conf,
aws_http: self.aws_http.clone(),
2024-02-23 16:31:29 +00:00
k2v_http: self.k2v_http.clone(),
2024-02-23 17:28:04 +00:00
unicity,
2024-02-23 16:31:29 +00:00
}))
2024-02-23 16:01:51 +00:00
}
}
2023-12-18 16:09:44 +00:00
#[derive(Clone, Debug, Serialize)]
pub struct GarageConf {
2023-11-23 16:19:35 +00:00
pub region: String,
pub s3_endpoint: String,
pub k2v_endpoint: String,
pub aws_access_key_id: String,
pub aws_secret_access_key: String,
pub bucket: String,
}
2023-11-01 14:15:57 +00:00
2024-02-23 16:01:51 +00:00
//@FIXME we should get rid of this builder
//and allocate a S3 + K2V client only once per user
//(and using a shared HTTP client)
2023-12-18 16:09:44 +00:00
#[derive(Clone, Debug)]
2024-02-23 16:01:51 +00:00
pub struct GarageUser {
2023-12-18 16:09:44 +00:00
conf: GarageConf,
2024-02-23 16:01:51 +00:00
aws_http: SharedHttpClient,
2024-02-23 16:31:29 +00:00
k2v_http: HttpClient<HttpsConnector<HttpConnector>, k2v_client::Body>,
2023-12-18 16:09:44 +00:00
unicity: Vec<u8>,
}
2023-12-21 20:54:36 +00:00
#[async_trait]
2024-02-23 16:01:51 +00:00
impl IBuilder for GarageUser {
2023-12-21 20:54:36 +00:00
async fn build(&self) -> Result<Store, StorageError> {
2023-12-22 20:52:20 +00:00
let s3_creds = s3::config::Credentials::new(
2023-12-27 13:58:28 +00:00
self.conf.aws_access_key_id.clone(),
self.conf.aws_secret_access_key.clone(),
None,
None,
"aerogramme",
2023-12-21 20:54:36 +00:00
);
2023-12-27 15:38:27 +00:00
let sdk_config = aws_config::from_env()
2023-12-21 20:54:36 +00:00
.region(aws_config::Region::new(self.conf.region.clone()))
2023-12-22 20:52:20 +00:00
.credentials_provider(s3_creds)
2024-02-23 16:01:51 +00:00
.http_client(self.aws_http.clone())
2023-12-21 20:54:36 +00:00
.endpoint_url(self.conf.s3_endpoint.clone())
.load()
.await;
2023-12-27 15:38:27 +00:00
let s3_config = aws_sdk_s3::config::Builder::from(&sdk_config)
.force_path_style(true)
.build();
let s3_client = aws_sdk_s3::Client::from_conf(s3_config);
2023-12-22 20:52:20 +00:00
let k2v_config = k2v_client::K2vClientConfig {
2023-12-27 13:58:28 +00:00
endpoint: self.conf.k2v_endpoint.clone(),
region: self.conf.region.clone(),
aws_access_key_id: self.conf.aws_access_key_id.clone(),
aws_secret_access_key: self.conf.aws_secret_access_key.clone(),
bucket: self.conf.bucket.clone(),
user_agent: None,
2023-12-22 20:52:20 +00:00
};
2024-02-23 17:28:04 +00:00
let k2v_client =
match k2v_client::K2vClient::new_with_client(k2v_config, self.k2v_http.clone()) {
Err(e) => {
tracing::error!("unable to build k2v client: {}", e);
return Err(StorageError::Internal);
}
Ok(v) => v,
};
2023-12-21 20:54:36 +00:00
2023-12-27 13:58:28 +00:00
Ok(Box::new(GarageStore {
2023-12-22 20:52:20 +00:00
bucket: self.conf.bucket.clone(),
s3: s3_client,
k2v: k2v_client,
2023-12-21 21:30:17 +00:00
}))
2023-11-01 14:15:57 +00:00
}
2023-12-18 16:09:44 +00:00
fn unique(&self) -> UnicityBuffer {
UnicityBuffer(self.unicity.clone())
}
2023-12-16 10:13:32 +00:00
}
2023-12-16 10:13:32 +00:00
pub struct GarageStore {
2023-12-22 20:52:20 +00:00
bucket: String,
2023-12-21 20:54:36 +00:00
s3: s3::Client,
2023-12-22 20:52:20 +00:00
k2v: k2v_client::K2vClient,
}
fn causal_to_row_val(row_ref: RowRef, causal_value: k2v_client::CausalValue) -> RowVal {
let new_row_ref = row_ref.with_causality(causal_value.causality.into());
2023-12-27 13:58:28 +00:00
let row_values = causal_value
.value
.into_iter()
.map(|k2v_value| match k2v_value {
k2v_client::K2vValue::Tombstone => Alternative::Tombstone,
k2v_client::K2vValue::Value(v) => Alternative::Value(v),
})
.collect::<Vec<_>>();
2023-12-22 20:52:20 +00:00
2023-12-27 13:58:28 +00:00
RowVal {
row_ref: new_row_ref,
value: row_values,
}
2023-11-01 14:15:57 +00:00
}
2023-12-16 10:13:32 +00:00
#[async_trait]
impl IStore for GarageStore {
async fn row_fetch<'a>(&self, select: &Selector<'a>) -> Result<Vec<RowVal>, StorageError> {
2024-01-31 10:01:18 +00:00
tracing::trace!(select=%select, command="row_fetch");
2023-12-26 17:33:56 +00:00
let (pk_list, batch_op) = match select {
2023-12-27 13:58:28 +00:00
Selector::Range {
shard,
sort_begin,
sort_end,
} => (
2023-12-26 17:33:56 +00:00
vec![shard.to_string()],
vec![k2v_client::BatchReadOp {
partition_key: shard,
filter: k2v_client::Filter {
start: Some(sort_begin),
end: Some(sort_end),
..k2v_client::Filter::default()
},
..k2v_client::BatchReadOp::default()
2023-12-27 13:58:28 +00:00
}],
2023-12-26 17:33:56 +00:00
),
Selector::List(row_ref_list) => (
2023-12-27 13:58:28 +00:00
row_ref_list
.iter()
.map(|row_ref| row_ref.uid.shard.to_string())
.collect::<Vec<_>>(),
row_ref_list
.iter()
.map(|row_ref| k2v_client::BatchReadOp {
partition_key: &row_ref.uid.shard,
filter: k2v_client::Filter {
start: Some(&row_ref.uid.sort),
..k2v_client::Filter::default()
},
single_item: true,
..k2v_client::BatchReadOp::default()
})
.collect::<Vec<_>>(),
),
Selector::Prefix { shard, sort_prefix } => (
vec![shard.to_string()],
vec![k2v_client::BatchReadOp {
partition_key: shard,
2023-12-26 17:33:56 +00:00
filter: k2v_client::Filter {
2023-12-27 13:58:28 +00:00
prefix: Some(sort_prefix),
2023-12-26 17:33:56 +00:00
..k2v_client::Filter::default()
},
..k2v_client::BatchReadOp::default()
2023-12-27 13:58:28 +00:00
}],
2023-12-26 17:33:56 +00:00
),
2023-12-22 20:52:20 +00:00
Selector::Single(row_ref) => {
2023-12-27 13:58:28 +00:00
let causal_value = match self
.k2v
.read_item(&row_ref.uid.shard, &row_ref.uid.sort)
.await
{
2023-12-27 15:38:27 +00:00
Err(k2v_client::Error::NotFound) => {
tracing::debug!(
"K2V item not found shard={}, sort={}, bucket={}",
row_ref.uid.shard,
row_ref.uid.sort,
self.bucket,
);
return Err(StorageError::NotFound);
}
2023-12-22 20:52:20 +00:00
Err(e) => {
2023-12-27 13:58:28 +00:00
tracing::error!(
"K2V read item shard={}, sort={}, bucket={} failed: {}",
row_ref.uid.shard,
row_ref.uid.sort,
self.bucket,
e
);
2023-12-22 20:52:20 +00:00
return Err(StorageError::Internal);
2023-12-27 13:58:28 +00:00
}
2023-12-22 20:52:20 +00:00
Ok(v) => v,
};
let row_val = causal_to_row_val((*row_ref).clone(), causal_value);
2023-12-27 13:58:28 +00:00
return Ok(vec![row_val]);
}
2023-12-22 20:52:20 +00:00
};
2023-12-26 17:33:56 +00:00
let all_raw_res = match self.k2v.read_batch(&batch_op).await {
2023-12-22 20:52:20 +00:00
Err(e) => {
2023-12-27 13:58:28 +00:00
tracing::error!(
"k2v read batch failed for {:?}, bucket {} with err: {}",
select,
self.bucket,
e
);
2023-12-22 20:52:20 +00:00
return Err(StorageError::Internal);
2023-12-27 13:58:28 +00:00
}
2023-12-22 20:52:20 +00:00
Ok(v) => v,
};
2024-01-31 10:01:18 +00:00
//println!("fetch res -> {:?}", all_raw_res);
2023-12-22 20:52:20 +00:00
2024-01-31 10:01:18 +00:00
let row_vals =
all_raw_res
.into_iter()
.zip(pk_list.into_iter())
.fold(vec![], |mut acc, (page, pk)| {
page.items
.into_iter()
.map(|(sk, cv)| causal_to_row_val(RowRef::new(&pk, &sk), cv))
.for_each(|rr| acc.push(rr));
acc
});
tracing::debug!(fetch_count = row_vals.len(), command = "row_fetch");
2023-12-26 17:33:56 +00:00
Ok(row_vals)
2023-11-01 14:15:57 +00:00
}
2023-12-16 10:13:32 +00:00
async fn row_rm<'a>(&self, select: &Selector<'a>) -> Result<(), StorageError> {
2024-01-31 10:01:18 +00:00
tracing::trace!(select=%select, command="row_rm");
2023-12-27 13:58:09 +00:00
let del_op = match select {
2023-12-27 13:58:28 +00:00
Selector::Range {
shard,
sort_begin,
sort_end,
} => vec![k2v_client::BatchDeleteOp {
2023-12-27 13:58:09 +00:00
partition_key: shard,
prefix: None,
start: Some(sort_begin),
end: Some(sort_end),
single_item: false,
}],
Selector::List(row_ref_list) => {
// Insert null values with causality token = delete
2023-12-27 13:58:28 +00:00
let batch_op = row_ref_list
.iter()
.map(|v| k2v_client::BatchInsertOp {
partition_key: &v.uid.shard,
sort_key: &v.uid.sort,
causality: v.causality.clone().map(|ct| ct.into()),
value: k2v_client::K2vValue::Tombstone,
})
.collect::<Vec<_>>();
2023-12-27 13:58:09 +00:00
return match self.k2v.insert_batch(&batch_op).await {
Err(e) => {
tracing::error!("Unable to delete the list of values: {}", e);
Err(StorageError::Internal)
2023-12-27 13:58:28 +00:00
}
2023-12-27 13:58:09 +00:00
Ok(_) => Ok(()),
};
2023-12-27 13:58:28 +00:00
}
2023-12-27 13:58:09 +00:00
Selector::Prefix { shard, sort_prefix } => vec![k2v_client::BatchDeleteOp {
partition_key: shard,
prefix: Some(sort_prefix),
start: None,
end: None,
single_item: false,
}],
Selector::Single(row_ref) => {
// Insert null values with causality token = delete
let batch_op = vec![k2v_client::BatchInsertOp {
partition_key: &row_ref.uid.shard,
sort_key: &row_ref.uid.sort,
causality: row_ref.causality.clone().map(|ct| ct.into()),
value: k2v_client::K2vValue::Tombstone,
}];
2023-12-27 13:58:28 +00:00
2023-12-27 13:58:09 +00:00
return match self.k2v.insert_batch(&batch_op).await {
Err(e) => {
tracing::error!("Unable to delete the list of values: {}", e);
Err(StorageError::Internal)
2023-12-27 13:58:28 +00:00
}
2023-12-27 13:58:09 +00:00
Ok(_) => Ok(()),
};
2023-12-27 13:58:28 +00:00
}
2023-12-27 13:58:09 +00:00
};
// Finally here we only have prefix & range
match self.k2v.delete_batch(&del_op).await {
Err(e) => {
tracing::error!("delete batch error: {}", e);
Err(StorageError::Internal)
2023-12-27 13:58:28 +00:00
}
2023-12-27 13:58:09 +00:00
Ok(_) => Ok(()),
}
}
2023-12-16 10:13:32 +00:00
async fn row_insert(&self, values: Vec<RowVal>) -> Result<(), StorageError> {
2024-01-31 10:01:18 +00:00
tracing::trace!(entries=%values.iter().map(|v| v.row_ref.to_string()).collect::<Vec<_>>().join(","), command="row_insert");
2023-12-27 13:58:28 +00:00
let batch_ops = values
.iter()
.map(|v| k2v_client::BatchInsertOp {
partition_key: &v.row_ref.uid.shard,
sort_key: &v.row_ref.uid.sort,
causality: v.row_ref.causality.clone().map(|ct| ct.into()),
value: v
.value
.iter()
.next()
.map(|cv| match cv {
Alternative::Value(buff) => k2v_client::K2vValue::Value(buff.clone()),
Alternative::Tombstone => k2v_client::K2vValue::Tombstone,
})
.unwrap_or(k2v_client::K2vValue::Tombstone),
})
.collect::<Vec<_>>();
2023-12-26 17:33:56 +00:00
match self.k2v.insert_batch(&batch_ops).await {
Err(e) => {
tracing::error!("k2v can't insert some value: {}", e);
Err(StorageError::Internal)
2023-12-27 13:58:28 +00:00
}
2023-12-26 17:33:56 +00:00
Ok(v) => Ok(v),
}
2023-11-17 09:46:13 +00:00
}
2023-12-18 16:09:44 +00:00
async fn row_poll(&self, value: &RowRef) -> Result<RowVal, StorageError> {
2024-01-31 10:01:18 +00:00
tracing::trace!(entry=%value, command="row_poll");
2023-12-26 19:02:13 +00:00
loop {
if let Some(ct) = &value.causality {
2023-12-27 13:58:28 +00:00
match self
.k2v
.poll_item(&value.uid.shard, &value.uid.sort, ct.clone().into(), None)
.await
{
2023-12-26 19:02:13 +00:00
Err(e) => {
tracing::error!("Unable to poll item: {}", e);
return Err(StorageError::Internal);
}
Ok(None) => continue,
Ok(Some(cv)) => return Ok(causal_to_row_val(value.clone(), cv)),
}
} else {
match self.k2v.read_item(&value.uid.shard, &value.uid.sort).await {
Err(k2v_client::Error::NotFound) => {
2023-12-27 13:58:28 +00:00
self.k2v
2023-12-26 19:02:13 +00:00
.insert_item(&value.uid.shard, &value.uid.sort, vec![0u8], None)
.await
.map_err(|e| {
tracing::error!("Unable to insert item in polling logic: {}", e);
StorageError::Internal
})?;
}
Err(e) => {
tracing::error!("Unable to read item in polling logic: {}", e);
2023-12-27 13:58:28 +00:00
return Err(StorageError::Internal);
}
2023-12-26 19:02:13 +00:00
Ok(cv) => return Ok(causal_to_row_val(value.clone(), cv)),
}
}
}
2023-11-02 14:28:19 +00:00
}
2023-12-16 10:13:32 +00:00
async fn blob_fetch(&self, blob_ref: &BlobRef) -> Result<BlobVal, StorageError> {
2024-01-31 10:01:18 +00:00
tracing::trace!(entry=%blob_ref, command="blob_fetch");
2023-12-27 13:58:28 +00:00
let maybe_out = self
.s3
2023-12-21 21:30:17 +00:00
.get_object()
2023-12-22 20:52:20 +00:00
.bucket(self.bucket.to_string())
2023-12-21 21:30:17 +00:00
.key(blob_ref.0.to_string())
.send()
.await;
let object_output = match maybe_out {
Ok(output) => output,
Err(SdkError::ServiceError(x)) => match x.err() {
GetObjectError::NoSuchKey(_) => return Err(StorageError::NotFound),
e => {
tracing::warn!("Blob Fetch Error, Service Error: {}", e);
return Err(StorageError::Internal);
2023-12-27 13:58:28 +00:00
}
2023-12-21 21:30:17 +00:00
},
Err(e) => {
tracing::warn!("Blob Fetch Error, {}", e);
return Err(StorageError::Internal);
2023-12-27 13:58:28 +00:00
}
2023-12-21 21:30:17 +00:00
};
let buffer = match object_output.body.collect().await {
Ok(aggreg) => aggreg.to_vec(),
Err(e) => {
tracing::warn!("Fetching body failed with {}", e);
return Err(StorageError::Internal);
}
};
2023-12-27 16:34:49 +00:00
let mut bv = BlobVal::new(blob_ref.clone(), buffer);
if let Some(meta) = object_output.metadata {
bv.meta = meta;
}
2023-12-22 20:52:20 +00:00
tracing::debug!("Fetched {}/{}", self.bucket, blob_ref.0);
2023-12-27 16:34:49 +00:00
Ok(bv)
2023-11-01 14:15:57 +00:00
}
2023-12-22 18:32:07 +00:00
async fn blob_insert(&self, blob_val: BlobVal) -> Result<(), StorageError> {
2024-01-31 10:01:18 +00:00
tracing::trace!(entry=%blob_val.blob_ref, command="blob_insert");
2023-12-27 13:58:28 +00:00
let streamable_value = s3::primitives::ByteStream::from(blob_val.value);
2023-12-22 18:32:07 +00:00
2023-12-27 13:58:28 +00:00
let maybe_send = self
.s3
2023-12-22 18:32:07 +00:00
.put_object()
2023-12-22 20:52:20 +00:00
.bucket(self.bucket.to_string())
2023-12-22 18:32:07 +00:00
.key(blob_val.blob_ref.0.to_string())
2023-12-27 16:34:49 +00:00
.set_metadata(Some(blob_val.meta))
2023-12-22 18:32:07 +00:00
.body(streamable_value)
.send()
.await;
match maybe_send {
Err(e) => {
tracing::error!("unable to send object: {}", e);
Err(StorageError::Internal)
}
Ok(_) => {
2023-12-22 20:52:20 +00:00
tracing::debug!("Inserted {}/{}", self.bucket, blob_val.blob_ref.0);
2023-12-22 18:32:07 +00:00
Ok(())
}
}
2023-12-18 16:09:44 +00:00
}
2023-12-19 18:02:22 +00:00
async fn blob_copy(&self, src: &BlobRef, dst: &BlobRef) -> Result<(), StorageError> {
2024-01-31 10:01:18 +00:00
tracing::trace!(src=%src, dst=%dst, command="blob_copy");
2023-12-27 13:58:28 +00:00
let maybe_copy = self
.s3
2023-12-22 18:32:07 +00:00
.copy_object()
2023-12-22 20:52:20 +00:00
.bucket(self.bucket.to_string())
2023-12-22 18:32:07 +00:00
.key(dst.0.clone())
2023-12-22 20:52:20 +00:00
.copy_source(format!("/{}/{}", self.bucket.to_string(), src.0.clone()))
2023-12-22 18:32:07 +00:00
.send()
.await;
match maybe_copy {
Err(e) => {
2023-12-27 13:58:28 +00:00
tracing::error!(
"unable to copy object {} to {} (bucket: {}), error: {}",
src.0,
dst.0,
self.bucket,
e
);
2023-12-22 18:32:07 +00:00
Err(StorageError::Internal)
2023-12-27 13:58:28 +00:00
}
2023-12-22 18:32:07 +00:00
Ok(_) => {
2023-12-22 20:52:20 +00:00
tracing::debug!("copied {} to {} (bucket: {})", src.0, dst.0, self.bucket);
2023-12-22 18:32:07 +00:00
Ok(())
}
}
2023-11-17 11:15:44 +00:00
}
2023-12-16 10:13:32 +00:00
async fn blob_list(&self, prefix: &str) -> Result<Vec<BlobRef>, StorageError> {
2024-01-31 10:01:18 +00:00
tracing::trace!(prefix = prefix, command = "blob_list");
2023-12-27 13:58:28 +00:00
let maybe_list = self
.s3
2023-12-22 18:32:07 +00:00
.list_objects_v2()
2023-12-22 20:52:20 +00:00
.bucket(self.bucket.to_string())
2023-12-22 18:32:07 +00:00
.prefix(prefix)
.into_paginator()
.send()
.try_collect()
.await;
match maybe_list {
Err(e) => {
2023-12-27 13:58:28 +00:00
tracing::error!(
"listing prefix {} on bucket {} failed: {}",
prefix,
self.bucket,
e
);
2023-12-22 18:32:07 +00:00
Err(StorageError::Internal)
}
Ok(pagin_list_out) => Ok(pagin_list_out
.into_iter()
.map(|list_out| list_out.contents.unwrap_or(vec![]))
.flatten()
.map(|obj| BlobRef(obj.key.unwrap_or(String::new())))
.collect::<Vec<_>>()),
}
2023-11-01 14:15:57 +00:00
}
2023-12-16 10:13:32 +00:00
async fn blob_rm(&self, blob_ref: &BlobRef) -> Result<(), StorageError> {
2024-01-31 10:01:18 +00:00
tracing::trace!(entry=%blob_ref, command="blob_rm");
2023-12-27 13:58:28 +00:00
let maybe_delete = self
.s3
2023-12-22 18:32:07 +00:00
.delete_object()
2023-12-22 20:52:20 +00:00
.bucket(self.bucket.to_string())
2023-12-22 18:32:07 +00:00
.key(blob_ref.0.clone())
.send()
.await;
match maybe_delete {
Err(e) => {
2023-12-27 13:58:28 +00:00
tracing::error!(
"unable to delete {} (bucket: {}), error {}",
blob_ref.0,
self.bucket,
e
);
2023-12-22 18:32:07 +00:00
Err(StorageError::Internal)
2023-12-27 13:58:28 +00:00
}
2023-12-22 18:32:07 +00:00
Ok(_) => {
2023-12-22 20:52:20 +00:00
tracing::debug!("deleted {} (bucket: {})", blob_ref.0, self.bucket);
2023-12-22 18:32:07 +00:00
Ok(())
}
}
2023-11-01 14:15:57 +00:00
}
}