use zstd checksumming

This commit is contained in:
Trinity Pointard 2021-04-07 02:13:26 +02:00
parent aeadb071c6
commit d4fd074000
2 changed files with 38 additions and 30 deletions

View file

@ -9,7 +9,7 @@ use serde::{Deserialize, Serialize};
use tokio::fs; use tokio::fs;
use tokio::io::{AsyncReadExt, AsyncWriteExt}; use tokio::io::{AsyncReadExt, AsyncWriteExt};
use tokio::sync::{watch, Mutex, Notify}; use tokio::sync::{watch, Mutex, Notify};
use zstd::stream::{decode_all as zstd_decode, encode_all as zstd_encode}; use zstd::stream::{decode_all as zstd_decode, Encoder};
use garage_util::data::*; use garage_util::data::*;
use garage_util::error::Error; use garage_util::error::Error;
@ -43,7 +43,10 @@ pub enum Message {
GetBlock(Hash), GetBlock(Hash),
/// Message to send a block of data, either because requested, of for first delivery of new /// Message to send a block of data, either because requested, of for first delivery of new
/// block /// block
PutBlock { hash: Hash, data: BlockData }, PutBlock {
hash: Hash,
data: BlockData,
},
/// Ask other node if they should have this block, but don't actually have it /// Ask other node if they should have this block, but don't actually have it
NeedBlockQuery(Hash), NeedBlockQuery(Hash),
/// Response : whether the node do require that block /// Response : whether the node do require that block
@ -64,6 +67,13 @@ impl BlockData {
BlockData::Compressed(_) => true, BlockData::Compressed(_) => true,
} }
} }
pub fn buffer(&self) -> &Vec<u8> {
match self {
BlockData::Plain(b) => b,
BlockData::Compressed(b) => b,
}
}
} }
impl RpcMessage for Message {} impl RpcMessage for Message {}
@ -164,6 +174,10 @@ impl BlockManager {
/// Write a block to disk /// Write a block to disk
pub async fn write_block(&self, hash: &Hash, data: &BlockData) -> Result<Message, Error> { pub async fn write_block(&self, hash: &Hash, data: &BlockData) -> Result<Message, Error> {
let mut path = self.block_dir(hash);
let _lock = self.data_dir_lock.lock().await;
let clean_plain = match self.is_block_compressed(hash).await { let clean_plain = match self.is_block_compressed(hash).await {
Ok(true) => return Ok(Message::Ok), Ok(true) => return Ok(Message::Ok),
Ok(false) if !data.is_compressed() => return Ok(Message::Ok), // we have a plain block, and the provided block is not compressed either Ok(false) if !data.is_compressed() => return Ok(Message::Ok), // we have a plain block, and the provided block is not compressed either
@ -171,29 +185,17 @@ impl BlockManager {
Err(_) => false, Err(_) => false,
}; };
let mut path = self.block_dir(hash);
let (buffer, checksum) = match data {
BlockData::Plain(b) => (b, None),
BlockData::Compressed(b) => {
let checksum = blake2sum(&b);
(b, Some(checksum))
}
};
let _lock = self.data_dir_lock.lock().await;
fs::create_dir_all(&path).await?; fs::create_dir_all(&path).await?;
path.push(hex::encode(hash)); path.push(hex::encode(hash));
if checksum.is_some() {
path.set_extension("zst_b2"); if data.is_compressed() {
path.set_extension("zst");
} }
let buffer = data.buffer();
let mut f = fs::File::create(path.clone()).await?; let mut f = fs::File::create(path.clone()).await?;
f.write_all(&buffer).await?; f.write_all(&buffer).await?;
if let Some(checksum) = checksum {
f.write_all(checksum.as_slice()).await?;
}
if clean_plain { if clean_plain {
path.set_extension(""); path.set_extension("");
@ -215,7 +217,7 @@ impl BlockManager {
f.map(|f| (f, false)).map_err(Into::into) f.map(|f| (f, false)).map_err(Into::into)
} }
Ok(true) => { Ok(true) => {
path.set_extension("zst_b2"); path.set_extension("zst");
let f = fs::File::open(&path).await; let f = fs::File::open(&path).await;
f.map(|f| (f, true)).map_err(Into::into) f.map(|f| (f, true)).map_err(Into::into)
} }
@ -233,14 +235,7 @@ impl BlockManager {
drop(f); drop(f);
let sum_ok = if compressed { let sum_ok = if compressed {
if data.len() >= 32 { zstd_check_checksum(&data[..])
let data_len = data.len() - 32;
let checksum = data.split_off(data_len);
blake2sum(&data[..]).as_slice() == &checksum
} else {
// the file is too short to be valid
false
}
} else { } else {
blake2sum(&data[..]) == *hash blake2sum(&data[..]) == *hash
}; };
@ -287,7 +282,7 @@ impl BlockManager {
async fn is_block_compressed(&self, hash: &Hash) -> Result<bool, Error> { async fn is_block_compressed(&self, hash: &Hash) -> Result<bool, Error> {
let mut path = self.block_path(hash); let mut path = self.block_path(hash);
path.set_extension("zst_b2"); path.set_extension("zst");
if fs::metadata(&path).await.is_ok() { if fs::metadata(&path).await.is_ok() {
return Ok(true); return Ok(true);
} }
@ -634,3 +629,16 @@ fn u64_from_be_bytes<T: AsRef<[u8]>>(bytes: T) -> u64 {
x8.copy_from_slice(bytes.as_ref()); x8.copy_from_slice(bytes.as_ref());
u64::from_be_bytes(x8) u64::from_be_bytes(x8)
} }
fn zstd_check_checksum<R: std::io::Read>(source: R) -> bool {
zstd::stream::copy_decode(source, std::io::sink()).is_ok()
}
fn zstd_encode<R: std::io::Read>(mut source: R, level: i32) -> std::io::Result<Vec<u8>> {
let mut result = Vec::<u8>::new();
let mut encoder = Encoder::new(&mut result, level)?;
encoder.include_checksum(true)?;
std::io::copy(&mut source, &mut encoder)?;
encoder.finish()?;
Ok(result)
}

View file

@ -45,7 +45,7 @@ pub struct Config {
#[serde(default = "default_replication_factor")] #[serde(default = "default_replication_factor")]
pub data_replication_factor: usize, pub data_replication_factor: usize,
/// Zstd compression level used on data blocks /// Zstd compression level used on data blocks
#[serde(default)] #[serde(default)]
pub compression_level: i32, pub compression_level: i32,