add content defined chuking

This commit is contained in:
Trinity Pointard 2021-03-16 21:08:39 +01:00 committed by Alex Auvolat
parent 119217f9f6
commit 71a13f366e
No known key found for this signature in database
GPG key ID: EDABF9711E244EB1
3 changed files with 101 additions and 18 deletions

97
Cargo.lock generated
View file

@ -221,6 +221,12 @@ dependencies = [
"synstructure", "synstructure",
] ]
[[package]]
name = "fmt-extra"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "07f11f71b1f9be830047fbb1899d90601c3b21a471dc99fe1057303eee37f2b9"
[[package]] [[package]]
name = "fnv" name = "fnv"
version = "1.0.7" version = "1.0.7"
@ -365,7 +371,7 @@ dependencies = [
"hex", "hex",
"log", "log",
"pretty_env_logger", "pretty_env_logger",
"rand", "rand 0.8.3",
"rmp-serde", "rmp-serde",
"serde", "serde",
"sled", "sled",
@ -388,6 +394,7 @@ dependencies = [
"garage_model", "garage_model",
"garage_table", "garage_table",
"garage_util", "garage_util",
"hash-roll",
"hex", "hex",
"hmac", "hmac",
"http", "http",
@ -397,6 +404,7 @@ dependencies = [
"log", "log",
"md-5", "md-5",
"percent-encoding", "percent-encoding",
"rand 0.7.3",
"roxmltree", "roxmltree",
"sha2", "sha2",
"tokio", "tokio",
@ -415,7 +423,7 @@ dependencies = [
"garage_util", "garage_util",
"hex", "hex",
"log", "log",
"rand", "rand 0.8.3",
"rmp-serde", "rmp-serde",
"serde", "serde",
"serde_bytes", "serde_bytes",
@ -459,7 +467,7 @@ dependencies = [
"garage_util", "garage_util",
"hexdump", "hexdump",
"log", "log",
"rand", "rand 0.8.3",
"rmp-serde", "rmp-serde",
"serde", "serde",
"serde_bytes", "serde_bytes",
@ -479,7 +487,7 @@ dependencies = [
"http", "http",
"hyper", "hyper",
"log", "log",
"rand", "rand 0.8.3",
"rmp-serde", "rmp-serde",
"rustls", "rustls",
"serde", "serde",
@ -529,6 +537,17 @@ dependencies = [
"winapi", "winapi",
] ]
[[package]]
name = "getrandom"
version = "0.1.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8fc3cb4d91f53b50155bdcfd23f6a4c39ae1969c2ae85982b135750cccaf5fce"
dependencies = [
"cfg-if",
"libc",
"wasi 0.9.0+wasi-snapshot-preview1",
]
[[package]] [[package]]
name = "getrandom" name = "getrandom"
version = "0.2.2" version = "0.2.2"
@ -537,7 +556,7 @@ checksum = "c9495705279e7140bf035dde1f6e750c162df8b625267cd52cc44e0b156732c8"
dependencies = [ dependencies = [
"cfg-if", "cfg-if",
"libc", "libc",
"wasi", "wasi 0.10.2+wasi-snapshot-preview1",
] ]
[[package]] [[package]]
@ -581,6 +600,15 @@ dependencies = [
"tracing", "tracing",
] ]
[[package]]
name = "hash-roll"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a9e27803a4b526df90ed2a3f60523eeec6b5ace6ba7530f9920fbee82027fa11"
dependencies = [
"fmt-extra",
]
[[package]] [[package]]
name = "hashbrown" name = "hashbrown"
version = "0.9.1" version = "0.9.1"
@ -1043,6 +1071,19 @@ dependencies = [
"proc-macro2", "proc-macro2",
] ]
[[package]]
name = "rand"
version = "0.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03"
dependencies = [
"getrandom 0.1.16",
"libc",
"rand_chacha 0.2.2",
"rand_core 0.5.1",
"rand_hc 0.2.0",
]
[[package]] [[package]]
name = "rand" name = "rand"
version = "0.8.3" version = "0.8.3"
@ -1050,9 +1091,19 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ef9e7e66b4468674bfcb0c81af8b7fa0bb154fa9f28eb840da5c447baeb8d7e" checksum = "0ef9e7e66b4468674bfcb0c81af8b7fa0bb154fa9f28eb840da5c447baeb8d7e"
dependencies = [ dependencies = [
"libc", "libc",
"rand_chacha", "rand_chacha 0.3.0",
"rand_core", "rand_core 0.6.2",
"rand_hc", "rand_hc 0.3.0",
]
[[package]]
name = "rand_chacha"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402"
dependencies = [
"ppv-lite86",
"rand_core 0.5.1",
] ]
[[package]] [[package]]
@ -1062,7 +1113,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e12735cf05c9e10bf21534da50a147b924d555dc7a547c42e6bb2d5b6017ae0d" checksum = "e12735cf05c9e10bf21534da50a147b924d555dc7a547c42e6bb2d5b6017ae0d"
dependencies = [ dependencies = [
"ppv-lite86", "ppv-lite86",
"rand_core", "rand_core 0.6.2",
]
[[package]]
name = "rand_core"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19"
dependencies = [
"getrandom 0.1.16",
] ]
[[package]] [[package]]
@ -1071,7 +1131,16 @@ version = "0.6.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34cf66eb183df1c5876e2dcf6b13d57340741e8dc255b48e40a26de954d06ae7" checksum = "34cf66eb183df1c5876e2dcf6b13d57340741e8dc255b48e40a26de954d06ae7"
dependencies = [ dependencies = [
"getrandom", "getrandom 0.2.2",
]
[[package]]
name = "rand_hc"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c"
dependencies = [
"rand_core 0.5.1",
] ]
[[package]] [[package]]
@ -1080,7 +1149,7 @@ version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3190ef7066a446f2e7f42e239d161e905420ccab01eb967c9eb27d21b2322a73" checksum = "3190ef7066a446f2e7f42e239d161e905420ccab01eb967c9eb27d21b2322a73"
dependencies = [ dependencies = [
"rand_core", "rand_core 0.6.2",
] ]
[[package]] [[package]]
@ -1581,6 +1650,12 @@ dependencies = [
"try-lock", "try-lock",
] ]
[[package]]
name = "wasi"
version = "0.9.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519"
[[package]] [[package]]
name = "wasi" name = "wasi"
version = "0.10.2+wasi-snapshot-preview1" version = "0.10.2+wasi-snapshot-preview1"

View file

@ -22,10 +22,12 @@ bytes = "1.0"
chrono = "0.4" chrono = "0.4"
crypto-mac = "0.10" crypto-mac = "0.10"
err-derive = "0.3" err-derive = "0.3"
hash-roll = "0.3.0"
hex = "0.4" hex = "0.4"
hmac = "0.10" hmac = "0.10"
log = "0.4" log = "0.4"
md-5 = "0.9" md-5 = "0.9"
rand = "0.7"
sha2 = "0.9" sha2 = "0.9"
futures = "0.3" futures = "0.3"

View file

@ -3,6 +3,7 @@ use std::fmt::Write;
use std::sync::Arc; use std::sync::Arc;
use futures::stream::*; use futures::stream::*;
use hash_roll::{ChunkIncr, fastcdc::{FastCdc, FastCdcIncr}, gear_table::GEAR_64};
use hyper::{Body, Request, Response}; use hyper::{Body, Request, Response};
use md5::{digest::generic_array::*, Digest as Md5Digest, Md5}; use md5::{digest::generic_array::*, Digest as Md5Digest, Md5};
use sha2::Sha256; use sha2::Sha256;
@ -268,21 +269,26 @@ async fn put_block_meta(
struct BodyChunker { struct BodyChunker {
body: Body, body: Body,
read_all: bool, read_all: bool,
block_size: usize, max_block_size: usize,
buf: VecDeque<u8>, buf: VecDeque<u8>,
chunker: FastCdcIncr<'static>,
} }
impl BodyChunker { impl BodyChunker {
fn new(body: Body, block_size: usize) -> Self { fn new(body: Body, block_size: usize) -> Self {
let max_block_size = block_size * 2;
let chunker = FastCdc::new(&GEAR_64, block_size as u64 / 2, block_size as u64, max_block_size as u64);
let chunker = (&chunker).into();
Self { Self {
body, body,
read_all: false, read_all: false,
block_size, max_block_size,
buf: VecDeque::with_capacity(2 * block_size), buf: VecDeque::with_capacity(2 * max_block_size),
chunker,
} }
} }
async fn next(&mut self) -> Result<Option<Vec<u8>>, GarageError> { async fn next(&mut self) -> Result<Option<Vec<u8>>, GarageError> {
while !self.read_all && self.buf.len() < self.block_size { while !self.read_all && self.buf.len() < self.max_block_size {
if let Some(block) = self.body.next().await { if let Some(block) = self.body.next().await {
let bytes = block?; let bytes = block?;
trace!("Body next: {} bytes", bytes.len()); trace!("Body next: {} bytes", bytes.len());
@ -293,11 +299,11 @@ impl BodyChunker {
} }
if self.buf.len() == 0 { if self.buf.len() == 0 {
Ok(None) Ok(None)
} else if self.buf.len() <= self.block_size { } else if let Some(index) = self.chunker.push(self.buf.make_contiguous()) {
let block = self.buf.drain(..).collect::<Vec<u8>>(); let block = self.buf.drain(..index).collect::<Vec<u8>>();
Ok(Some(block)) Ok(Some(block))
} else { } else {
let block = self.buf.drain(..self.block_size).collect::<Vec<u8>>(); let block = self.buf.drain(..).collect::<Vec<u8>>();
Ok(Some(block)) Ok(Some(block))
} }
} }