From 6e0cb2dfb6db182a07debe4f7b89f7780bcdf5e4 Mon Sep 17 00:00:00 2001 From: Trinity Pointard Date: Tue, 16 Mar 2021 21:08:39 +0100 Subject: [PATCH 1/4] add content defined chuking --- Cargo.lock | 97 ++++++++++++++++++++++++++++++++++++++++------ src/api/Cargo.toml | 2 + src/api/s3_put.rs | 20 ++++++---- 3 files changed, 101 insertions(+), 18 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5dc83dfa..e71152e4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -221,6 +221,12 @@ dependencies = [ "synstructure", ] +[[package]] +name = "fmt-extra" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07f11f71b1f9be830047fbb1899d90601c3b21a471dc99fe1057303eee37f2b9" + [[package]] name = "fnv" version = "1.0.7" @@ -365,7 +371,7 @@ dependencies = [ "hex", "log", "pretty_env_logger", - "rand", + "rand 0.8.3", "rmp-serde", "serde", "sled", @@ -388,6 +394,7 @@ dependencies = [ "garage_model", "garage_table", "garage_util", + "hash-roll", "hex", "hmac", "http", @@ -397,6 +404,7 @@ dependencies = [ "log", "md-5", "percent-encoding", + "rand 0.7.3", "roxmltree", "sha2", "tokio", @@ -415,7 +423,7 @@ dependencies = [ "garage_util", "hex", "log", - "rand", + "rand 0.8.3", "rmp-serde", "serde", "serde_bytes", @@ -459,7 +467,7 @@ dependencies = [ "garage_util", "hexdump", "log", - "rand", + "rand 0.8.3", "rmp-serde", "serde", "serde_bytes", @@ -479,7 +487,7 @@ dependencies = [ "http", "hyper", "log", - "rand", + "rand 0.8.3", "rmp-serde", "rustls", "serde", @@ -529,6 +537,17 @@ dependencies = [ "winapi", ] +[[package]] +name = "getrandom" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fc3cb4d91f53b50155bdcfd23f6a4c39ae1969c2ae85982b135750cccaf5fce" +dependencies = [ + "cfg-if", + "libc", + "wasi 0.9.0+wasi-snapshot-preview1", +] + [[package]] name = "getrandom" version = "0.2.2" @@ -537,7 +556,7 @@ checksum = "c9495705279e7140bf035dde1f6e750c162df8b625267cd52cc44e0b156732c8" dependencies = [ "cfg-if", "libc", - "wasi", + "wasi 0.10.2+wasi-snapshot-preview1", ] [[package]] @@ -581,6 +600,15 @@ dependencies = [ "tracing", ] +[[package]] +name = "hash-roll" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9e27803a4b526df90ed2a3f60523eeec6b5ace6ba7530f9920fbee82027fa11" +dependencies = [ + "fmt-extra", +] + [[package]] name = "hashbrown" version = "0.9.1" @@ -1043,6 +1071,19 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "rand" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03" +dependencies = [ + "getrandom 0.1.16", + "libc", + "rand_chacha 0.2.2", + "rand_core 0.5.1", + "rand_hc 0.2.0", +] + [[package]] name = "rand" version = "0.8.3" @@ -1050,9 +1091,19 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ef9e7e66b4468674bfcb0c81af8b7fa0bb154fa9f28eb840da5c447baeb8d7e" dependencies = [ "libc", - "rand_chacha", - "rand_core", - "rand_hc", + "rand_chacha 0.3.0", + "rand_core 0.6.2", + "rand_hc 0.3.0", +] + +[[package]] +name = "rand_chacha" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402" +dependencies = [ + "ppv-lite86", + "rand_core 0.5.1", ] [[package]] @@ -1062,7 +1113,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e12735cf05c9e10bf21534da50a147b924d555dc7a547c42e6bb2d5b6017ae0d" dependencies = [ "ppv-lite86", - "rand_core", + "rand_core 0.6.2", +] + +[[package]] +name = "rand_core" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19" +dependencies = [ + "getrandom 0.1.16", ] [[package]] @@ -1071,7 +1131,16 @@ version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34cf66eb183df1c5876e2dcf6b13d57340741e8dc255b48e40a26de954d06ae7" dependencies = [ - "getrandom", + "getrandom 0.2.2", +] + +[[package]] +name = "rand_hc" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c" +dependencies = [ + "rand_core 0.5.1", ] [[package]] @@ -1080,7 +1149,7 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3190ef7066a446f2e7f42e239d161e905420ccab01eb967c9eb27d21b2322a73" dependencies = [ - "rand_core", + "rand_core 0.6.2", ] [[package]] @@ -1581,6 +1650,12 @@ dependencies = [ "try-lock", ] +[[package]] +name = "wasi" +version = "0.9.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519" + [[package]] name = "wasi" version = "0.10.2+wasi-snapshot-preview1" diff --git a/src/api/Cargo.toml b/src/api/Cargo.toml index 0b824ca3..5bc170a3 100644 --- a/src/api/Cargo.toml +++ b/src/api/Cargo.toml @@ -22,10 +22,12 @@ bytes = "1.0" chrono = "0.4" crypto-mac = "0.10" err-derive = "0.3" +hash-roll = "0.3.0" hex = "0.4" hmac = "0.10" log = "0.4" md-5 = "0.9" +rand = "0.7" sha2 = "0.9" futures = "0.3" diff --git a/src/api/s3_put.rs b/src/api/s3_put.rs index c4e3b818..e2a1b54d 100644 --- a/src/api/s3_put.rs +++ b/src/api/s3_put.rs @@ -3,6 +3,7 @@ use std::fmt::Write; use std::sync::Arc; use futures::stream::*; +use hash_roll::{ChunkIncr, fastcdc::{FastCdc, FastCdcIncr}, gear_table::GEAR_64}; use hyper::{Body, Request, Response}; use md5::{digest::generic_array::*, Digest as Md5Digest, Md5}; use sha2::Sha256; @@ -268,21 +269,26 @@ async fn put_block_meta( struct BodyChunker { body: Body, read_all: bool, - block_size: usize, + max_block_size: usize, buf: VecDeque, + chunker: FastCdcIncr<'static>, } impl BodyChunker { fn new(body: Body, block_size: usize) -> Self { + let max_block_size = block_size * 2; + let chunker = FastCdc::new(&GEAR_64, block_size as u64 / 2, block_size as u64, max_block_size as u64); + let chunker = (&chunker).into(); Self { body, read_all: false, - block_size, - buf: VecDeque::with_capacity(2 * block_size), + max_block_size, + buf: VecDeque::with_capacity(2 * max_block_size), + chunker, } } async fn next(&mut self) -> Result>, GarageError> { - while !self.read_all && self.buf.len() < self.block_size { + while !self.read_all && self.buf.len() < self.max_block_size { if let Some(block) = self.body.next().await { let bytes = block?; trace!("Body next: {} bytes", bytes.len()); @@ -293,11 +299,11 @@ impl BodyChunker { } if self.buf.len() == 0 { Ok(None) - } else if self.buf.len() <= self.block_size { - let block = self.buf.drain(..).collect::>(); + } else if let Some(index) = self.chunker.push(self.buf.make_contiguous()) { + let block = self.buf.drain(..index).collect::>(); Ok(Some(block)) } else { - let block = self.buf.drain(..self.block_size).collect::>(); + let block = self.buf.drain(..).collect::>(); Ok(Some(block)) } } From 47d0aee9f807e0ded10a01d045a8930ff112224b Mon Sep 17 00:00:00 2001 From: Trinity Pointard Date: Wed, 17 Mar 2021 01:31:35 +0100 Subject: [PATCH 2/4] change crate used for cdc previous one seemed to output incorrect results --- Cargo.lock | 17 ++++------------- src/api/Cargo.toml | 2 +- src/api/s3_put.rs | 24 ++++++++++++++---------- 3 files changed, 19 insertions(+), 24 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e71152e4..c97968ef 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -222,10 +222,10 @@ dependencies = [ ] [[package]] -name = "fmt-extra" -version = "0.2.1" +name = "fastcdc" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07f11f71b1f9be830047fbb1899d90601c3b21a471dc99fe1057303eee37f2b9" +checksum = "5afa29be46b12c8c380b997def8d1ac77c2665da93eb0a768fab0bf4db79333f" [[package]] name = "fnv" @@ -389,12 +389,12 @@ dependencies = [ "chrono", "crypto-mac 0.10.0", "err-derive", + "fastcdc", "futures", "futures-util", "garage_model", "garage_table", "garage_util", - "hash-roll", "hex", "hmac", "http", @@ -600,15 +600,6 @@ dependencies = [ "tracing", ] -[[package]] -name = "hash-roll" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9e27803a4b526df90ed2a3f60523eeec6b5ace6ba7530f9920fbee82027fa11" -dependencies = [ - "fmt-extra", -] - [[package]] name = "hashbrown" version = "0.9.1" diff --git a/src/api/Cargo.toml b/src/api/Cargo.toml index 5bc170a3..b328f671 100644 --- a/src/api/Cargo.toml +++ b/src/api/Cargo.toml @@ -22,7 +22,7 @@ bytes = "1.0" chrono = "0.4" crypto-mac = "0.10" err-derive = "0.3" -hash-roll = "0.3.0" +fastcdc = "1.0.5" hex = "0.4" hmac = "0.10" log = "0.4" diff --git a/src/api/s3_put.rs b/src/api/s3_put.rs index e2a1b54d..f5607c9f 100644 --- a/src/api/s3_put.rs +++ b/src/api/s3_put.rs @@ -2,8 +2,8 @@ use std::collections::{BTreeMap, VecDeque}; use std::fmt::Write; use std::sync::Arc; +use fastcdc::{Chunk, FastCDC}; use futures::stream::*; -use hash_roll::{ChunkIncr, fastcdc::{FastCdc, FastCdcIncr}, gear_table::GEAR_64}; use hyper::{Body, Request, Response}; use md5::{digest::generic_array::*, Digest as Md5Digest, Md5}; use sha2::Sha256; @@ -269,22 +269,24 @@ async fn put_block_meta( struct BodyChunker { body: Body, read_all: bool, + min_block_size: usize, + avg_block_size: usize, max_block_size: usize, buf: VecDeque, - chunker: FastCdcIncr<'static>, } impl BodyChunker { fn new(body: Body, block_size: usize) -> Self { + let min_block_size = block_size / 4 * 3; + let avg_block_size = block_size; let max_block_size = block_size * 2; - let chunker = FastCdc::new(&GEAR_64, block_size as u64 / 2, block_size as u64, max_block_size as u64); - let chunker = (&chunker).into(); Self { body, read_all: false, + min_block_size, + avg_block_size, max_block_size, buf: VecDeque::with_capacity(2 * max_block_size), - chunker, } } async fn next(&mut self) -> Result>, GarageError> { @@ -299,12 +301,14 @@ impl BodyChunker { } if self.buf.len() == 0 { Ok(None) - } else if let Some(index) = self.chunker.push(self.buf.make_contiguous()) { - let block = self.buf.drain(..index).collect::>(); - Ok(Some(block)) } else { - let block = self.buf.drain(..).collect::>(); - Ok(Some(block)) + let mut iter = FastCDC::with_eof(self.buf.make_contiguous(), self.min_block_size, self.avg_block_size, self.max_block_size, self.read_all); + if let Some(Chunk {length, ..}) = iter.next() { + let block = self.buf.drain(..length).collect::>(); + Ok(Some(block)) + } else { + Ok(None) + } } } } From b3b0b20d722bd001ef971b029a2165a2407fec83 Mon Sep 17 00:00:00 2001 From: Trinity Pointard Date: Tue, 6 Apr 2021 02:54:00 +0200 Subject: [PATCH 3/4] run fmt --- src/api/s3_put.rs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/api/s3_put.rs b/src/api/s3_put.rs index f5607c9f..d2702940 100644 --- a/src/api/s3_put.rs +++ b/src/api/s3_put.rs @@ -302,8 +302,14 @@ impl BodyChunker { if self.buf.len() == 0 { Ok(None) } else { - let mut iter = FastCDC::with_eof(self.buf.make_contiguous(), self.min_block_size, self.avg_block_size, self.max_block_size, self.read_all); - if let Some(Chunk {length, ..}) = iter.next() { + let mut iter = FastCDC::with_eof( + self.buf.make_contiguous(), + self.min_block_size, + self.avg_block_size, + self.max_block_size, + self.read_all, + ); + if let Some(Chunk { length, .. }) = iter.next() { let block = self.buf.drain(..length).collect::>(); Ok(Some(block)) } else { From 6cbc8d6ec93b832a301a5402f1b1ae70b07a2be3 Mon Sep 17 00:00:00 2001 From: Trinity Pointard Date: Tue, 6 Apr 2021 16:53:39 +0200 Subject: [PATCH 4/4] mark branch as unreachable --- src/api/s3_put.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/api/s3_put.rs b/src/api/s3_put.rs index d2702940..d023bcef 100644 --- a/src/api/s3_put.rs +++ b/src/api/s3_put.rs @@ -313,7 +313,7 @@ impl BodyChunker { let block = self.buf.drain(..length).collect::>(); Ok(Some(block)) } else { - Ok(None) + unreachable!("FastCDC returned not chunk") } } }