forked from Deuxfleurs/garage
Merge pull request 'Use content defined chunking' (#43) from trinity-1686a/garage:content-defined-chunking into main
Reviewed-on: Deuxfleurs/garage#43
This commit is contained in:
commit
7380f3855c
3 changed files with 104 additions and 20 deletions
88
Cargo.lock
generated
88
Cargo.lock
generated
|
@ -221,6 +221,12 @@ dependencies = [
|
|||
"synstructure",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fastcdc"
|
||||
version = "1.0.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5afa29be46b12c8c380b997def8d1ac77c2665da93eb0a768fab0bf4db79333f"
|
||||
|
||||
[[package]]
|
||||
name = "fnv"
|
||||
version = "1.0.7"
|
||||
|
@ -365,7 +371,7 @@ dependencies = [
|
|||
"hex",
|
||||
"log",
|
||||
"pretty_env_logger",
|
||||
"rand",
|
||||
"rand 0.8.3",
|
||||
"rmp-serde",
|
||||
"serde",
|
||||
"sled",
|
||||
|
@ -383,6 +389,7 @@ dependencies = [
|
|||
"chrono",
|
||||
"crypto-mac 0.10.0",
|
||||
"err-derive",
|
||||
"fastcdc",
|
||||
"futures",
|
||||
"futures-util",
|
||||
"garage_model",
|
||||
|
@ -397,6 +404,7 @@ dependencies = [
|
|||
"log",
|
||||
"md-5",
|
||||
"percent-encoding",
|
||||
"rand 0.7.3",
|
||||
"roxmltree",
|
||||
"sha2",
|
||||
"tokio",
|
||||
|
@ -415,7 +423,7 @@ dependencies = [
|
|||
"garage_util",
|
||||
"hex",
|
||||
"log",
|
||||
"rand",
|
||||
"rand 0.8.3",
|
||||
"rmp-serde",
|
||||
"serde",
|
||||
"serde_bytes",
|
||||
|
@ -459,7 +467,7 @@ dependencies = [
|
|||
"garage_util",
|
||||
"hexdump",
|
||||
"log",
|
||||
"rand",
|
||||
"rand 0.8.3",
|
||||
"rmp-serde",
|
||||
"serde",
|
||||
"serde_bytes",
|
||||
|
@ -479,7 +487,7 @@ dependencies = [
|
|||
"http",
|
||||
"hyper",
|
||||
"log",
|
||||
"rand",
|
||||
"rand 0.8.3",
|
||||
"rmp-serde",
|
||||
"rustls",
|
||||
"serde",
|
||||
|
@ -529,6 +537,17 @@ dependencies = [
|
|||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "getrandom"
|
||||
version = "0.1.16"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8fc3cb4d91f53b50155bdcfd23f6a4c39ae1969c2ae85982b135750cccaf5fce"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"wasi 0.9.0+wasi-snapshot-preview1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "getrandom"
|
||||
version = "0.2.2"
|
||||
|
@ -537,7 +556,7 @@ checksum = "c9495705279e7140bf035dde1f6e750c162df8b625267cd52cc44e0b156732c8"
|
|||
dependencies = [
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"wasi",
|
||||
"wasi 0.10.2+wasi-snapshot-preview1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -1043,6 +1062,19 @@ dependencies = [
|
|||
"proc-macro2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand"
|
||||
version = "0.7.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03"
|
||||
dependencies = [
|
||||
"getrandom 0.1.16",
|
||||
"libc",
|
||||
"rand_chacha 0.2.2",
|
||||
"rand_core 0.5.1",
|
||||
"rand_hc 0.2.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand"
|
||||
version = "0.8.3"
|
||||
|
@ -1050,9 +1082,19 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
checksum = "0ef9e7e66b4468674bfcb0c81af8b7fa0bb154fa9f28eb840da5c447baeb8d7e"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"rand_chacha",
|
||||
"rand_core",
|
||||
"rand_hc",
|
||||
"rand_chacha 0.3.0",
|
||||
"rand_core 0.6.2",
|
||||
"rand_hc 0.3.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_chacha"
|
||||
version = "0.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402"
|
||||
dependencies = [
|
||||
"ppv-lite86",
|
||||
"rand_core 0.5.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -1062,7 +1104,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
checksum = "e12735cf05c9e10bf21534da50a147b924d555dc7a547c42e6bb2d5b6017ae0d"
|
||||
dependencies = [
|
||||
"ppv-lite86",
|
||||
"rand_core",
|
||||
"rand_core 0.6.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_core"
|
||||
version = "0.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19"
|
||||
dependencies = [
|
||||
"getrandom 0.1.16",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -1071,7 +1122,16 @@ version = "0.6.2"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "34cf66eb183df1c5876e2dcf6b13d57340741e8dc255b48e40a26de954d06ae7"
|
||||
dependencies = [
|
||||
"getrandom",
|
||||
"getrandom 0.2.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_hc"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c"
|
||||
dependencies = [
|
||||
"rand_core 0.5.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -1080,7 +1140,7 @@ version = "0.3.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3190ef7066a446f2e7f42e239d161e905420ccab01eb967c9eb27d21b2322a73"
|
||||
dependencies = [
|
||||
"rand_core",
|
||||
"rand_core 0.6.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -1581,6 +1641,12 @@ dependencies = [
|
|||
"try-lock",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wasi"
|
||||
version = "0.9.0+wasi-snapshot-preview1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519"
|
||||
|
||||
[[package]]
|
||||
name = "wasi"
|
||||
version = "0.10.2+wasi-snapshot-preview1"
|
||||
|
|
|
@ -22,10 +22,12 @@ bytes = "1.0"
|
|||
chrono = "0.4"
|
||||
crypto-mac = "0.10"
|
||||
err-derive = "0.3"
|
||||
fastcdc = "1.0.5"
|
||||
hex = "0.4"
|
||||
hmac = "0.10"
|
||||
log = "0.4"
|
||||
md-5 = "0.9"
|
||||
rand = "0.7"
|
||||
sha2 = "0.9"
|
||||
|
||||
futures = "0.3"
|
||||
|
|
|
@ -2,6 +2,7 @@ use std::collections::{BTreeMap, VecDeque};
|
|||
use std::fmt::Write;
|
||||
use std::sync::Arc;
|
||||
|
||||
use fastcdc::{Chunk, FastCDC};
|
||||
use futures::stream::*;
|
||||
use hyper::{Body, Request, Response};
|
||||
use md5::{digest::generic_array::*, Digest as Md5Digest, Md5};
|
||||
|
@ -268,21 +269,28 @@ async fn put_block_meta(
|
|||
struct BodyChunker {
|
||||
body: Body,
|
||||
read_all: bool,
|
||||
block_size: usize,
|
||||
min_block_size: usize,
|
||||
avg_block_size: usize,
|
||||
max_block_size: usize,
|
||||
buf: VecDeque<u8>,
|
||||
}
|
||||
|
||||
impl BodyChunker {
|
||||
fn new(body: Body, block_size: usize) -> Self {
|
||||
let min_block_size = block_size / 4 * 3;
|
||||
let avg_block_size = block_size;
|
||||
let max_block_size = block_size * 2;
|
||||
Self {
|
||||
body,
|
||||
read_all: false,
|
||||
block_size,
|
||||
buf: VecDeque::with_capacity(2 * block_size),
|
||||
min_block_size,
|
||||
avg_block_size,
|
||||
max_block_size,
|
||||
buf: VecDeque::with_capacity(2 * max_block_size),
|
||||
}
|
||||
}
|
||||
async fn next(&mut self) -> Result<Option<Vec<u8>>, GarageError> {
|
||||
while !self.read_all && self.buf.len() < self.block_size {
|
||||
while !self.read_all && self.buf.len() < self.max_block_size {
|
||||
if let Some(block) = self.body.next().await {
|
||||
let bytes = block?;
|
||||
trace!("Body next: {} bytes", bytes.len());
|
||||
|
@ -293,12 +301,20 @@ impl BodyChunker {
|
|||
}
|
||||
if self.buf.len() == 0 {
|
||||
Ok(None)
|
||||
} else if self.buf.len() <= self.block_size {
|
||||
let block = self.buf.drain(..).collect::<Vec<u8>>();
|
||||
Ok(Some(block))
|
||||
} else {
|
||||
let block = self.buf.drain(..self.block_size).collect::<Vec<u8>>();
|
||||
Ok(Some(block))
|
||||
let mut iter = FastCDC::with_eof(
|
||||
self.buf.make_contiguous(),
|
||||
self.min_block_size,
|
||||
self.avg_block_size,
|
||||
self.max_block_size,
|
||||
self.read_all,
|
||||
);
|
||||
if let Some(Chunk { length, .. }) = iter.next() {
|
||||
let block = self.buf.drain(..length).collect::<Vec<u8>>();
|
||||
Ok(Some(block))
|
||||
} else {
|
||||
unreachable!("FastCDC returned not chunk")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue