From 7cc7ae9491ebc5fc47779efdeb5d3a657463034e Mon Sep 17 00:00:00 2001 From: Julien Kritter Date: Fri, 13 Sep 2024 10:40:46 +0200 Subject: [PATCH] Add support for an LSM-tree-based backend with Fjall --- .woodpecker/debug.yaml | 2 + Cargo.lock | 227 ++++++++++++++++++---- Cargo.toml | 1 + src/db/Cargo.toml | 2 + src/db/fjall_adapter.rs | 413 ++++++++++++++++++++++++++++++++++++++++ src/db/lib.rs | 2 + src/db/open.rs | 12 ++ src/garage/Cargo.toml | 1 + src/model/Cargo.toml | 1 + src/model/garage.rs | 3 + 10 files changed, 628 insertions(+), 36 deletions(-) create mode 100644 src/db/fjall_adapter.rs diff --git a/.woodpecker/debug.yaml b/.woodpecker/debug.yaml index 0076feda..46da1d8d 100644 --- a/.woodpecker/debug.yaml +++ b/.woodpecker/debug.yaml @@ -37,6 +37,8 @@ steps: - GARAGE_TEST_INTEGRATION_DB_ENGINE=lmdb ./result/bin/integration-* || (cat tmp-garage-integration/stderr.log; false) - nix-shell --attr ci --run "killall -9 garage" || true - GARAGE_TEST_INTEGRATION_DB_ENGINE=sqlite ./result/bin/integration-* || (cat tmp-garage-integration/stderr.log; false) + - nix-shell --attr ci --run "killall -9 garage" || true + - GARAGE_TEST_INTEGRATION_DB_ENGINE=fjall ./result/bin/integration-* || (cat tmp-garage-integration/stderr.log; false) - rm result - rm -rv tmp-garage-integration diff --git a/Cargo.lock b/Cargo.lock index fa313874..164ba03d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -54,9 +54,9 @@ dependencies = [ [[package]] name = "ahash" -version = "0.8.7" +version = "0.8.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77c3a9648d43b9cd48db467b3f87fdd6e146bcc88ab0180006cef2179fe11d01" +checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" dependencies = [ "cfg-if", "getrandom", @@ -793,7 +793,7 @@ dependencies = [ "num-traits", "serde", "wasm-bindgen", - "windows-targets 0.52.0", + "windows-targets 0.52.6", ] [[package]] @@ -905,9 +905,9 @@ dependencies = [ [[package]] name = "crc32fast" -version = "1.4.0" +version = "1.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3855a8a784b474f333699ef2bbca9db2c4a1f6d9088a90a2d25b1eb53111eaa" +checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" dependencies = [ "cfg-if", ] @@ -921,6 +921,15 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "crossbeam-queue" version = "0.3.11" @@ -930,6 +939,16 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "crossbeam-skiplist" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df29de440c58ca2cc6e587ec3d22347551a32435fbde9d2bff64e78a9ffa151b" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + [[package]] name = "crossbeam-utils" version = "0.8.19" @@ -1064,6 +1083,12 @@ dependencies = [ "subtle", ] +[[package]] +name = "double-ended-peekable" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0d05e1c0dbad51b52c38bda7adceef61b9efc2baf04acfe8726a8c4630a6f57" + [[package]] name = "dyn-clone" version = "1.0.16" @@ -1194,6 +1219,22 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" +[[package]] +name = "fjall" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2fd11be247941253b9861c7ca94526d092d8311ac0a1740415242e3d9efd7a5" +dependencies = [ + "byteorder", + "crc32fast", + "fs_extra", + "log", + "lsm-tree", + "path-absolutize", + "std-semaphore", + "tempfile", +] + [[package]] name = "fnv" version = "1.0.7" @@ -1213,6 +1254,12 @@ dependencies = [ name = "format_table" version = "0.1.1" +[[package]] +name = "fs_extra" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" + [[package]] name = "futures" version = "0.3.30" @@ -1445,6 +1492,7 @@ name = "garage_db" version = "1.0.1" dependencies = [ "err-derive", + "fjall", "heed", "hexdump", "mktemp", @@ -1700,6 +1748,12 @@ dependencies = [ "subtle", ] +[[package]] +name = "guardian" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "493913a18c0d7bebb75127a26a432162c59edbe06f6cf712001e3e769345e8b5" + [[package]] name = "h2" version = "0.3.24" @@ -2462,9 +2516,37 @@ dependencies = [ [[package]] name = "log" -version = "0.4.20" +version = "0.4.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" +checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" + +[[package]] +name = "lsm-tree" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6b7522d314955643781c4050b48477985e61f892434e14e9e6148d27c60435d" +dependencies = [ + "byteorder", + "crc32fast", + "crossbeam-skiplist", + "double-ended-peekable", + "guardian", + "log", + "lz4_flex", + "path-absolutize", + "quick_cache", + "self_cell", + "tempfile", +] + +[[package]] +name = "lz4_flex" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75761162ae2b0e580d7e7c390558127e5f01b4194debd6221fd8c207fc80e3f5" +dependencies = [ + "twox-hash", +] [[package]] name = "matchers" @@ -2893,6 +2975,24 @@ version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c" +[[package]] +name = "path-absolutize" +version = "3.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4af381fe79fa195b4909485d99f73a80792331df0625188e707854f0b3383f5" +dependencies = [ + "path-dedot", +] + +[[package]] +name = "path-dedot" +version = "3.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07ba0ad7e047712414213ff67533e6dd477af0a4e1d14fb52343e53d30ea9397" +dependencies = [ + "once_cell", +] + [[package]] name = "pem" version = "3.0.3" @@ -3195,6 +3295,17 @@ dependencies = [ "serde", ] +[[package]] +name = "quick_cache" +version = "0.6.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ec0b6fed0a0ff01fa82d0c8982389375dd59c72dae84d4f8a15b1a894c273f7" +dependencies = [ + "ahash", + "equivalent", + "hashbrown 0.14.3", +] + [[package]] name = "quote" version = "1.0.35" @@ -3682,6 +3793,12 @@ dependencies = [ "libc", ] +[[package]] +name = "self_cell" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d369a96f978623eb3dc28807c4852d6cc617fed53da5d3c400feff1ef34a714a" + [[package]] name = "semver" version = "1.0.21" @@ -3874,6 +3991,12 @@ dependencies = [ "der", ] +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + [[package]] name = "static_init" version = "1.0.3" @@ -3902,6 +4025,12 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "std-semaphore" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33ae9eec00137a8eed469fb4148acd9fc6ac8c3f9b110f52cd34698c8b5bfa0e" + [[package]] name = "strsim" version = "0.10.0" @@ -4021,15 +4150,15 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.9.0" +version = "3.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01ce4141aa927a6d1bd34a041795abd0db1cccba5d5f24b009f694bdf3a1f3fa" +checksum = "04cbcdd0c794ebb0d4cf35e88edd2f7d2c4c3e9a5a6dab322839b321c6a87a64" dependencies = [ "cfg-if", "fastrand", - "redox_syscall 0.4.1", + "once_cell", "rustix", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -4450,6 +4579,16 @@ version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" +[[package]] +name = "twox-hash" +version = "1.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675" +dependencies = [ + "cfg-if", + "static_assertions", +] + [[package]] name = "typenum" version = "1.17.0" @@ -4730,7 +4869,7 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" dependencies = [ - "windows-targets 0.52.0", + "windows-targets 0.52.6", ] [[package]] @@ -4748,7 +4887,16 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" dependencies = [ - "windows-targets 0.52.0", + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets 0.52.6", ] [[package]] @@ -4768,17 +4916,18 @@ dependencies = [ [[package]] name = "windows-targets" -version = "0.52.0" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a18201040b24831fbb9e4eb208f8892e1f50a37feb53cc7ff887feb8f50e7cd" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" dependencies = [ - "windows_aarch64_gnullvm 0.52.0", - "windows_aarch64_msvc 0.52.0", - "windows_i686_gnu 0.52.0", - "windows_i686_msvc 0.52.0", - "windows_x86_64_gnu 0.52.0", - "windows_x86_64_gnullvm 0.52.0", - "windows_x86_64_msvc 0.52.0", + "windows_aarch64_gnullvm 0.52.6", + "windows_aarch64_msvc 0.52.6", + "windows_i686_gnu 0.52.6", + "windows_i686_gnullvm", + "windows_i686_msvc 0.52.6", + "windows_x86_64_gnu 0.52.6", + "windows_x86_64_gnullvm 0.52.6", + "windows_x86_64_msvc 0.52.6", ] [[package]] @@ -4789,9 +4938,9 @@ checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" [[package]] name = "windows_aarch64_gnullvm" -version = "0.52.0" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" [[package]] name = "windows_aarch64_msvc" @@ -4801,9 +4950,9 @@ checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" [[package]] name = "windows_aarch64_msvc" -version = "0.52.0" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" [[package]] name = "windows_i686_gnu" @@ -4813,9 +4962,15 @@ checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" [[package]] name = "windows_i686_gnu" -version = "0.52.0" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" [[package]] name = "windows_i686_msvc" @@ -4825,9 +4980,9 @@ checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" [[package]] name = "windows_i686_msvc" -version = "0.52.0" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" [[package]] name = "windows_x86_64_gnu" @@ -4837,9 +4992,9 @@ checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" [[package]] name = "windows_x86_64_gnu" -version = "0.52.0" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" [[package]] name = "windows_x86_64_gnullvm" @@ -4849,9 +5004,9 @@ checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" [[package]] name = "windows_x86_64_gnullvm" -version = "0.52.0" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" [[package]] name = "windows_x86_64_msvc" @@ -4861,9 +5016,9 @@ checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" [[package]] name = "windows_x86_64_msvc" -version = "0.52.0" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] name = "winnow" diff --git a/Cargo.toml b/Cargo.toml index f327763e..fe3b32ea 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -85,6 +85,7 @@ heed = { version = "0.11", default-features = false, features = ["lmdb"] } rusqlite = "0.31.0" r2d2 = "0.8" r2d2_sqlite = "0.24" +fjall = "1.5" async-compression = { version = "0.4", features = ["tokio", "zstd"] } zstd = { version = "0.13", default-features = false } diff --git a/src/db/Cargo.toml b/src/db/Cargo.toml index 0a278bc0..8305f504 100644 --- a/src/db/Cargo.toml +++ b/src/db/Cargo.toml @@ -20,6 +20,7 @@ heed = { workspace = true, optional = true } rusqlite = { workspace = true, optional = true, features = ["backup"] } r2d2 = { workspace = true, optional = true } r2d2_sqlite = { workspace = true, optional = true } +fjall = { workspace = true, optional = true } [dev-dependencies] mktemp.workspace = true @@ -28,4 +29,5 @@ mktemp.workspace = true default = [ "lmdb", "sqlite" ] bundled-libs = [ "rusqlite?/bundled" ] lmdb = [ "heed" ] +fjall = [ "dep:fjall" ] sqlite = [ "rusqlite", "r2d2", "r2d2_sqlite" ] diff --git a/src/db/fjall_adapter.rs b/src/db/fjall_adapter.rs new file mode 100644 index 00000000..91703ddb --- /dev/null +++ b/src/db/fjall_adapter.rs @@ -0,0 +1,413 @@ +use core::ops::Bound; +use core::pin::Pin; +use core::ptr::NonNull; + +use std::collections::HashMap; +use std::path::PathBuf; +use std::sync::{Arc, RwLock}; + +use fjall::{ + PartitionCreateOptions, PersistMode, ReadTransaction, TransactionalKeyspace, + TransactionalPartitionHandle, WriteTransaction, +}; + +use crate::{ + Db, Error, IDb, ITx, ITxFn, OnCommit, Result, TxError, TxFnResult, TxOpError, TxOpResult, + TxResult, TxValueIter, Value, ValueIter, +}; + +pub use fjall; + +// -- err + +impl From for Error { + fn from(e: fjall::Error) -> Error { + Error(format!("fjall: {}", e).into()) + } +} + +impl From for Error { + fn from(e: fjall::LsmError) -> Error { + Error(format!("fjall lsm_tree: {}", e).into()) + } +} + +impl From for TxOpError { + fn from(e: fjall::Error) -> TxOpError { + TxOpError(e.into()) + } +} + +// -- db + +pub struct FjallDb { + path: PathBuf, + keyspace: TransactionalKeyspace, + trees: RwLock<(Vec, HashMap)>, +} + +type ByteRefRangeBound<'r> = (Bound<&'r [u8]>, Bound<&'r [u8]>); + +impl FjallDb { + pub fn init(path: &PathBuf, keyspace: TransactionalKeyspace) -> Db { + let s = Self { + path: path.clone(), + keyspace, + trees: RwLock::new((Vec::new(), HashMap::new())), + }; + Db(Arc::new(s)) + } + + fn get_tree(&self, i: usize) -> Result { + self.trees + .read() + .unwrap() + .0 + .get(i) + .cloned() + .ok_or_else(|| Error("invalid tree id".into())) + } + + fn canonicalize(name: &str) -> String { + name.chars() + .map(|c| { + if c.is_alphanumeric() || c == '-' || c == '_' { + c + } else { + '_' + } + }) + .collect::() + } +} + +impl IDb for FjallDb { + fn engine(&self) -> String { + "LSM trees (using Fjall crate)".into() + } + + fn open_tree(&self, name: &str) -> Result { + let mut trees = self.trees.write().unwrap(); + let canonical_name = FjallDb::canonicalize(name); + if let Some(i) = trees.1.get(&canonical_name) { + Ok(*i) + } else { + let tree = self + .keyspace + .open_partition(&canonical_name, PartitionCreateOptions::default())?; + let i = trees.0.len(); + trees.0.push(tree); + trees.1.insert(canonical_name, i); + Ok(i) + } + } + + fn list_trees(&self) -> Result> { + Ok(self + .keyspace + .list_partitions() + .iter() + .map(|n| n.to_string()) + .collect()) + } + + fn snapshot(&self, to: &PathBuf) -> Result<()> { + std::fs::create_dir_all(to)?; + let mut path = to.clone(); + path.push("data.fjall"); + + let source_keyspace = fjall::Config::new(&self.path).open()?; + let copy_keyspace = fjall::Config::new(path).open()?; + + for partition_name in source_keyspace.list_partitions() { + let source_partition = source_keyspace + .open_partition(&partition_name, PartitionCreateOptions::default())?; + let snapshot = source_partition.snapshot(); + let copy_partition = + copy_keyspace.open_partition(&partition_name, PartitionCreateOptions::default())?; + + for entry in snapshot.iter() { + let (key, value) = entry?; + copy_partition.insert(key, value)?; + } + } + + copy_keyspace.persist(PersistMode::SyncAll)?; + Ok(()) + } + + // ---- + + fn get(&self, tree_idx: usize, key: &[u8]) -> Result> { + let tree = self.get_tree(tree_idx)?; + let tx = self.keyspace.read_tx(); + let val = tx.get(&tree, key)?; + match val { + None => Ok(None), + Some(v) => Ok(Some(v.to_vec())), + } + } + + fn len(&self, tree_idx: usize) -> Result { + let tree = self.get_tree(tree_idx)?; + let tx = self.keyspace.read_tx(); + Ok(tx.len(&tree)?) + } + + fn insert(&self, tree_idx: usize, key: &[u8], value: &[u8]) -> Result<()> { + let tree = self.get_tree(tree_idx)?; + let mut tx = self.keyspace.write_tx(); + tx.insert(&tree, key, value); + tx.commit()?; + Ok(()) + } + + fn remove(&self, tree_idx: usize, key: &[u8]) -> Result<()> { + let tree = self.get_tree(tree_idx)?; + let mut tx = self.keyspace.write_tx(); + tx.remove(&tree, key); + tx.commit()?; + Ok(()) + } + + fn clear(&self, tree_idx: usize) -> Result<()> { + let tree = self.get_tree(tree_idx)?; + let tree_name = tree.inner().name.clone(); + self.keyspace.delete_partition(tree)?; + let tree = self + .keyspace + .open_partition(&tree_name, PartitionCreateOptions::default())?; + let mut trees = self.trees.write().unwrap(); + trees.0[tree_idx] = tree; + Ok(()) + } + + fn iter(&self, tree_idx: usize) -> Result> { + let tree = self.get_tree(tree_idx)?; + let tx = self.keyspace.read_tx(); + TxAndIterator::make(tx, |tx| Ok(tx.iter(&tree))) + } + + fn iter_rev(&self, tree_idx: usize) -> Result> { + let tree = self.get_tree(tree_idx)?; + let tx = self.keyspace.read_tx(); + TxAndIterator::make(tx, |tx| Ok(tx.iter(&tree).rev())) + } + + fn range<'r>( + &self, + tree_idx: usize, + low: Bound<&'r [u8]>, + high: Bound<&'r [u8]>, + ) -> Result> { + let tree = self.get_tree(tree_idx)?; + let tx = self.keyspace.read_tx(); + TxAndIterator::make(tx, |tx| { + Ok(tx.range::<&'r [u8], ByteRefRangeBound>(&tree, (low, high))) + }) + } + fn range_rev<'r>( + &self, + tree_idx: usize, + low: Bound<&'r [u8]>, + high: Bound<&'r [u8]>, + ) -> Result> { + let tree = self.get_tree(tree_idx)?; + let tx = self.keyspace.read_tx(); + TxAndIterator::make(tx, |tx| { + Ok(tx + .range::<&'r [u8], ByteRefRangeBound>(&tree, (low, high)) + .rev()) + }) + } + + // ---- + + fn transaction(&self, f: &dyn ITxFn) -> TxResult { + let trees = self.trees.read().unwrap(); + let mut tx = FjallTx { + trees: &trees.0[..], + tx: self.keyspace.write_tx(), + }; + + let res = f.try_on(&mut tx); + match res { + TxFnResult::Ok(on_commit) => { + tx.tx.commit().map_err(Error::from).map_err(TxError::Db)?; + Ok(on_commit) + } + TxFnResult::Abort => { + tx.tx.rollback(); + Err(TxError::Abort(())) + } + TxFnResult::DbErr => { + tx.tx.rollback(); + Err(TxError::Db(Error( + "(this message will be discarded)".into(), + ))) + } + } + } +} + +// ---- + +struct FjallTx<'a> { + trees: &'a [TransactionalPartitionHandle], + tx: WriteTransaction<'a>, +} + +impl<'a> FjallTx<'a> { + fn get_tree(&self, i: usize) -> TxOpResult<&TransactionalPartitionHandle> { + self.trees.get(i).ok_or_else(|| { + TxOpError(Error( + "invalid tree id (it might have been openned after the transaction started)".into(), + )) + }) + } +} + +impl<'a> ITx for FjallTx<'a> { + fn get(&self, tree_idx: usize, key: &[u8]) -> TxOpResult> { + let tree = self.get_tree(tree_idx)?; + match self.tx.get(tree, key)? { + Some(v) => Ok(Some(v.to_vec())), + None => Ok(None), + } + } + fn len(&self, tree_idx: usize) -> TxOpResult { + let tree = self.get_tree(tree_idx)?; + Ok(self.tx.len(tree)? as usize) + } + + fn insert(&mut self, tree_idx: usize, key: &[u8], value: &[u8]) -> TxOpResult<()> { + let tree = self.get_tree(tree_idx)?.clone(); + self.tx.insert(&tree, key, value); + Ok(()) + } + fn remove(&mut self, tree_idx: usize, key: &[u8]) -> TxOpResult<()> { + let tree = self.get_tree(tree_idx)?.clone(); + self.tx.remove(&tree, key); + Ok(()) + } + fn clear(&mut self, _tree_idx: usize) -> TxOpResult<()> { + unimplemented!("LSM tree clearing in cross-partition transaction is not supported") + } + + fn iter(&self, tree_idx: usize) -> TxOpResult> { + let tree = self.get_tree(tree_idx)?.clone(); + Ok(Box::new(self.tx.iter(&tree).map(tx_iter_item))) + } + fn iter_rev(&self, tree_idx: usize) -> TxOpResult> { + let tree = self.get_tree(tree_idx)?.clone(); + Ok(Box::new(self.tx.iter(&tree).rev().map(tx_iter_item))) + } + + fn range<'r>( + &self, + tree_idx: usize, + low: Bound<&'r [u8]>, + high: Bound<&'r [u8]>, + ) -> TxOpResult> { + let tree = self.get_tree(tree_idx)?.clone(); + Ok(Box::new( + self.tx + .range::<&'r [u8], ByteRefRangeBound>(&tree, (low, high)) + .map(tx_iter_item), + )) + } + fn range_rev<'r>( + &self, + tree_idx: usize, + low: Bound<&'r [u8]>, + high: Bound<&'r [u8]>, + ) -> TxOpResult> { + let tree = self.get_tree(tree_idx)?.clone(); + Ok(Box::new( + self.tx + .range::<&'r [u8], ByteRefRangeBound>(&tree, (low, high)) + .rev() + .map(tx_iter_item), + )) + } +} + +// ---- iterators outside transactions ---- +// complicated, they must hold the transaction object +// therefore a bit of unsafe code (it is a self-referential struct) + +type IteratorItem = fjall::Result<(fjall::UserKey, fjall::UserValue)>; + +struct TxAndIterator +where + I: Iterator, +{ + tx: ReadTransaction, + iter: Option, +} + +impl<'a, I> TxAndIterator +where + I: Iterator + 'a, +{ + fn make(tx: ReadTransaction, iterfun: F) -> Result> + where + F: FnOnce(&ReadTransaction) -> Result, + { + let res = TxAndIterator { tx, iter: None }; + let mut boxed = Box::pin(res); + + // This unsafe allows us to bypass lifetime checks + let tx = unsafe { NonNull::from(&boxed.tx).as_ref() }; + let iter = iterfun(tx)?; + + let mut_ref = Pin::as_mut(&mut boxed); + // This unsafe allows us to write in a field of the pinned struct + unsafe { + Pin::get_unchecked_mut(mut_ref).iter = Some(iter); + } + + Ok(Box::new(TxAndIteratorPin(boxed))) + } +} + +impl<'a, I> Drop for TxAndIterator +where + I: Iterator, +{ + fn drop(&mut self) { + // ensure the iterator is dropped before the RoTxn it references + drop(self.iter.take()); + } +} + +struct TxAndIteratorPin(Pin>>) +where + I: Iterator; + +impl Iterator for TxAndIteratorPin +where + I: Iterator, +{ + type Item = Result<(Value, Value)>; + + fn next(&mut self) -> Option { + let mut_ref = Pin::as_mut(&mut self.0); + // This unsafe allows us to mutably access the iterator field + let next = unsafe { Pin::get_unchecked_mut(mut_ref).iter.as_mut()?.next() }; + match next { + None => None, + Some(Err(e)) => Some(Err(e.into())), + Some(Ok((k, v))) => Some(Ok((k.to_vec(), v.to_vec()))), + } + } +} + +// ---- iterators within transactions ---- + +fn tx_iter_item<'a>( + item: std::result::Result<(Arc<[u8]>, Arc<[u8]>), fjall::Error>, +) -> TxOpResult<(Vec, Vec)> { + item.map(|(k, v)| (k.to_vec(), v.to_vec())) + .map_err(|e| TxOpError(Error::from(e))) +} diff --git a/src/db/lib.rs b/src/db/lib.rs index 3485745a..d5c3fba4 100644 --- a/src/db/lib.rs +++ b/src/db/lib.rs @@ -1,6 +1,8 @@ #[macro_use] extern crate tracing; +#[cfg(feature = "fjall")] +pub mod fjall_adapter; #[cfg(feature = "lmdb")] pub mod lmdb_adapter; #[cfg(feature = "sqlite")] diff --git a/src/db/open.rs b/src/db/open.rs index ff3bc830..19cb4e87 100644 --- a/src/db/open.rs +++ b/src/db/open.rs @@ -11,6 +11,7 @@ use crate::{Db, Error, Result}; pub enum Engine { Lmdb, Sqlite, + Fjall, } impl Engine { @@ -19,6 +20,7 @@ impl Engine { match self { Self::Lmdb => "lmdb", Self::Sqlite => "sqlite", + Self::Fjall => "fjall", } } } @@ -36,6 +38,7 @@ impl std::str::FromStr for Engine { match text { "lmdb" | "heed" => Ok(Self::Lmdb), "sqlite" | "sqlite3" | "rusqlite" => Ok(Self::Sqlite), + "fjall" => Ok(Self::Fjall), "sled" => Err(Error("Sled is no longer supported as a database engine. Converting your old metadata db can be done using an older Garage binary (e.g. v0.9.4).".into())), kind => Err(Error( format!( @@ -114,6 +117,15 @@ pub fn open_db(path: &PathBuf, engine: Engine, opt: &OpenOpt) -> Result { } } + // ---- Fjall DB ---- + #[cfg(feature = "fjall")] + Engine::Fjall => { + info!("Opening Fjall database at: {}", path.display()); + let fsync_ms = opt.fsync.then(|| 1000 as u16); + let keyspace = fjall::Config::new(path).fsync_ms(fsync_ms).open_transactional()?; + Ok(crate::fjall_adapter::FjallDb::init(path, keyspace)) + } + // Pattern is unreachable when all supported DB engines are compiled into binary. The allow // attribute is added so that we won't have to change this match in case stop building // support for one or more engines by default. diff --git a/src/garage/Cargo.toml b/src/garage/Cargo.toml index 483e33c0..cef83f1d 100644 --- a/src/garage/Cargo.toml +++ b/src/garage/Cargo.toml @@ -89,6 +89,7 @@ k2v = [ "garage_util/k2v", "garage_api/k2v" ] # Database engines lmdb = [ "garage_model/lmdb" ] sqlite = [ "garage_model/sqlite" ] +fjall = [ "garage_model/fjall" ] # Automatic registration and discovery via Consul API consul-discovery = [ "garage_rpc/consul-discovery" ] diff --git a/src/model/Cargo.toml b/src/model/Cargo.toml index 12931a4c..86312a77 100644 --- a/src/model/Cargo.toml +++ b/src/model/Cargo.toml @@ -47,3 +47,4 @@ default = [ "lmdb", "sqlite" ] k2v = [ "garage_util/k2v" ] lmdb = [ "garage_db/lmdb" ] sqlite = [ "garage_db/sqlite" ] +fjall = [ "garage_db/fjall" ] \ No newline at end of file diff --git a/src/model/garage.rs b/src/model/garage.rs index 363b02dd..36836fb4 100644 --- a/src/model/garage.rs +++ b/src/model/garage.rs @@ -124,6 +124,9 @@ impl Garage { db::Engine::Lmdb => { db_path.push("db.lmdb"); } + db::Engine::Fjall => { + db_path.push("db.fjall"); + } } let db_opt = db::OpenOpt { fsync: config.metadata_fsync,