From 44454aac012cbef9158110f2352301ffcfaf31c7 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Fri, 8 Mar 2024 14:11:02 +0100 Subject: [PATCH 1/6] [rm-sled] Remove the Sled database engine --- Cargo.lock | 45 ---- Cargo.nix | 65 +---- Cargo.toml | 1 - default.nix | 1 - doc/api/garage-admin-v1.yml | 1 - doc/book/connect/apps/index.md | 2 +- doc/book/cookbook/from-source.md | 3 +- doc/book/cookbook/real-world.md | 5 +- doc/book/design/internals.md | 2 +- doc/book/reference-manual/configuration.md | 38 +-- doc/drafts/admin-api.md | 1 - doc/drafts/k2v-spec.md | 2 +- nix/compile.nix | 2 +- script/helm/garage/values.yaml | 12 +- src/block/resync.rs | 6 +- src/db/Cargo.toml | 3 +- src/db/lib.rs | 2 - src/db/open.rs | 27 +- src/db/sled_adapter.rs | 274 --------------------- src/db/test.rs | 11 - src/garage/Cargo.toml | 5 +- src/garage/cli/convert_db.rs | 2 +- src/garage/main.rs | 6 +- src/model/Cargo.toml | 3 +- src/model/garage.rs | 5 - src/table/gc.rs | 6 +- src/table/merkle.rs | 4 +- src/util/config.rs | 19 +- 28 files changed, 34 insertions(+), 519 deletions(-) delete mode 100644 src/db/sled_adapter.rs diff --git a/Cargo.lock b/Cargo.lock index f1aa93bf..284e2276 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -921,15 +921,6 @@ dependencies = [ "crossbeam-utils", ] -[[package]] -name = "crossbeam-epoch" -version = "0.9.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" -dependencies = [ - "crossbeam-utils", -] - [[package]] name = "crossbeam-queue" version = "0.3.11" @@ -1222,16 +1213,6 @@ dependencies = [ name = "format_table" version = "0.1.1" -[[package]] -name = "fs2" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9564fc758e15025b46aa6643b1b77d047d1a56a1aea6e01002ac0c7026876213" -dependencies = [ - "libc", - "winapi", -] - [[package]] name = "futures" version = "0.3.30" @@ -1321,15 +1302,6 @@ dependencies = [ "slab", ] -[[package]] -name = "fxhash" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" -dependencies = [ - "byteorder", -] - [[package]] name = "garage" version = "0.10.0" @@ -1472,7 +1444,6 @@ dependencies = [ "hexdump", "mktemp", "rusqlite", - "sled", "tracing", ] @@ -3831,22 +3802,6 @@ dependencies = [ "autocfg", ] -[[package]] -name = "sled" -version = "0.34.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f96b4737c2ce5987354855aed3797279def4ebf734436c6aa4552cf8e169935" -dependencies = [ - "crc32fast", - "crossbeam-epoch", - "crossbeam-utils", - "fs2", - "fxhash", - "libc", - "log", - "parking_lot 0.11.2", -] - [[package]] name = "smallvec" version = "1.13.1" diff --git a/Cargo.nix b/Cargo.nix index cd485416..0bc28ecd 100644 --- a/Cargo.nix +++ b/Cargo.nix @@ -34,7 +34,7 @@ args@{ ignoreLockHash, }: let - nixifiedLockHash = "c3296a54f1c6f385e0d4a4a937734f1fe0fee4405b44d7462249d72675f7ac40"; + nixifiedLockHash = "23e1504df44ec18cfc5c872c858154304c16da2a6c1f7c9f06608ef833815f30"; workspaceSrc = if args.workspaceSrc == null then ./. else args.workspaceSrc; currentLockHash = builtins.hashFile "sha256" (workspaceSrc + /Cargo.lock); lockHashIgnored = if ignoreLockHash @@ -1315,21 +1315,6 @@ in }; }); - "registry+https://github.com/rust-lang/crates.io-index".crossbeam-epoch."0.9.18" = overridableMkRustCrate (profileName: rec { - name = "crossbeam-epoch"; - version = "0.9.18"; - registry = "registry+https://github.com/rust-lang/crates.io-index"; - src = fetchCratesIo { inherit name version; sha256 = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"; }; - features = builtins.concatLists [ - (lib.optional (rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sled" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/sled" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sled") "alloc") - (lib.optional (rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sled" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/sled" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sled") "default") - (lib.optional (rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sled" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/sled" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sled") "std") - ]; - dependencies = { - ${ if rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sled" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/sled" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sled" then "crossbeam_utils" else null } = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".crossbeam-utils."0.8.19" { inherit profileName; }).out; - }; - }); - "registry+https://github.com/rust-lang/crates.io-index".crossbeam-queue."0.3.11" = overridableMkRustCrate (profileName: rec { name = "crossbeam-queue"; version = "0.3.11"; @@ -1351,7 +1336,6 @@ in registry = "registry+https://github.com/rust-lang/crates.io-index"; src = fetchCratesIo { inherit name version; sha256 = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345"; }; features = builtins.concatLists [ - (lib.optional (rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sled" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/sled" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sled") "default") [ "std" ] ]; }); @@ -1775,17 +1759,6 @@ in src = fetchCrateLocal (workspaceSrc + "/src/format-table"); }); - "registry+https://github.com/rust-lang/crates.io-index".fs2."0.4.3" = overridableMkRustCrate (profileName: rec { - name = "fs2"; - version = "0.4.3"; - registry = "registry+https://github.com/rust-lang/crates.io-index"; - src = fetchCratesIo { inherit name version; sha256 = "9564fc758e15025b46aa6643b1b77d047d1a56a1aea6e01002ac0c7026876213"; }; - dependencies = { - ${ if (rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sled" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/sled" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sled") && hostPlatform.isUnix then "libc" else null } = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".libc."0.2.153" { inherit profileName; }).out; - ${ if (rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sled" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/sled" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sled") && hostPlatform.isWindows then "winapi" else null } = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".winapi."0.3.9" { inherit profileName; }).out; - }; - }); - "registry+https://github.com/rust-lang/crates.io-index".futures."0.3.30" = overridableMkRustCrate (profileName: rec { name = "futures"; version = "0.3.30"; @@ -1937,16 +1910,6 @@ in }; }); - "registry+https://github.com/rust-lang/crates.io-index".fxhash."0.2.1" = overridableMkRustCrate (profileName: rec { - name = "fxhash"; - version = "0.2.1"; - registry = "registry+https://github.com/rust-lang/crates.io-index"; - src = fetchCratesIo { inherit name version; sha256 = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c"; }; - dependencies = { - ${ if rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sled" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/sled" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sled" then "byteorder" else null } = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".byteorder."1.5.0" { inherit profileName; }).out; - }; - }); - "unknown".garage."0.10.0" = overridableMkRustCrate (profileName: rec { name = "garage"; version = "0.10.0"; @@ -1963,7 +1926,6 @@ in (lib.optional (rootFeatures' ? "garage/opentelemetry-otlp" || rootFeatures' ? "garage/telemetry-otlp") "opentelemetry-otlp") (lib.optional (rootFeatures' ? "garage/default" || rootFeatures' ? "garage/metrics" || rootFeatures' ? "garage/opentelemetry-prometheus") "opentelemetry-prometheus") (lib.optional (rootFeatures' ? "garage/default" || rootFeatures' ? "garage/metrics" || rootFeatures' ? "garage/prometheus") "prometheus") - (lib.optional (rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sled") "sled") (lib.optional (rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sqlite") "sqlite") (lib.optional (rootFeatures' ? "garage/system-libs") "system-libs") (lib.optional (rootFeatures' ? "garage/telemetry-otlp") "telemetry-otlp") @@ -2127,7 +2089,6 @@ in (lib.optional (rootFeatures' ? "garage/default" || rootFeatures' ? "garage/lmdb" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/heed" || rootFeatures' ? "garage_db/lmdb" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/lmdb") "heed") (lib.optional (rootFeatures' ? "garage/default" || rootFeatures' ? "garage/lmdb" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/lmdb" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/lmdb") "lmdb") (lib.optional (rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sqlite" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/rusqlite" || rootFeatures' ? "garage_db/sqlite" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sqlite") "rusqlite") - (lib.optional (rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sled" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/sled" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sled") "sled") (lib.optional (rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sqlite" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/sqlite" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sqlite") "sqlite") ]; dependencies = { @@ -2135,7 +2096,6 @@ in ${ if rootFeatures' ? "garage/default" || rootFeatures' ? "garage/lmdb" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/heed" || rootFeatures' ? "garage_db/lmdb" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/lmdb" then "heed" else null } = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".heed."0.11.0" { inherit profileName; }).out; hexdump = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".hexdump."0.1.1" { inherit profileName; }).out; ${ if rootFeatures' ? "garage/bundled-libs" || rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sqlite" || rootFeatures' ? "garage_db/bundled-libs" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/rusqlite" || rootFeatures' ? "garage_db/sqlite" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sqlite" then "rusqlite" else null } = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".rusqlite."0.30.0" { inherit profileName; }).out; - ${ if rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sled" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/sled" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sled" then "sled" else null } = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".sled."0.34.7" { inherit profileName; }).out; tracing = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".tracing."0.1.40" { inherit profileName; }).out; }; devDependencies = { @@ -2152,7 +2112,6 @@ in (lib.optional (rootFeatures' ? "garage_model/default") "default") (lib.optional (rootFeatures' ? "garage/default" || rootFeatures' ? "garage/k2v" || rootFeatures' ? "garage_api/k2v" || rootFeatures' ? "garage_model/k2v") "k2v") (lib.optional (rootFeatures' ? "garage/default" || rootFeatures' ? "garage/lmdb" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/lmdb") "lmdb") - (lib.optional (rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sled" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sled") "sled") (lib.optional (rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sqlite" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sqlite") "sqlite") ]; dependencies = { @@ -5459,27 +5418,6 @@ in }; }); - "registry+https://github.com/rust-lang/crates.io-index".sled."0.34.7" = overridableMkRustCrate (profileName: rec { - name = "sled"; - version = "0.34.7"; - registry = "registry+https://github.com/rust-lang/crates.io-index"; - src = fetchCratesIo { inherit name version; sha256 = "7f96b4737c2ce5987354855aed3797279def4ebf734436c6aa4552cf8e169935"; }; - features = builtins.concatLists [ - (lib.optional (rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sled" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/sled" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sled") "default") - (lib.optional (rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sled" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/sled" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sled") "no_metrics") - ]; - dependencies = { - ${ if rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sled" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/sled" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sled" then "crc32fast" else null } = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".crc32fast."1.3.2" { inherit profileName; }).out; - ${ if rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sled" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/sled" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sled" then "crossbeam_epoch" else null } = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".crossbeam-epoch."0.9.18" { inherit profileName; }).out; - ${ if rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sled" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/sled" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sled" then "crossbeam_utils" else null } = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".crossbeam-utils."0.8.19" { inherit profileName; }).out; - ${ if (rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sled" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/sled" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sled") && (hostPlatform.parsed.kernel.name == "linux" || hostPlatform.parsed.kernel.name == "darwin" || hostPlatform.parsed.kernel.name == "windows") then "fs2" else null } = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".fs2."0.4.3" { inherit profileName; }).out; - ${ if rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sled" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/sled" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sled" then "fxhash" else null } = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".fxhash."0.2.1" { inherit profileName; }).out; - ${ if rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sled" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/sled" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sled" then "libc" else null } = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".libc."0.2.153" { inherit profileName; }).out; - ${ if rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sled" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/sled" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sled" then "log" else null } = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".log."0.4.20" { inherit profileName; }).out; - ${ if rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sled" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/sled" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sled" then "parking_lot" else null } = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".parking_lot."0.11.2" { inherit profileName; }).out; - }; - }); - "registry+https://github.com/rust-lang/crates.io-index".smallvec."1.13.1" = overridableMkRustCrate (profileName: rec { name = "smallvec"; version = "1.13.1"; @@ -6723,7 +6661,6 @@ in [ "minwindef" ] [ "ntstatus" ] [ "processenv" ] - (lib.optional (rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sled" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/sled" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sled") "processthreadsapi") [ "std" ] [ "synchapi" ] [ "sysinfoapi" ] diff --git a/Cargo.toml b/Cargo.toml index c259c7f2..f40e8738 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -78,7 +78,6 @@ tracing-subscriber = { version = "0.3", features = ["env-filter"] } heed = { version = "0.11", default-features = false, features = ["lmdb"] } rusqlite = "0.30.0" -sled = "0.34" async-compression = { version = "0.4", features = ["tokio", "zstd"] } zstd = { version = "0.13", default-features = false } diff --git a/default.nix b/default.nix index ecdf6436..57bc24a5 100644 --- a/default.nix +++ b/default.nix @@ -40,7 +40,6 @@ in { features = [ "garage/bundled-libs" "garage/k2v" - "garage/sled" "garage/lmdb" "garage/sqlite" ]; diff --git a/doc/api/garage-admin-v1.yml b/doc/api/garage-admin-v1.yml index fd78feb1..1ea77b2e 100644 --- a/doc/api/garage-admin-v1.yml +++ b/doc/api/garage-admin-v1.yml @@ -98,7 +98,6 @@ paths: type: string example: - "k2v" - - "sled" - "lmdb" - "sqlite" - "consul-discovery" diff --git a/doc/book/connect/apps/index.md b/doc/book/connect/apps/index.md index 5def3851..9a678275 100644 --- a/doc/book/connect/apps/index.md +++ b/doc/book/connect/apps/index.md @@ -292,7 +292,7 @@ with average object size ranging from 50 KB to 150 KB. As such, your Garage cluster should be configured appropriately for good performance: - use Garage v0.8.0 or higher with the [LMDB database engine](@documentation/reference-manual/configuration.md#db-engine-since-v0-8-0). - With the default Sled database engine, your database could quickly end up taking tens of GB of disk space. + Older versions of Garage used the Sled database engine which had issues, such as databases quickly ending up taking tens of GB of disk space. - the Garage database should be stored on a SSD ### Creating your bucket diff --git a/doc/book/cookbook/from-source.md b/doc/book/cookbook/from-source.md index bacf93ab..f7fd17ce 100644 --- a/doc/book/cookbook/from-source.md +++ b/doc/book/cookbook/from-source.md @@ -90,6 +90,5 @@ The following feature flags are available in v0.8.0: | `kubernetes-discovery` | optional | Enable automatic registration and discovery
of cluster nodes through the Kubernetes API | | `metrics` | *by default* | Enable collection of metrics in Prometheus format on the admin API | | `telemetry-otlp` | optional | Enable collection of execution traces using OpenTelemetry | -| `sled` | *by default* | Enable using Sled to store Garage's metadata | -| `lmdb` | optional | Enable using LMDB to store Garage's metadata | +| `lmdb` | *by default* | Enable using LMDB to store Garage's metadata | | `sqlite` | optional | Enable using Sqlite3 to store Garage's metadata | diff --git a/doc/book/cookbook/real-world.md b/doc/book/cookbook/real-world.md index cb10b550..30be4907 100644 --- a/doc/book/cookbook/real-world.md +++ b/doc/book/cookbook/real-world.md @@ -70,9 +70,8 @@ to store 2 TB of data in total. - If you only have an HDD and no SSD, it's fine to put your metadata alongside the data on the same drive. Having lots of RAM for your kernel to cache the metadata will - help a lot with performance. Make sure to use the LMDB database engine, - instead of Sled, which suffers from quite bad performance degradation on HDDs. - Sled is still the default for legacy reasons, but is not recommended anymore. + help a lot with performance. The default LMDB database engine is the most tested + and has good performance. - For the metadata storage, Garage does not do checksumming and integrity verification on its own. If you are afraid of bitrot/data corruption, diff --git a/doc/book/design/internals.md b/doc/book/design/internals.md index cefb7acc..8e3c214e 100644 --- a/doc/book/design/internals.md +++ b/doc/book/design/internals.md @@ -97,7 +97,7 @@ delete a tombstone, the following condition has to be met: superseeded by the tombstone. This ensures that deleting the tombstone is safe and that no deleted value will come back in the system. -Garage makes use of Sled's atomic operations (such as compare-and-swap and +Garage uses atomic database operations (such as compare-and-swap and transactions) to ensure that only tombstones that have been correctly propagated to other nodes are ever deleted from the local entry tree. diff --git a/doc/book/reference-manual/configuration.md b/doc/book/reference-manual/configuration.md index 580e9fbc..4df2d0df 100644 --- a/doc/book/reference-manual/configuration.md +++ b/doc/book/reference-manual/configuration.md @@ -20,8 +20,6 @@ db_engine = "lmdb" block_size = "1M" -sled_cache_capacity = "128MiB" -sled_flush_every_ms = 2000 lmdb_map_size = "1T" compression_level = 1 @@ -96,9 +94,7 @@ Top-level configuration options: [`rpc_bind_addr`](#rpc_bind_addr), [`rpc_bind_outgoing`](#rpc_bind_outgoing), [`rpc_public_addr`](#rpc_public_addr), -[`rpc_secret`/`rpc_secret_file`](#rpc_secret), -[`sled_cache_capacity`](#sled_cache_capacity), -[`sled_flush_every_ms`](#sled_flush_every_ms). +[`rpc_secret`/`rpc_secret_file`](#rpc_secret). The `[consul_discovery]` section: [`api`](#consul_api), @@ -271,20 +267,16 @@ Since `v0.8.0`, Garage can use alternative storage backends as follows: | DB engine | `db_engine` value | Database path | | --------- | ----------------- | ------------- | -| [LMDB](https://www.lmdb.tech) (default since `v0.9.0`) | `"lmdb"` | `/db.lmdb/` | -| [Sled](https://sled.rs) (default up to `v0.8.0`) | `"sled"` | `/db/` | -| [Sqlite](https://sqlite.org) | `"sqlite"` | `/db.sqlite` | +| [LMDB](https://www.lmdb.tech) (since `v0.8.0`, default since `v0.9.0`) | `"lmdb"` | `/db.lmdb/` | +| [Sqlite](https://sqlite.org) (since `v0.8.0`) | `"sqlite"` | `/db.sqlite` | +| [Sled](https://sled.rs) (old default, removed since `v1.0`) | `"sled"` | `/db/` | -Sled was the only database engine up to Garage v0.7.0. Performance issues and -API limitations of Sled prompted the addition of alternative engines in v0.8.0. -Since v0.9.0, LMDB is the default engine instead of Sled, and Sled is -deprecated. We plan to remove Sled in Garage v1.0. +Sled was supported until Garage v0.9.x, and was removed in Garage v1.0. +You can still use an older binary of Garage (e.g. v0.9.3) to migrate +old Sled metadata databases to another engine. Performance characteristics of the different DB engines are as follows: -- Sled: tends to produce large data files and also has performance issues, - especially when the metadata folder is on a traditional HDD and not on SSD. - - LMDB: the recommended database engine on 64-bit systems, much more space-efficient and slightly faster. Note that the data format of LMDB is not portable between architectures, so for instance the Garage database of an @@ -333,7 +325,6 @@ Here is how this option impacts the different database engines: | Database | `metadata_fsync = false` (default) | `metadata_fsync = true` | |----------|------------------------------------|-------------------------------| -| Sled | default options | *unsupported* | | Sqlite | `PRAGMA synchronous = OFF` | `PRAGMA synchronous = NORMAL` | | LMDB | `MDB_NOMETASYNC` + `MDB_NOSYNC` | `MDB_NOMETASYNC` | @@ -367,21 +358,6 @@ files will remain available. This however means that chunks from existing files will not be deduplicated with chunks from newly uploaded files, meaning you might use more storage space that is optimally possible. -#### `sled_cache_capacity` {#sled_cache_capacity} - -This parameter can be used to tune the capacity of the cache used by -[sled](https://sled.rs), the database Garage uses internally to store metadata. -Tune this to fit the RAM you wish to make available to your Garage instance. -This value has a conservative default (128MB) so that Garage doesn't use too much -RAM by default, but feel free to increase this for higher performance. - -#### `sled_flush_every_ms` {#sled_flush_every_ms} - -This parameters can be used to tune the flushing interval of sled. -Increase this if sled is thrashing your SSD, at the risk of losing more data in case -of a power outage (though this should not matter much as data is replicated on other -nodes). The default value, 2000ms, should be appropriate for most use cases. - #### `lmdb_map_size` {#lmdb_map_size} This parameters can be used to set the map size used by LMDB, diff --git a/doc/drafts/admin-api.md b/doc/drafts/admin-api.md index e7851ab1..40c82f5a 100644 --- a/doc/drafts/admin-api.md +++ b/doc/drafts/admin-api.md @@ -73,7 +73,6 @@ Example response body: "garageVersion": "v0.10.0", "garageFeatures": [ "k2v", - "sled", "lmdb", "sqlite", "metrics", diff --git a/doc/drafts/k2v-spec.md b/doc/drafts/k2v-spec.md index faa1a247..3956fa31 100644 --- a/doc/drafts/k2v-spec.md +++ b/doc/drafts/k2v-spec.md @@ -146,7 +146,7 @@ in a bucket, as the partition key becomes the sort key in the index. How indexing works: - Each node keeps a local count of how many items it stores for each partition, - in a local Sled tree that is updated atomically when an item is modified. + in a local database tree that is updated atomically when an item is modified. - These local counters are asynchronously stored in the index table which is a regular Garage table spread in the network. Counters are stored as LWW values, so basically the final table will have the following structure: diff --git a/nix/compile.nix b/nix/compile.nix index 1e712710..78cbd80e 100644 --- a/nix/compile.nix +++ b/nix/compile.nix @@ -168,7 +168,7 @@ let rootFeatures = if features != null then features else - ([ "garage/bundled-libs" "garage/sled" "garage/lmdb" "garage/k2v" ] ++ (if release then [ + ([ "garage/bundled-libs" "garage/lmdb" "garage/k2v" ] ++ (if release then [ "garage/consul-discovery" "garage/kubernetes-discovery" "garage/metrics" diff --git a/script/helm/garage/values.yaml b/script/helm/garage/values.yaml index 02a6651b..56afa2b2 100644 --- a/script/helm/garage/values.yaml +++ b/script/helm/garage/values.yaml @@ -6,18 +6,13 @@ garage: # Can be changed for better performance on certain systems # https://garagehq.deuxfleurs.fr/documentation/reference-manual/configuration/#db-engine-since-v0-8-0 - dbEngine: "sled" + dbEngine: "lmdb" # Defaults is 1MB # An increase can result in better performance in certain scenarios # https://garagehq.deuxfleurs.fr/documentation/reference-manual/configuration/#block-size blockSize: "1048576" - # Tuning parameters for the sled DB engine - # https://garagehq.deuxfleurs.fr/documentation/reference-manual/configuration/#sled-cache-capacity - sledCacheCapacity: "134217728" - sledFlushEveryMs: "2000" - # Default to 3 replicas, see the replication_mode section at # https://garagehq.deuxfleurs.fr/documentation/reference-manual/configuration/#replication-mode replicationMode: "3" @@ -50,11 +45,6 @@ garage: block_size = {{ .Values.garage.blockSize }} - {{- if eq .Values.garage.dbEngine "sled"}} - sled_cache_capacity = {{ .Values.garage.sledCacheCapacity }} - sled_flush_every_ms = {{ .Values.garage.sledFlushEveryMs }} - {{- end }} - replication_mode = "{{ .Values.garage.replicationMode }}" compression_level = {{ .Values.garage.compressionLevel }} diff --git a/src/block/resync.rs b/src/block/resync.rs index 15f210e4..2516ba08 100644 --- a/src/block/resync.rs +++ b/src/block/resync.rs @@ -180,7 +180,7 @@ impl BlockResyncManager { // deleted once the garbage collection delay has passed. // // Here are some explanations on how the resync queue works. - // There are two Sled trees that are used to have information + // There are two db trees that are used to have information // about the status of blocks that need to be resynchronized: // // - resync.queue: a tree that is ordered first by a timestamp @@ -541,9 +541,9 @@ impl Worker for ResyncWorker { Ok(WorkerState::Idle) } Err(e) => { - // The errors that we have here are only Sled errors + // The errors that we have here are only db errors // We don't really know how to handle them so just ¯\_(ツ)_/¯ - // (there is kind of an assumption that Sled won't error on us, + // (there is kind of an assumption that the db won't error on us, // if it does there is not much we can do -- TODO should we just panic?) // Here we just give the error to the worker manager, // it will print it to the logs and increment a counter diff --git a/src/db/Cargo.toml b/src/db/Cargo.toml index fddc5cca..a8f6d586 100644 --- a/src/db/Cargo.toml +++ b/src/db/Cargo.toml @@ -18,13 +18,12 @@ tracing.workspace = true heed = { workspace = true, optional = true } rusqlite = { workspace = true, optional = true } -sled = { workspace = true, optional = true } [dev-dependencies] mktemp.workspace = true [features] -default = [ "sled", "lmdb", "sqlite" ] +default = [ "lmdb", "sqlite" ] bundled-libs = [ "rusqlite?/bundled" ] lmdb = [ "heed" ] sqlite = [ "rusqlite" ] diff --git a/src/db/lib.rs b/src/db/lib.rs index 0fb457ce..8975f295 100644 --- a/src/db/lib.rs +++ b/src/db/lib.rs @@ -3,8 +3,6 @@ extern crate tracing; #[cfg(feature = "lmdb")] pub mod lmdb_adapter; -#[cfg(feature = "sled")] -pub mod sled_adapter; #[cfg(feature = "sqlite")] pub mod sqlite_adapter; diff --git a/src/db/open.rs b/src/db/open.rs index ae135c4e..03476a42 100644 --- a/src/db/open.rs +++ b/src/db/open.rs @@ -11,7 +11,6 @@ use crate::{Db, Error, Result}; pub enum Engine { Lmdb, Sqlite, - Sled, } impl Engine { @@ -20,7 +19,6 @@ impl Engine { match self { Self::Lmdb => "lmdb", Self::Sqlite => "sqlite", - Self::Sled => "sled", } } } @@ -38,10 +36,10 @@ impl std::str::FromStr for Engine { match text { "lmdb" | "heed" => Ok(Self::Lmdb), "sqlite" | "sqlite3" | "rusqlite" => Ok(Self::Sqlite), - "sled" => Ok(Self::Sled), + "sled" => Err(Error("Sled is no longer supported as a database engine. Converting your old metadata db can be done using an older Garage binary (e.g. v0.9.3).".into())), kind => Err(Error( format!( - "Invalid DB engine: {} (options are: lmdb, sled, sqlite)", + "Invalid DB engine: {} (options are: lmdb, sqlite)", kind ) .into(), @@ -53,8 +51,6 @@ impl std::str::FromStr for Engine { pub struct OpenOpt { pub fsync: bool, pub lmdb_map_size: Option, - pub sled_cache_capacity: usize, - pub sled_flush_every_ms: u64, } impl Default for OpenOpt { @@ -62,31 +58,12 @@ impl Default for OpenOpt { Self { fsync: false, lmdb_map_size: None, - sled_cache_capacity: 1024 * 1024 * 1024, - sled_flush_every_ms: 2000, } } } pub fn open_db(path: &PathBuf, engine: Engine, opt: &OpenOpt) -> Result { match engine { - // ---- Sled DB ---- - #[cfg(feature = "sled")] - Engine::Sled => { - if opt.fsync { - return Err(Error( - "`metadata_fsync = true` is not supported with the Sled database engine".into(), - )); - } - info!("Opening Sled database at: {}", path.display()); - let db = crate::sled_adapter::sled::Config::default() - .path(&path) - .cache_capacity(opt.sled_cache_capacity as u64) - .flush_every_ms(Some(opt.sled_flush_every_ms)) - .open()?; - Ok(crate::sled_adapter::SledDb::init(db)) - } - // ---- Sqlite DB ---- #[cfg(feature = "sqlite")] Engine::Sqlite => { diff --git a/src/db/sled_adapter.rs b/src/db/sled_adapter.rs deleted file mode 100644 index 84f2001b..00000000 --- a/src/db/sled_adapter.rs +++ /dev/null @@ -1,274 +0,0 @@ -use core::ops::Bound; - -use std::cell::Cell; -use std::collections::HashMap; -use std::sync::{Arc, RwLock}; - -use sled::transaction::{ - ConflictableTransactionError, TransactionError, Transactional, TransactionalTree, - UnabortableTransactionError, -}; - -use crate::{ - Db, Error, IDb, ITx, ITxFn, OnCommit, Result, TxError, TxFnResult, TxOpError, TxOpResult, - TxResult, TxValueIter, Value, ValueIter, -}; - -pub use sled; - -// -- err - -impl From for Error { - fn from(e: sled::Error) -> Error { - Error(format!("Sled: {}", e).into()) - } -} - -impl From for TxOpError { - fn from(e: sled::Error) -> TxOpError { - TxOpError(e.into()) - } -} - -// -- db - -pub struct SledDb { - db: sled::Db, - trees: RwLock<(Vec, HashMap)>, -} - -impl SledDb { - #[deprecated( - since = "0.9.0", - note = "The Sled database is now deprecated and will be removed in Garage v1.0. Please migrate to LMDB or Sqlite as soon as possible." - )] - pub fn init(db: sled::Db) -> Db { - tracing::warn!("-------------------- IMPORTANT WARNING !!! ----------------------"); - tracing::warn!("The Sled database is now deprecated and will be removed in Garage v1.0."); - tracing::warn!("Please migrate to LMDB or Sqlite as soon as possible."); - tracing::warn!("-----------------------------------------------------------------------"); - let s = Self { - db, - trees: RwLock::new((Vec::new(), HashMap::new())), - }; - Db(Arc::new(s)) - } - - fn get_tree(&self, i: usize) -> Result { - self.trees - .read() - .unwrap() - .0 - .get(i) - .cloned() - .ok_or_else(|| Error("invalid tree id".into())) - } -} - -impl IDb for SledDb { - fn engine(&self) -> String { - "Sled".into() - } - - fn open_tree(&self, name: &str) -> Result { - let mut trees = self.trees.write().unwrap(); - if let Some(i) = trees.1.get(name) { - Ok(*i) - } else { - let tree = self.db.open_tree(name)?; - let i = trees.0.len(); - trees.0.push(tree); - trees.1.insert(name.to_string(), i); - Ok(i) - } - } - - fn list_trees(&self) -> Result> { - let mut trees = vec![]; - for name in self.db.tree_names() { - let name = std::str::from_utf8(&name) - .map_err(|e| Error(format!("{}", e).into()))? - .to_string(); - if name != "__sled__default" { - trees.push(name); - } - } - Ok(trees) - } - - // ---- - - fn get(&self, tree: usize, key: &[u8]) -> Result> { - let tree = self.get_tree(tree)?; - let val = tree.get(key)?; - Ok(val.map(|x| x.to_vec())) - } - - fn len(&self, tree: usize) -> Result { - let tree = self.get_tree(tree)?; - Ok(tree.len()) - } - - fn insert(&self, tree: usize, key: &[u8], value: &[u8]) -> Result> { - let tree = self.get_tree(tree)?; - let old_val = tree.insert(key, value)?; - Ok(old_val.map(|x| x.to_vec())) - } - - fn remove(&self, tree: usize, key: &[u8]) -> Result> { - let tree = self.get_tree(tree)?; - let old_val = tree.remove(key)?; - Ok(old_val.map(|x| x.to_vec())) - } - - fn clear(&self, tree: usize) -> Result<()> { - let tree = self.get_tree(tree)?; - tree.clear()?; - Ok(()) - } - - fn iter(&self, tree: usize) -> Result> { - let tree = self.get_tree(tree)?; - Ok(Box::new(tree.iter().map(|v| { - v.map(|(x, y)| (x.to_vec(), y.to_vec())).map_err(Into::into) - }))) - } - - fn iter_rev(&self, tree: usize) -> Result> { - let tree = self.get_tree(tree)?; - Ok(Box::new(tree.iter().rev().map(|v| { - v.map(|(x, y)| (x.to_vec(), y.to_vec())).map_err(Into::into) - }))) - } - - fn range<'r>( - &self, - tree: usize, - low: Bound<&'r [u8]>, - high: Bound<&'r [u8]>, - ) -> Result> { - let tree = self.get_tree(tree)?; - Ok(Box::new(tree.range::<&'r [u8], _>((low, high)).map(|v| { - v.map(|(x, y)| (x.to_vec(), y.to_vec())).map_err(Into::into) - }))) - } - fn range_rev<'r>( - &self, - tree: usize, - low: Bound<&'r [u8]>, - high: Bound<&'r [u8]>, - ) -> Result> { - let tree = self.get_tree(tree)?; - Ok(Box::new(tree.range::<&'r [u8], _>((low, high)).rev().map( - |v| v.map(|(x, y)| (x.to_vec(), y.to_vec())).map_err(Into::into), - ))) - } - - // ---- - - fn transaction(&self, f: &dyn ITxFn) -> TxResult { - let trees = self.trees.read().unwrap(); - let res = trees.0.transaction(|txtrees| { - let mut tx = SledTx { - trees: txtrees, - err: Cell::new(None), - }; - match f.try_on(&mut tx) { - TxFnResult::Ok(on_commit) => { - assert!(tx.err.into_inner().is_none()); - Ok(on_commit) - } - TxFnResult::Abort => { - assert!(tx.err.into_inner().is_none()); - Err(ConflictableTransactionError::Abort(())) - } - TxFnResult::DbErr => { - let e = tx.err.into_inner().expect("No DB error"); - Err(e.into()) - } - } - }); - match res { - Ok(on_commit) => Ok(on_commit), - Err(TransactionError::Abort(())) => Err(TxError::Abort(())), - Err(TransactionError::Storage(s)) => Err(TxError::Db(s.into())), - } - } -} - -// ---- - -struct SledTx<'a> { - trees: &'a [TransactionalTree], - err: Cell>, -} - -impl<'a> SledTx<'a> { - fn get_tree(&self, i: usize) -> TxOpResult<&TransactionalTree> { - self.trees.get(i).ok_or_else(|| { - TxOpError(Error( - "invalid tree id (it might have been openned after the transaction started)".into(), - )) - }) - } - - fn save_error( - &self, - v: std::result::Result, - ) -> TxOpResult { - match v { - Ok(x) => Ok(x), - Err(e) => { - let txt = format!("{}", e); - self.err.set(Some(e)); - Err(TxOpError(Error(txt.into()))) - } - } - } -} - -impl<'a> ITx for SledTx<'a> { - fn get(&self, tree: usize, key: &[u8]) -> TxOpResult> { - let tree = self.get_tree(tree)?; - let tmp = self.save_error(tree.get(key))?; - Ok(tmp.map(|x| x.to_vec())) - } - fn len(&self, _tree: usize) -> TxOpResult { - unimplemented!(".len() in transaction not supported with Sled backend") - } - - fn insert(&mut self, tree: usize, key: &[u8], value: &[u8]) -> TxOpResult> { - let tree = self.get_tree(tree)?; - let old_val = self.save_error(tree.insert(key, value))?; - Ok(old_val.map(|x| x.to_vec())) - } - fn remove(&mut self, tree: usize, key: &[u8]) -> TxOpResult> { - let tree = self.get_tree(tree)?; - let old_val = self.save_error(tree.remove(key))?; - Ok(old_val.map(|x| x.to_vec())) - } - - fn iter(&self, _tree: usize) -> TxOpResult> { - unimplemented!("Iterators in transactions not supported with Sled backend"); - } - fn iter_rev(&self, _tree: usize) -> TxOpResult> { - unimplemented!("Iterators in transactions not supported with Sled backend"); - } - - fn range<'r>( - &self, - _tree: usize, - _low: Bound<&'r [u8]>, - _high: Bound<&'r [u8]>, - ) -> TxOpResult> { - unimplemented!("Iterators in transactions not supported with Sled backend"); - } - fn range_rev<'r>( - &self, - _tree: usize, - _low: Bound<&'r [u8]>, - _high: Bound<&'r [u8]>, - ) -> TxOpResult> { - unimplemented!("Iterators in transactions not supported with Sled backend"); - } -} diff --git a/src/db/test.rs b/src/db/test.rs index cd99eafa..d4c875f0 100644 --- a/src/db/test.rs +++ b/src/db/test.rs @@ -90,17 +90,6 @@ fn test_lmdb_db() { drop(path); } -#[test] -#[cfg(feature = "sled")] -fn test_sled_db() { - use crate::sled_adapter::SledDb; - - let path = mktemp::Temp::new_dir().unwrap(); - let db = SledDb::init(sled::open(path.to_path_buf()).unwrap()); - test_suite(db); - drop(path); -} - #[test] #[cfg(feature = "sqlite")] fn test_sqlite_db() { diff --git a/src/garage/Cargo.toml b/src/garage/Cargo.toml index 00ecb35e..53449a1c 100644 --- a/src/garage/Cargo.toml +++ b/src/garage/Cargo.toml @@ -80,12 +80,11 @@ k2v-client.workspace = true [features] -default = [ "bundled-libs", "metrics", "sled", "lmdb", "sqlite", "k2v" ] +default = [ "bundled-libs", "metrics", "lmdb", "sqlite", "k2v" ] k2v = [ "garage_util/k2v", "garage_api/k2v" ] -# Database engines, Sled is still our default even though we don't like it -sled = [ "garage_model/sled" ] +# Database engines lmdb = [ "garage_model/lmdb" ] sqlite = [ "garage_model/sqlite" ] diff --git a/src/garage/cli/convert_db.rs b/src/garage/cli/convert_db.rs index 2aadb1d6..5346d55a 100644 --- a/src/garage/cli/convert_db.rs +++ b/src/garage/cli/convert_db.rs @@ -11,7 +11,7 @@ pub struct ConvertDbOpt { /// https://garagehq.deuxfleurs.fr/documentation/reference-manual/configuration/#db-engine-since-v0-8-0) #[structopt(short = "i")] input_path: PathBuf, - /// Input database engine (sled, lmdb or sqlite; limited by db engines + /// Input database engine (lmdb or sqlite; limited by db engines /// enabled in this build) #[structopt(short = "a")] input_engine: Engine, diff --git a/src/garage/main.rs b/src/garage/main.rs index e489fff0..5e9c061f 100644 --- a/src/garage/main.rs +++ b/src/garage/main.rs @@ -18,8 +18,8 @@ compile_error!("Either bundled-libs or system-libs Cargo feature must be enabled #[cfg(all(feature = "bundled-libs", feature = "system-libs"))] compile_error!("Only one of bundled-libs and system-libs Cargo features must be enabled"); -#[cfg(not(any(feature = "lmdb", feature = "sled", feature = "sqlite")))] -compile_error!("Must activate the Cargo feature for at least one DB engine: lmdb, sled or sqlite."); +#[cfg(not(any(feature = "lmdb", feature = "sqlite")))] +compile_error!("Must activate the Cargo feature for at least one DB engine: lmdb or sqlite."); use std::net::SocketAddr; use std::path::PathBuf; @@ -72,8 +72,6 @@ async fn main() { let features = &[ #[cfg(feature = "k2v")] "k2v", - #[cfg(feature = "sled")] - "sled", #[cfg(feature = "lmdb")] "lmdb", #[cfg(feature = "sqlite")] diff --git a/src/model/Cargo.toml b/src/model/Cargo.toml index 776671d0..a6bcfbe7 100644 --- a/src/model/Cargo.toml +++ b/src/model/Cargo.toml @@ -42,8 +42,7 @@ tokio.workspace = true opentelemetry.workspace = true [features] -default = [ "sled", "lmdb", "sqlite" ] +default = [ "lmdb", "sqlite" ] k2v = [ "garage_util/k2v" ] lmdb = [ "garage_db/lmdb" ] -sled = [ "garage_db/sled" ] sqlite = [ "garage_db/sqlite" ] diff --git a/src/model/garage.rs b/src/model/garage.rs index 7ec8b22e..8987c594 100644 --- a/src/model/garage.rs +++ b/src/model/garage.rs @@ -118,9 +118,6 @@ impl Garage { .ok_or_message("Invalid `db_engine` value in configuration file")?; let mut db_path = config.metadata_dir.clone(); match db_engine { - db::Engine::Sled => { - db_path.push("db"); - } db::Engine::Sqlite => { db_path.push("db.sqlite"); } @@ -134,8 +131,6 @@ impl Garage { v if v == usize::default() => None, v => Some(v), }, - sled_cache_capacity: config.sled_cache_capacity, - sled_flush_every_ms: config.sled_flush_every_ms, }; let db = db::open_db(&db_path, db_engine, &db_opt) .ok_or_message("Unable to open metadata db")?; diff --git a/src/table/gc.rs b/src/table/gc.rs index ef788749..65ad0c42 100644 --- a/src/table/gc.rs +++ b/src/table/gc.rs @@ -334,9 +334,9 @@ impl Worker for GcWorker { } } -/// An entry stored in the gc_todo Sled tree associated with the table +/// An entry stored in the gc_todo db tree associated with the table /// Contains helper function for parsing, saving, and removing -/// such entry in Sled +/// such entry in the db /// /// Format of an entry: /// - key = 8 bytes: timestamp of tombstone @@ -353,7 +353,7 @@ pub(crate) struct GcTodoEntry { } impl GcTodoEntry { - /// Creates a new GcTodoEntry (not saved in Sled) from its components: + /// Creates a new GcTodoEntry (not saved in the db) from its components: /// the key of an entry in the table, and the hash of the associated /// serialized value pub(crate) fn new(key: Vec, value_hash: Hash) -> Self { diff --git a/src/table/merkle.rs b/src/table/merkle.rs index 01271c58..be0ae243 100644 --- a/src/table/merkle.rs +++ b/src/table/merkle.rs @@ -31,14 +31,14 @@ pub struct MerkleUpdater { // - value = the hash of the full serialized item, if present, // or an empty vec if item is absent (deleted) // Fields in data: - // pub(crate) merkle_todo: sled::Tree, + // pub(crate) merkle_todo: db::Tree, // pub(crate) merkle_todo_notify: Notify, // Content of the merkle tree: items where // - key = .bytes() for MerkleNodeKey // - value = serialization of a MerkleNode, assumed to be MerkleNode::empty if not found // Field in data: - // pub(crate) merkle_tree: sled::Tree, + // pub(crate) merkle_tree: db::Tree, empty_node_hash: Hash, } diff --git a/src/util/config.rs b/src/util/config.rs index b7f27676..e243c813 100644 --- a/src/util/config.rs +++ b/src/util/config.rs @@ -87,20 +87,10 @@ pub struct Config { pub kubernetes_discovery: Option, // -- DB - /// Database engine to use for metadata (options: sled, sqlite, lmdb) + /// Database engine to use for metadata (options: sqlite, lmdb) #[serde(default = "default_db_engine")] pub db_engine: String, - /// Sled cache size, in bytes - #[serde( - deserialize_with = "deserialize_capacity", - default = "default_sled_cache_capacity" - )] - pub sled_cache_capacity: usize, - /// Sled flush interval in milliseconds - #[serde(default = "default_sled_flush_every_ms")] - pub sled_flush_every_ms: u64, - /// LMDB map size #[serde(deserialize_with = "deserialize_capacity", default)] pub lmdb_map_size: usize, @@ -246,13 +236,6 @@ fn default_db_engine() -> String { "lmdb".into() } -fn default_sled_cache_capacity() -> usize { - 128 * 1024 * 1024 -} -fn default_sled_flush_every_ms() -> u64 { - 2000 -} - fn default_block_size() -> usize { 1048576 } From 05c92204ecab87540806073ac4deedfd58519240 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Fri, 8 Mar 2024 14:59:56 +0100 Subject: [PATCH 2/6] [rm-sled] Remove counted_tree_hack --- src/block/manager.rs | 7 +- src/block/metrics.rs | 17 +++-- src/block/resync.rs | 15 +--- src/db/counted_tree_hack.rs | 127 ------------------------------- src/db/lib.rs | 9 --- src/db/lmdb_adapter.rs | 4 - src/db/sqlite_adapter.rs | 4 - src/garage/admin/mod.rs | 51 +++---------- src/garage/cli/structs.rs | 4 - src/model/s3/lifecycle_worker.rs | 8 +- src/table/data.rs | 6 +- src/table/gc.rs | 18 +++-- src/table/merkle.rs | 4 - src/table/metrics.rs | 21 ++--- 14 files changed, 48 insertions(+), 247 deletions(-) delete mode 100644 src/db/counted_tree_hack.rs diff --git a/src/block/manager.rs b/src/block/manager.rs index c7e4df17..18fadf85 100644 --- a/src/block/manager.rs +++ b/src/block/manager.rs @@ -378,11 +378,6 @@ impl BlockManager { Ok(self.rc.rc.len()?) } - /// Get number of items in the refcount table - pub fn rc_fast_len(&self) -> Result, Error> { - Ok(self.rc.rc.fast_len()?) - } - /// Send command to start/stop/manager scrub worker pub async fn send_scrub_command(&self, cmd: ScrubWorkerCommand) -> Result<(), Error> { let tx = self.tx_scrub_command.load(); @@ -398,7 +393,7 @@ impl BlockManager { /// List all resync errors pub fn list_resync_errors(&self) -> Result, Error> { - let mut blocks = Vec::with_capacity(self.resync.errors.len()); + let mut blocks = Vec::with_capacity(self.resync.errors.len()?); for ent in self.resync.errors.iter()? { let (hash, cnt) = ent?; let cnt = ErrorCounter::decode(&cnt); diff --git a/src/block/metrics.rs b/src/block/metrics.rs index 6659df32..8e10afdf 100644 --- a/src/block/metrics.rs +++ b/src/block/metrics.rs @@ -1,7 +1,6 @@ use opentelemetry::{global, metrics::*}; use garage_db as db; -use garage_db::counted_tree_hack::CountedTree; /// TableMetrics reference all counter used for metrics pub struct BlockManagerMetrics { @@ -29,8 +28,8 @@ impl BlockManagerMetrics { pub fn new( compression_level: Option, rc_tree: db::Tree, - resync_queue: CountedTree, - resync_errors: CountedTree, + resync_queue: db::Tree, + resync_errors: db::Tree, ) -> Self { let meter = global::meter("garage_model/block"); Self { @@ -45,15 +44,17 @@ impl BlockManagerMetrics { .init(), _rc_size: meter .u64_value_observer("block.rc_size", move |observer| { - if let Ok(Some(v)) = rc_tree.fast_len() { - observer.observe(v as u64, &[]) + if let Ok(value) = rc_tree.len() { + observer.observe(value as u64, &[]) } }) .with_description("Number of blocks known to the reference counter") .init(), _resync_queue_len: meter .u64_value_observer("block.resync_queue_length", move |observer| { - observer.observe(resync_queue.len() as u64, &[]) + if let Ok(value) = resync_queue.len() { + observer.observe(value as u64, &[]); + } }) .with_description( "Number of block hashes queued for local check and possible resync", @@ -61,7 +62,9 @@ impl BlockManagerMetrics { .init(), _resync_errored_blocks: meter .u64_value_observer("block.resync_errored_blocks", move |observer| { - observer.observe(resync_errors.len() as u64, &[]) + if let Ok(value) = resync_errors.len() { + observer.observe(value as u64, &[]); + } }) .with_description("Number of block hashes whose last resync resulted in an error") .init(), diff --git a/src/block/resync.rs b/src/block/resync.rs index 2516ba08..48c2cef1 100644 --- a/src/block/resync.rs +++ b/src/block/resync.rs @@ -15,7 +15,6 @@ use opentelemetry::{ }; use garage_db as db; -use garage_db::counted_tree_hack::CountedTree; use garage_util::background::*; use garage_util::data::*; @@ -47,9 +46,9 @@ pub(crate) const MAX_RESYNC_WORKERS: usize = 8; const INITIAL_RESYNC_TRANQUILITY: u32 = 2; pub struct BlockResyncManager { - pub(crate) queue: CountedTree, + pub(crate) queue: db::Tree, pub(crate) notify: Arc, - pub(crate) errors: CountedTree, + pub(crate) errors: db::Tree, busy_set: BusySet, @@ -90,12 +89,10 @@ impl BlockResyncManager { let queue = db .open_tree("block_local_resync_queue") .expect("Unable to open block_local_resync_queue tree"); - let queue = CountedTree::new(queue).expect("Could not count block_local_resync_queue"); let errors = db .open_tree("block_local_resync_errors") .expect("Unable to open block_local_resync_errors tree"); - let errors = CountedTree::new(errors).expect("Could not count block_local_resync_errors"); let persister = PersisterShared::new(&system.metadata_dir, "resync_cfg"); @@ -110,16 +107,12 @@ impl BlockResyncManager { /// Get lenght of resync queue pub fn queue_len(&self) -> Result { - // This currently can't return an error because the CountedTree hack - // doesn't error on .len(), but this will change when we remove the hack - // (hopefully someday!) - Ok(self.queue.len()) + Ok(self.queue.len()?) } /// Get number of blocks that have an error pub fn errors_len(&self) -> Result { - // (see queue_len comment) - Ok(self.errors.len()) + Ok(self.errors.len()?) } /// Clear the error counter for a block and put it in queue immediately diff --git a/src/db/counted_tree_hack.rs b/src/db/counted_tree_hack.rs deleted file mode 100644 index a4ce12e0..00000000 --- a/src/db/counted_tree_hack.rs +++ /dev/null @@ -1,127 +0,0 @@ -//! This hack allows a db tree to keep in RAM a counter of the number of entries -//! it contains, which is used to call .len() on it. This is usefull only for -//! the sled backend where .len() otherwise would have to traverse the whole -//! tree to count items. For sqlite and lmdb, this is mostly useless (but -//! hopefully not harmfull!). Note that a CountedTree cannot be part of a -//! transaction. - -use std::sync::{ - atomic::{AtomicUsize, Ordering}, - Arc, -}; - -use crate::{Result, Tree, TxError, Value, ValueIter}; - -#[derive(Clone)] -pub struct CountedTree(Arc); - -struct CountedTreeInternal { - tree: Tree, - len: AtomicUsize, -} - -impl CountedTree { - pub fn new(tree: Tree) -> Result { - let len = tree.len()?; - Ok(Self(Arc::new(CountedTreeInternal { - tree, - len: AtomicUsize::new(len), - }))) - } - - pub fn len(&self) -> usize { - self.0.len.load(Ordering::SeqCst) - } - - pub fn is_empty(&self) -> bool { - self.len() == 0 - } - - pub fn get>(&self, key: K) -> Result> { - self.0.tree.get(key) - } - - pub fn first(&self) -> Result> { - self.0.tree.first() - } - - pub fn iter(&self) -> Result> { - self.0.tree.iter() - } - - // ---- writing functions ---- - - pub fn insert(&self, key: K, value: V) -> Result> - where - K: AsRef<[u8]>, - V: AsRef<[u8]>, - { - let old_val = self.0.tree.insert(key, value)?; - if old_val.is_none() { - self.0.len.fetch_add(1, Ordering::SeqCst); - } - Ok(old_val) - } - - pub fn remove>(&self, key: K) -> Result> { - let old_val = self.0.tree.remove(key)?; - if old_val.is_some() { - self.0.len.fetch_sub(1, Ordering::SeqCst); - } - Ok(old_val) - } - - pub fn compare_and_swap( - &self, - key: K, - expected_old: Option, - new: Option, - ) -> Result - where - K: AsRef<[u8]>, - OV: AsRef<[u8]>, - NV: AsRef<[u8]>, - { - let old_some = expected_old.is_some(); - let new_some = new.is_some(); - - let tx_res = self.0.tree.db().transaction(|tx| { - let old_val = tx.get(&self.0.tree, &key)?; - let is_same = match (&old_val, &expected_old) { - (None, None) => true, - (Some(x), Some(y)) if x == y.as_ref() => true, - _ => false, - }; - if is_same { - match &new { - Some(v) => { - tx.insert(&self.0.tree, &key, v)?; - } - None => { - tx.remove(&self.0.tree, &key)?; - } - } - Ok(()) - } else { - Err(TxError::Abort(())) - } - }); - - match tx_res { - Ok(()) => { - match (old_some, new_some) { - (false, true) => { - self.0.len.fetch_add(1, Ordering::SeqCst); - } - (true, false) => { - self.0.len.fetch_sub(1, Ordering::SeqCst); - } - _ => (), - } - Ok(true) - } - Err(TxError::Abort(())) => Ok(false), - Err(TxError::Db(e)) => Err(e), - } - } -} diff --git a/src/db/lib.rs b/src/db/lib.rs index 8975f295..e81c712c 100644 --- a/src/db/lib.rs +++ b/src/db/lib.rs @@ -6,8 +6,6 @@ pub mod lmdb_adapter; #[cfg(feature = "sqlite")] pub mod sqlite_adapter; -pub mod counted_tree_hack; - pub mod open; #[cfg(test)] @@ -187,10 +185,6 @@ impl Tree { pub fn len(&self) -> Result { self.0.len(self.1) } - #[inline] - pub fn fast_len(&self) -> Result> { - self.0.fast_len(self.1) - } #[inline] pub fn first(&self) -> Result> { @@ -326,9 +320,6 @@ pub(crate) trait IDb: Send + Sync { fn get(&self, tree: usize, key: &[u8]) -> Result>; fn len(&self, tree: usize) -> Result; - fn fast_len(&self, _tree: usize) -> Result> { - Ok(None) - } fn insert(&self, tree: usize, key: &[u8], value: &[u8]) -> Result>; fn remove(&self, tree: usize, key: &[u8]) -> Result>; diff --git a/src/db/lmdb_adapter.rs b/src/db/lmdb_adapter.rs index 59fa132d..99b29a74 100644 --- a/src/db/lmdb_adapter.rs +++ b/src/db/lmdb_adapter.rs @@ -121,10 +121,6 @@ impl IDb for LmdbDb { Ok(tree.len(&tx)?.try_into().unwrap()) } - fn fast_len(&self, tree: usize) -> Result> { - Ok(Some(self.len(tree)?)) - } - fn insert(&self, tree: usize, key: &[u8], value: &[u8]) -> Result> { let tree = self.get_tree(tree)?; let mut tx = self.db.write_txn()?; diff --git a/src/db/sqlite_adapter.rs b/src/db/sqlite_adapter.rs index 9f967c66..1a7ae5f0 100644 --- a/src/db/sqlite_adapter.rs +++ b/src/db/sqlite_adapter.rs @@ -144,10 +144,6 @@ impl IDb for SqliteDb { } } - fn fast_len(&self, tree: usize) -> Result> { - Ok(Some(self.len(tree)?)) - } - fn insert(&self, tree: usize, key: &[u8], value: &[u8]) -> Result> { trace!("insert {}: lock db", tree); let this = self.0.lock().unwrap(); diff --git a/src/garage/admin/mod.rs b/src/garage/admin/mod.rs index de7851e1..896751cc 100644 --- a/src/garage/admin/mod.rs +++ b/src/garage/admin/mod.rs @@ -217,11 +217,11 @@ impl AdminRpcHandler { // Gather table statistics let mut table = vec![" Table\tItems\tMklItems\tMklTodo\tGcTodo".into()]; - table.push(self.gather_table_stats(&self.garage.bucket_table, opt.detailed)?); - table.push(self.gather_table_stats(&self.garage.key_table, opt.detailed)?); - table.push(self.gather_table_stats(&self.garage.object_table, opt.detailed)?); - table.push(self.gather_table_stats(&self.garage.version_table, opt.detailed)?); - table.push(self.gather_table_stats(&self.garage.block_ref_table, opt.detailed)?); + table.push(self.gather_table_stats(&self.garage.bucket_table)?); + table.push(self.gather_table_stats(&self.garage.key_table)?); + table.push(self.gather_table_stats(&self.garage.object_table)?); + table.push(self.gather_table_stats(&self.garage.version_table)?); + table.push(self.gather_table_stats(&self.garage.block_ref_table)?); write!( &mut ret, "\nTable stats:\n{}", @@ -231,15 +231,7 @@ impl AdminRpcHandler { // Gather block manager statistics writeln!(&mut ret, "\nBlock manager stats:").unwrap(); - let rc_len = if opt.detailed { - self.garage.block_manager.rc_len()?.to_string() - } else { - self.garage - .block_manager - .rc_fast_len()? - .map(|x| x.to_string()) - .unwrap_or_else(|| "NC".into()) - }; + let rc_len = self.garage.block_manager.rc_len()?.to_string(); writeln!( &mut ret, @@ -260,10 +252,6 @@ impl AdminRpcHandler { ) .unwrap(); - if !opt.detailed { - writeln!(&mut ret, "\nIf values are missing above (marked as NC), consider adding the --detailed flag (this will be slow).").unwrap(); - } - if !opt.skip_global { write!(&mut ret, "\n{}", self.gather_cluster_stats()).unwrap(); } @@ -365,34 +353,13 @@ impl AdminRpcHandler { ret } - fn gather_table_stats( - &self, - t: &Arc>, - detailed: bool, - ) -> Result + fn gather_table_stats(&self, t: &Arc>) -> Result where F: TableSchema + 'static, R: TableReplication + 'static, { - let (data_len, mkl_len) = if detailed { - ( - t.data.store.len().map_err(GarageError::from)?.to_string(), - t.merkle_updater.merkle_tree_len()?.to_string(), - ) - } else { - ( - t.data - .store - .fast_len() - .map_err(GarageError::from)? - .map(|x| x.to_string()) - .unwrap_or_else(|| "NC".into()), - t.merkle_updater - .merkle_tree_fast_len()? - .map(|x| x.to_string()) - .unwrap_or_else(|| "NC".into()), - ) - }; + let data_len = t.data.store.len().map_err(GarageError::from)?.to_string(); + let mkl_len = t.merkle_updater.merkle_tree_len()?.to_string(); Ok(format!( " {}\t{}\t{}\t{}\t{}", diff --git a/src/garage/cli/structs.rs b/src/garage/cli/structs.rs index 40e47ee1..7e7ab71b 100644 --- a/src/garage/cli/structs.rs +++ b/src/garage/cli/structs.rs @@ -553,10 +553,6 @@ pub struct StatsOpt { #[structopt(short = "a", long = "all-nodes")] pub all_nodes: bool, - /// Gather detailed statistics (this can be long) - #[structopt(short = "d", long = "detailed")] - pub detailed: bool, - /// Don't show global cluster stats (internal use in RPC) #[structopt(skip)] #[serde(default)] diff --git a/src/model/s3/lifecycle_worker.rs b/src/model/s3/lifecycle_worker.rs index 50d4283f..9ecf168c 100644 --- a/src/model/s3/lifecycle_worker.rs +++ b/src/model/s3/lifecycle_worker.rs @@ -121,13 +121,7 @@ impl Worker for LifecycleWorker { mpu_aborted, .. } => { - let n_objects = self - .garage - .object_table - .data - .store - .fast_len() - .unwrap_or(None); + let n_objects = self.garage.object_table.data.store.len().ok(); let progress = match n_objects { None => "...".to_string(), Some(total) => format!( diff --git a/src/table/data.rs b/src/table/data.rs index 7f6b7847..09f4e008 100644 --- a/src/table/data.rs +++ b/src/table/data.rs @@ -6,7 +6,6 @@ use serde_bytes::ByteBuf; use tokio::sync::Notify; use garage_db as db; -use garage_db::counted_tree_hack::CountedTree; use garage_util::data::*; use garage_util::error::*; @@ -36,7 +35,7 @@ pub struct TableData { pub(crate) insert_queue: db::Tree, pub(crate) insert_queue_notify: Arc, - pub(crate) gc_todo: CountedTree, + pub(crate) gc_todo: db::Tree, pub(crate) metrics: TableMetrics, } @@ -61,7 +60,6 @@ impl TableData { let gc_todo = db .open_tree(format!("{}:gc_todo_v2", F::TABLE_NAME)) .expect("Unable to open GC DB tree"); - let gc_todo = CountedTree::new(gc_todo).expect("Cannot count gc_todo_v2"); let metrics = TableMetrics::new( F::TABLE_NAME, @@ -370,6 +368,6 @@ impl TableData { } pub fn gc_todo_len(&self) -> Result { - Ok(self.gc_todo.len()) + Ok(self.gc_todo.len()?) } } diff --git a/src/table/gc.rs b/src/table/gc.rs index 65ad0c42..d30a1849 100644 --- a/src/table/gc.rs +++ b/src/table/gc.rs @@ -10,7 +10,7 @@ use serde_bytes::ByteBuf; use futures::future::join_all; use tokio::sync::watch; -use garage_db::counted_tree_hack::CountedTree; +use garage_db as db; use garage_util::background::*; use garage_util::data::*; @@ -376,7 +376,7 @@ impl GcTodoEntry { } /// Saves the GcTodoEntry in the gc_todo tree - pub(crate) fn save(&self, gc_todo_tree: &CountedTree) -> Result<(), Error> { + pub(crate) fn save(&self, gc_todo_tree: &db::Tree) -> Result<(), Error> { gc_todo_tree.insert(self.todo_table_key(), self.value_hash.as_slice())?; Ok(()) } @@ -386,12 +386,14 @@ impl GcTodoEntry { /// This is usefull to remove a todo entry only under the condition /// that it has not changed since the time it was read, i.e. /// what we have to do is still the same - pub(crate) fn remove_if_equal(&self, gc_todo_tree: &CountedTree) -> Result<(), Error> { - gc_todo_tree.compare_and_swap::<_, _, &[u8]>( - &self.todo_table_key(), - Some(self.value_hash), - None, - )?; + pub(crate) fn remove_if_equal(&self, gc_todo_tree: &db::Tree) -> Result<(), Error> { + gc_todo_tree.db().transaction(|txn| { + let key = self.todo_table_key(); + if txn.get(gc_todo_tree, &key)?.as_deref() == Some(self.value_hash.as_slice()) { + txn.remove(gc_todo_tree, &key)?; + } + Ok(()) + })?; Ok(()) } diff --git a/src/table/merkle.rs b/src/table/merkle.rs index be0ae243..596d5805 100644 --- a/src/table/merkle.rs +++ b/src/table/merkle.rs @@ -291,10 +291,6 @@ impl MerkleUpdater { Ok(self.data.merkle_tree.len()?) } - pub fn merkle_tree_fast_len(&self) -> Result, Error> { - Ok(self.data.merkle_tree.fast_len()?) - } - pub fn todo_len(&self) -> Result { Ok(self.data.merkle_todo.len()?) } diff --git a/src/table/metrics.rs b/src/table/metrics.rs index 8318a84f..7bb0959a 100644 --- a/src/table/metrics.rs +++ b/src/table/metrics.rs @@ -1,7 +1,6 @@ use opentelemetry::{global, metrics::*, KeyValue}; use garage_db as db; -use garage_db::counted_tree_hack::CountedTree; /// TableMetrics reference all counter used for metrics pub struct TableMetrics { @@ -27,7 +26,7 @@ impl TableMetrics { store: db::Tree, merkle_tree: db::Tree, merkle_todo: db::Tree, - gc_todo: CountedTree, + gc_todo: db::Tree, ) -> Self { let meter = global::meter(table_name); TableMetrics { @@ -35,9 +34,9 @@ impl TableMetrics { .u64_value_observer( "table.size", move |observer| { - if let Ok(Some(v)) = store.fast_len() { + if let Ok(value) = store.len() { observer.observe( - v as u64, + value as u64, &[KeyValue::new("table_name", table_name)], ); } @@ -49,9 +48,9 @@ impl TableMetrics { .u64_value_observer( "table.merkle_tree_size", move |observer| { - if let Ok(Some(v)) = merkle_tree.fast_len() { + if let Ok(value) = merkle_tree.len() { observer.observe( - v as u64, + value as u64, &[KeyValue::new("table_name", table_name)], ); } @@ -77,10 +76,12 @@ impl TableMetrics { .u64_value_observer( "table.gc_todo_queue_length", move |observer| { - observer.observe( - gc_todo.len() as u64, - &[KeyValue::new("table_name", table_name)], - ); + if let Ok(value) = gc_todo.len() { + observer.observe( + value as u64, + &[KeyValue::new("table_name", table_name)], + ); + } }, ) .with_description("Table garbage collector TODO queue length") From 66c23890c1a6e73fd6c5246642e087cd2866451e Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Fri, 8 Mar 2024 16:02:58 +0100 Subject: [PATCH 3/6] [rm-sled] Implement some missing functionality in garage_db --- src/db/lib.rs | 6 ++++++ src/db/lmdb_adapter.rs | 10 ++++++++-- src/db/sqlite_adapter.rs | 5 +++++ 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/src/db/lib.rs b/src/db/lib.rs index e81c712c..5881954d 100644 --- a/src/db/lib.rs +++ b/src/db/lib.rs @@ -274,6 +274,11 @@ impl<'a> Transaction<'a> { pub fn remove>(&mut self, tree: &Tree, key: T) -> TxOpResult> { self.tx.remove(tree.1, key.as_ref()) } + /// Clears all values in a tree + #[inline] + pub fn clear(&mut self, tree: &Tree) -> TxOpResult<()> { + self.tx.clear(tree.1) + } #[inline] pub fn iter(&self, tree: &Tree) -> TxOpResult> { @@ -350,6 +355,7 @@ pub(crate) trait ITx { fn insert(&mut self, tree: usize, key: &[u8], value: &[u8]) -> TxOpResult>; fn remove(&mut self, tree: usize, key: &[u8]) -> TxOpResult>; + fn clear(&mut self, tree: usize) -> TxOpResult<()>; fn iter(&self, tree: usize) -> TxOpResult>; fn iter_rev(&self, tree: usize) -> TxOpResult>; diff --git a/src/db/lmdb_adapter.rs b/src/db/lmdb_adapter.rs index 99b29a74..01c360b4 100644 --- a/src/db/lmdb_adapter.rs +++ b/src/db/lmdb_adapter.rs @@ -238,8 +238,9 @@ impl<'a> ITx for LmdbTx<'a> { None => Ok(None), } } - fn len(&self, _tree: usize) -> TxOpResult { - unimplemented!(".len() in transaction not supported with LMDB backend") + fn len(&self, tree: usize) -> TxOpResult { + let tree = self.get_tree(tree)?; + Ok(tree.len(&self.tx)? as usize) } fn insert(&mut self, tree: usize, key: &[u8], value: &[u8]) -> TxOpResult> { @@ -254,6 +255,11 @@ impl<'a> ITx for LmdbTx<'a> { tree.delete(&mut self.tx, key)?; Ok(old_val) } + fn clear(&mut self, tree: usize) -> TxOpResult<()> { + let tree = *self.get_tree(tree)?; + tree.clear(&mut self.tx)?; + Ok(()) + } fn iter(&self, _tree: usize) -> TxOpResult> { unimplemented!("Iterators in transactions not supported with LMDB backend"); diff --git a/src/db/sqlite_adapter.rs b/src/db/sqlite_adapter.rs index 1a7ae5f0..d1394355 100644 --- a/src/db/sqlite_adapter.rs +++ b/src/db/sqlite_adapter.rs @@ -363,6 +363,11 @@ impl<'a> ITx for SqliteTx<'a> { Ok(old_val) } + fn clear(&mut self, tree: usize) -> TxOpResult<()> { + let tree = self.get_tree(tree)?; + self.tx.execute(&format!("DELETE FROM {}", tree), [])?; + Ok(()) + } fn iter(&self, _tree: usize) -> TxOpResult> { unimplemented!(); From b942949940b5a0dec8e8640c44a2705a4482a2e4 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Fri, 8 Mar 2024 16:38:01 +0100 Subject: [PATCH 4/6] [rm-sled] Implement iterators in sqlite & lmdb transactions with way too much unsafe code --- src/db/lib.rs | 1 + src/db/lmdb_adapter.rs | 48 ++++++++++----- src/db/sqlite_adapter.rs | 125 ++++++++++++++++++++++++++++++++++----- src/db/test.rs | 49 +++++++++++++++ 4 files changed, 195 insertions(+), 28 deletions(-) diff --git a/src/db/lib.rs b/src/db/lib.rs index 5881954d..ff511b5f 100644 --- a/src/db/lib.rs +++ b/src/db/lib.rs @@ -51,6 +51,7 @@ pub type Result = std::result::Result; pub struct TxOpError(pub(crate) Error); pub type TxOpResult = std::result::Result; +#[derive(Debug)] pub enum TxError { Abort(E), Db(Error), diff --git a/src/db/lmdb_adapter.rs b/src/db/lmdb_adapter.rs index 01c360b4..ddfb6ed5 100644 --- a/src/db/lmdb_adapter.rs +++ b/src/db/lmdb_adapter.rs @@ -261,32 +261,42 @@ impl<'a> ITx for LmdbTx<'a> { Ok(()) } - fn iter(&self, _tree: usize) -> TxOpResult> { - unimplemented!("Iterators in transactions not supported with LMDB backend"); + fn iter(&self, tree: usize) -> TxOpResult> { + let tree = *self.get_tree(tree)?; + Ok(Box::new(tree.iter(&self.tx)?.map(tx_iter_item))) } - fn iter_rev(&self, _tree: usize) -> TxOpResult> { - unimplemented!("Iterators in transactions not supported with LMDB backend"); + fn iter_rev(&self, tree: usize) -> TxOpResult> { + let tree = *self.get_tree(tree)?; + Ok(Box::new(tree.rev_iter(&self.tx)?.map(tx_iter_item))) } fn range<'r>( &self, - _tree: usize, - _low: Bound<&'r [u8]>, - _high: Bound<&'r [u8]>, + tree: usize, + low: Bound<&'r [u8]>, + high: Bound<&'r [u8]>, ) -> TxOpResult> { - unimplemented!("Iterators in transactions not supported with LMDB backend"); + let tree = *self.get_tree(tree)?; + Ok(Box::new( + tree.range(&self.tx, &(low, high))?.map(tx_iter_item), + )) } fn range_rev<'r>( &self, - _tree: usize, - _low: Bound<&'r [u8]>, - _high: Bound<&'r [u8]>, + tree: usize, + low: Bound<&'r [u8]>, + high: Bound<&'r [u8]>, ) -> TxOpResult> { - unimplemented!("Iterators in transactions not supported with LMDB backend"); + let tree = *self.get_tree(tree)?; + Ok(Box::new( + tree.rev_range(&self.tx, &(low, high))?.map(tx_iter_item), + )) } } -// ---- +// ---- iterators outside transactions ---- +// complicated, they must hold the transaction object +// therefore a bit of unsafe code (it is a self-referential struct) type IteratorItem<'a> = heed::Result<( >::DItem, @@ -323,6 +333,7 @@ where I: Iterator> + 'a, { fn drop(&mut self) { + // ensure the iterator is dropped before the RoTxn it references drop(self.iter.take()); } } @@ -342,7 +353,16 @@ where } } -// ---- +// ---- iterators within transactions ---- + +fn tx_iter_item<'a>( + item: std::result::Result<(&'a [u8], &'a [u8]), heed::Error>, +) -> TxOpResult<(Vec, Vec)> { + item.map(|(k, v)| (k.to_vec(), v.to_vec())) + .map_err(|e| TxOpError(Error::from(e))) +} + +// ---- utility ---- #[cfg(target_pointer_width = "64")] pub fn recommended_map_size() -> usize { diff --git a/src/db/sqlite_adapter.rs b/src/db/sqlite_adapter.rs index d1394355..077c1f1b 100644 --- a/src/db/sqlite_adapter.rs +++ b/src/db/sqlite_adapter.rs @@ -369,32 +369,58 @@ impl<'a> ITx for SqliteTx<'a> { Ok(()) } - fn iter(&self, _tree: usize) -> TxOpResult> { - unimplemented!(); + fn iter(&self, tree: usize) -> TxOpResult> { + let tree = self.get_tree(tree)?; + let sql = format!("SELECT k, v FROM {} ORDER BY k ASC", tree); + TxValueIterator::make(self, &sql, []) } - fn iter_rev(&self, _tree: usize) -> TxOpResult> { - unimplemented!(); + fn iter_rev(&self, tree: usize) -> TxOpResult> { + let tree = self.get_tree(tree)?; + let sql = format!("SELECT k, v FROM {} ORDER BY k DESC", tree); + TxValueIterator::make(self, &sql, []) } fn range<'r>( &self, - _tree: usize, - _low: Bound<&'r [u8]>, - _high: Bound<&'r [u8]>, + tree: usize, + low: Bound<&'r [u8]>, + high: Bound<&'r [u8]>, ) -> TxOpResult> { - unimplemented!(); + let tree = self.get_tree(tree)?; + + let (bounds_sql, params) = bounds_sql(low, high); + let sql = format!("SELECT k, v FROM {} {} ORDER BY k ASC", tree, bounds_sql); + + let params = params + .iter() + .map(|x| x as &dyn rusqlite::ToSql) + .collect::>(); + + TxValueIterator::make::<&[&dyn rusqlite::ToSql]>(self, &sql, params.as_ref()) } fn range_rev<'r>( &self, - _tree: usize, - _low: Bound<&'r [u8]>, - _high: Bound<&'r [u8]>, + tree: usize, + low: Bound<&'r [u8]>, + high: Bound<&'r [u8]>, ) -> TxOpResult> { - unimplemented!(); + let tree = self.get_tree(tree)?; + + let (bounds_sql, params) = bounds_sql(low, high); + let sql = format!("SELECT k, v FROM {} {} ORDER BY k DESC", tree, bounds_sql); + + let params = params + .iter() + .map(|x| x as &dyn rusqlite::ToSql) + .collect::>(); + + TxValueIterator::make::<&[&dyn rusqlite::ToSql]>(self, &sql, params.as_ref()) } } -// ---- +// ---- iterators outside transactions ---- +// complicated, they must hold the Statement and Row objects +// therefore quite some unsafe code (it is a self-referential struct) struct DbValueIterator<'a> { db: MutexGuard<'a, SqliteDbInner>, @@ -471,7 +497,78 @@ impl<'a> Iterator for DbValueIteratorPin<'a> { } } -// ---- +// ---- iterators within transactions ---- +// it's the same except we don't hold a mutex guard, +// only a Statement and a Rows object + +struct TxValueIterator<'a> { + stmt: Statement<'a>, + iter: Option>, + _pin: PhantomPinned, +} + +impl<'a> TxValueIterator<'a> { + fn make( + tx: &'a SqliteTx<'a>, + sql: &str, + args: P, + ) -> TxOpResult> { + let stmt = tx.tx.prepare(sql)?; + let res = TxValueIterator { + stmt, + iter: None, + _pin: PhantomPinned, + }; + let mut boxed = Box::pin(res); + trace!("make iterator with sql: {}", sql); + + unsafe { + let mut stmt = NonNull::from(&boxed.stmt); + let iter = stmt.as_mut().query(args)?; + + let mut_ref: Pin<&mut TxValueIterator<'a>> = Pin::as_mut(&mut boxed); + Pin::get_unchecked_mut(mut_ref).iter = Some(iter); + } + + Ok(Box::new(TxValueIteratorPin(boxed))) + } +} + +impl<'a> Drop for TxValueIterator<'a> { + fn drop(&mut self) { + trace!("drop iter"); + drop(self.iter.take()); + } +} + +struct TxValueIteratorPin<'a>(Pin>>); + +impl<'a> Iterator for TxValueIteratorPin<'a> { + type Item = TxOpResult<(Value, Value)>; + + fn next(&mut self) -> Option { + let next = unsafe { + let mut_ref: Pin<&mut TxValueIterator<'a>> = Pin::as_mut(&mut self.0); + Pin::get_unchecked_mut(mut_ref).iter.as_mut()?.next() + }; + let row = match next { + Err(e) => return Some(Err(e.into())), + Ok(None) => return None, + Ok(Some(r)) => r, + }; + let k = match row.get::<_, Vec>(0) { + Err(e) => return Some(Err(e.into())), + Ok(x) => x, + }; + let v = match row.get::<_, Vec>(1) { + Err(e) => return Some(Err(e.into())), + Ok(y) => y, + }; + Some(Ok((k, v))) + } +} + +// ---- utility ---- fn bounds_sql<'r>(low: Bound<&'r [u8]>, high: Bound<&'r [u8]>) -> (String, Vec>) { let mut sql = String::new(); diff --git a/src/db/test.rs b/src/db/test.rs index d4c875f0..3add89fb 100644 --- a/src/db/test.rs +++ b/src/db/test.rs @@ -10,8 +10,13 @@ fn test_suite(db: Db) { let vb: &[u8] = &b"plip"[..]; let vc: &[u8] = &b"plup"[..]; + // ---- test simple insert/delete ---- + assert!(tree.insert(ka, va).unwrap().is_none()); assert_eq!(tree.get(ka).unwrap().unwrap(), va); + assert_eq!(tree.len().unwrap(), 1); + + // ---- test transaction logic ---- let res = db.transaction::<_, (), _>(|tx| { assert_eq!(tx.get(&tree, ka).unwrap().unwrap(), va); @@ -37,6 +42,8 @@ fn test_suite(db: Db) { assert!(matches!(res, Err(TxError::Abort(42)))); assert_eq!(tree.get(ka).unwrap().unwrap(), vb); + // ---- test iteration outside of transactions ---- + let mut iter = tree.iter().unwrap(); let next = iter.next().unwrap().unwrap(); assert_eq!((next.0.as_ref(), next.1.as_ref()), (ka, vb)); @@ -73,6 +80,48 @@ fn test_suite(db: Db) { assert_eq!((next.0.as_ref(), next.1.as_ref()), (ka, vb)); assert!(iter.next().is_none()); drop(iter); + + // ---- test iteration within transactions ---- + + db.transaction::<_, (), _>(|tx| { + let mut iter = tx.iter(&tree).unwrap(); + let next = iter.next().unwrap().unwrap(); + assert_eq!((next.0.as_ref(), next.1.as_ref()), (ka, vb)); + let next = iter.next().unwrap().unwrap(); + assert_eq!((next.0.as_ref(), next.1.as_ref()), (kb, vc)); + assert!(iter.next().is_none()); + Ok(()) + }) + .unwrap(); + + db.transaction::<_, (), _>(|tx| { + let mut iter = tx.range(&tree, kint..).unwrap(); + let next = iter.next().unwrap().unwrap(); + assert_eq!((next.0.as_ref(), next.1.as_ref()), (kb, vc)); + assert!(iter.next().is_none()); + Ok(()) + }) + .unwrap(); + + db.transaction::<_, (), _>(|tx| { + let mut iter = tx.range_rev(&tree, ..kint).unwrap(); + let next = iter.next().unwrap().unwrap(); + assert_eq!((next.0.as_ref(), next.1.as_ref()), (ka, vb)); + assert!(iter.next().is_none()); + Ok(()) + }) + .unwrap(); + + db.transaction::<_, (), _>(|tx| { + let mut iter = tx.iter_rev(&tree).unwrap(); + let next = iter.next().unwrap().unwrap(); + assert_eq!((next.0.as_ref(), next.1.as_ref()), (kb, vc)); + let next = iter.next().unwrap().unwrap(); + assert_eq!((next.0.as_ref(), next.1.as_ref()), (ka, vb)); + assert!(iter.next().is_none()); + Ok(()) + }) + .unwrap(); } #[test] From 32aa2463001c0af9f87633a1ff78858dd4157eb9 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Fri, 8 Mar 2024 17:39:17 +0100 Subject: [PATCH 5/6] [rm-sled] Make proper use of pinning in LMDB adapter + comment unsafe --- src/db/lmdb_adapter.rs | 28 +++++++++++++++++++------ src/db/sqlite_adapter.rs | 44 +++++++++++++++++++++++----------------- 2 files changed, 47 insertions(+), 25 deletions(-) diff --git a/src/db/lmdb_adapter.rs b/src/db/lmdb_adapter.rs index ddfb6ed5..5ce7d3e3 100644 --- a/src/db/lmdb_adapter.rs +++ b/src/db/lmdb_adapter.rs @@ -3,6 +3,7 @@ use core::ptr::NonNull; use std::collections::HashMap; use std::convert::TryInto; +use std::pin::Pin; use std::sync::{Arc, RwLock}; use heed::types::ByteSlice; @@ -319,12 +320,20 @@ where where F: FnOnce(&'a RoTxn<'a>) -> Result, { - let mut res = TxAndIterator { tx, iter: None }; + let res = TxAndIterator { tx, iter: None }; + let mut boxed = Box::pin(res); - let tx = unsafe { NonNull::from(&res.tx).as_ref() }; - res.iter = Some(iterfun(tx)?); + // This unsafe allows us to bypass lifetime checks + let tx = unsafe { NonNull::from(&boxed.tx).as_ref() }; + let iter = iterfun(tx)?; - Ok(Box::new(res)) + let mut_ref = Pin::as_mut(&mut boxed); + // This unsafe allows us to write in a field of the pinned struct + unsafe { + Pin::get_unchecked_mut(mut_ref).iter = Some(iter); + } + + Ok(Box::new(TxAndIteratorPin(boxed))) } } @@ -338,14 +347,21 @@ where } } -impl<'a, I> Iterator for TxAndIterator<'a, I> +struct TxAndIteratorPin<'a, I>(Pin>>) +where + I: Iterator> + 'a; + +impl<'a, I> Iterator for TxAndIteratorPin<'a, I> where I: Iterator> + 'a, { type Item = Result<(Value, Value)>; fn next(&mut self) -> Option { - match self.iter.as_mut().unwrap().next() { + let mut_ref = Pin::as_mut(&mut self.0); + // This unsafe allows us to mutably access the iterator field + let next = unsafe { Pin::get_unchecked_mut(mut_ref).iter.as_mut()?.next() }; + match next { None => None, Some(Err(e)) => Some(Err(e.into())), Some(Ok((k, v))) => Some(Ok((k.to_vec(), v.to_vec()))), diff --git a/src/db/sqlite_adapter.rs b/src/db/sqlite_adapter.rs index 077c1f1b..2c6a4159 100644 --- a/src/db/sqlite_adapter.rs +++ b/src/db/sqlite_adapter.rs @@ -444,17 +444,23 @@ impl<'a> DbValueIterator<'a> { let mut boxed = Box::pin(res); trace!("make iterator with sql: {}", sql); + // This unsafe allows us to bypass lifetime checks + let db = unsafe { NonNull::from(&boxed.db).as_ref() }; + let stmt = db.db.prepare(sql)?; + + let mut_ref = Pin::as_mut(&mut boxed); + // This unsafe allows us to write in a field of the pinned struct unsafe { - let db = NonNull::from(&boxed.db); - let stmt = db.as_ref().db.prepare(sql)?; - - let mut_ref: Pin<&mut DbValueIterator<'a>> = Pin::as_mut(&mut boxed); Pin::get_unchecked_mut(mut_ref).stmt = Some(stmt); + } - let mut stmt = NonNull::from(&boxed.stmt); - let iter = stmt.as_mut().as_mut().unwrap().query(args)?; + // This unsafe allows us to bypass lifetime checks + let stmt = unsafe { NonNull::from(&boxed.stmt).as_mut() }; + let iter = stmt.as_mut().unwrap().query(args)?; - let mut_ref: Pin<&mut DbValueIterator<'a>> = Pin::as_mut(&mut boxed); + let mut_ref = Pin::as_mut(&mut boxed); + // This unsafe allows us to write in a field of the pinned struct + unsafe { Pin::get_unchecked_mut(mut_ref).iter = Some(iter); } @@ -476,10 +482,9 @@ impl<'a> Iterator for DbValueIteratorPin<'a> { type Item = Result<(Value, Value)>; fn next(&mut self) -> Option { - let next = unsafe { - let mut_ref: Pin<&mut DbValueIterator<'a>> = Pin::as_mut(&mut self.0); - Pin::get_unchecked_mut(mut_ref).iter.as_mut()?.next() - }; + let mut_ref = Pin::as_mut(&mut self.0); + // This unsafe allows us to mutably access the iterator field + let next = unsafe { Pin::get_unchecked_mut(mut_ref).iter.as_mut()?.next() }; let row = match next { Err(e) => return Some(Err(e.into())), Ok(None) => return None, @@ -522,11 +527,13 @@ impl<'a> TxValueIterator<'a> { let mut boxed = Box::pin(res); trace!("make iterator with sql: {}", sql); - unsafe { - let mut stmt = NonNull::from(&boxed.stmt); - let iter = stmt.as_mut().query(args)?; + // This unsafe allows us to bypass lifetime checks + let stmt = unsafe { NonNull::from(&boxed.stmt).as_mut() }; + let iter = stmt.query(args)?; - let mut_ref: Pin<&mut TxValueIterator<'a>> = Pin::as_mut(&mut boxed); + let mut_ref = Pin::as_mut(&mut boxed); + // This unsafe allows us to write in a field of the pinned struct + unsafe { Pin::get_unchecked_mut(mut_ref).iter = Some(iter); } @@ -547,10 +554,9 @@ impl<'a> Iterator for TxValueIteratorPin<'a> { type Item = TxOpResult<(Value, Value)>; fn next(&mut self) -> Option { - let next = unsafe { - let mut_ref: Pin<&mut TxValueIterator<'a>> = Pin::as_mut(&mut self.0); - Pin::get_unchecked_mut(mut_ref).iter.as_mut()?.next() - }; + let mut_ref = Pin::as_mut(&mut self.0); + // This unsafe allows us to mutably access the iterator field + let next = unsafe { Pin::get_unchecked_mut(mut_ref).iter.as_mut()?.next() }; let row = match next { Err(e) => return Some(Err(e.into())), Ok(None) => return None, From 2795b53b8b3ebd162df6b0244b73889e72f67ce0 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Tue, 12 Mar 2024 11:15:26 +0100 Subject: [PATCH 6/6] [rm-sled] factorize some code in sqlite backend --- src/db/sqlite_adapter.rs | 52 +++++++++++++++++++--------------------- 1 file changed, 24 insertions(+), 28 deletions(-) diff --git a/src/db/sqlite_adapter.rs b/src/db/sqlite_adapter.rs index 2c6a4159..6c556c97 100644 --- a/src/db/sqlite_adapter.rs +++ b/src/db/sqlite_adapter.rs @@ -485,20 +485,7 @@ impl<'a> Iterator for DbValueIteratorPin<'a> { let mut_ref = Pin::as_mut(&mut self.0); // This unsafe allows us to mutably access the iterator field let next = unsafe { Pin::get_unchecked_mut(mut_ref).iter.as_mut()?.next() }; - let row = match next { - Err(e) => return Some(Err(e.into())), - Ok(None) => return None, - Ok(Some(r)) => r, - }; - let k = match row.get::<_, Vec>(0) { - Err(e) => return Some(Err(e.into())), - Ok(x) => x, - }; - let v = match row.get::<_, Vec>(1) { - Err(e) => return Some(Err(e.into())), - Ok(y) => y, - }; - Some(Ok((k, v))) + iter_next_row(next) } } @@ -557,20 +544,7 @@ impl<'a> Iterator for TxValueIteratorPin<'a> { let mut_ref = Pin::as_mut(&mut self.0); // This unsafe allows us to mutably access the iterator field let next = unsafe { Pin::get_unchecked_mut(mut_ref).iter.as_mut()?.next() }; - let row = match next { - Err(e) => return Some(Err(e.into())), - Ok(None) => return None, - Ok(Some(r)) => r, - }; - let k = match row.get::<_, Vec>(0) { - Err(e) => return Some(Err(e.into())), - Ok(x) => x, - }; - let v = match row.get::<_, Vec>(1) { - Err(e) => return Some(Err(e.into())), - Ok(y) => y, - }; - Some(Ok((k, v))) + iter_next_row(next) } } @@ -614,3 +588,25 @@ fn bounds_sql<'r>(low: Bound<&'r [u8]>, high: Bound<&'r [u8]>) -> (String, Vec( + next_row: rusqlite::Result>, +) -> Option> +where + E: From, +{ + let row = match next_row { + Err(e) => return Some(Err(e.into())), + Ok(None) => return None, + Ok(Some(r)) => r, + }; + let k = match row.get::<_, Vec>(0) { + Err(e) => return Some(Err(e.into())), + Ok(x) => x, + }; + let v = match row.get::<_, Vec>(1) { + Err(e) => return Some(Err(e.into())), + Ok(y) => y, + }; + Some(Ok((k, v))) +}