Merge pull request 'Remove Sled' (#767) from rm-sled into next-0.10

Reviewed-on: Deuxfleurs/garage#767
This commit is contained in:
Alex 2024-03-12 10:45:57 +00:00
commit 81191d2d92
38 changed files with 347 additions and 829 deletions

45
Cargo.lock generated
View file

@ -921,15 +921,6 @@ dependencies = [
"crossbeam-utils",
]
[[package]]
name = "crossbeam-epoch"
version = "0.9.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
dependencies = [
"crossbeam-utils",
]
[[package]]
name = "crossbeam-queue"
version = "0.3.11"
@ -1222,16 +1213,6 @@ dependencies = [
name = "format_table"
version = "0.1.1"
[[package]]
name = "fs2"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9564fc758e15025b46aa6643b1b77d047d1a56a1aea6e01002ac0c7026876213"
dependencies = [
"libc",
"winapi",
]
[[package]]
name = "futures"
version = "0.3.30"
@ -1321,15 +1302,6 @@ dependencies = [
"slab",
]
[[package]]
name = "fxhash"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c"
dependencies = [
"byteorder",
]
[[package]]
name = "garage"
version = "0.10.0"
@ -1472,7 +1444,6 @@ dependencies = [
"hexdump",
"mktemp",
"rusqlite",
"sled",
"tracing",
]
@ -3831,22 +3802,6 @@ dependencies = [
"autocfg",
]
[[package]]
name = "sled"
version = "0.34.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f96b4737c2ce5987354855aed3797279def4ebf734436c6aa4552cf8e169935"
dependencies = [
"crc32fast",
"crossbeam-epoch",
"crossbeam-utils",
"fs2",
"fxhash",
"libc",
"log",
"parking_lot 0.11.2",
]
[[package]]
name = "smallvec"
version = "1.13.1"

View file

@ -34,7 +34,7 @@ args@{
ignoreLockHash,
}:
let
nixifiedLockHash = "c3296a54f1c6f385e0d4a4a937734f1fe0fee4405b44d7462249d72675f7ac40";
nixifiedLockHash = "23e1504df44ec18cfc5c872c858154304c16da2a6c1f7c9f06608ef833815f30";
workspaceSrc = if args.workspaceSrc == null then ./. else args.workspaceSrc;
currentLockHash = builtins.hashFile "sha256" (workspaceSrc + /Cargo.lock);
lockHashIgnored = if ignoreLockHash
@ -1315,21 +1315,6 @@ in
};
});
"registry+https://github.com/rust-lang/crates.io-index".crossbeam-epoch."0.9.18" = overridableMkRustCrate (profileName: rec {
name = "crossbeam-epoch";
version = "0.9.18";
registry = "registry+https://github.com/rust-lang/crates.io-index";
src = fetchCratesIo { inherit name version; sha256 = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"; };
features = builtins.concatLists [
(lib.optional (rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sled" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/sled" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sled") "alloc")
(lib.optional (rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sled" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/sled" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sled") "default")
(lib.optional (rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sled" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/sled" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sled") "std")
];
dependencies = {
${ if rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sled" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/sled" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sled" then "crossbeam_utils" else null } = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".crossbeam-utils."0.8.19" { inherit profileName; }).out;
};
});
"registry+https://github.com/rust-lang/crates.io-index".crossbeam-queue."0.3.11" = overridableMkRustCrate (profileName: rec {
name = "crossbeam-queue";
version = "0.3.11";
@ -1351,7 +1336,6 @@ in
registry = "registry+https://github.com/rust-lang/crates.io-index";
src = fetchCratesIo { inherit name version; sha256 = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345"; };
features = builtins.concatLists [
(lib.optional (rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sled" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/sled" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sled") "default")
[ "std" ]
];
});
@ -1775,17 +1759,6 @@ in
src = fetchCrateLocal (workspaceSrc + "/src/format-table");
});
"registry+https://github.com/rust-lang/crates.io-index".fs2."0.4.3" = overridableMkRustCrate (profileName: rec {
name = "fs2";
version = "0.4.3";
registry = "registry+https://github.com/rust-lang/crates.io-index";
src = fetchCratesIo { inherit name version; sha256 = "9564fc758e15025b46aa6643b1b77d047d1a56a1aea6e01002ac0c7026876213"; };
dependencies = {
${ if (rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sled" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/sled" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sled") && hostPlatform.isUnix then "libc" else null } = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".libc."0.2.153" { inherit profileName; }).out;
${ if (rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sled" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/sled" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sled") && hostPlatform.isWindows then "winapi" else null } = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".winapi."0.3.9" { inherit profileName; }).out;
};
});
"registry+https://github.com/rust-lang/crates.io-index".futures."0.3.30" = overridableMkRustCrate (profileName: rec {
name = "futures";
version = "0.3.30";
@ -1937,16 +1910,6 @@ in
};
});
"registry+https://github.com/rust-lang/crates.io-index".fxhash."0.2.1" = overridableMkRustCrate (profileName: rec {
name = "fxhash";
version = "0.2.1";
registry = "registry+https://github.com/rust-lang/crates.io-index";
src = fetchCratesIo { inherit name version; sha256 = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c"; };
dependencies = {
${ if rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sled" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/sled" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sled" then "byteorder" else null } = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".byteorder."1.5.0" { inherit profileName; }).out;
};
});
"unknown".garage."0.10.0" = overridableMkRustCrate (profileName: rec {
name = "garage";
version = "0.10.0";
@ -1963,7 +1926,6 @@ in
(lib.optional (rootFeatures' ? "garage/opentelemetry-otlp" || rootFeatures' ? "garage/telemetry-otlp") "opentelemetry-otlp")
(lib.optional (rootFeatures' ? "garage/default" || rootFeatures' ? "garage/metrics" || rootFeatures' ? "garage/opentelemetry-prometheus") "opentelemetry-prometheus")
(lib.optional (rootFeatures' ? "garage/default" || rootFeatures' ? "garage/metrics" || rootFeatures' ? "garage/prometheus") "prometheus")
(lib.optional (rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sled") "sled")
(lib.optional (rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sqlite") "sqlite")
(lib.optional (rootFeatures' ? "garage/system-libs") "system-libs")
(lib.optional (rootFeatures' ? "garage/telemetry-otlp") "telemetry-otlp")
@ -2127,7 +2089,6 @@ in
(lib.optional (rootFeatures' ? "garage/default" || rootFeatures' ? "garage/lmdb" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/heed" || rootFeatures' ? "garage_db/lmdb" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/lmdb") "heed")
(lib.optional (rootFeatures' ? "garage/default" || rootFeatures' ? "garage/lmdb" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/lmdb" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/lmdb") "lmdb")
(lib.optional (rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sqlite" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/rusqlite" || rootFeatures' ? "garage_db/sqlite" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sqlite") "rusqlite")
(lib.optional (rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sled" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/sled" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sled") "sled")
(lib.optional (rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sqlite" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/sqlite" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sqlite") "sqlite")
];
dependencies = {
@ -2135,7 +2096,6 @@ in
${ if rootFeatures' ? "garage/default" || rootFeatures' ? "garage/lmdb" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/heed" || rootFeatures' ? "garage_db/lmdb" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/lmdb" then "heed" else null } = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".heed."0.11.0" { inherit profileName; }).out;
hexdump = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".hexdump."0.1.1" { inherit profileName; }).out;
${ if rootFeatures' ? "garage/bundled-libs" || rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sqlite" || rootFeatures' ? "garage_db/bundled-libs" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/rusqlite" || rootFeatures' ? "garage_db/sqlite" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sqlite" then "rusqlite" else null } = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".rusqlite."0.30.0" { inherit profileName; }).out;
${ if rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sled" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/sled" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sled" then "sled" else null } = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".sled."0.34.7" { inherit profileName; }).out;
tracing = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".tracing."0.1.40" { inherit profileName; }).out;
};
devDependencies = {
@ -2152,7 +2112,6 @@ in
(lib.optional (rootFeatures' ? "garage_model/default") "default")
(lib.optional (rootFeatures' ? "garage/default" || rootFeatures' ? "garage/k2v" || rootFeatures' ? "garage_api/k2v" || rootFeatures' ? "garage_model/k2v") "k2v")
(lib.optional (rootFeatures' ? "garage/default" || rootFeatures' ? "garage/lmdb" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/lmdb") "lmdb")
(lib.optional (rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sled" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sled") "sled")
(lib.optional (rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sqlite" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sqlite") "sqlite")
];
dependencies = {
@ -5459,27 +5418,6 @@ in
};
});
"registry+https://github.com/rust-lang/crates.io-index".sled."0.34.7" = overridableMkRustCrate (profileName: rec {
name = "sled";
version = "0.34.7";
registry = "registry+https://github.com/rust-lang/crates.io-index";
src = fetchCratesIo { inherit name version; sha256 = "7f96b4737c2ce5987354855aed3797279def4ebf734436c6aa4552cf8e169935"; };
features = builtins.concatLists [
(lib.optional (rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sled" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/sled" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sled") "default")
(lib.optional (rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sled" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/sled" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sled") "no_metrics")
];
dependencies = {
${ if rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sled" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/sled" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sled" then "crc32fast" else null } = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".crc32fast."1.3.2" { inherit profileName; }).out;
${ if rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sled" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/sled" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sled" then "crossbeam_epoch" else null } = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".crossbeam-epoch."0.9.18" { inherit profileName; }).out;
${ if rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sled" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/sled" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sled" then "crossbeam_utils" else null } = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".crossbeam-utils."0.8.19" { inherit profileName; }).out;
${ if (rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sled" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/sled" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sled") && (hostPlatform.parsed.kernel.name == "linux" || hostPlatform.parsed.kernel.name == "darwin" || hostPlatform.parsed.kernel.name == "windows") then "fs2" else null } = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".fs2."0.4.3" { inherit profileName; }).out;
${ if rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sled" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/sled" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sled" then "fxhash" else null } = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".fxhash."0.2.1" { inherit profileName; }).out;
${ if rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sled" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/sled" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sled" then "libc" else null } = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".libc."0.2.153" { inherit profileName; }).out;
${ if rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sled" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/sled" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sled" then "log" else null } = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".log."0.4.20" { inherit profileName; }).out;
${ if rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sled" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/sled" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sled" then "parking_lot" else null } = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".parking_lot."0.11.2" { inherit profileName; }).out;
};
});
"registry+https://github.com/rust-lang/crates.io-index".smallvec."1.13.1" = overridableMkRustCrate (profileName: rec {
name = "smallvec";
version = "1.13.1";
@ -6723,7 +6661,6 @@ in
[ "minwindef" ]
[ "ntstatus" ]
[ "processenv" ]
(lib.optional (rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sled" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/sled" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sled") "processthreadsapi")
[ "std" ]
[ "synchapi" ]
[ "sysinfoapi" ]

View file

@ -78,7 +78,6 @@ tracing-subscriber = { version = "0.3", features = ["env-filter"] }
heed = { version = "0.11", default-features = false, features = ["lmdb"] }
rusqlite = "0.30.0"
sled = "0.34"
async-compression = { version = "0.4", features = ["tokio", "zstd"] }
zstd = { version = "0.13", default-features = false }

View file

@ -40,7 +40,6 @@ in {
features = [
"garage/bundled-libs"
"garage/k2v"
"garage/sled"
"garage/lmdb"
"garage/sqlite"
];

View file

@ -98,7 +98,6 @@ paths:
type: string
example:
- "k2v"
- "sled"
- "lmdb"
- "sqlite"
- "consul-discovery"

View file

@ -292,7 +292,7 @@ with average object size ranging from 50 KB to 150 KB.
As such, your Garage cluster should be configured appropriately for good performance:
- use Garage v0.8.0 or higher with the [LMDB database engine](@documentation/reference-manual/configuration.md#db-engine-since-v0-8-0).
With the default Sled database engine, your database could quickly end up taking tens of GB of disk space.
Older versions of Garage used the Sled database engine which had issues, such as databases quickly ending up taking tens of GB of disk space.
- the Garage database should be stored on a SSD
### Creating your bucket

View file

@ -90,6 +90,5 @@ The following feature flags are available in v0.8.0:
| `kubernetes-discovery` | optional | Enable automatic registration and discovery<br>of cluster nodes through the Kubernetes API |
| `metrics` | *by default* | Enable collection of metrics in Prometheus format on the admin API |
| `telemetry-otlp` | optional | Enable collection of execution traces using OpenTelemetry |
| `sled` | *by default* | Enable using Sled to store Garage's metadata |
| `lmdb` | optional | Enable using LMDB to store Garage's metadata |
| `lmdb` | *by default* | Enable using LMDB to store Garage's metadata |
| `sqlite` | optional | Enable using Sqlite3 to store Garage's metadata |

View file

@ -70,9 +70,8 @@ to store 2 TB of data in total.
- If you only have an HDD and no SSD, it's fine to put your metadata alongside the data
on the same drive. Having lots of RAM for your kernel to cache the metadata will
help a lot with performance. Make sure to use the LMDB database engine,
instead of Sled, which suffers from quite bad performance degradation on HDDs.
Sled is still the default for legacy reasons, but is not recommended anymore.
help a lot with performance. The default LMDB database engine is the most tested
and has good performance.
- For the metadata storage, Garage does not do checksumming and integrity
verification on its own. If you are afraid of bitrot/data corruption,

View file

@ -97,7 +97,7 @@ delete a tombstone, the following condition has to be met:
superseeded by the tombstone. This ensures that deleting the tombstone is
safe and that no deleted value will come back in the system.
Garage makes use of Sled's atomic operations (such as compare-and-swap and
Garage uses atomic database operations (such as compare-and-swap and
transactions) to ensure that only tombstones that have been correctly
propagated to other nodes are ever deleted from the local entry tree.

View file

@ -20,8 +20,6 @@ db_engine = "lmdb"
block_size = "1M"
sled_cache_capacity = "128MiB"
sled_flush_every_ms = 2000
lmdb_map_size = "1T"
compression_level = 1
@ -96,9 +94,7 @@ Top-level configuration options:
[`rpc_bind_addr`](#rpc_bind_addr),
[`rpc_bind_outgoing`](#rpc_bind_outgoing),
[`rpc_public_addr`](#rpc_public_addr),
[`rpc_secret`/`rpc_secret_file`](#rpc_secret),
[`sled_cache_capacity`](#sled_cache_capacity),
[`sled_flush_every_ms`](#sled_flush_every_ms).
[`rpc_secret`/`rpc_secret_file`](#rpc_secret).
The `[consul_discovery]` section:
[`api`](#consul_api),
@ -271,20 +267,16 @@ Since `v0.8.0`, Garage can use alternative storage backends as follows:
| DB engine | `db_engine` value | Database path |
| --------- | ----------------- | ------------- |
| [LMDB](https://www.lmdb.tech) (default since `v0.9.0`) | `"lmdb"` | `<metadata_dir>/db.lmdb/` |
| [Sled](https://sled.rs) (default up to `v0.8.0`) | `"sled"` | `<metadata_dir>/db/` |
| [Sqlite](https://sqlite.org) | `"sqlite"` | `<metadata_dir>/db.sqlite` |
| [LMDB](https://www.lmdb.tech) (since `v0.8.0`, default since `v0.9.0`) | `"lmdb"` | `<metadata_dir>/db.lmdb/` |
| [Sqlite](https://sqlite.org) (since `v0.8.0`) | `"sqlite"` | `<metadata_dir>/db.sqlite` |
| [Sled](https://sled.rs) (old default, removed since `v1.0`) | `"sled"` | `<metadata_dir>/db/` |
Sled was the only database engine up to Garage v0.7.0. Performance issues and
API limitations of Sled prompted the addition of alternative engines in v0.8.0.
Since v0.9.0, LMDB is the default engine instead of Sled, and Sled is
deprecated. We plan to remove Sled in Garage v1.0.
Sled was supported until Garage v0.9.x, and was removed in Garage v1.0.
You can still use an older binary of Garage (e.g. v0.9.3) to migrate
old Sled metadata databases to another engine.
Performance characteristics of the different DB engines are as follows:
- Sled: tends to produce large data files and also has performance issues,
especially when the metadata folder is on a traditional HDD and not on SSD.
- LMDB: the recommended database engine on 64-bit systems, much more
space-efficient and slightly faster. Note that the data format of LMDB is not
portable between architectures, so for instance the Garage database of an
@ -333,7 +325,6 @@ Here is how this option impacts the different database engines:
| Database | `metadata_fsync = false` (default) | `metadata_fsync = true` |
|----------|------------------------------------|-------------------------------|
| Sled | default options | *unsupported* |
| Sqlite | `PRAGMA synchronous = OFF` | `PRAGMA synchronous = NORMAL` |
| LMDB | `MDB_NOMETASYNC` + `MDB_NOSYNC` | `MDB_NOMETASYNC` |
@ -367,21 +358,6 @@ files will remain available. This however means that chunks from existing files
will not be deduplicated with chunks from newly uploaded files, meaning you
might use more storage space that is optimally possible.
#### `sled_cache_capacity` {#sled_cache_capacity}
This parameter can be used to tune the capacity of the cache used by
[sled](https://sled.rs), the database Garage uses internally to store metadata.
Tune this to fit the RAM you wish to make available to your Garage instance.
This value has a conservative default (128MB) so that Garage doesn't use too much
RAM by default, but feel free to increase this for higher performance.
#### `sled_flush_every_ms` {#sled_flush_every_ms}
This parameters can be used to tune the flushing interval of sled.
Increase this if sled is thrashing your SSD, at the risk of losing more data in case
of a power outage (though this should not matter much as data is replicated on other
nodes). The default value, 2000ms, should be appropriate for most use cases.
#### `lmdb_map_size` {#lmdb_map_size}
This parameters can be used to set the map size used by LMDB,

View file

@ -73,7 +73,6 @@ Example response body:
"garageVersion": "v0.10.0",
"garageFeatures": [
"k2v",
"sled",
"lmdb",
"sqlite",
"metrics",

View file

@ -146,7 +146,7 @@ in a bucket, as the partition key becomes the sort key in the index.
How indexing works:
- Each node keeps a local count of how many items it stores for each partition,
in a local Sled tree that is updated atomically when an item is modified.
in a local database tree that is updated atomically when an item is modified.
- These local counters are asynchronously stored in the index table which is
a regular Garage table spread in the network. Counters are stored as LWW values,
so basically the final table will have the following structure:

View file

@ -168,7 +168,7 @@ let
rootFeatures = if features != null then
features
else
([ "garage/bundled-libs" "garage/sled" "garage/lmdb" "garage/k2v" ] ++ (if release then [
([ "garage/bundled-libs" "garage/lmdb" "garage/k2v" ] ++ (if release then [
"garage/consul-discovery"
"garage/kubernetes-discovery"
"garage/metrics"

View file

@ -6,18 +6,13 @@
garage:
# Can be changed for better performance on certain systems
# https://garagehq.deuxfleurs.fr/documentation/reference-manual/configuration/#db-engine-since-v0-8-0
dbEngine: "sled"
dbEngine: "lmdb"
# Defaults is 1MB
# An increase can result in better performance in certain scenarios
# https://garagehq.deuxfleurs.fr/documentation/reference-manual/configuration/#block-size
blockSize: "1048576"
# Tuning parameters for the sled DB engine
# https://garagehq.deuxfleurs.fr/documentation/reference-manual/configuration/#sled-cache-capacity
sledCacheCapacity: "134217728"
sledFlushEveryMs: "2000"
# Default to 3 replicas, see the replication_mode section at
# https://garagehq.deuxfleurs.fr/documentation/reference-manual/configuration/#replication-mode
replicationMode: "3"
@ -50,11 +45,6 @@ garage:
block_size = {{ .Values.garage.blockSize }}
{{- if eq .Values.garage.dbEngine "sled"}}
sled_cache_capacity = {{ .Values.garage.sledCacheCapacity }}
sled_flush_every_ms = {{ .Values.garage.sledFlushEveryMs }}
{{- end }}
replication_mode = "{{ .Values.garage.replicationMode }}"
compression_level = {{ .Values.garage.compressionLevel }}

View file

@ -378,11 +378,6 @@ impl BlockManager {
Ok(self.rc.rc.len()?)
}
/// Get number of items in the refcount table
pub fn rc_fast_len(&self) -> Result<Option<usize>, Error> {
Ok(self.rc.rc.fast_len()?)
}
/// Send command to start/stop/manager scrub worker
pub async fn send_scrub_command(&self, cmd: ScrubWorkerCommand) -> Result<(), Error> {
let tx = self.tx_scrub_command.load();
@ -398,7 +393,7 @@ impl BlockManager {
/// List all resync errors
pub fn list_resync_errors(&self) -> Result<Vec<BlockResyncErrorInfo>, Error> {
let mut blocks = Vec::with_capacity(self.resync.errors.len());
let mut blocks = Vec::with_capacity(self.resync.errors.len()?);
for ent in self.resync.errors.iter()? {
let (hash, cnt) = ent?;
let cnt = ErrorCounter::decode(&cnt);

View file

@ -1,7 +1,6 @@
use opentelemetry::{global, metrics::*};
use garage_db as db;
use garage_db::counted_tree_hack::CountedTree;
/// TableMetrics reference all counter used for metrics
pub struct BlockManagerMetrics {
@ -29,8 +28,8 @@ impl BlockManagerMetrics {
pub fn new(
compression_level: Option<i32>,
rc_tree: db::Tree,
resync_queue: CountedTree,
resync_errors: CountedTree,
resync_queue: db::Tree,
resync_errors: db::Tree,
) -> Self {
let meter = global::meter("garage_model/block");
Self {
@ -45,15 +44,17 @@ impl BlockManagerMetrics {
.init(),
_rc_size: meter
.u64_value_observer("block.rc_size", move |observer| {
if let Ok(Some(v)) = rc_tree.fast_len() {
observer.observe(v as u64, &[])
if let Ok(value) = rc_tree.len() {
observer.observe(value as u64, &[])
}
})
.with_description("Number of blocks known to the reference counter")
.init(),
_resync_queue_len: meter
.u64_value_observer("block.resync_queue_length", move |observer| {
observer.observe(resync_queue.len() as u64, &[])
if let Ok(value) = resync_queue.len() {
observer.observe(value as u64, &[]);
}
})
.with_description(
"Number of block hashes queued for local check and possible resync",
@ -61,7 +62,9 @@ impl BlockManagerMetrics {
.init(),
_resync_errored_blocks: meter
.u64_value_observer("block.resync_errored_blocks", move |observer| {
observer.observe(resync_errors.len() as u64, &[])
if let Ok(value) = resync_errors.len() {
observer.observe(value as u64, &[]);
}
})
.with_description("Number of block hashes whose last resync resulted in an error")
.init(),

View file

@ -15,7 +15,6 @@ use opentelemetry::{
};
use garage_db as db;
use garage_db::counted_tree_hack::CountedTree;
use garage_util::background::*;
use garage_util::data::*;
@ -47,9 +46,9 @@ pub(crate) const MAX_RESYNC_WORKERS: usize = 8;
const INITIAL_RESYNC_TRANQUILITY: u32 = 2;
pub struct BlockResyncManager {
pub(crate) queue: CountedTree,
pub(crate) queue: db::Tree,
pub(crate) notify: Arc<Notify>,
pub(crate) errors: CountedTree,
pub(crate) errors: db::Tree,
busy_set: BusySet,
@ -90,12 +89,10 @@ impl BlockResyncManager {
let queue = db
.open_tree("block_local_resync_queue")
.expect("Unable to open block_local_resync_queue tree");
let queue = CountedTree::new(queue).expect("Could not count block_local_resync_queue");
let errors = db
.open_tree("block_local_resync_errors")
.expect("Unable to open block_local_resync_errors tree");
let errors = CountedTree::new(errors).expect("Could not count block_local_resync_errors");
let persister = PersisterShared::new(&system.metadata_dir, "resync_cfg");
@ -110,16 +107,12 @@ impl BlockResyncManager {
/// Get lenght of resync queue
pub fn queue_len(&self) -> Result<usize, Error> {
// This currently can't return an error because the CountedTree hack
// doesn't error on .len(), but this will change when we remove the hack
// (hopefully someday!)
Ok(self.queue.len())
Ok(self.queue.len()?)
}
/// Get number of blocks that have an error
pub fn errors_len(&self) -> Result<usize, Error> {
// (see queue_len comment)
Ok(self.errors.len())
Ok(self.errors.len()?)
}
/// Clear the error counter for a block and put it in queue immediately
@ -180,7 +173,7 @@ impl BlockResyncManager {
// deleted once the garbage collection delay has passed.
//
// Here are some explanations on how the resync queue works.
// There are two Sled trees that are used to have information
// There are two db trees that are used to have information
// about the status of blocks that need to be resynchronized:
//
// - resync.queue: a tree that is ordered first by a timestamp
@ -541,9 +534,9 @@ impl Worker for ResyncWorker {
Ok(WorkerState::Idle)
}
Err(e) => {
// The errors that we have here are only Sled errors
// The errors that we have here are only db errors
// We don't really know how to handle them so just ¯\_(ツ)_/¯
// (there is kind of an assumption that Sled won't error on us,
// (there is kind of an assumption that the db won't error on us,
// if it does there is not much we can do -- TODO should we just panic?)
// Here we just give the error to the worker manager,
// it will print it to the logs and increment a counter

View file

@ -18,13 +18,12 @@ tracing.workspace = true
heed = { workspace = true, optional = true }
rusqlite = { workspace = true, optional = true }
sled = { workspace = true, optional = true }
[dev-dependencies]
mktemp.workspace = true
[features]
default = [ "sled", "lmdb", "sqlite" ]
default = [ "lmdb", "sqlite" ]
bundled-libs = [ "rusqlite?/bundled" ]
lmdb = [ "heed" ]
sqlite = [ "rusqlite" ]

View file

@ -1,127 +0,0 @@
//! This hack allows a db tree to keep in RAM a counter of the number of entries
//! it contains, which is used to call .len() on it. This is usefull only for
//! the sled backend where .len() otherwise would have to traverse the whole
//! tree to count items. For sqlite and lmdb, this is mostly useless (but
//! hopefully not harmfull!). Note that a CountedTree cannot be part of a
//! transaction.
use std::sync::{
atomic::{AtomicUsize, Ordering},
Arc,
};
use crate::{Result, Tree, TxError, Value, ValueIter};
#[derive(Clone)]
pub struct CountedTree(Arc<CountedTreeInternal>);
struct CountedTreeInternal {
tree: Tree,
len: AtomicUsize,
}
impl CountedTree {
pub fn new(tree: Tree) -> Result<Self> {
let len = tree.len()?;
Ok(Self(Arc::new(CountedTreeInternal {
tree,
len: AtomicUsize::new(len),
})))
}
pub fn len(&self) -> usize {
self.0.len.load(Ordering::SeqCst)
}
pub fn is_empty(&self) -> bool {
self.len() == 0
}
pub fn get<K: AsRef<[u8]>>(&self, key: K) -> Result<Option<Value>> {
self.0.tree.get(key)
}
pub fn first(&self) -> Result<Option<(Value, Value)>> {
self.0.tree.first()
}
pub fn iter(&self) -> Result<ValueIter<'_>> {
self.0.tree.iter()
}
// ---- writing functions ----
pub fn insert<K, V>(&self, key: K, value: V) -> Result<Option<Value>>
where
K: AsRef<[u8]>,
V: AsRef<[u8]>,
{
let old_val = self.0.tree.insert(key, value)?;
if old_val.is_none() {
self.0.len.fetch_add(1, Ordering::SeqCst);
}
Ok(old_val)
}
pub fn remove<K: AsRef<[u8]>>(&self, key: K) -> Result<Option<Value>> {
let old_val = self.0.tree.remove(key)?;
if old_val.is_some() {
self.0.len.fetch_sub(1, Ordering::SeqCst);
}
Ok(old_val)
}
pub fn compare_and_swap<K, OV, NV>(
&self,
key: K,
expected_old: Option<OV>,
new: Option<NV>,
) -> Result<bool>
where
K: AsRef<[u8]>,
OV: AsRef<[u8]>,
NV: AsRef<[u8]>,
{
let old_some = expected_old.is_some();
let new_some = new.is_some();
let tx_res = self.0.tree.db().transaction(|tx| {
let old_val = tx.get(&self.0.tree, &key)?;
let is_same = match (&old_val, &expected_old) {
(None, None) => true,
(Some(x), Some(y)) if x == y.as_ref() => true,
_ => false,
};
if is_same {
match &new {
Some(v) => {
tx.insert(&self.0.tree, &key, v)?;
}
None => {
tx.remove(&self.0.tree, &key)?;
}
}
Ok(())
} else {
Err(TxError::Abort(()))
}
});
match tx_res {
Ok(()) => {
match (old_some, new_some) {
(false, true) => {
self.0.len.fetch_add(1, Ordering::SeqCst);
}
(true, false) => {
self.0.len.fetch_sub(1, Ordering::SeqCst);
}
_ => (),
}
Ok(true)
}
Err(TxError::Abort(())) => Ok(false),
Err(TxError::Db(e)) => Err(e),
}
}
}

View file

@ -3,13 +3,9 @@ extern crate tracing;
#[cfg(feature = "lmdb")]
pub mod lmdb_adapter;
#[cfg(feature = "sled")]
pub mod sled_adapter;
#[cfg(feature = "sqlite")]
pub mod sqlite_adapter;
pub mod counted_tree_hack;
pub mod open;
#[cfg(test)]
@ -55,6 +51,7 @@ pub type Result<T> = std::result::Result<T, Error>;
pub struct TxOpError(pub(crate) Error);
pub type TxOpResult<T> = std::result::Result<T, TxOpError>;
#[derive(Debug)]
pub enum TxError<E> {
Abort(E),
Db(Error),
@ -189,10 +186,6 @@ impl Tree {
pub fn len(&self) -> Result<usize> {
self.0.len(self.1)
}
#[inline]
pub fn fast_len(&self) -> Result<Option<usize>> {
self.0.fast_len(self.1)
}
#[inline]
pub fn first(&self) -> Result<Option<(Value, Value)>> {
@ -282,6 +275,11 @@ impl<'a> Transaction<'a> {
pub fn remove<T: AsRef<[u8]>>(&mut self, tree: &Tree, key: T) -> TxOpResult<Option<Value>> {
self.tx.remove(tree.1, key.as_ref())
}
/// Clears all values in a tree
#[inline]
pub fn clear(&mut self, tree: &Tree) -> TxOpResult<()> {
self.tx.clear(tree.1)
}
#[inline]
pub fn iter(&self, tree: &Tree) -> TxOpResult<TxValueIter<'_>> {
@ -328,9 +326,6 @@ pub(crate) trait IDb: Send + Sync {
fn get(&self, tree: usize, key: &[u8]) -> Result<Option<Value>>;
fn len(&self, tree: usize) -> Result<usize>;
fn fast_len(&self, _tree: usize) -> Result<Option<usize>> {
Ok(None)
}
fn insert(&self, tree: usize, key: &[u8], value: &[u8]) -> Result<Option<Value>>;
fn remove(&self, tree: usize, key: &[u8]) -> Result<Option<Value>>;
@ -361,6 +356,7 @@ pub(crate) trait ITx {
fn insert(&mut self, tree: usize, key: &[u8], value: &[u8]) -> TxOpResult<Option<Value>>;
fn remove(&mut self, tree: usize, key: &[u8]) -> TxOpResult<Option<Value>>;
fn clear(&mut self, tree: usize) -> TxOpResult<()>;
fn iter(&self, tree: usize) -> TxOpResult<TxValueIter<'_>>;
fn iter_rev(&self, tree: usize) -> TxOpResult<TxValueIter<'_>>;

View file

@ -3,6 +3,7 @@ use core::ptr::NonNull;
use std::collections::HashMap;
use std::convert::TryInto;
use std::pin::Pin;
use std::sync::{Arc, RwLock};
use heed::types::ByteSlice;
@ -121,10 +122,6 @@ impl IDb for LmdbDb {
Ok(tree.len(&tx)?.try_into().unwrap())
}
fn fast_len(&self, tree: usize) -> Result<Option<usize>> {
Ok(Some(self.len(tree)?))
}
fn insert(&self, tree: usize, key: &[u8], value: &[u8]) -> Result<Option<Value>> {
let tree = self.get_tree(tree)?;
let mut tx = self.db.write_txn()?;
@ -242,8 +239,9 @@ impl<'a> ITx for LmdbTx<'a> {
None => Ok(None),
}
}
fn len(&self, _tree: usize) -> TxOpResult<usize> {
unimplemented!(".len() in transaction not supported with LMDB backend")
fn len(&self, tree: usize) -> TxOpResult<usize> {
let tree = self.get_tree(tree)?;
Ok(tree.len(&self.tx)? as usize)
}
fn insert(&mut self, tree: usize, key: &[u8], value: &[u8]) -> TxOpResult<Option<Value>> {
@ -258,33 +256,48 @@ impl<'a> ITx for LmdbTx<'a> {
tree.delete(&mut self.tx, key)?;
Ok(old_val)
}
fn iter(&self, _tree: usize) -> TxOpResult<TxValueIter<'_>> {
unimplemented!("Iterators in transactions not supported with LMDB backend");
fn clear(&mut self, tree: usize) -> TxOpResult<()> {
let tree = *self.get_tree(tree)?;
tree.clear(&mut self.tx)?;
Ok(())
}
fn iter_rev(&self, _tree: usize) -> TxOpResult<TxValueIter<'_>> {
unimplemented!("Iterators in transactions not supported with LMDB backend");
fn iter(&self, tree: usize) -> TxOpResult<TxValueIter<'_>> {
let tree = *self.get_tree(tree)?;
Ok(Box::new(tree.iter(&self.tx)?.map(tx_iter_item)))
}
fn iter_rev(&self, tree: usize) -> TxOpResult<TxValueIter<'_>> {
let tree = *self.get_tree(tree)?;
Ok(Box::new(tree.rev_iter(&self.tx)?.map(tx_iter_item)))
}
fn range<'r>(
&self,
_tree: usize,
_low: Bound<&'r [u8]>,
_high: Bound<&'r [u8]>,
tree: usize,
low: Bound<&'r [u8]>,
high: Bound<&'r [u8]>,
) -> TxOpResult<TxValueIter<'_>> {
unimplemented!("Iterators in transactions not supported with LMDB backend");
let tree = *self.get_tree(tree)?;
Ok(Box::new(
tree.range(&self.tx, &(low, high))?.map(tx_iter_item),
))
}
fn range_rev<'r>(
&self,
_tree: usize,
_low: Bound<&'r [u8]>,
_high: Bound<&'r [u8]>,
tree: usize,
low: Bound<&'r [u8]>,
high: Bound<&'r [u8]>,
) -> TxOpResult<TxValueIter<'_>> {
unimplemented!("Iterators in transactions not supported with LMDB backend");
let tree = *self.get_tree(tree)?;
Ok(Box::new(
tree.rev_range(&self.tx, &(low, high))?.map(tx_iter_item),
))
}
}
// ----
// ---- iterators outside transactions ----
// complicated, they must hold the transaction object
// therefore a bit of unsafe code (it is a self-referential struct)
type IteratorItem<'a> = heed::Result<(
<ByteSlice as BytesDecode<'a>>::DItem,
@ -307,12 +320,20 @@ where
where
F: FnOnce(&'a RoTxn<'a>) -> Result<I>,
{
let mut res = TxAndIterator { tx, iter: None };
let res = TxAndIterator { tx, iter: None };
let mut boxed = Box::pin(res);
let tx = unsafe { NonNull::from(&res.tx).as_ref() };
res.iter = Some(iterfun(tx)?);
// This unsafe allows us to bypass lifetime checks
let tx = unsafe { NonNull::from(&boxed.tx).as_ref() };
let iter = iterfun(tx)?;
Ok(Box::new(res))
let mut_ref = Pin::as_mut(&mut boxed);
// This unsafe allows us to write in a field of the pinned struct
unsafe {
Pin::get_unchecked_mut(mut_ref).iter = Some(iter);
}
Ok(Box::new(TxAndIteratorPin(boxed)))
}
}
@ -321,18 +342,26 @@ where
I: Iterator<Item = IteratorItem<'a>> + 'a,
{
fn drop(&mut self) {
// ensure the iterator is dropped before the RoTxn it references
drop(self.iter.take());
}
}
impl<'a, I> Iterator for TxAndIterator<'a, I>
struct TxAndIteratorPin<'a, I>(Pin<Box<TxAndIterator<'a, I>>>)
where
I: Iterator<Item = IteratorItem<'a>> + 'a;
impl<'a, I> Iterator for TxAndIteratorPin<'a, I>
where
I: Iterator<Item = IteratorItem<'a>> + 'a,
{
type Item = Result<(Value, Value)>;
fn next(&mut self) -> Option<Self::Item> {
match self.iter.as_mut().unwrap().next() {
let mut_ref = Pin::as_mut(&mut self.0);
// This unsafe allows us to mutably access the iterator field
let next = unsafe { Pin::get_unchecked_mut(mut_ref).iter.as_mut()?.next() };
match next {
None => None,
Some(Err(e)) => Some(Err(e.into())),
Some(Ok((k, v))) => Some(Ok((k.to_vec(), v.to_vec()))),
@ -340,7 +369,16 @@ where
}
}
// ----
// ---- iterators within transactions ----
fn tx_iter_item<'a>(
item: std::result::Result<(&'a [u8], &'a [u8]), heed::Error>,
) -> TxOpResult<(Vec<u8>, Vec<u8>)> {
item.map(|(k, v)| (k.to_vec(), v.to_vec()))
.map_err(|e| TxOpError(Error::from(e)))
}
// ---- utility ----
#[cfg(target_pointer_width = "64")]
pub fn recommended_map_size() -> usize {

View file

@ -11,7 +11,6 @@ use crate::{Db, Error, Result};
pub enum Engine {
Lmdb,
Sqlite,
Sled,
}
impl Engine {
@ -20,7 +19,6 @@ impl Engine {
match self {
Self::Lmdb => "lmdb",
Self::Sqlite => "sqlite",
Self::Sled => "sled",
}
}
}
@ -38,10 +36,10 @@ impl std::str::FromStr for Engine {
match text {
"lmdb" | "heed" => Ok(Self::Lmdb),
"sqlite" | "sqlite3" | "rusqlite" => Ok(Self::Sqlite),
"sled" => Ok(Self::Sled),
"sled" => Err(Error("Sled is no longer supported as a database engine. Converting your old metadata db can be done using an older Garage binary (e.g. v0.9.3).".into())),
kind => Err(Error(
format!(
"Invalid DB engine: {} (options are: lmdb, sled, sqlite)",
"Invalid DB engine: {} (options are: lmdb, sqlite)",
kind
)
.into(),
@ -53,8 +51,6 @@ impl std::str::FromStr for Engine {
pub struct OpenOpt {
pub fsync: bool,
pub lmdb_map_size: Option<usize>,
pub sled_cache_capacity: usize,
pub sled_flush_every_ms: u64,
}
impl Default for OpenOpt {
@ -62,31 +58,12 @@ impl Default for OpenOpt {
Self {
fsync: false,
lmdb_map_size: None,
sled_cache_capacity: 1024 * 1024 * 1024,
sled_flush_every_ms: 2000,
}
}
}
pub fn open_db(path: &PathBuf, engine: Engine, opt: &OpenOpt) -> Result<Db> {
match engine {
// ---- Sled DB ----
#[cfg(feature = "sled")]
Engine::Sled => {
if opt.fsync {
return Err(Error(
"`metadata_fsync = true` is not supported with the Sled database engine".into(),
));
}
info!("Opening Sled database at: {}", path.display());
let db = crate::sled_adapter::sled::Config::default()
.path(&path)
.cache_capacity(opt.sled_cache_capacity as u64)
.flush_every_ms(Some(opt.sled_flush_every_ms))
.open()?;
Ok(crate::sled_adapter::SledDb::init(db))
}
// ---- Sqlite DB ----
#[cfg(feature = "sqlite")]
Engine::Sqlite => {

View file

@ -1,274 +0,0 @@
use core::ops::Bound;
use std::cell::Cell;
use std::collections::HashMap;
use std::sync::{Arc, RwLock};
use sled::transaction::{
ConflictableTransactionError, TransactionError, Transactional, TransactionalTree,
UnabortableTransactionError,
};
use crate::{
Db, Error, IDb, ITx, ITxFn, OnCommit, Result, TxError, TxFnResult, TxOpError, TxOpResult,
TxResult, TxValueIter, Value, ValueIter,
};
pub use sled;
// -- err
impl From<sled::Error> for Error {
fn from(e: sled::Error) -> Error {
Error(format!("Sled: {}", e).into())
}
}
impl From<sled::Error> for TxOpError {
fn from(e: sled::Error) -> TxOpError {
TxOpError(e.into())
}
}
// -- db
pub struct SledDb {
db: sled::Db,
trees: RwLock<(Vec<sled::Tree>, HashMap<String, usize>)>,
}
impl SledDb {
#[deprecated(
since = "0.9.0",
note = "The Sled database is now deprecated and will be removed in Garage v1.0. Please migrate to LMDB or Sqlite as soon as possible."
)]
pub fn init(db: sled::Db) -> Db {
tracing::warn!("-------------------- IMPORTANT WARNING !!! ----------------------");
tracing::warn!("The Sled database is now deprecated and will be removed in Garage v1.0.");
tracing::warn!("Please migrate to LMDB or Sqlite as soon as possible.");
tracing::warn!("-----------------------------------------------------------------------");
let s = Self {
db,
trees: RwLock::new((Vec::new(), HashMap::new())),
};
Db(Arc::new(s))
}
fn get_tree(&self, i: usize) -> Result<sled::Tree> {
self.trees
.read()
.unwrap()
.0
.get(i)
.cloned()
.ok_or_else(|| Error("invalid tree id".into()))
}
}
impl IDb for SledDb {
fn engine(&self) -> String {
"Sled".into()
}
fn open_tree(&self, name: &str) -> Result<usize> {
let mut trees = self.trees.write().unwrap();
if let Some(i) = trees.1.get(name) {
Ok(*i)
} else {
let tree = self.db.open_tree(name)?;
let i = trees.0.len();
trees.0.push(tree);
trees.1.insert(name.to_string(), i);
Ok(i)
}
}
fn list_trees(&self) -> Result<Vec<String>> {
let mut trees = vec![];
for name in self.db.tree_names() {
let name = std::str::from_utf8(&name)
.map_err(|e| Error(format!("{}", e).into()))?
.to_string();
if name != "__sled__default" {
trees.push(name);
}
}
Ok(trees)
}
// ----
fn get(&self, tree: usize, key: &[u8]) -> Result<Option<Value>> {
let tree = self.get_tree(tree)?;
let val = tree.get(key)?;
Ok(val.map(|x| x.to_vec()))
}
fn len(&self, tree: usize) -> Result<usize> {
let tree = self.get_tree(tree)?;
Ok(tree.len())
}
fn insert(&self, tree: usize, key: &[u8], value: &[u8]) -> Result<Option<Value>> {
let tree = self.get_tree(tree)?;
let old_val = tree.insert(key, value)?;
Ok(old_val.map(|x| x.to_vec()))
}
fn remove(&self, tree: usize, key: &[u8]) -> Result<Option<Value>> {
let tree = self.get_tree(tree)?;
let old_val = tree.remove(key)?;
Ok(old_val.map(|x| x.to_vec()))
}
fn clear(&self, tree: usize) -> Result<()> {
let tree = self.get_tree(tree)?;
tree.clear()?;
Ok(())
}
fn iter(&self, tree: usize) -> Result<ValueIter<'_>> {
let tree = self.get_tree(tree)?;
Ok(Box::new(tree.iter().map(|v| {
v.map(|(x, y)| (x.to_vec(), y.to_vec())).map_err(Into::into)
})))
}
fn iter_rev(&self, tree: usize) -> Result<ValueIter<'_>> {
let tree = self.get_tree(tree)?;
Ok(Box::new(tree.iter().rev().map(|v| {
v.map(|(x, y)| (x.to_vec(), y.to_vec())).map_err(Into::into)
})))
}
fn range<'r>(
&self,
tree: usize,
low: Bound<&'r [u8]>,
high: Bound<&'r [u8]>,
) -> Result<ValueIter<'_>> {
let tree = self.get_tree(tree)?;
Ok(Box::new(tree.range::<&'r [u8], _>((low, high)).map(|v| {
v.map(|(x, y)| (x.to_vec(), y.to_vec())).map_err(Into::into)
})))
}
fn range_rev<'r>(
&self,
tree: usize,
low: Bound<&'r [u8]>,
high: Bound<&'r [u8]>,
) -> Result<ValueIter<'_>> {
let tree = self.get_tree(tree)?;
Ok(Box::new(tree.range::<&'r [u8], _>((low, high)).rev().map(
|v| v.map(|(x, y)| (x.to_vec(), y.to_vec())).map_err(Into::into),
)))
}
// ----
fn transaction(&self, f: &dyn ITxFn) -> TxResult<OnCommit, ()> {
let trees = self.trees.read().unwrap();
let res = trees.0.transaction(|txtrees| {
let mut tx = SledTx {
trees: txtrees,
err: Cell::new(None),
};
match f.try_on(&mut tx) {
TxFnResult::Ok(on_commit) => {
assert!(tx.err.into_inner().is_none());
Ok(on_commit)
}
TxFnResult::Abort => {
assert!(tx.err.into_inner().is_none());
Err(ConflictableTransactionError::Abort(()))
}
TxFnResult::DbErr => {
let e = tx.err.into_inner().expect("No DB error");
Err(e.into())
}
}
});
match res {
Ok(on_commit) => Ok(on_commit),
Err(TransactionError::Abort(())) => Err(TxError::Abort(())),
Err(TransactionError::Storage(s)) => Err(TxError::Db(s.into())),
}
}
}
// ----
struct SledTx<'a> {
trees: &'a [TransactionalTree],
err: Cell<Option<UnabortableTransactionError>>,
}
impl<'a> SledTx<'a> {
fn get_tree(&self, i: usize) -> TxOpResult<&TransactionalTree> {
self.trees.get(i).ok_or_else(|| {
TxOpError(Error(
"invalid tree id (it might have been openned after the transaction started)".into(),
))
})
}
fn save_error<R>(
&self,
v: std::result::Result<R, UnabortableTransactionError>,
) -> TxOpResult<R> {
match v {
Ok(x) => Ok(x),
Err(e) => {
let txt = format!("{}", e);
self.err.set(Some(e));
Err(TxOpError(Error(txt.into())))
}
}
}
}
impl<'a> ITx for SledTx<'a> {
fn get(&self, tree: usize, key: &[u8]) -> TxOpResult<Option<Value>> {
let tree = self.get_tree(tree)?;
let tmp = self.save_error(tree.get(key))?;
Ok(tmp.map(|x| x.to_vec()))
}
fn len(&self, _tree: usize) -> TxOpResult<usize> {
unimplemented!(".len() in transaction not supported with Sled backend")
}
fn insert(&mut self, tree: usize, key: &[u8], value: &[u8]) -> TxOpResult<Option<Value>> {
let tree = self.get_tree(tree)?;
let old_val = self.save_error(tree.insert(key, value))?;
Ok(old_val.map(|x| x.to_vec()))
}
fn remove(&mut self, tree: usize, key: &[u8]) -> TxOpResult<Option<Value>> {
let tree = self.get_tree(tree)?;
let old_val = self.save_error(tree.remove(key))?;
Ok(old_val.map(|x| x.to_vec()))
}
fn iter(&self, _tree: usize) -> TxOpResult<TxValueIter<'_>> {
unimplemented!("Iterators in transactions not supported with Sled backend");
}
fn iter_rev(&self, _tree: usize) -> TxOpResult<TxValueIter<'_>> {
unimplemented!("Iterators in transactions not supported with Sled backend");
}
fn range<'r>(
&self,
_tree: usize,
_low: Bound<&'r [u8]>,
_high: Bound<&'r [u8]>,
) -> TxOpResult<TxValueIter<'_>> {
unimplemented!("Iterators in transactions not supported with Sled backend");
}
fn range_rev<'r>(
&self,
_tree: usize,
_low: Bound<&'r [u8]>,
_high: Bound<&'r [u8]>,
) -> TxOpResult<TxValueIter<'_>> {
unimplemented!("Iterators in transactions not supported with Sled backend");
}
}

View file

@ -144,10 +144,6 @@ impl IDb for SqliteDb {
}
}
fn fast_len(&self, tree: usize) -> Result<Option<usize>> {
Ok(Some(self.len(tree)?))
}
fn insert(&self, tree: usize, key: &[u8], value: &[u8]) -> Result<Option<Value>> {
trace!("insert {}: lock db", tree);
let this = self.0.lock().unwrap();
@ -367,33 +363,64 @@ impl<'a> ITx for SqliteTx<'a> {
Ok(old_val)
}
fn iter(&self, _tree: usize) -> TxOpResult<TxValueIter<'_>> {
unimplemented!();
fn clear(&mut self, tree: usize) -> TxOpResult<()> {
let tree = self.get_tree(tree)?;
self.tx.execute(&format!("DELETE FROM {}", tree), [])?;
Ok(())
}
fn iter_rev(&self, _tree: usize) -> TxOpResult<TxValueIter<'_>> {
unimplemented!();
fn iter(&self, tree: usize) -> TxOpResult<TxValueIter<'_>> {
let tree = self.get_tree(tree)?;
let sql = format!("SELECT k, v FROM {} ORDER BY k ASC", tree);
TxValueIterator::make(self, &sql, [])
}
fn iter_rev(&self, tree: usize) -> TxOpResult<TxValueIter<'_>> {
let tree = self.get_tree(tree)?;
let sql = format!("SELECT k, v FROM {} ORDER BY k DESC", tree);
TxValueIterator::make(self, &sql, [])
}
fn range<'r>(
&self,
_tree: usize,
_low: Bound<&'r [u8]>,
_high: Bound<&'r [u8]>,
tree: usize,
low: Bound<&'r [u8]>,
high: Bound<&'r [u8]>,
) -> TxOpResult<TxValueIter<'_>> {
unimplemented!();
let tree = self.get_tree(tree)?;
let (bounds_sql, params) = bounds_sql(low, high);
let sql = format!("SELECT k, v FROM {} {} ORDER BY k ASC", tree, bounds_sql);
let params = params
.iter()
.map(|x| x as &dyn rusqlite::ToSql)
.collect::<Vec<_>>();
TxValueIterator::make::<&[&dyn rusqlite::ToSql]>(self, &sql, params.as_ref())
}
fn range_rev<'r>(
&self,
_tree: usize,
_low: Bound<&'r [u8]>,
_high: Bound<&'r [u8]>,
tree: usize,
low: Bound<&'r [u8]>,
high: Bound<&'r [u8]>,
) -> TxOpResult<TxValueIter<'_>> {
unimplemented!();
let tree = self.get_tree(tree)?;
let (bounds_sql, params) = bounds_sql(low, high);
let sql = format!("SELECT k, v FROM {} {} ORDER BY k DESC", tree, bounds_sql);
let params = params
.iter()
.map(|x| x as &dyn rusqlite::ToSql)
.collect::<Vec<_>>();
TxValueIterator::make::<&[&dyn rusqlite::ToSql]>(self, &sql, params.as_ref())
}
}
// ----
// ---- iterators outside transactions ----
// complicated, they must hold the Statement and Row objects
// therefore quite some unsafe code (it is a self-referential struct)
struct DbValueIterator<'a> {
db: MutexGuard<'a, SqliteDbInner>,
@ -417,17 +444,23 @@ impl<'a> DbValueIterator<'a> {
let mut boxed = Box::pin(res);
trace!("make iterator with sql: {}", sql);
// This unsafe allows us to bypass lifetime checks
let db = unsafe { NonNull::from(&boxed.db).as_ref() };
let stmt = db.db.prepare(sql)?;
let mut_ref = Pin::as_mut(&mut boxed);
// This unsafe allows us to write in a field of the pinned struct
unsafe {
let db = NonNull::from(&boxed.db);
let stmt = db.as_ref().db.prepare(sql)?;
let mut_ref: Pin<&mut DbValueIterator<'a>> = Pin::as_mut(&mut boxed);
Pin::get_unchecked_mut(mut_ref).stmt = Some(stmt);
}
let mut stmt = NonNull::from(&boxed.stmt);
let iter = stmt.as_mut().as_mut().unwrap().query(args)?;
// This unsafe allows us to bypass lifetime checks
let stmt = unsafe { NonNull::from(&boxed.stmt).as_mut() };
let iter = stmt.as_mut().unwrap().query(args)?;
let mut_ref: Pin<&mut DbValueIterator<'a>> = Pin::as_mut(&mut boxed);
let mut_ref = Pin::as_mut(&mut boxed);
// This unsafe allows us to write in a field of the pinned struct
unsafe {
Pin::get_unchecked_mut(mut_ref).iter = Some(iter);
}
@ -449,28 +482,73 @@ impl<'a> Iterator for DbValueIteratorPin<'a> {
type Item = Result<(Value, Value)>;
fn next(&mut self) -> Option<Self::Item> {
let next = unsafe {
let mut_ref: Pin<&mut DbValueIterator<'a>> = Pin::as_mut(&mut self.0);
Pin::get_unchecked_mut(mut_ref).iter.as_mut()?.next()
};
let row = match next {
Err(e) => return Some(Err(e.into())),
Ok(None) => return None,
Ok(Some(r)) => r,
};
let k = match row.get::<_, Vec<u8>>(0) {
Err(e) => return Some(Err(e.into())),
Ok(x) => x,
};
let v = match row.get::<_, Vec<u8>>(1) {
Err(e) => return Some(Err(e.into())),
Ok(y) => y,
};
Some(Ok((k, v)))
let mut_ref = Pin::as_mut(&mut self.0);
// This unsafe allows us to mutably access the iterator field
let next = unsafe { Pin::get_unchecked_mut(mut_ref).iter.as_mut()?.next() };
iter_next_row(next)
}
}
// ----
// ---- iterators within transactions ----
// it's the same except we don't hold a mutex guard,
// only a Statement and a Rows object
struct TxValueIterator<'a> {
stmt: Statement<'a>,
iter: Option<Rows<'a>>,
_pin: PhantomPinned,
}
impl<'a> TxValueIterator<'a> {
fn make<P: rusqlite::Params>(
tx: &'a SqliteTx<'a>,
sql: &str,
args: P,
) -> TxOpResult<TxValueIter<'a>> {
let stmt = tx.tx.prepare(sql)?;
let res = TxValueIterator {
stmt,
iter: None,
_pin: PhantomPinned,
};
let mut boxed = Box::pin(res);
trace!("make iterator with sql: {}", sql);
// This unsafe allows us to bypass lifetime checks
let stmt = unsafe { NonNull::from(&boxed.stmt).as_mut() };
let iter = stmt.query(args)?;
let mut_ref = Pin::as_mut(&mut boxed);
// This unsafe allows us to write in a field of the pinned struct
unsafe {
Pin::get_unchecked_mut(mut_ref).iter = Some(iter);
}
Ok(Box::new(TxValueIteratorPin(boxed)))
}
}
impl<'a> Drop for TxValueIterator<'a> {
fn drop(&mut self) {
trace!("drop iter");
drop(self.iter.take());
}
}
struct TxValueIteratorPin<'a>(Pin<Box<TxValueIterator<'a>>>);
impl<'a> Iterator for TxValueIteratorPin<'a> {
type Item = TxOpResult<(Value, Value)>;
fn next(&mut self) -> Option<Self::Item> {
let mut_ref = Pin::as_mut(&mut self.0);
// This unsafe allows us to mutably access the iterator field
let next = unsafe { Pin::get_unchecked_mut(mut_ref).iter.as_mut()?.next() };
iter_next_row(next)
}
}
// ---- utility ----
fn bounds_sql<'r>(low: Bound<&'r [u8]>, high: Bound<&'r [u8]>) -> (String, Vec<Vec<u8>>) {
let mut sql = String::new();
@ -510,3 +588,25 @@ fn bounds_sql<'r>(low: Bound<&'r [u8]>, high: Bound<&'r [u8]>) -> (String, Vec<V
(sql, params)
}
fn iter_next_row<E>(
next_row: rusqlite::Result<Option<&rusqlite::Row>>,
) -> Option<std::result::Result<(Value, Value), E>>
where
E: From<rusqlite::Error>,
{
let row = match next_row {
Err(e) => return Some(Err(e.into())),
Ok(None) => return None,
Ok(Some(r)) => r,
};
let k = match row.get::<_, Vec<u8>>(0) {
Err(e) => return Some(Err(e.into())),
Ok(x) => x,
};
let v = match row.get::<_, Vec<u8>>(1) {
Err(e) => return Some(Err(e.into())),
Ok(y) => y,
};
Some(Ok((k, v)))
}

View file

@ -10,8 +10,13 @@ fn test_suite(db: Db) {
let vb: &[u8] = &b"plip"[..];
let vc: &[u8] = &b"plup"[..];
// ---- test simple insert/delete ----
assert!(tree.insert(ka, va).unwrap().is_none());
assert_eq!(tree.get(ka).unwrap().unwrap(), va);
assert_eq!(tree.len().unwrap(), 1);
// ---- test transaction logic ----
let res = db.transaction::<_, (), _>(|tx| {
assert_eq!(tx.get(&tree, ka).unwrap().unwrap(), va);
@ -37,6 +42,8 @@ fn test_suite(db: Db) {
assert!(matches!(res, Err(TxError::Abort(42))));
assert_eq!(tree.get(ka).unwrap().unwrap(), vb);
// ---- test iteration outside of transactions ----
let mut iter = tree.iter().unwrap();
let next = iter.next().unwrap().unwrap();
assert_eq!((next.0.as_ref(), next.1.as_ref()), (ka, vb));
@ -73,6 +80,48 @@ fn test_suite(db: Db) {
assert_eq!((next.0.as_ref(), next.1.as_ref()), (ka, vb));
assert!(iter.next().is_none());
drop(iter);
// ---- test iteration within transactions ----
db.transaction::<_, (), _>(|tx| {
let mut iter = tx.iter(&tree).unwrap();
let next = iter.next().unwrap().unwrap();
assert_eq!((next.0.as_ref(), next.1.as_ref()), (ka, vb));
let next = iter.next().unwrap().unwrap();
assert_eq!((next.0.as_ref(), next.1.as_ref()), (kb, vc));
assert!(iter.next().is_none());
Ok(())
})
.unwrap();
db.transaction::<_, (), _>(|tx| {
let mut iter = tx.range(&tree, kint..).unwrap();
let next = iter.next().unwrap().unwrap();
assert_eq!((next.0.as_ref(), next.1.as_ref()), (kb, vc));
assert!(iter.next().is_none());
Ok(())
})
.unwrap();
db.transaction::<_, (), _>(|tx| {
let mut iter = tx.range_rev(&tree, ..kint).unwrap();
let next = iter.next().unwrap().unwrap();
assert_eq!((next.0.as_ref(), next.1.as_ref()), (ka, vb));
assert!(iter.next().is_none());
Ok(())
})
.unwrap();
db.transaction::<_, (), _>(|tx| {
let mut iter = tx.iter_rev(&tree).unwrap();
let next = iter.next().unwrap().unwrap();
assert_eq!((next.0.as_ref(), next.1.as_ref()), (kb, vc));
let next = iter.next().unwrap().unwrap();
assert_eq!((next.0.as_ref(), next.1.as_ref()), (ka, vb));
assert!(iter.next().is_none());
Ok(())
})
.unwrap();
}
#[test]
@ -90,17 +139,6 @@ fn test_lmdb_db() {
drop(path);
}
#[test]
#[cfg(feature = "sled")]
fn test_sled_db() {
use crate::sled_adapter::SledDb;
let path = mktemp::Temp::new_dir().unwrap();
let db = SledDb::init(sled::open(path.to_path_buf()).unwrap());
test_suite(db);
drop(path);
}
#[test]
#[cfg(feature = "sqlite")]
fn test_sqlite_db() {

View file

@ -80,12 +80,11 @@ k2v-client.workspace = true
[features]
default = [ "bundled-libs", "metrics", "sled", "lmdb", "sqlite", "k2v" ]
default = [ "bundled-libs", "metrics", "lmdb", "sqlite", "k2v" ]
k2v = [ "garage_util/k2v", "garage_api/k2v" ]
# Database engines, Sled is still our default even though we don't like it
sled = [ "garage_model/sled" ]
# Database engines
lmdb = [ "garage_model/lmdb" ]
sqlite = [ "garage_model/sqlite" ]

View file

@ -197,11 +197,11 @@ impl AdminRpcHandler {
// Gather table statistics
let mut table = vec![" Table\tItems\tMklItems\tMklTodo\tGcTodo".into()];
table.push(self.gather_table_stats(&self.garage.bucket_table, opt.detailed)?);
table.push(self.gather_table_stats(&self.garage.key_table, opt.detailed)?);
table.push(self.gather_table_stats(&self.garage.object_table, opt.detailed)?);
table.push(self.gather_table_stats(&self.garage.version_table, opt.detailed)?);
table.push(self.gather_table_stats(&self.garage.block_ref_table, opt.detailed)?);
table.push(self.gather_table_stats(&self.garage.bucket_table)?);
table.push(self.gather_table_stats(&self.garage.key_table)?);
table.push(self.gather_table_stats(&self.garage.object_table)?);
table.push(self.gather_table_stats(&self.garage.version_table)?);
table.push(self.gather_table_stats(&self.garage.block_ref_table)?);
write!(
&mut ret,
"\nTable stats:\n{}",
@ -211,15 +211,7 @@ impl AdminRpcHandler {
// Gather block manager statistics
writeln!(&mut ret, "\nBlock manager stats:").unwrap();
let rc_len = if opt.detailed {
self.garage.block_manager.rc_len()?.to_string()
} else {
self.garage
.block_manager
.rc_fast_len()?
.map(|x| x.to_string())
.unwrap_or_else(|| "NC".into())
};
let rc_len = self.garage.block_manager.rc_len()?.to_string();
writeln!(
&mut ret,
@ -240,10 +232,6 @@ impl AdminRpcHandler {
)
.unwrap();
if !opt.detailed {
writeln!(&mut ret, "\nIf values are missing above (marked as NC), consider adding the --detailed flag (this will be slow).").unwrap();
}
if !opt.skip_global {
write!(&mut ret, "\n{}", self.gather_cluster_stats()).unwrap();
}
@ -345,34 +333,13 @@ impl AdminRpcHandler {
ret
}
fn gather_table_stats<F, R>(
&self,
t: &Arc<Table<F, R>>,
detailed: bool,
) -> Result<String, Error>
fn gather_table_stats<F, R>(&self, t: &Arc<Table<F, R>>) -> Result<String, Error>
where
F: TableSchema + 'static,
R: TableReplication + 'static,
{
let (data_len, mkl_len) = if detailed {
(
t.data.store.len().map_err(GarageError::from)?.to_string(),
t.merkle_updater.merkle_tree_len()?.to_string(),
)
} else {
(
t.data
.store
.fast_len()
.map_err(GarageError::from)?
.map(|x| x.to_string())
.unwrap_or_else(|| "NC".into()),
t.merkle_updater
.merkle_tree_fast_len()?
.map(|x| x.to_string())
.unwrap_or_else(|| "NC".into()),
)
};
let data_len = t.data.store.len().map_err(GarageError::from)?.to_string();
let mkl_len = t.merkle_updater.merkle_tree_len()?.to_string();
Ok(format!(
" {}\t{}\t{}\t{}\t{}",

View file

@ -11,7 +11,7 @@ pub struct ConvertDbOpt {
/// https://garagehq.deuxfleurs.fr/documentation/reference-manual/configuration/#db-engine-since-v0-8-0)
#[structopt(short = "i")]
input_path: PathBuf,
/// Input database engine (sled, lmdb or sqlite; limited by db engines
/// Input database engine (lmdb or sqlite; limited by db engines
/// enabled in this build)
#[structopt(short = "a")]
input_engine: Engine,

View file

@ -531,10 +531,6 @@ pub struct StatsOpt {
#[structopt(short = "a", long = "all-nodes")]
pub all_nodes: bool,
/// Gather detailed statistics (this can be long)
#[structopt(short = "d", long = "detailed")]
pub detailed: bool,
/// Don't show global cluster stats (internal use in RPC)
#[structopt(skip)]
#[serde(default)]

View file

@ -18,8 +18,8 @@ compile_error!("Either bundled-libs or system-libs Cargo feature must be enabled
#[cfg(all(feature = "bundled-libs", feature = "system-libs"))]
compile_error!("Only one of bundled-libs and system-libs Cargo features must be enabled");
#[cfg(not(any(feature = "lmdb", feature = "sled", feature = "sqlite")))]
compile_error!("Must activate the Cargo feature for at least one DB engine: lmdb, sled or sqlite.");
#[cfg(not(any(feature = "lmdb", feature = "sqlite")))]
compile_error!("Must activate the Cargo feature for at least one DB engine: lmdb or sqlite.");
use std::net::SocketAddr;
use std::path::PathBuf;
@ -72,8 +72,6 @@ async fn main() {
let features = &[
#[cfg(feature = "k2v")]
"k2v",
#[cfg(feature = "sled")]
"sled",
#[cfg(feature = "lmdb")]
"lmdb",
#[cfg(feature = "sqlite")]

View file

@ -42,8 +42,7 @@ tokio.workspace = true
opentelemetry.workspace = true
[features]
default = [ "sled", "lmdb", "sqlite" ]
default = [ "lmdb", "sqlite" ]
k2v = [ "garage_util/k2v" ]
lmdb = [ "garage_db/lmdb" ]
sled = [ "garage_db/sled" ]
sqlite = [ "garage_db/sqlite" ]

View file

@ -118,9 +118,6 @@ impl Garage {
.ok_or_message("Invalid `db_engine` value in configuration file")?;
let mut db_path = config.metadata_dir.clone();
match db_engine {
db::Engine::Sled => {
db_path.push("db");
}
db::Engine::Sqlite => {
db_path.push("db.sqlite");
}
@ -134,8 +131,6 @@ impl Garage {
v if v == usize::default() => None,
v => Some(v),
},
sled_cache_capacity: config.sled_cache_capacity,
sled_flush_every_ms: config.sled_flush_every_ms,
};
let db = db::open_db(&db_path, db_engine, &db_opt)
.ok_or_message("Unable to open metadata db")?;

View file

@ -121,13 +121,7 @@ impl Worker for LifecycleWorker {
mpu_aborted,
..
} => {
let n_objects = self
.garage
.object_table
.data
.store
.fast_len()
.unwrap_or(None);
let n_objects = self.garage.object_table.data.store.len().ok();
let progress = match n_objects {
None => "...".to_string(),
Some(total) => format!(

View file

@ -6,7 +6,6 @@ use serde_bytes::ByteBuf;
use tokio::sync::Notify;
use garage_db as db;
use garage_db::counted_tree_hack::CountedTree;
use garage_util::data::*;
use garage_util::error::*;
@ -36,7 +35,7 @@ pub struct TableData<F: TableSchema, R: TableReplication> {
pub(crate) insert_queue: db::Tree,
pub(crate) insert_queue_notify: Arc<Notify>,
pub(crate) gc_todo: CountedTree,
pub(crate) gc_todo: db::Tree,
pub(crate) metrics: TableMetrics,
}
@ -61,7 +60,6 @@ impl<F: TableSchema, R: TableReplication> TableData<F, R> {
let gc_todo = db
.open_tree(format!("{}:gc_todo_v2", F::TABLE_NAME))
.expect("Unable to open GC DB tree");
let gc_todo = CountedTree::new(gc_todo).expect("Cannot count gc_todo_v2");
let metrics = TableMetrics::new(
F::TABLE_NAME,
@ -370,6 +368,6 @@ impl<F: TableSchema, R: TableReplication> TableData<F, R> {
}
pub fn gc_todo_len(&self) -> Result<usize, Error> {
Ok(self.gc_todo.len())
Ok(self.gc_todo.len()?)
}
}

View file

@ -10,7 +10,7 @@ use serde_bytes::ByteBuf;
use futures::future::join_all;
use tokio::sync::watch;
use garage_db::counted_tree_hack::CountedTree;
use garage_db as db;
use garage_util::background::*;
use garage_util::data::*;
@ -334,9 +334,9 @@ impl<F: TableSchema, R: TableReplication> Worker for GcWorker<F, R> {
}
}
/// An entry stored in the gc_todo Sled tree associated with the table
/// An entry stored in the gc_todo db tree associated with the table
/// Contains helper function for parsing, saving, and removing
/// such entry in Sled
/// such entry in the db
///
/// Format of an entry:
/// - key = 8 bytes: timestamp of tombstone
@ -353,7 +353,7 @@ pub(crate) struct GcTodoEntry {
}
impl GcTodoEntry {
/// Creates a new GcTodoEntry (not saved in Sled) from its components:
/// Creates a new GcTodoEntry (not saved in the db) from its components:
/// the key of an entry in the table, and the hash of the associated
/// serialized value
pub(crate) fn new(key: Vec<u8>, value_hash: Hash) -> Self {
@ -376,7 +376,7 @@ impl GcTodoEntry {
}
/// Saves the GcTodoEntry in the gc_todo tree
pub(crate) fn save(&self, gc_todo_tree: &CountedTree) -> Result<(), Error> {
pub(crate) fn save(&self, gc_todo_tree: &db::Tree) -> Result<(), Error> {
gc_todo_tree.insert(self.todo_table_key(), self.value_hash.as_slice())?;
Ok(())
}
@ -386,12 +386,14 @@ impl GcTodoEntry {
/// This is usefull to remove a todo entry only under the condition
/// that it has not changed since the time it was read, i.e.
/// what we have to do is still the same
pub(crate) fn remove_if_equal(&self, gc_todo_tree: &CountedTree) -> Result<(), Error> {
gc_todo_tree.compare_and_swap::<_, _, &[u8]>(
&self.todo_table_key(),
Some(self.value_hash),
None,
)?;
pub(crate) fn remove_if_equal(&self, gc_todo_tree: &db::Tree) -> Result<(), Error> {
gc_todo_tree.db().transaction(|txn| {
let key = self.todo_table_key();
if txn.get(gc_todo_tree, &key)?.as_deref() == Some(self.value_hash.as_slice()) {
txn.remove(gc_todo_tree, &key)?;
}
Ok(())
})?;
Ok(())
}

View file

@ -31,14 +31,14 @@ pub struct MerkleUpdater<F: TableSchema, R: TableReplication> {
// - value = the hash of the full serialized item, if present,
// or an empty vec if item is absent (deleted)
// Fields in data:
// pub(crate) merkle_todo: sled::Tree,
// pub(crate) merkle_todo: db::Tree,
// pub(crate) merkle_todo_notify: Notify,
// Content of the merkle tree: items where
// - key = .bytes() for MerkleNodeKey
// - value = serialization of a MerkleNode, assumed to be MerkleNode::empty if not found
// Field in data:
// pub(crate) merkle_tree: sled::Tree,
// pub(crate) merkle_tree: db::Tree,
empty_node_hash: Hash,
}
@ -291,10 +291,6 @@ impl<F: TableSchema, R: TableReplication> MerkleUpdater<F, R> {
Ok(self.data.merkle_tree.len()?)
}
pub fn merkle_tree_fast_len(&self) -> Result<Option<usize>, Error> {
Ok(self.data.merkle_tree.fast_len()?)
}
pub fn todo_len(&self) -> Result<usize, Error> {
Ok(self.data.merkle_todo.len()?)
}

View file

@ -1,7 +1,6 @@
use opentelemetry::{global, metrics::*, KeyValue};
use garage_db as db;
use garage_db::counted_tree_hack::CountedTree;
/// TableMetrics reference all counter used for metrics
pub struct TableMetrics {
@ -27,7 +26,7 @@ impl TableMetrics {
store: db::Tree,
merkle_tree: db::Tree,
merkle_todo: db::Tree,
gc_todo: CountedTree,
gc_todo: db::Tree,
) -> Self {
let meter = global::meter(table_name);
TableMetrics {
@ -35,9 +34,9 @@ impl TableMetrics {
.u64_value_observer(
"table.size",
move |observer| {
if let Ok(Some(v)) = store.fast_len() {
if let Ok(value) = store.len() {
observer.observe(
v as u64,
value as u64,
&[KeyValue::new("table_name", table_name)],
);
}
@ -49,9 +48,9 @@ impl TableMetrics {
.u64_value_observer(
"table.merkle_tree_size",
move |observer| {
if let Ok(Some(v)) = merkle_tree.fast_len() {
if let Ok(value) = merkle_tree.len() {
observer.observe(
v as u64,
value as u64,
&[KeyValue::new("table_name", table_name)],
);
}
@ -77,10 +76,12 @@ impl TableMetrics {
.u64_value_observer(
"table.gc_todo_queue_length",
move |observer| {
observer.observe(
gc_todo.len() as u64,
&[KeyValue::new("table_name", table_name)],
);
if let Ok(value) = gc_todo.len() {
observer.observe(
value as u64,
&[KeyValue::new("table_name", table_name)],
);
}
},
)
.with_description("Table garbage collector TODO queue length")

View file

@ -87,20 +87,10 @@ pub struct Config {
pub kubernetes_discovery: Option<KubernetesDiscoveryConfig>,
// -- DB
/// Database engine to use for metadata (options: sled, sqlite, lmdb)
/// Database engine to use for metadata (options: sqlite, lmdb)
#[serde(default = "default_db_engine")]
pub db_engine: String,
/// Sled cache size, in bytes
#[serde(
deserialize_with = "deserialize_capacity",
default = "default_sled_cache_capacity"
)]
pub sled_cache_capacity: usize,
/// Sled flush interval in milliseconds
#[serde(default = "default_sled_flush_every_ms")]
pub sled_flush_every_ms: u64,
/// LMDB map size
#[serde(deserialize_with = "deserialize_capacity", default)]
pub lmdb_map_size: usize,
@ -246,13 +236,6 @@ fn default_db_engine() -> String {
"lmdb".into()
}
fn default_sled_cache_capacity() -> usize {
128 * 1024 * 1024
}
fn default_sled_flush_every_ms() -> u64 {
2000
}
fn default_block_size() -> usize {
1048576
}