Compare commits
No commits in common. "81191d2d92e58ff82ace0f4d82b275c157673ade" and "2128b5febd97eb6c95e3bdd380dcba39b860c0e3" have entirely different histories.
81191d2d92
...
2128b5febd
38 changed files with 826 additions and 344 deletions
45
Cargo.lock
generated
45
Cargo.lock
generated
|
@ -921,6 +921,15 @@ dependencies = [
|
|||
"crossbeam-utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-epoch"
|
||||
version = "0.9.18"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
|
||||
dependencies = [
|
||||
"crossbeam-utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-queue"
|
||||
version = "0.3.11"
|
||||
|
@ -1213,6 +1222,16 @@ dependencies = [
|
|||
name = "format_table"
|
||||
version = "0.1.1"
|
||||
|
||||
[[package]]
|
||||
name = "fs2"
|
||||
version = "0.4.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9564fc758e15025b46aa6643b1b77d047d1a56a1aea6e01002ac0c7026876213"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "futures"
|
||||
version = "0.3.30"
|
||||
|
@ -1302,6 +1321,15 @@ dependencies = [
|
|||
"slab",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fxhash"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "garage"
|
||||
version = "0.10.0"
|
||||
|
@ -1444,6 +1472,7 @@ dependencies = [
|
|||
"hexdump",
|
||||
"mktemp",
|
||||
"rusqlite",
|
||||
"sled",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
|
@ -3802,6 +3831,22 @@ dependencies = [
|
|||
"autocfg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sled"
|
||||
version = "0.34.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7f96b4737c2ce5987354855aed3797279def4ebf734436c6aa4552cf8e169935"
|
||||
dependencies = [
|
||||
"crc32fast",
|
||||
"crossbeam-epoch",
|
||||
"crossbeam-utils",
|
||||
"fs2",
|
||||
"fxhash",
|
||||
"libc",
|
||||
"log",
|
||||
"parking_lot 0.11.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "smallvec"
|
||||
version = "1.13.1"
|
||||
|
|
65
Cargo.nix
65
Cargo.nix
|
@ -34,7 +34,7 @@ args@{
|
|||
ignoreLockHash,
|
||||
}:
|
||||
let
|
||||
nixifiedLockHash = "23e1504df44ec18cfc5c872c858154304c16da2a6c1f7c9f06608ef833815f30";
|
||||
nixifiedLockHash = "c3296a54f1c6f385e0d4a4a937734f1fe0fee4405b44d7462249d72675f7ac40";
|
||||
workspaceSrc = if args.workspaceSrc == null then ./. else args.workspaceSrc;
|
||||
currentLockHash = builtins.hashFile "sha256" (workspaceSrc + /Cargo.lock);
|
||||
lockHashIgnored = if ignoreLockHash
|
||||
|
@ -1315,6 +1315,21 @@ in
|
|||
};
|
||||
});
|
||||
|
||||
"registry+https://github.com/rust-lang/crates.io-index".crossbeam-epoch."0.9.18" = overridableMkRustCrate (profileName: rec {
|
||||
name = "crossbeam-epoch";
|
||||
version = "0.9.18";
|
||||
registry = "registry+https://github.com/rust-lang/crates.io-index";
|
||||
src = fetchCratesIo { inherit name version; sha256 = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"; };
|
||||
features = builtins.concatLists [
|
||||
(lib.optional (rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sled" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/sled" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sled") "alloc")
|
||||
(lib.optional (rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sled" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/sled" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sled") "default")
|
||||
(lib.optional (rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sled" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/sled" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sled") "std")
|
||||
];
|
||||
dependencies = {
|
||||
${ if rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sled" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/sled" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sled" then "crossbeam_utils" else null } = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".crossbeam-utils."0.8.19" { inherit profileName; }).out;
|
||||
};
|
||||
});
|
||||
|
||||
"registry+https://github.com/rust-lang/crates.io-index".crossbeam-queue."0.3.11" = overridableMkRustCrate (profileName: rec {
|
||||
name = "crossbeam-queue";
|
||||
version = "0.3.11";
|
||||
|
@ -1336,6 +1351,7 @@ in
|
|||
registry = "registry+https://github.com/rust-lang/crates.io-index";
|
||||
src = fetchCratesIo { inherit name version; sha256 = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345"; };
|
||||
features = builtins.concatLists [
|
||||
(lib.optional (rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sled" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/sled" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sled") "default")
|
||||
[ "std" ]
|
||||
];
|
||||
});
|
||||
|
@ -1759,6 +1775,17 @@ in
|
|||
src = fetchCrateLocal (workspaceSrc + "/src/format-table");
|
||||
});
|
||||
|
||||
"registry+https://github.com/rust-lang/crates.io-index".fs2."0.4.3" = overridableMkRustCrate (profileName: rec {
|
||||
name = "fs2";
|
||||
version = "0.4.3";
|
||||
registry = "registry+https://github.com/rust-lang/crates.io-index";
|
||||
src = fetchCratesIo { inherit name version; sha256 = "9564fc758e15025b46aa6643b1b77d047d1a56a1aea6e01002ac0c7026876213"; };
|
||||
dependencies = {
|
||||
${ if (rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sled" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/sled" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sled") && hostPlatform.isUnix then "libc" else null } = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".libc."0.2.153" { inherit profileName; }).out;
|
||||
${ if (rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sled" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/sled" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sled") && hostPlatform.isWindows then "winapi" else null } = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".winapi."0.3.9" { inherit profileName; }).out;
|
||||
};
|
||||
});
|
||||
|
||||
"registry+https://github.com/rust-lang/crates.io-index".futures."0.3.30" = overridableMkRustCrate (profileName: rec {
|
||||
name = "futures";
|
||||
version = "0.3.30";
|
||||
|
@ -1910,6 +1937,16 @@ in
|
|||
};
|
||||
});
|
||||
|
||||
"registry+https://github.com/rust-lang/crates.io-index".fxhash."0.2.1" = overridableMkRustCrate (profileName: rec {
|
||||
name = "fxhash";
|
||||
version = "0.2.1";
|
||||
registry = "registry+https://github.com/rust-lang/crates.io-index";
|
||||
src = fetchCratesIo { inherit name version; sha256 = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c"; };
|
||||
dependencies = {
|
||||
${ if rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sled" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/sled" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sled" then "byteorder" else null } = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".byteorder."1.5.0" { inherit profileName; }).out;
|
||||
};
|
||||
});
|
||||
|
||||
"unknown".garage."0.10.0" = overridableMkRustCrate (profileName: rec {
|
||||
name = "garage";
|
||||
version = "0.10.0";
|
||||
|
@ -1926,6 +1963,7 @@ in
|
|||
(lib.optional (rootFeatures' ? "garage/opentelemetry-otlp" || rootFeatures' ? "garage/telemetry-otlp") "opentelemetry-otlp")
|
||||
(lib.optional (rootFeatures' ? "garage/default" || rootFeatures' ? "garage/metrics" || rootFeatures' ? "garage/opentelemetry-prometheus") "opentelemetry-prometheus")
|
||||
(lib.optional (rootFeatures' ? "garage/default" || rootFeatures' ? "garage/metrics" || rootFeatures' ? "garage/prometheus") "prometheus")
|
||||
(lib.optional (rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sled") "sled")
|
||||
(lib.optional (rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sqlite") "sqlite")
|
||||
(lib.optional (rootFeatures' ? "garage/system-libs") "system-libs")
|
||||
(lib.optional (rootFeatures' ? "garage/telemetry-otlp") "telemetry-otlp")
|
||||
|
@ -2089,6 +2127,7 @@ in
|
|||
(lib.optional (rootFeatures' ? "garage/default" || rootFeatures' ? "garage/lmdb" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/heed" || rootFeatures' ? "garage_db/lmdb" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/lmdb") "heed")
|
||||
(lib.optional (rootFeatures' ? "garage/default" || rootFeatures' ? "garage/lmdb" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/lmdb" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/lmdb") "lmdb")
|
||||
(lib.optional (rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sqlite" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/rusqlite" || rootFeatures' ? "garage_db/sqlite" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sqlite") "rusqlite")
|
||||
(lib.optional (rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sled" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/sled" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sled") "sled")
|
||||
(lib.optional (rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sqlite" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/sqlite" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sqlite") "sqlite")
|
||||
];
|
||||
dependencies = {
|
||||
|
@ -2096,6 +2135,7 @@ in
|
|||
${ if rootFeatures' ? "garage/default" || rootFeatures' ? "garage/lmdb" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/heed" || rootFeatures' ? "garage_db/lmdb" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/lmdb" then "heed" else null } = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".heed."0.11.0" { inherit profileName; }).out;
|
||||
hexdump = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".hexdump."0.1.1" { inherit profileName; }).out;
|
||||
${ if rootFeatures' ? "garage/bundled-libs" || rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sqlite" || rootFeatures' ? "garage_db/bundled-libs" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/rusqlite" || rootFeatures' ? "garage_db/sqlite" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sqlite" then "rusqlite" else null } = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".rusqlite."0.30.0" { inherit profileName; }).out;
|
||||
${ if rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sled" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/sled" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sled" then "sled" else null } = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".sled."0.34.7" { inherit profileName; }).out;
|
||||
tracing = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".tracing."0.1.40" { inherit profileName; }).out;
|
||||
};
|
||||
devDependencies = {
|
||||
|
@ -2112,6 +2152,7 @@ in
|
|||
(lib.optional (rootFeatures' ? "garage_model/default") "default")
|
||||
(lib.optional (rootFeatures' ? "garage/default" || rootFeatures' ? "garage/k2v" || rootFeatures' ? "garage_api/k2v" || rootFeatures' ? "garage_model/k2v") "k2v")
|
||||
(lib.optional (rootFeatures' ? "garage/default" || rootFeatures' ? "garage/lmdb" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/lmdb") "lmdb")
|
||||
(lib.optional (rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sled" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sled") "sled")
|
||||
(lib.optional (rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sqlite" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sqlite") "sqlite")
|
||||
];
|
||||
dependencies = {
|
||||
|
@ -5418,6 +5459,27 @@ in
|
|||
};
|
||||
});
|
||||
|
||||
"registry+https://github.com/rust-lang/crates.io-index".sled."0.34.7" = overridableMkRustCrate (profileName: rec {
|
||||
name = "sled";
|
||||
version = "0.34.7";
|
||||
registry = "registry+https://github.com/rust-lang/crates.io-index";
|
||||
src = fetchCratesIo { inherit name version; sha256 = "7f96b4737c2ce5987354855aed3797279def4ebf734436c6aa4552cf8e169935"; };
|
||||
features = builtins.concatLists [
|
||||
(lib.optional (rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sled" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/sled" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sled") "default")
|
||||
(lib.optional (rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sled" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/sled" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sled") "no_metrics")
|
||||
];
|
||||
dependencies = {
|
||||
${ if rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sled" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/sled" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sled" then "crc32fast" else null } = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".crc32fast."1.3.2" { inherit profileName; }).out;
|
||||
${ if rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sled" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/sled" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sled" then "crossbeam_epoch" else null } = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".crossbeam-epoch."0.9.18" { inherit profileName; }).out;
|
||||
${ if rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sled" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/sled" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sled" then "crossbeam_utils" else null } = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".crossbeam-utils."0.8.19" { inherit profileName; }).out;
|
||||
${ if (rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sled" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/sled" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sled") && (hostPlatform.parsed.kernel.name == "linux" || hostPlatform.parsed.kernel.name == "darwin" || hostPlatform.parsed.kernel.name == "windows") then "fs2" else null } = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".fs2."0.4.3" { inherit profileName; }).out;
|
||||
${ if rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sled" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/sled" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sled" then "fxhash" else null } = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".fxhash."0.2.1" { inherit profileName; }).out;
|
||||
${ if rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sled" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/sled" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sled" then "libc" else null } = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".libc."0.2.153" { inherit profileName; }).out;
|
||||
${ if rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sled" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/sled" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sled" then "log" else null } = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".log."0.4.20" { inherit profileName; }).out;
|
||||
${ if rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sled" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/sled" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sled" then "parking_lot" else null } = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".parking_lot."0.11.2" { inherit profileName; }).out;
|
||||
};
|
||||
});
|
||||
|
||||
"registry+https://github.com/rust-lang/crates.io-index".smallvec."1.13.1" = overridableMkRustCrate (profileName: rec {
|
||||
name = "smallvec";
|
||||
version = "1.13.1";
|
||||
|
@ -6661,6 +6723,7 @@ in
|
|||
[ "minwindef" ]
|
||||
[ "ntstatus" ]
|
||||
[ "processenv" ]
|
||||
(lib.optional (rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sled" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/sled" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sled") "processthreadsapi")
|
||||
[ "std" ]
|
||||
[ "synchapi" ]
|
||||
[ "sysinfoapi" ]
|
||||
|
|
|
@ -78,6 +78,7 @@ tracing-subscriber = { version = "0.3", features = ["env-filter"] }
|
|||
|
||||
heed = { version = "0.11", default-features = false, features = ["lmdb"] }
|
||||
rusqlite = "0.30.0"
|
||||
sled = "0.34"
|
||||
|
||||
async-compression = { version = "0.4", features = ["tokio", "zstd"] }
|
||||
zstd = { version = "0.13", default-features = false }
|
||||
|
|
|
@ -40,6 +40,7 @@ in {
|
|||
features = [
|
||||
"garage/bundled-libs"
|
||||
"garage/k2v"
|
||||
"garage/sled"
|
||||
"garage/lmdb"
|
||||
"garage/sqlite"
|
||||
];
|
||||
|
|
|
@ -98,6 +98,7 @@ paths:
|
|||
type: string
|
||||
example:
|
||||
- "k2v"
|
||||
- "sled"
|
||||
- "lmdb"
|
||||
- "sqlite"
|
||||
- "consul-discovery"
|
||||
|
|
|
@ -292,7 +292,7 @@ with average object size ranging from 50 KB to 150 KB.
|
|||
As such, your Garage cluster should be configured appropriately for good performance:
|
||||
|
||||
- use Garage v0.8.0 or higher with the [LMDB database engine](@documentation/reference-manual/configuration.md#db-engine-since-v0-8-0).
|
||||
Older versions of Garage used the Sled database engine which had issues, such as databases quickly ending up taking tens of GB of disk space.
|
||||
With the default Sled database engine, your database could quickly end up taking tens of GB of disk space.
|
||||
- the Garage database should be stored on a SSD
|
||||
|
||||
### Creating your bucket
|
||||
|
|
|
@ -90,5 +90,6 @@ The following feature flags are available in v0.8.0:
|
|||
| `kubernetes-discovery` | optional | Enable automatic registration and discovery<br>of cluster nodes through the Kubernetes API |
|
||||
| `metrics` | *by default* | Enable collection of metrics in Prometheus format on the admin API |
|
||||
| `telemetry-otlp` | optional | Enable collection of execution traces using OpenTelemetry |
|
||||
| `lmdb` | *by default* | Enable using LMDB to store Garage's metadata |
|
||||
| `sled` | *by default* | Enable using Sled to store Garage's metadata |
|
||||
| `lmdb` | optional | Enable using LMDB to store Garage's metadata |
|
||||
| `sqlite` | optional | Enable using Sqlite3 to store Garage's metadata |
|
||||
|
|
|
@ -70,8 +70,9 @@ to store 2 TB of data in total.
|
|||
|
||||
- If you only have an HDD and no SSD, it's fine to put your metadata alongside the data
|
||||
on the same drive. Having lots of RAM for your kernel to cache the metadata will
|
||||
help a lot with performance. The default LMDB database engine is the most tested
|
||||
and has good performance.
|
||||
help a lot with performance. Make sure to use the LMDB database engine,
|
||||
instead of Sled, which suffers from quite bad performance degradation on HDDs.
|
||||
Sled is still the default for legacy reasons, but is not recommended anymore.
|
||||
|
||||
- For the metadata storage, Garage does not do checksumming and integrity
|
||||
verification on its own. If you are afraid of bitrot/data corruption,
|
||||
|
|
|
@ -97,7 +97,7 @@ delete a tombstone, the following condition has to be met:
|
|||
superseeded by the tombstone. This ensures that deleting the tombstone is
|
||||
safe and that no deleted value will come back in the system.
|
||||
|
||||
Garage uses atomic database operations (such as compare-and-swap and
|
||||
Garage makes use of Sled's atomic operations (such as compare-and-swap and
|
||||
transactions) to ensure that only tombstones that have been correctly
|
||||
propagated to other nodes are ever deleted from the local entry tree.
|
||||
|
||||
|
|
|
@ -20,6 +20,8 @@ db_engine = "lmdb"
|
|||
|
||||
block_size = "1M"
|
||||
|
||||
sled_cache_capacity = "128MiB"
|
||||
sled_flush_every_ms = 2000
|
||||
lmdb_map_size = "1T"
|
||||
|
||||
compression_level = 1
|
||||
|
@ -94,7 +96,9 @@ Top-level configuration options:
|
|||
[`rpc_bind_addr`](#rpc_bind_addr),
|
||||
[`rpc_bind_outgoing`](#rpc_bind_outgoing),
|
||||
[`rpc_public_addr`](#rpc_public_addr),
|
||||
[`rpc_secret`/`rpc_secret_file`](#rpc_secret).
|
||||
[`rpc_secret`/`rpc_secret_file`](#rpc_secret),
|
||||
[`sled_cache_capacity`](#sled_cache_capacity),
|
||||
[`sled_flush_every_ms`](#sled_flush_every_ms).
|
||||
|
||||
The `[consul_discovery]` section:
|
||||
[`api`](#consul_api),
|
||||
|
@ -267,16 +271,20 @@ Since `v0.8.0`, Garage can use alternative storage backends as follows:
|
|||
|
||||
| DB engine | `db_engine` value | Database path |
|
||||
| --------- | ----------------- | ------------- |
|
||||
| [LMDB](https://www.lmdb.tech) (since `v0.8.0`, default since `v0.9.0`) | `"lmdb"` | `<metadata_dir>/db.lmdb/` |
|
||||
| [Sqlite](https://sqlite.org) (since `v0.8.0`) | `"sqlite"` | `<metadata_dir>/db.sqlite` |
|
||||
| [Sled](https://sled.rs) (old default, removed since `v1.0`) | `"sled"` | `<metadata_dir>/db/` |
|
||||
| [LMDB](https://www.lmdb.tech) (default since `v0.9.0`) | `"lmdb"` | `<metadata_dir>/db.lmdb/` |
|
||||
| [Sled](https://sled.rs) (default up to `v0.8.0`) | `"sled"` | `<metadata_dir>/db/` |
|
||||
| [Sqlite](https://sqlite.org) | `"sqlite"` | `<metadata_dir>/db.sqlite` |
|
||||
|
||||
Sled was supported until Garage v0.9.x, and was removed in Garage v1.0.
|
||||
You can still use an older binary of Garage (e.g. v0.9.3) to migrate
|
||||
old Sled metadata databases to another engine.
|
||||
Sled was the only database engine up to Garage v0.7.0. Performance issues and
|
||||
API limitations of Sled prompted the addition of alternative engines in v0.8.0.
|
||||
Since v0.9.0, LMDB is the default engine instead of Sled, and Sled is
|
||||
deprecated. We plan to remove Sled in Garage v1.0.
|
||||
|
||||
Performance characteristics of the different DB engines are as follows:
|
||||
|
||||
- Sled: tends to produce large data files and also has performance issues,
|
||||
especially when the metadata folder is on a traditional HDD and not on SSD.
|
||||
|
||||
- LMDB: the recommended database engine on 64-bit systems, much more
|
||||
space-efficient and slightly faster. Note that the data format of LMDB is not
|
||||
portable between architectures, so for instance the Garage database of an
|
||||
|
@ -325,6 +333,7 @@ Here is how this option impacts the different database engines:
|
|||
|
||||
| Database | `metadata_fsync = false` (default) | `metadata_fsync = true` |
|
||||
|----------|------------------------------------|-------------------------------|
|
||||
| Sled | default options | *unsupported* |
|
||||
| Sqlite | `PRAGMA synchronous = OFF` | `PRAGMA synchronous = NORMAL` |
|
||||
| LMDB | `MDB_NOMETASYNC` + `MDB_NOSYNC` | `MDB_NOMETASYNC` |
|
||||
|
||||
|
@ -358,6 +367,21 @@ files will remain available. This however means that chunks from existing files
|
|||
will not be deduplicated with chunks from newly uploaded files, meaning you
|
||||
might use more storage space that is optimally possible.
|
||||
|
||||
#### `sled_cache_capacity` {#sled_cache_capacity}
|
||||
|
||||
This parameter can be used to tune the capacity of the cache used by
|
||||
[sled](https://sled.rs), the database Garage uses internally to store metadata.
|
||||
Tune this to fit the RAM you wish to make available to your Garage instance.
|
||||
This value has a conservative default (128MB) so that Garage doesn't use too much
|
||||
RAM by default, but feel free to increase this for higher performance.
|
||||
|
||||
#### `sled_flush_every_ms` {#sled_flush_every_ms}
|
||||
|
||||
This parameters can be used to tune the flushing interval of sled.
|
||||
Increase this if sled is thrashing your SSD, at the risk of losing more data in case
|
||||
of a power outage (though this should not matter much as data is replicated on other
|
||||
nodes). The default value, 2000ms, should be appropriate for most use cases.
|
||||
|
||||
#### `lmdb_map_size` {#lmdb_map_size}
|
||||
|
||||
This parameters can be used to set the map size used by LMDB,
|
||||
|
|
|
@ -73,6 +73,7 @@ Example response body:
|
|||
"garageVersion": "v0.10.0",
|
||||
"garageFeatures": [
|
||||
"k2v",
|
||||
"sled",
|
||||
"lmdb",
|
||||
"sqlite",
|
||||
"metrics",
|
||||
|
|
|
@ -146,7 +146,7 @@ in a bucket, as the partition key becomes the sort key in the index.
|
|||
How indexing works:
|
||||
|
||||
- Each node keeps a local count of how many items it stores for each partition,
|
||||
in a local database tree that is updated atomically when an item is modified.
|
||||
in a local Sled tree that is updated atomically when an item is modified.
|
||||
- These local counters are asynchronously stored in the index table which is
|
||||
a regular Garage table spread in the network. Counters are stored as LWW values,
|
||||
so basically the final table will have the following structure:
|
||||
|
|
|
@ -168,7 +168,7 @@ let
|
|||
rootFeatures = if features != null then
|
||||
features
|
||||
else
|
||||
([ "garage/bundled-libs" "garage/lmdb" "garage/k2v" ] ++ (if release then [
|
||||
([ "garage/bundled-libs" "garage/sled" "garage/lmdb" "garage/k2v" ] ++ (if release then [
|
||||
"garage/consul-discovery"
|
||||
"garage/kubernetes-discovery"
|
||||
"garage/metrics"
|
||||
|
|
|
@ -6,13 +6,18 @@
|
|||
garage:
|
||||
# Can be changed for better performance on certain systems
|
||||
# https://garagehq.deuxfleurs.fr/documentation/reference-manual/configuration/#db-engine-since-v0-8-0
|
||||
dbEngine: "lmdb"
|
||||
dbEngine: "sled"
|
||||
|
||||
# Defaults is 1MB
|
||||
# An increase can result in better performance in certain scenarios
|
||||
# https://garagehq.deuxfleurs.fr/documentation/reference-manual/configuration/#block-size
|
||||
blockSize: "1048576"
|
||||
|
||||
# Tuning parameters for the sled DB engine
|
||||
# https://garagehq.deuxfleurs.fr/documentation/reference-manual/configuration/#sled-cache-capacity
|
||||
sledCacheCapacity: "134217728"
|
||||
sledFlushEveryMs: "2000"
|
||||
|
||||
# Default to 3 replicas, see the replication_mode section at
|
||||
# https://garagehq.deuxfleurs.fr/documentation/reference-manual/configuration/#replication-mode
|
||||
replicationMode: "3"
|
||||
|
@ -45,6 +50,11 @@ garage:
|
|||
|
||||
block_size = {{ .Values.garage.blockSize }}
|
||||
|
||||
{{- if eq .Values.garage.dbEngine "sled"}}
|
||||
sled_cache_capacity = {{ .Values.garage.sledCacheCapacity }}
|
||||
sled_flush_every_ms = {{ .Values.garage.sledFlushEveryMs }}
|
||||
{{- end }}
|
||||
|
||||
replication_mode = "{{ .Values.garage.replicationMode }}"
|
||||
|
||||
compression_level = {{ .Values.garage.compressionLevel }}
|
||||
|
|
|
@ -378,6 +378,11 @@ impl BlockManager {
|
|||
Ok(self.rc.rc.len()?)
|
||||
}
|
||||
|
||||
/// Get number of items in the refcount table
|
||||
pub fn rc_fast_len(&self) -> Result<Option<usize>, Error> {
|
||||
Ok(self.rc.rc.fast_len()?)
|
||||
}
|
||||
|
||||
/// Send command to start/stop/manager scrub worker
|
||||
pub async fn send_scrub_command(&self, cmd: ScrubWorkerCommand) -> Result<(), Error> {
|
||||
let tx = self.tx_scrub_command.load();
|
||||
|
@ -393,7 +398,7 @@ impl BlockManager {
|
|||
|
||||
/// List all resync errors
|
||||
pub fn list_resync_errors(&self) -> Result<Vec<BlockResyncErrorInfo>, Error> {
|
||||
let mut blocks = Vec::with_capacity(self.resync.errors.len()?);
|
||||
let mut blocks = Vec::with_capacity(self.resync.errors.len());
|
||||
for ent in self.resync.errors.iter()? {
|
||||
let (hash, cnt) = ent?;
|
||||
let cnt = ErrorCounter::decode(&cnt);
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
use opentelemetry::{global, metrics::*};
|
||||
|
||||
use garage_db as db;
|
||||
use garage_db::counted_tree_hack::CountedTree;
|
||||
|
||||
/// TableMetrics reference all counter used for metrics
|
||||
pub struct BlockManagerMetrics {
|
||||
|
@ -28,8 +29,8 @@ impl BlockManagerMetrics {
|
|||
pub fn new(
|
||||
compression_level: Option<i32>,
|
||||
rc_tree: db::Tree,
|
||||
resync_queue: db::Tree,
|
||||
resync_errors: db::Tree,
|
||||
resync_queue: CountedTree,
|
||||
resync_errors: CountedTree,
|
||||
) -> Self {
|
||||
let meter = global::meter("garage_model/block");
|
||||
Self {
|
||||
|
@ -44,17 +45,15 @@ impl BlockManagerMetrics {
|
|||
.init(),
|
||||
_rc_size: meter
|
||||
.u64_value_observer("block.rc_size", move |observer| {
|
||||
if let Ok(value) = rc_tree.len() {
|
||||
observer.observe(value as u64, &[])
|
||||
if let Ok(Some(v)) = rc_tree.fast_len() {
|
||||
observer.observe(v as u64, &[])
|
||||
}
|
||||
})
|
||||
.with_description("Number of blocks known to the reference counter")
|
||||
.init(),
|
||||
_resync_queue_len: meter
|
||||
.u64_value_observer("block.resync_queue_length", move |observer| {
|
||||
if let Ok(value) = resync_queue.len() {
|
||||
observer.observe(value as u64, &[]);
|
||||
}
|
||||
observer.observe(resync_queue.len() as u64, &[])
|
||||
})
|
||||
.with_description(
|
||||
"Number of block hashes queued for local check and possible resync",
|
||||
|
@ -62,9 +61,7 @@ impl BlockManagerMetrics {
|
|||
.init(),
|
||||
_resync_errored_blocks: meter
|
||||
.u64_value_observer("block.resync_errored_blocks", move |observer| {
|
||||
if let Ok(value) = resync_errors.len() {
|
||||
observer.observe(value as u64, &[]);
|
||||
}
|
||||
observer.observe(resync_errors.len() as u64, &[])
|
||||
})
|
||||
.with_description("Number of block hashes whose last resync resulted in an error")
|
||||
.init(),
|
||||
|
|
|
@ -15,6 +15,7 @@ use opentelemetry::{
|
|||
};
|
||||
|
||||
use garage_db as db;
|
||||
use garage_db::counted_tree_hack::CountedTree;
|
||||
|
||||
use garage_util::background::*;
|
||||
use garage_util::data::*;
|
||||
|
@ -46,9 +47,9 @@ pub(crate) const MAX_RESYNC_WORKERS: usize = 8;
|
|||
const INITIAL_RESYNC_TRANQUILITY: u32 = 2;
|
||||
|
||||
pub struct BlockResyncManager {
|
||||
pub(crate) queue: db::Tree,
|
||||
pub(crate) queue: CountedTree,
|
||||
pub(crate) notify: Arc<Notify>,
|
||||
pub(crate) errors: db::Tree,
|
||||
pub(crate) errors: CountedTree,
|
||||
|
||||
busy_set: BusySet,
|
||||
|
||||
|
@ -89,10 +90,12 @@ impl BlockResyncManager {
|
|||
let queue = db
|
||||
.open_tree("block_local_resync_queue")
|
||||
.expect("Unable to open block_local_resync_queue tree");
|
||||
let queue = CountedTree::new(queue).expect("Could not count block_local_resync_queue");
|
||||
|
||||
let errors = db
|
||||
.open_tree("block_local_resync_errors")
|
||||
.expect("Unable to open block_local_resync_errors tree");
|
||||
let errors = CountedTree::new(errors).expect("Could not count block_local_resync_errors");
|
||||
|
||||
let persister = PersisterShared::new(&system.metadata_dir, "resync_cfg");
|
||||
|
||||
|
@ -107,12 +110,16 @@ impl BlockResyncManager {
|
|||
|
||||
/// Get lenght of resync queue
|
||||
pub fn queue_len(&self) -> Result<usize, Error> {
|
||||
Ok(self.queue.len()?)
|
||||
// This currently can't return an error because the CountedTree hack
|
||||
// doesn't error on .len(), but this will change when we remove the hack
|
||||
// (hopefully someday!)
|
||||
Ok(self.queue.len())
|
||||
}
|
||||
|
||||
/// Get number of blocks that have an error
|
||||
pub fn errors_len(&self) -> Result<usize, Error> {
|
||||
Ok(self.errors.len()?)
|
||||
// (see queue_len comment)
|
||||
Ok(self.errors.len())
|
||||
}
|
||||
|
||||
/// Clear the error counter for a block and put it in queue immediately
|
||||
|
@ -173,7 +180,7 @@ impl BlockResyncManager {
|
|||
// deleted once the garbage collection delay has passed.
|
||||
//
|
||||
// Here are some explanations on how the resync queue works.
|
||||
// There are two db trees that are used to have information
|
||||
// There are two Sled trees that are used to have information
|
||||
// about the status of blocks that need to be resynchronized:
|
||||
//
|
||||
// - resync.queue: a tree that is ordered first by a timestamp
|
||||
|
@ -534,9 +541,9 @@ impl Worker for ResyncWorker {
|
|||
Ok(WorkerState::Idle)
|
||||
}
|
||||
Err(e) => {
|
||||
// The errors that we have here are only db errors
|
||||
// The errors that we have here are only Sled errors
|
||||
// We don't really know how to handle them so just ¯\_(ツ)_/¯
|
||||
// (there is kind of an assumption that the db won't error on us,
|
||||
// (there is kind of an assumption that Sled won't error on us,
|
||||
// if it does there is not much we can do -- TODO should we just panic?)
|
||||
// Here we just give the error to the worker manager,
|
||||
// it will print it to the logs and increment a counter
|
||||
|
|
|
@ -18,12 +18,13 @@ tracing.workspace = true
|
|||
|
||||
heed = { workspace = true, optional = true }
|
||||
rusqlite = { workspace = true, optional = true }
|
||||
sled = { workspace = true, optional = true }
|
||||
|
||||
[dev-dependencies]
|
||||
mktemp.workspace = true
|
||||
|
||||
[features]
|
||||
default = [ "lmdb", "sqlite" ]
|
||||
default = [ "sled", "lmdb", "sqlite" ]
|
||||
bundled-libs = [ "rusqlite?/bundled" ]
|
||||
lmdb = [ "heed" ]
|
||||
sqlite = [ "rusqlite" ]
|
||||
|
|
127
src/db/counted_tree_hack.rs
Normal file
127
src/db/counted_tree_hack.rs
Normal file
|
@ -0,0 +1,127 @@
|
|||
//! This hack allows a db tree to keep in RAM a counter of the number of entries
|
||||
//! it contains, which is used to call .len() on it. This is usefull only for
|
||||
//! the sled backend where .len() otherwise would have to traverse the whole
|
||||
//! tree to count items. For sqlite and lmdb, this is mostly useless (but
|
||||
//! hopefully not harmfull!). Note that a CountedTree cannot be part of a
|
||||
//! transaction.
|
||||
|
||||
use std::sync::{
|
||||
atomic::{AtomicUsize, Ordering},
|
||||
Arc,
|
||||
};
|
||||
|
||||
use crate::{Result, Tree, TxError, Value, ValueIter};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct CountedTree(Arc<CountedTreeInternal>);
|
||||
|
||||
struct CountedTreeInternal {
|
||||
tree: Tree,
|
||||
len: AtomicUsize,
|
||||
}
|
||||
|
||||
impl CountedTree {
|
||||
pub fn new(tree: Tree) -> Result<Self> {
|
||||
let len = tree.len()?;
|
||||
Ok(Self(Arc::new(CountedTreeInternal {
|
||||
tree,
|
||||
len: AtomicUsize::new(len),
|
||||
})))
|
||||
}
|
||||
|
||||
pub fn len(&self) -> usize {
|
||||
self.0.len.load(Ordering::SeqCst)
|
||||
}
|
||||
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.len() == 0
|
||||
}
|
||||
|
||||
pub fn get<K: AsRef<[u8]>>(&self, key: K) -> Result<Option<Value>> {
|
||||
self.0.tree.get(key)
|
||||
}
|
||||
|
||||
pub fn first(&self) -> Result<Option<(Value, Value)>> {
|
||||
self.0.tree.first()
|
||||
}
|
||||
|
||||
pub fn iter(&self) -> Result<ValueIter<'_>> {
|
||||
self.0.tree.iter()
|
||||
}
|
||||
|
||||
// ---- writing functions ----
|
||||
|
||||
pub fn insert<K, V>(&self, key: K, value: V) -> Result<Option<Value>>
|
||||
where
|
||||
K: AsRef<[u8]>,
|
||||
V: AsRef<[u8]>,
|
||||
{
|
||||
let old_val = self.0.tree.insert(key, value)?;
|
||||
if old_val.is_none() {
|
||||
self.0.len.fetch_add(1, Ordering::SeqCst);
|
||||
}
|
||||
Ok(old_val)
|
||||
}
|
||||
|
||||
pub fn remove<K: AsRef<[u8]>>(&self, key: K) -> Result<Option<Value>> {
|
||||
let old_val = self.0.tree.remove(key)?;
|
||||
if old_val.is_some() {
|
||||
self.0.len.fetch_sub(1, Ordering::SeqCst);
|
||||
}
|
||||
Ok(old_val)
|
||||
}
|
||||
|
||||
pub fn compare_and_swap<K, OV, NV>(
|
||||
&self,
|
||||
key: K,
|
||||
expected_old: Option<OV>,
|
||||
new: Option<NV>,
|
||||
) -> Result<bool>
|
||||
where
|
||||
K: AsRef<[u8]>,
|
||||
OV: AsRef<[u8]>,
|
||||
NV: AsRef<[u8]>,
|
||||
{
|
||||
let old_some = expected_old.is_some();
|
||||
let new_some = new.is_some();
|
||||
|
||||
let tx_res = self.0.tree.db().transaction(|tx| {
|
||||
let old_val = tx.get(&self.0.tree, &key)?;
|
||||
let is_same = match (&old_val, &expected_old) {
|
||||
(None, None) => true,
|
||||
(Some(x), Some(y)) if x == y.as_ref() => true,
|
||||
_ => false,
|
||||
};
|
||||
if is_same {
|
||||
match &new {
|
||||
Some(v) => {
|
||||
tx.insert(&self.0.tree, &key, v)?;
|
||||
}
|
||||
None => {
|
||||
tx.remove(&self.0.tree, &key)?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
} else {
|
||||
Err(TxError::Abort(()))
|
||||
}
|
||||
});
|
||||
|
||||
match tx_res {
|
||||
Ok(()) => {
|
||||
match (old_some, new_some) {
|
||||
(false, true) => {
|
||||
self.0.len.fetch_add(1, Ordering::SeqCst);
|
||||
}
|
||||
(true, false) => {
|
||||
self.0.len.fetch_sub(1, Ordering::SeqCst);
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
Ok(true)
|
||||
}
|
||||
Err(TxError::Abort(())) => Ok(false),
|
||||
Err(TxError::Db(e)) => Err(e),
|
||||
}
|
||||
}
|
||||
}
|
|
@ -3,9 +3,13 @@ extern crate tracing;
|
|||
|
||||
#[cfg(feature = "lmdb")]
|
||||
pub mod lmdb_adapter;
|
||||
#[cfg(feature = "sled")]
|
||||
pub mod sled_adapter;
|
||||
#[cfg(feature = "sqlite")]
|
||||
pub mod sqlite_adapter;
|
||||
|
||||
pub mod counted_tree_hack;
|
||||
|
||||
pub mod open;
|
||||
|
||||
#[cfg(test)]
|
||||
|
@ -51,7 +55,6 @@ pub type Result<T> = std::result::Result<T, Error>;
|
|||
pub struct TxOpError(pub(crate) Error);
|
||||
pub type TxOpResult<T> = std::result::Result<T, TxOpError>;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum TxError<E> {
|
||||
Abort(E),
|
||||
Db(Error),
|
||||
|
@ -186,6 +189,10 @@ impl Tree {
|
|||
pub fn len(&self) -> Result<usize> {
|
||||
self.0.len(self.1)
|
||||
}
|
||||
#[inline]
|
||||
pub fn fast_len(&self) -> Result<Option<usize>> {
|
||||
self.0.fast_len(self.1)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn first(&self) -> Result<Option<(Value, Value)>> {
|
||||
|
@ -275,11 +282,6 @@ impl<'a> Transaction<'a> {
|
|||
pub fn remove<T: AsRef<[u8]>>(&mut self, tree: &Tree, key: T) -> TxOpResult<Option<Value>> {
|
||||
self.tx.remove(tree.1, key.as_ref())
|
||||
}
|
||||
/// Clears all values in a tree
|
||||
#[inline]
|
||||
pub fn clear(&mut self, tree: &Tree) -> TxOpResult<()> {
|
||||
self.tx.clear(tree.1)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn iter(&self, tree: &Tree) -> TxOpResult<TxValueIter<'_>> {
|
||||
|
@ -326,6 +328,9 @@ pub(crate) trait IDb: Send + Sync {
|
|||
|
||||
fn get(&self, tree: usize, key: &[u8]) -> Result<Option<Value>>;
|
||||
fn len(&self, tree: usize) -> Result<usize>;
|
||||
fn fast_len(&self, _tree: usize) -> Result<Option<usize>> {
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
fn insert(&self, tree: usize, key: &[u8], value: &[u8]) -> Result<Option<Value>>;
|
||||
fn remove(&self, tree: usize, key: &[u8]) -> Result<Option<Value>>;
|
||||
|
@ -356,7 +361,6 @@ pub(crate) trait ITx {
|
|||
|
||||
fn insert(&mut self, tree: usize, key: &[u8], value: &[u8]) -> TxOpResult<Option<Value>>;
|
||||
fn remove(&mut self, tree: usize, key: &[u8]) -> TxOpResult<Option<Value>>;
|
||||
fn clear(&mut self, tree: usize) -> TxOpResult<()>;
|
||||
|
||||
fn iter(&self, tree: usize) -> TxOpResult<TxValueIter<'_>>;
|
||||
fn iter_rev(&self, tree: usize) -> TxOpResult<TxValueIter<'_>>;
|
||||
|
|
|
@ -3,7 +3,6 @@ use core::ptr::NonNull;
|
|||
|
||||
use std::collections::HashMap;
|
||||
use std::convert::TryInto;
|
||||
use std::pin::Pin;
|
||||
use std::sync::{Arc, RwLock};
|
||||
|
||||
use heed::types::ByteSlice;
|
||||
|
@ -122,6 +121,10 @@ impl IDb for LmdbDb {
|
|||
Ok(tree.len(&tx)?.try_into().unwrap())
|
||||
}
|
||||
|
||||
fn fast_len(&self, tree: usize) -> Result<Option<usize>> {
|
||||
Ok(Some(self.len(tree)?))
|
||||
}
|
||||
|
||||
fn insert(&self, tree: usize, key: &[u8], value: &[u8]) -> Result<Option<Value>> {
|
||||
let tree = self.get_tree(tree)?;
|
||||
let mut tx = self.db.write_txn()?;
|
||||
|
@ -239,9 +242,8 @@ impl<'a> ITx for LmdbTx<'a> {
|
|||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
fn len(&self, tree: usize) -> TxOpResult<usize> {
|
||||
let tree = self.get_tree(tree)?;
|
||||
Ok(tree.len(&self.tx)? as usize)
|
||||
fn len(&self, _tree: usize) -> TxOpResult<usize> {
|
||||
unimplemented!(".len() in transaction not supported with LMDB backend")
|
||||
}
|
||||
|
||||
fn insert(&mut self, tree: usize, key: &[u8], value: &[u8]) -> TxOpResult<Option<Value>> {
|
||||
|
@ -256,48 +258,33 @@ impl<'a> ITx for LmdbTx<'a> {
|
|||
tree.delete(&mut self.tx, key)?;
|
||||
Ok(old_val)
|
||||
}
|
||||
fn clear(&mut self, tree: usize) -> TxOpResult<()> {
|
||||
let tree = *self.get_tree(tree)?;
|
||||
tree.clear(&mut self.tx)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn iter(&self, tree: usize) -> TxOpResult<TxValueIter<'_>> {
|
||||
let tree = *self.get_tree(tree)?;
|
||||
Ok(Box::new(tree.iter(&self.tx)?.map(tx_iter_item)))
|
||||
fn iter(&self, _tree: usize) -> TxOpResult<TxValueIter<'_>> {
|
||||
unimplemented!("Iterators in transactions not supported with LMDB backend");
|
||||
}
|
||||
fn iter_rev(&self, tree: usize) -> TxOpResult<TxValueIter<'_>> {
|
||||
let tree = *self.get_tree(tree)?;
|
||||
Ok(Box::new(tree.rev_iter(&self.tx)?.map(tx_iter_item)))
|
||||
fn iter_rev(&self, _tree: usize) -> TxOpResult<TxValueIter<'_>> {
|
||||
unimplemented!("Iterators in transactions not supported with LMDB backend");
|
||||
}
|
||||
|
||||
fn range<'r>(
|
||||
&self,
|
||||
tree: usize,
|
||||
low: Bound<&'r [u8]>,
|
||||
high: Bound<&'r [u8]>,
|
||||
_tree: usize,
|
||||
_low: Bound<&'r [u8]>,
|
||||
_high: Bound<&'r [u8]>,
|
||||
) -> TxOpResult<TxValueIter<'_>> {
|
||||
let tree = *self.get_tree(tree)?;
|
||||
Ok(Box::new(
|
||||
tree.range(&self.tx, &(low, high))?.map(tx_iter_item),
|
||||
))
|
||||
unimplemented!("Iterators in transactions not supported with LMDB backend");
|
||||
}
|
||||
fn range_rev<'r>(
|
||||
&self,
|
||||
tree: usize,
|
||||
low: Bound<&'r [u8]>,
|
||||
high: Bound<&'r [u8]>,
|
||||
_tree: usize,
|
||||
_low: Bound<&'r [u8]>,
|
||||
_high: Bound<&'r [u8]>,
|
||||
) -> TxOpResult<TxValueIter<'_>> {
|
||||
let tree = *self.get_tree(tree)?;
|
||||
Ok(Box::new(
|
||||
tree.rev_range(&self.tx, &(low, high))?.map(tx_iter_item),
|
||||
))
|
||||
unimplemented!("Iterators in transactions not supported with LMDB backend");
|
||||
}
|
||||
}
|
||||
|
||||
// ---- iterators outside transactions ----
|
||||
// complicated, they must hold the transaction object
|
||||
// therefore a bit of unsafe code (it is a self-referential struct)
|
||||
// ----
|
||||
|
||||
type IteratorItem<'a> = heed::Result<(
|
||||
<ByteSlice as BytesDecode<'a>>::DItem,
|
||||
|
@ -320,20 +307,12 @@ where
|
|||
where
|
||||
F: FnOnce(&'a RoTxn<'a>) -> Result<I>,
|
||||
{
|
||||
let res = TxAndIterator { tx, iter: None };
|
||||
let mut boxed = Box::pin(res);
|
||||
let mut res = TxAndIterator { tx, iter: None };
|
||||
|
||||
// This unsafe allows us to bypass lifetime checks
|
||||
let tx = unsafe { NonNull::from(&boxed.tx).as_ref() };
|
||||
let iter = iterfun(tx)?;
|
||||
let tx = unsafe { NonNull::from(&res.tx).as_ref() };
|
||||
res.iter = Some(iterfun(tx)?);
|
||||
|
||||
let mut_ref = Pin::as_mut(&mut boxed);
|
||||
// This unsafe allows us to write in a field of the pinned struct
|
||||
unsafe {
|
||||
Pin::get_unchecked_mut(mut_ref).iter = Some(iter);
|
||||
}
|
||||
|
||||
Ok(Box::new(TxAndIteratorPin(boxed)))
|
||||
Ok(Box::new(res))
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -342,26 +321,18 @@ where
|
|||
I: Iterator<Item = IteratorItem<'a>> + 'a,
|
||||
{
|
||||
fn drop(&mut self) {
|
||||
// ensure the iterator is dropped before the RoTxn it references
|
||||
drop(self.iter.take());
|
||||
}
|
||||
}
|
||||
|
||||
struct TxAndIteratorPin<'a, I>(Pin<Box<TxAndIterator<'a, I>>>)
|
||||
where
|
||||
I: Iterator<Item = IteratorItem<'a>> + 'a;
|
||||
|
||||
impl<'a, I> Iterator for TxAndIteratorPin<'a, I>
|
||||
impl<'a, I> Iterator for TxAndIterator<'a, I>
|
||||
where
|
||||
I: Iterator<Item = IteratorItem<'a>> + 'a,
|
||||
{
|
||||
type Item = Result<(Value, Value)>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
let mut_ref = Pin::as_mut(&mut self.0);
|
||||
// This unsafe allows us to mutably access the iterator field
|
||||
let next = unsafe { Pin::get_unchecked_mut(mut_ref).iter.as_mut()?.next() };
|
||||
match next {
|
||||
match self.iter.as_mut().unwrap().next() {
|
||||
None => None,
|
||||
Some(Err(e)) => Some(Err(e.into())),
|
||||
Some(Ok((k, v))) => Some(Ok((k.to_vec(), v.to_vec()))),
|
||||
|
@ -369,16 +340,7 @@ where
|
|||
}
|
||||
}
|
||||
|
||||
// ---- iterators within transactions ----
|
||||
|
||||
fn tx_iter_item<'a>(
|
||||
item: std::result::Result<(&'a [u8], &'a [u8]), heed::Error>,
|
||||
) -> TxOpResult<(Vec<u8>, Vec<u8>)> {
|
||||
item.map(|(k, v)| (k.to_vec(), v.to_vec()))
|
||||
.map_err(|e| TxOpError(Error::from(e)))
|
||||
}
|
||||
|
||||
// ---- utility ----
|
||||
// ----
|
||||
|
||||
#[cfg(target_pointer_width = "64")]
|
||||
pub fn recommended_map_size() -> usize {
|
||||
|
|
|
@ -11,6 +11,7 @@ use crate::{Db, Error, Result};
|
|||
pub enum Engine {
|
||||
Lmdb,
|
||||
Sqlite,
|
||||
Sled,
|
||||
}
|
||||
|
||||
impl Engine {
|
||||
|
@ -19,6 +20,7 @@ impl Engine {
|
|||
match self {
|
||||
Self::Lmdb => "lmdb",
|
||||
Self::Sqlite => "sqlite",
|
||||
Self::Sled => "sled",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -36,10 +38,10 @@ impl std::str::FromStr for Engine {
|
|||
match text {
|
||||
"lmdb" | "heed" => Ok(Self::Lmdb),
|
||||
"sqlite" | "sqlite3" | "rusqlite" => Ok(Self::Sqlite),
|
||||
"sled" => Err(Error("Sled is no longer supported as a database engine. Converting your old metadata db can be done using an older Garage binary (e.g. v0.9.3).".into())),
|
||||
"sled" => Ok(Self::Sled),
|
||||
kind => Err(Error(
|
||||
format!(
|
||||
"Invalid DB engine: {} (options are: lmdb, sqlite)",
|
||||
"Invalid DB engine: {} (options are: lmdb, sled, sqlite)",
|
||||
kind
|
||||
)
|
||||
.into(),
|
||||
|
@ -51,6 +53,8 @@ impl std::str::FromStr for Engine {
|
|||
pub struct OpenOpt {
|
||||
pub fsync: bool,
|
||||
pub lmdb_map_size: Option<usize>,
|
||||
pub sled_cache_capacity: usize,
|
||||
pub sled_flush_every_ms: u64,
|
||||
}
|
||||
|
||||
impl Default for OpenOpt {
|
||||
|
@ -58,12 +62,31 @@ impl Default for OpenOpt {
|
|||
Self {
|
||||
fsync: false,
|
||||
lmdb_map_size: None,
|
||||
sled_cache_capacity: 1024 * 1024 * 1024,
|
||||
sled_flush_every_ms: 2000,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn open_db(path: &PathBuf, engine: Engine, opt: &OpenOpt) -> Result<Db> {
|
||||
match engine {
|
||||
// ---- Sled DB ----
|
||||
#[cfg(feature = "sled")]
|
||||
Engine::Sled => {
|
||||
if opt.fsync {
|
||||
return Err(Error(
|
||||
"`metadata_fsync = true` is not supported with the Sled database engine".into(),
|
||||
));
|
||||
}
|
||||
info!("Opening Sled database at: {}", path.display());
|
||||
let db = crate::sled_adapter::sled::Config::default()
|
||||
.path(&path)
|
||||
.cache_capacity(opt.sled_cache_capacity as u64)
|
||||
.flush_every_ms(Some(opt.sled_flush_every_ms))
|
||||
.open()?;
|
||||
Ok(crate::sled_adapter::SledDb::init(db))
|
||||
}
|
||||
|
||||
// ---- Sqlite DB ----
|
||||
#[cfg(feature = "sqlite")]
|
||||
Engine::Sqlite => {
|
||||
|
|
274
src/db/sled_adapter.rs
Normal file
274
src/db/sled_adapter.rs
Normal file
|
@ -0,0 +1,274 @@
|
|||
use core::ops::Bound;
|
||||
|
||||
use std::cell::Cell;
|
||||
use std::collections::HashMap;
|
||||
use std::sync::{Arc, RwLock};
|
||||
|
||||
use sled::transaction::{
|
||||
ConflictableTransactionError, TransactionError, Transactional, TransactionalTree,
|
||||
UnabortableTransactionError,
|
||||
};
|
||||
|
||||
use crate::{
|
||||
Db, Error, IDb, ITx, ITxFn, OnCommit, Result, TxError, TxFnResult, TxOpError, TxOpResult,
|
||||
TxResult, TxValueIter, Value, ValueIter,
|
||||
};
|
||||
|
||||
pub use sled;
|
||||
|
||||
// -- err
|
||||
|
||||
impl From<sled::Error> for Error {
|
||||
fn from(e: sled::Error) -> Error {
|
||||
Error(format!("Sled: {}", e).into())
|
||||
}
|
||||
}
|
||||
|
||||
impl From<sled::Error> for TxOpError {
|
||||
fn from(e: sled::Error) -> TxOpError {
|
||||
TxOpError(e.into())
|
||||
}
|
||||
}
|
||||
|
||||
// -- db
|
||||
|
||||
pub struct SledDb {
|
||||
db: sled::Db,
|
||||
trees: RwLock<(Vec<sled::Tree>, HashMap<String, usize>)>,
|
||||
}
|
||||
|
||||
impl SledDb {
|
||||
#[deprecated(
|
||||
since = "0.9.0",
|
||||
note = "The Sled database is now deprecated and will be removed in Garage v1.0. Please migrate to LMDB or Sqlite as soon as possible."
|
||||
)]
|
||||
pub fn init(db: sled::Db) -> Db {
|
||||
tracing::warn!("-------------------- IMPORTANT WARNING !!! ----------------------");
|
||||
tracing::warn!("The Sled database is now deprecated and will be removed in Garage v1.0.");
|
||||
tracing::warn!("Please migrate to LMDB or Sqlite as soon as possible.");
|
||||
tracing::warn!("-----------------------------------------------------------------------");
|
||||
let s = Self {
|
||||
db,
|
||||
trees: RwLock::new((Vec::new(), HashMap::new())),
|
||||
};
|
||||
Db(Arc::new(s))
|
||||
}
|
||||
|
||||
fn get_tree(&self, i: usize) -> Result<sled::Tree> {
|
||||
self.trees
|
||||
.read()
|
||||
.unwrap()
|
||||
.0
|
||||
.get(i)
|
||||
.cloned()
|
||||
.ok_or_else(|| Error("invalid tree id".into()))
|
||||
}
|
||||
}
|
||||
|
||||
impl IDb for SledDb {
|
||||
fn engine(&self) -> String {
|
||||
"Sled".into()
|
||||
}
|
||||
|
||||
fn open_tree(&self, name: &str) -> Result<usize> {
|
||||
let mut trees = self.trees.write().unwrap();
|
||||
if let Some(i) = trees.1.get(name) {
|
||||
Ok(*i)
|
||||
} else {
|
||||
let tree = self.db.open_tree(name)?;
|
||||
let i = trees.0.len();
|
||||
trees.0.push(tree);
|
||||
trees.1.insert(name.to_string(), i);
|
||||
Ok(i)
|
||||
}
|
||||
}
|
||||
|
||||
fn list_trees(&self) -> Result<Vec<String>> {
|
||||
let mut trees = vec![];
|
||||
for name in self.db.tree_names() {
|
||||
let name = std::str::from_utf8(&name)
|
||||
.map_err(|e| Error(format!("{}", e).into()))?
|
||||
.to_string();
|
||||
if name != "__sled__default" {
|
||||
trees.push(name);
|
||||
}
|
||||
}
|
||||
Ok(trees)
|
||||
}
|
||||
|
||||
// ----
|
||||
|
||||
fn get(&self, tree: usize, key: &[u8]) -> Result<Option<Value>> {
|
||||
let tree = self.get_tree(tree)?;
|
||||
let val = tree.get(key)?;
|
||||
Ok(val.map(|x| x.to_vec()))
|
||||
}
|
||||
|
||||
fn len(&self, tree: usize) -> Result<usize> {
|
||||
let tree = self.get_tree(tree)?;
|
||||
Ok(tree.len())
|
||||
}
|
||||
|
||||
fn insert(&self, tree: usize, key: &[u8], value: &[u8]) -> Result<Option<Value>> {
|
||||
let tree = self.get_tree(tree)?;
|
||||
let old_val = tree.insert(key, value)?;
|
||||
Ok(old_val.map(|x| x.to_vec()))
|
||||
}
|
||||
|
||||
fn remove(&self, tree: usize, key: &[u8]) -> Result<Option<Value>> {
|
||||
let tree = self.get_tree(tree)?;
|
||||
let old_val = tree.remove(key)?;
|
||||
Ok(old_val.map(|x| x.to_vec()))
|
||||
}
|
||||
|
||||
fn clear(&self, tree: usize) -> Result<()> {
|
||||
let tree = self.get_tree(tree)?;
|
||||
tree.clear()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn iter(&self, tree: usize) -> Result<ValueIter<'_>> {
|
||||
let tree = self.get_tree(tree)?;
|
||||
Ok(Box::new(tree.iter().map(|v| {
|
||||
v.map(|(x, y)| (x.to_vec(), y.to_vec())).map_err(Into::into)
|
||||
})))
|
||||
}
|
||||
|
||||
fn iter_rev(&self, tree: usize) -> Result<ValueIter<'_>> {
|
||||
let tree = self.get_tree(tree)?;
|
||||
Ok(Box::new(tree.iter().rev().map(|v| {
|
||||
v.map(|(x, y)| (x.to_vec(), y.to_vec())).map_err(Into::into)
|
||||
})))
|
||||
}
|
||||
|
||||
fn range<'r>(
|
||||
&self,
|
||||
tree: usize,
|
||||
low: Bound<&'r [u8]>,
|
||||
high: Bound<&'r [u8]>,
|
||||
) -> Result<ValueIter<'_>> {
|
||||
let tree = self.get_tree(tree)?;
|
||||
Ok(Box::new(tree.range::<&'r [u8], _>((low, high)).map(|v| {
|
||||
v.map(|(x, y)| (x.to_vec(), y.to_vec())).map_err(Into::into)
|
||||
})))
|
||||
}
|
||||
fn range_rev<'r>(
|
||||
&self,
|
||||
tree: usize,
|
||||
low: Bound<&'r [u8]>,
|
||||
high: Bound<&'r [u8]>,
|
||||
) -> Result<ValueIter<'_>> {
|
||||
let tree = self.get_tree(tree)?;
|
||||
Ok(Box::new(tree.range::<&'r [u8], _>((low, high)).rev().map(
|
||||
|v| v.map(|(x, y)| (x.to_vec(), y.to_vec())).map_err(Into::into),
|
||||
)))
|
||||
}
|
||||
|
||||
// ----
|
||||
|
||||
fn transaction(&self, f: &dyn ITxFn) -> TxResult<OnCommit, ()> {
|
||||
let trees = self.trees.read().unwrap();
|
||||
let res = trees.0.transaction(|txtrees| {
|
||||
let mut tx = SledTx {
|
||||
trees: txtrees,
|
||||
err: Cell::new(None),
|
||||
};
|
||||
match f.try_on(&mut tx) {
|
||||
TxFnResult::Ok(on_commit) => {
|
||||
assert!(tx.err.into_inner().is_none());
|
||||
Ok(on_commit)
|
||||
}
|
||||
TxFnResult::Abort => {
|
||||
assert!(tx.err.into_inner().is_none());
|
||||
Err(ConflictableTransactionError::Abort(()))
|
||||
}
|
||||
TxFnResult::DbErr => {
|
||||
let e = tx.err.into_inner().expect("No DB error");
|
||||
Err(e.into())
|
||||
}
|
||||
}
|
||||
});
|
||||
match res {
|
||||
Ok(on_commit) => Ok(on_commit),
|
||||
Err(TransactionError::Abort(())) => Err(TxError::Abort(())),
|
||||
Err(TransactionError::Storage(s)) => Err(TxError::Db(s.into())),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ----
|
||||
|
||||
struct SledTx<'a> {
|
||||
trees: &'a [TransactionalTree],
|
||||
err: Cell<Option<UnabortableTransactionError>>,
|
||||
}
|
||||
|
||||
impl<'a> SledTx<'a> {
|
||||
fn get_tree(&self, i: usize) -> TxOpResult<&TransactionalTree> {
|
||||
self.trees.get(i).ok_or_else(|| {
|
||||
TxOpError(Error(
|
||||
"invalid tree id (it might have been openned after the transaction started)".into(),
|
||||
))
|
||||
})
|
||||
}
|
||||
|
||||
fn save_error<R>(
|
||||
&self,
|
||||
v: std::result::Result<R, UnabortableTransactionError>,
|
||||
) -> TxOpResult<R> {
|
||||
match v {
|
||||
Ok(x) => Ok(x),
|
||||
Err(e) => {
|
||||
let txt = format!("{}", e);
|
||||
self.err.set(Some(e));
|
||||
Err(TxOpError(Error(txt.into())))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> ITx for SledTx<'a> {
|
||||
fn get(&self, tree: usize, key: &[u8]) -> TxOpResult<Option<Value>> {
|
||||
let tree = self.get_tree(tree)?;
|
||||
let tmp = self.save_error(tree.get(key))?;
|
||||
Ok(tmp.map(|x| x.to_vec()))
|
||||
}
|
||||
fn len(&self, _tree: usize) -> TxOpResult<usize> {
|
||||
unimplemented!(".len() in transaction not supported with Sled backend")
|
||||
}
|
||||
|
||||
fn insert(&mut self, tree: usize, key: &[u8], value: &[u8]) -> TxOpResult<Option<Value>> {
|
||||
let tree = self.get_tree(tree)?;
|
||||
let old_val = self.save_error(tree.insert(key, value))?;
|
||||
Ok(old_val.map(|x| x.to_vec()))
|
||||
}
|
||||
fn remove(&mut self, tree: usize, key: &[u8]) -> TxOpResult<Option<Value>> {
|
||||
let tree = self.get_tree(tree)?;
|
||||
let old_val = self.save_error(tree.remove(key))?;
|
||||
Ok(old_val.map(|x| x.to_vec()))
|
||||
}
|
||||
|
||||
fn iter(&self, _tree: usize) -> TxOpResult<TxValueIter<'_>> {
|
||||
unimplemented!("Iterators in transactions not supported with Sled backend");
|
||||
}
|
||||
fn iter_rev(&self, _tree: usize) -> TxOpResult<TxValueIter<'_>> {
|
||||
unimplemented!("Iterators in transactions not supported with Sled backend");
|
||||
}
|
||||
|
||||
fn range<'r>(
|
||||
&self,
|
||||
_tree: usize,
|
||||
_low: Bound<&'r [u8]>,
|
||||
_high: Bound<&'r [u8]>,
|
||||
) -> TxOpResult<TxValueIter<'_>> {
|
||||
unimplemented!("Iterators in transactions not supported with Sled backend");
|
||||
}
|
||||
fn range_rev<'r>(
|
||||
&self,
|
||||
_tree: usize,
|
||||
_low: Bound<&'r [u8]>,
|
||||
_high: Bound<&'r [u8]>,
|
||||
) -> TxOpResult<TxValueIter<'_>> {
|
||||
unimplemented!("Iterators in transactions not supported with Sled backend");
|
||||
}
|
||||
}
|
|
@ -144,6 +144,10 @@ impl IDb for SqliteDb {
|
|||
}
|
||||
}
|
||||
|
||||
fn fast_len(&self, tree: usize) -> Result<Option<usize>> {
|
||||
Ok(Some(self.len(tree)?))
|
||||
}
|
||||
|
||||
fn insert(&self, tree: usize, key: &[u8], value: &[u8]) -> Result<Option<Value>> {
|
||||
trace!("insert {}: lock db", tree);
|
||||
let this = self.0.lock().unwrap();
|
||||
|
@ -363,64 +367,33 @@ impl<'a> ITx for SqliteTx<'a> {
|
|||
|
||||
Ok(old_val)
|
||||
}
|
||||
fn clear(&mut self, tree: usize) -> TxOpResult<()> {
|
||||
let tree = self.get_tree(tree)?;
|
||||
self.tx.execute(&format!("DELETE FROM {}", tree), [])?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn iter(&self, tree: usize) -> TxOpResult<TxValueIter<'_>> {
|
||||
let tree = self.get_tree(tree)?;
|
||||
let sql = format!("SELECT k, v FROM {} ORDER BY k ASC", tree);
|
||||
TxValueIterator::make(self, &sql, [])
|
||||
fn iter(&self, _tree: usize) -> TxOpResult<TxValueIter<'_>> {
|
||||
unimplemented!();
|
||||
}
|
||||
fn iter_rev(&self, tree: usize) -> TxOpResult<TxValueIter<'_>> {
|
||||
let tree = self.get_tree(tree)?;
|
||||
let sql = format!("SELECT k, v FROM {} ORDER BY k DESC", tree);
|
||||
TxValueIterator::make(self, &sql, [])
|
||||
fn iter_rev(&self, _tree: usize) -> TxOpResult<TxValueIter<'_>> {
|
||||
unimplemented!();
|
||||
}
|
||||
|
||||
fn range<'r>(
|
||||
&self,
|
||||
tree: usize,
|
||||
low: Bound<&'r [u8]>,
|
||||
high: Bound<&'r [u8]>,
|
||||
_tree: usize,
|
||||
_low: Bound<&'r [u8]>,
|
||||
_high: Bound<&'r [u8]>,
|
||||
) -> TxOpResult<TxValueIter<'_>> {
|
||||
let tree = self.get_tree(tree)?;
|
||||
|
||||
let (bounds_sql, params) = bounds_sql(low, high);
|
||||
let sql = format!("SELECT k, v FROM {} {} ORDER BY k ASC", tree, bounds_sql);
|
||||
|
||||
let params = params
|
||||
.iter()
|
||||
.map(|x| x as &dyn rusqlite::ToSql)
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
TxValueIterator::make::<&[&dyn rusqlite::ToSql]>(self, &sql, params.as_ref())
|
||||
unimplemented!();
|
||||
}
|
||||
fn range_rev<'r>(
|
||||
&self,
|
||||
tree: usize,
|
||||
low: Bound<&'r [u8]>,
|
||||
high: Bound<&'r [u8]>,
|
||||
_tree: usize,
|
||||
_low: Bound<&'r [u8]>,
|
||||
_high: Bound<&'r [u8]>,
|
||||
) -> TxOpResult<TxValueIter<'_>> {
|
||||
let tree = self.get_tree(tree)?;
|
||||
|
||||
let (bounds_sql, params) = bounds_sql(low, high);
|
||||
let sql = format!("SELECT k, v FROM {} {} ORDER BY k DESC", tree, bounds_sql);
|
||||
|
||||
let params = params
|
||||
.iter()
|
||||
.map(|x| x as &dyn rusqlite::ToSql)
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
TxValueIterator::make::<&[&dyn rusqlite::ToSql]>(self, &sql, params.as_ref())
|
||||
unimplemented!();
|
||||
}
|
||||
}
|
||||
|
||||
// ---- iterators outside transactions ----
|
||||
// complicated, they must hold the Statement and Row objects
|
||||
// therefore quite some unsafe code (it is a self-referential struct)
|
||||
// ----
|
||||
|
||||
struct DbValueIterator<'a> {
|
||||
db: MutexGuard<'a, SqliteDbInner>,
|
||||
|
@ -444,23 +417,17 @@ impl<'a> DbValueIterator<'a> {
|
|||
let mut boxed = Box::pin(res);
|
||||
trace!("make iterator with sql: {}", sql);
|
||||
|
||||
// This unsafe allows us to bypass lifetime checks
|
||||
let db = unsafe { NonNull::from(&boxed.db).as_ref() };
|
||||
let stmt = db.db.prepare(sql)?;
|
||||
|
||||
let mut_ref = Pin::as_mut(&mut boxed);
|
||||
// This unsafe allows us to write in a field of the pinned struct
|
||||
unsafe {
|
||||
let db = NonNull::from(&boxed.db);
|
||||
let stmt = db.as_ref().db.prepare(sql)?;
|
||||
|
||||
let mut_ref: Pin<&mut DbValueIterator<'a>> = Pin::as_mut(&mut boxed);
|
||||
Pin::get_unchecked_mut(mut_ref).stmt = Some(stmt);
|
||||
}
|
||||
|
||||
// This unsafe allows us to bypass lifetime checks
|
||||
let stmt = unsafe { NonNull::from(&boxed.stmt).as_mut() };
|
||||
let iter = stmt.as_mut().unwrap().query(args)?;
|
||||
let mut stmt = NonNull::from(&boxed.stmt);
|
||||
let iter = stmt.as_mut().as_mut().unwrap().query(args)?;
|
||||
|
||||
let mut_ref = Pin::as_mut(&mut boxed);
|
||||
// This unsafe allows us to write in a field of the pinned struct
|
||||
unsafe {
|
||||
let mut_ref: Pin<&mut DbValueIterator<'a>> = Pin::as_mut(&mut boxed);
|
||||
Pin::get_unchecked_mut(mut_ref).iter = Some(iter);
|
||||
}
|
||||
|
||||
|
@ -482,73 +449,28 @@ impl<'a> Iterator for DbValueIteratorPin<'a> {
|
|||
type Item = Result<(Value, Value)>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
let mut_ref = Pin::as_mut(&mut self.0);
|
||||
// This unsafe allows us to mutably access the iterator field
|
||||
let next = unsafe { Pin::get_unchecked_mut(mut_ref).iter.as_mut()?.next() };
|
||||
iter_next_row(next)
|
||||
}
|
||||
}
|
||||
|
||||
// ---- iterators within transactions ----
|
||||
// it's the same except we don't hold a mutex guard,
|
||||
// only a Statement and a Rows object
|
||||
|
||||
struct TxValueIterator<'a> {
|
||||
stmt: Statement<'a>,
|
||||
iter: Option<Rows<'a>>,
|
||||
_pin: PhantomPinned,
|
||||
}
|
||||
|
||||
impl<'a> TxValueIterator<'a> {
|
||||
fn make<P: rusqlite::Params>(
|
||||
tx: &'a SqliteTx<'a>,
|
||||
sql: &str,
|
||||
args: P,
|
||||
) -> TxOpResult<TxValueIter<'a>> {
|
||||
let stmt = tx.tx.prepare(sql)?;
|
||||
let res = TxValueIterator {
|
||||
stmt,
|
||||
iter: None,
|
||||
_pin: PhantomPinned,
|
||||
let next = unsafe {
|
||||
let mut_ref: Pin<&mut DbValueIterator<'a>> = Pin::as_mut(&mut self.0);
|
||||
Pin::get_unchecked_mut(mut_ref).iter.as_mut()?.next()
|
||||
};
|
||||
let mut boxed = Box::pin(res);
|
||||
trace!("make iterator with sql: {}", sql);
|
||||
|
||||
// This unsafe allows us to bypass lifetime checks
|
||||
let stmt = unsafe { NonNull::from(&boxed.stmt).as_mut() };
|
||||
let iter = stmt.query(args)?;
|
||||
|
||||
let mut_ref = Pin::as_mut(&mut boxed);
|
||||
// This unsafe allows us to write in a field of the pinned struct
|
||||
unsafe {
|
||||
Pin::get_unchecked_mut(mut_ref).iter = Some(iter);
|
||||
}
|
||||
|
||||
Ok(Box::new(TxValueIteratorPin(boxed)))
|
||||
let row = match next {
|
||||
Err(e) => return Some(Err(e.into())),
|
||||
Ok(None) => return None,
|
||||
Ok(Some(r)) => r,
|
||||
};
|
||||
let k = match row.get::<_, Vec<u8>>(0) {
|
||||
Err(e) => return Some(Err(e.into())),
|
||||
Ok(x) => x,
|
||||
};
|
||||
let v = match row.get::<_, Vec<u8>>(1) {
|
||||
Err(e) => return Some(Err(e.into())),
|
||||
Ok(y) => y,
|
||||
};
|
||||
Some(Ok((k, v)))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Drop for TxValueIterator<'a> {
|
||||
fn drop(&mut self) {
|
||||
trace!("drop iter");
|
||||
drop(self.iter.take());
|
||||
}
|
||||
}
|
||||
|
||||
struct TxValueIteratorPin<'a>(Pin<Box<TxValueIterator<'a>>>);
|
||||
|
||||
impl<'a> Iterator for TxValueIteratorPin<'a> {
|
||||
type Item = TxOpResult<(Value, Value)>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
let mut_ref = Pin::as_mut(&mut self.0);
|
||||
// This unsafe allows us to mutably access the iterator field
|
||||
let next = unsafe { Pin::get_unchecked_mut(mut_ref).iter.as_mut()?.next() };
|
||||
iter_next_row(next)
|
||||
}
|
||||
}
|
||||
|
||||
// ---- utility ----
|
||||
// ----
|
||||
|
||||
fn bounds_sql<'r>(low: Bound<&'r [u8]>, high: Bound<&'r [u8]>) -> (String, Vec<Vec<u8>>) {
|
||||
let mut sql = String::new();
|
||||
|
@ -588,25 +510,3 @@ fn bounds_sql<'r>(low: Bound<&'r [u8]>, high: Bound<&'r [u8]>) -> (String, Vec<V
|
|||
|
||||
(sql, params)
|
||||
}
|
||||
|
||||
fn iter_next_row<E>(
|
||||
next_row: rusqlite::Result<Option<&rusqlite::Row>>,
|
||||
) -> Option<std::result::Result<(Value, Value), E>>
|
||||
where
|
||||
E: From<rusqlite::Error>,
|
||||
{
|
||||
let row = match next_row {
|
||||
Err(e) => return Some(Err(e.into())),
|
||||
Ok(None) => return None,
|
||||
Ok(Some(r)) => r,
|
||||
};
|
||||
let k = match row.get::<_, Vec<u8>>(0) {
|
||||
Err(e) => return Some(Err(e.into())),
|
||||
Ok(x) => x,
|
||||
};
|
||||
let v = match row.get::<_, Vec<u8>>(1) {
|
||||
Err(e) => return Some(Err(e.into())),
|
||||
Ok(y) => y,
|
||||
};
|
||||
Some(Ok((k, v)))
|
||||
}
|
||||
|
|
|
@ -10,13 +10,8 @@ fn test_suite(db: Db) {
|
|||
let vb: &[u8] = &b"plip"[..];
|
||||
let vc: &[u8] = &b"plup"[..];
|
||||
|
||||
// ---- test simple insert/delete ----
|
||||
|
||||
assert!(tree.insert(ka, va).unwrap().is_none());
|
||||
assert_eq!(tree.get(ka).unwrap().unwrap(), va);
|
||||
assert_eq!(tree.len().unwrap(), 1);
|
||||
|
||||
// ---- test transaction logic ----
|
||||
|
||||
let res = db.transaction::<_, (), _>(|tx| {
|
||||
assert_eq!(tx.get(&tree, ka).unwrap().unwrap(), va);
|
||||
|
@ -42,8 +37,6 @@ fn test_suite(db: Db) {
|
|||
assert!(matches!(res, Err(TxError::Abort(42))));
|
||||
assert_eq!(tree.get(ka).unwrap().unwrap(), vb);
|
||||
|
||||
// ---- test iteration outside of transactions ----
|
||||
|
||||
let mut iter = tree.iter().unwrap();
|
||||
let next = iter.next().unwrap().unwrap();
|
||||
assert_eq!((next.0.as_ref(), next.1.as_ref()), (ka, vb));
|
||||
|
@ -80,48 +73,6 @@ fn test_suite(db: Db) {
|
|||
assert_eq!((next.0.as_ref(), next.1.as_ref()), (ka, vb));
|
||||
assert!(iter.next().is_none());
|
||||
drop(iter);
|
||||
|
||||
// ---- test iteration within transactions ----
|
||||
|
||||
db.transaction::<_, (), _>(|tx| {
|
||||
let mut iter = tx.iter(&tree).unwrap();
|
||||
let next = iter.next().unwrap().unwrap();
|
||||
assert_eq!((next.0.as_ref(), next.1.as_ref()), (ka, vb));
|
||||
let next = iter.next().unwrap().unwrap();
|
||||
assert_eq!((next.0.as_ref(), next.1.as_ref()), (kb, vc));
|
||||
assert!(iter.next().is_none());
|
||||
Ok(())
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
db.transaction::<_, (), _>(|tx| {
|
||||
let mut iter = tx.range(&tree, kint..).unwrap();
|
||||
let next = iter.next().unwrap().unwrap();
|
||||
assert_eq!((next.0.as_ref(), next.1.as_ref()), (kb, vc));
|
||||
assert!(iter.next().is_none());
|
||||
Ok(())
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
db.transaction::<_, (), _>(|tx| {
|
||||
let mut iter = tx.range_rev(&tree, ..kint).unwrap();
|
||||
let next = iter.next().unwrap().unwrap();
|
||||
assert_eq!((next.0.as_ref(), next.1.as_ref()), (ka, vb));
|
||||
assert!(iter.next().is_none());
|
||||
Ok(())
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
db.transaction::<_, (), _>(|tx| {
|
||||
let mut iter = tx.iter_rev(&tree).unwrap();
|
||||
let next = iter.next().unwrap().unwrap();
|
||||
assert_eq!((next.0.as_ref(), next.1.as_ref()), (kb, vc));
|
||||
let next = iter.next().unwrap().unwrap();
|
||||
assert_eq!((next.0.as_ref(), next.1.as_ref()), (ka, vb));
|
||||
assert!(iter.next().is_none());
|
||||
Ok(())
|
||||
})
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -139,6 +90,17 @@ fn test_lmdb_db() {
|
|||
drop(path);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(feature = "sled")]
|
||||
fn test_sled_db() {
|
||||
use crate::sled_adapter::SledDb;
|
||||
|
||||
let path = mktemp::Temp::new_dir().unwrap();
|
||||
let db = SledDb::init(sled::open(path.to_path_buf()).unwrap());
|
||||
test_suite(db);
|
||||
drop(path);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(feature = "sqlite")]
|
||||
fn test_sqlite_db() {
|
||||
|
|
|
@ -80,11 +80,12 @@ k2v-client.workspace = true
|
|||
|
||||
|
||||
[features]
|
||||
default = [ "bundled-libs", "metrics", "lmdb", "sqlite", "k2v" ]
|
||||
default = [ "bundled-libs", "metrics", "sled", "lmdb", "sqlite", "k2v" ]
|
||||
|
||||
k2v = [ "garage_util/k2v", "garage_api/k2v" ]
|
||||
|
||||
# Database engines
|
||||
# Database engines, Sled is still our default even though we don't like it
|
||||
sled = [ "garage_model/sled" ]
|
||||
lmdb = [ "garage_model/lmdb" ]
|
||||
sqlite = [ "garage_model/sqlite" ]
|
||||
|
||||
|
|
|
@ -197,11 +197,11 @@ impl AdminRpcHandler {
|
|||
|
||||
// Gather table statistics
|
||||
let mut table = vec![" Table\tItems\tMklItems\tMklTodo\tGcTodo".into()];
|
||||
table.push(self.gather_table_stats(&self.garage.bucket_table)?);
|
||||
table.push(self.gather_table_stats(&self.garage.key_table)?);
|
||||
table.push(self.gather_table_stats(&self.garage.object_table)?);
|
||||
table.push(self.gather_table_stats(&self.garage.version_table)?);
|
||||
table.push(self.gather_table_stats(&self.garage.block_ref_table)?);
|
||||
table.push(self.gather_table_stats(&self.garage.bucket_table, opt.detailed)?);
|
||||
table.push(self.gather_table_stats(&self.garage.key_table, opt.detailed)?);
|
||||
table.push(self.gather_table_stats(&self.garage.object_table, opt.detailed)?);
|
||||
table.push(self.gather_table_stats(&self.garage.version_table, opt.detailed)?);
|
||||
table.push(self.gather_table_stats(&self.garage.block_ref_table, opt.detailed)?);
|
||||
write!(
|
||||
&mut ret,
|
||||
"\nTable stats:\n{}",
|
||||
|
@ -211,7 +211,15 @@ impl AdminRpcHandler {
|
|||
|
||||
// Gather block manager statistics
|
||||
writeln!(&mut ret, "\nBlock manager stats:").unwrap();
|
||||
let rc_len = self.garage.block_manager.rc_len()?.to_string();
|
||||
let rc_len = if opt.detailed {
|
||||
self.garage.block_manager.rc_len()?.to_string()
|
||||
} else {
|
||||
self.garage
|
||||
.block_manager
|
||||
.rc_fast_len()?
|
||||
.map(|x| x.to_string())
|
||||
.unwrap_or_else(|| "NC".into())
|
||||
};
|
||||
|
||||
writeln!(
|
||||
&mut ret,
|
||||
|
@ -232,6 +240,10 @@ impl AdminRpcHandler {
|
|||
)
|
||||
.unwrap();
|
||||
|
||||
if !opt.detailed {
|
||||
writeln!(&mut ret, "\nIf values are missing above (marked as NC), consider adding the --detailed flag (this will be slow).").unwrap();
|
||||
}
|
||||
|
||||
if !opt.skip_global {
|
||||
write!(&mut ret, "\n{}", self.gather_cluster_stats()).unwrap();
|
||||
}
|
||||
|
@ -333,13 +345,34 @@ impl AdminRpcHandler {
|
|||
ret
|
||||
}
|
||||
|
||||
fn gather_table_stats<F, R>(&self, t: &Arc<Table<F, R>>) -> Result<String, Error>
|
||||
fn gather_table_stats<F, R>(
|
||||
&self,
|
||||
t: &Arc<Table<F, R>>,
|
||||
detailed: bool,
|
||||
) -> Result<String, Error>
|
||||
where
|
||||
F: TableSchema + 'static,
|
||||
R: TableReplication + 'static,
|
||||
{
|
||||
let data_len = t.data.store.len().map_err(GarageError::from)?.to_string();
|
||||
let mkl_len = t.merkle_updater.merkle_tree_len()?.to_string();
|
||||
let (data_len, mkl_len) = if detailed {
|
||||
(
|
||||
t.data.store.len().map_err(GarageError::from)?.to_string(),
|
||||
t.merkle_updater.merkle_tree_len()?.to_string(),
|
||||
)
|
||||
} else {
|
||||
(
|
||||
t.data
|
||||
.store
|
||||
.fast_len()
|
||||
.map_err(GarageError::from)?
|
||||
.map(|x| x.to_string())
|
||||
.unwrap_or_else(|| "NC".into()),
|
||||
t.merkle_updater
|
||||
.merkle_tree_fast_len()?
|
||||
.map(|x| x.to_string())
|
||||
.unwrap_or_else(|| "NC".into()),
|
||||
)
|
||||
};
|
||||
|
||||
Ok(format!(
|
||||
" {}\t{}\t{}\t{}\t{}",
|
||||
|
|
|
@ -11,7 +11,7 @@ pub struct ConvertDbOpt {
|
|||
/// https://garagehq.deuxfleurs.fr/documentation/reference-manual/configuration/#db-engine-since-v0-8-0)
|
||||
#[structopt(short = "i")]
|
||||
input_path: PathBuf,
|
||||
/// Input database engine (lmdb or sqlite; limited by db engines
|
||||
/// Input database engine (sled, lmdb or sqlite; limited by db engines
|
||||
/// enabled in this build)
|
||||
#[structopt(short = "a")]
|
||||
input_engine: Engine,
|
||||
|
|
|
@ -531,6 +531,10 @@ pub struct StatsOpt {
|
|||
#[structopt(short = "a", long = "all-nodes")]
|
||||
pub all_nodes: bool,
|
||||
|
||||
/// Gather detailed statistics (this can be long)
|
||||
#[structopt(short = "d", long = "detailed")]
|
||||
pub detailed: bool,
|
||||
|
||||
/// Don't show global cluster stats (internal use in RPC)
|
||||
#[structopt(skip)]
|
||||
#[serde(default)]
|
||||
|
|
|
@ -18,8 +18,8 @@ compile_error!("Either bundled-libs or system-libs Cargo feature must be enabled
|
|||
#[cfg(all(feature = "bundled-libs", feature = "system-libs"))]
|
||||
compile_error!("Only one of bundled-libs and system-libs Cargo features must be enabled");
|
||||
|
||||
#[cfg(not(any(feature = "lmdb", feature = "sqlite")))]
|
||||
compile_error!("Must activate the Cargo feature for at least one DB engine: lmdb or sqlite.");
|
||||
#[cfg(not(any(feature = "lmdb", feature = "sled", feature = "sqlite")))]
|
||||
compile_error!("Must activate the Cargo feature for at least one DB engine: lmdb, sled or sqlite.");
|
||||
|
||||
use std::net::SocketAddr;
|
||||
use std::path::PathBuf;
|
||||
|
@ -72,6 +72,8 @@ async fn main() {
|
|||
let features = &[
|
||||
#[cfg(feature = "k2v")]
|
||||
"k2v",
|
||||
#[cfg(feature = "sled")]
|
||||
"sled",
|
||||
#[cfg(feature = "lmdb")]
|
||||
"lmdb",
|
||||
#[cfg(feature = "sqlite")]
|
||||
|
|
|
@ -42,7 +42,8 @@ tokio.workspace = true
|
|||
opentelemetry.workspace = true
|
||||
|
||||
[features]
|
||||
default = [ "lmdb", "sqlite" ]
|
||||
default = [ "sled", "lmdb", "sqlite" ]
|
||||
k2v = [ "garage_util/k2v" ]
|
||||
lmdb = [ "garage_db/lmdb" ]
|
||||
sled = [ "garage_db/sled" ]
|
||||
sqlite = [ "garage_db/sqlite" ]
|
||||
|
|
|
@ -118,6 +118,9 @@ impl Garage {
|
|||
.ok_or_message("Invalid `db_engine` value in configuration file")?;
|
||||
let mut db_path = config.metadata_dir.clone();
|
||||
match db_engine {
|
||||
db::Engine::Sled => {
|
||||
db_path.push("db");
|
||||
}
|
||||
db::Engine::Sqlite => {
|
||||
db_path.push("db.sqlite");
|
||||
}
|
||||
|
@ -131,6 +134,8 @@ impl Garage {
|
|||
v if v == usize::default() => None,
|
||||
v => Some(v),
|
||||
},
|
||||
sled_cache_capacity: config.sled_cache_capacity,
|
||||
sled_flush_every_ms: config.sled_flush_every_ms,
|
||||
};
|
||||
let db = db::open_db(&db_path, db_engine, &db_opt)
|
||||
.ok_or_message("Unable to open metadata db")?;
|
||||
|
|
|
@ -121,7 +121,13 @@ impl Worker for LifecycleWorker {
|
|||
mpu_aborted,
|
||||
..
|
||||
} => {
|
||||
let n_objects = self.garage.object_table.data.store.len().ok();
|
||||
let n_objects = self
|
||||
.garage
|
||||
.object_table
|
||||
.data
|
||||
.store
|
||||
.fast_len()
|
||||
.unwrap_or(None);
|
||||
let progress = match n_objects {
|
||||
None => "...".to_string(),
|
||||
Some(total) => format!(
|
||||
|
|
|
@ -6,6 +6,7 @@ use serde_bytes::ByteBuf;
|
|||
use tokio::sync::Notify;
|
||||
|
||||
use garage_db as db;
|
||||
use garage_db::counted_tree_hack::CountedTree;
|
||||
|
||||
use garage_util::data::*;
|
||||
use garage_util::error::*;
|
||||
|
@ -35,7 +36,7 @@ pub struct TableData<F: TableSchema, R: TableReplication> {
|
|||
pub(crate) insert_queue: db::Tree,
|
||||
pub(crate) insert_queue_notify: Arc<Notify>,
|
||||
|
||||
pub(crate) gc_todo: db::Tree,
|
||||
pub(crate) gc_todo: CountedTree,
|
||||
|
||||
pub(crate) metrics: TableMetrics,
|
||||
}
|
||||
|
@ -60,6 +61,7 @@ impl<F: TableSchema, R: TableReplication> TableData<F, R> {
|
|||
let gc_todo = db
|
||||
.open_tree(format!("{}:gc_todo_v2", F::TABLE_NAME))
|
||||
.expect("Unable to open GC DB tree");
|
||||
let gc_todo = CountedTree::new(gc_todo).expect("Cannot count gc_todo_v2");
|
||||
|
||||
let metrics = TableMetrics::new(
|
||||
F::TABLE_NAME,
|
||||
|
@ -368,6 +370,6 @@ impl<F: TableSchema, R: TableReplication> TableData<F, R> {
|
|||
}
|
||||
|
||||
pub fn gc_todo_len(&self) -> Result<usize, Error> {
|
||||
Ok(self.gc_todo.len()?)
|
||||
Ok(self.gc_todo.len())
|
||||
}
|
||||
}
|
||||
|
|
|
@ -10,7 +10,7 @@ use serde_bytes::ByteBuf;
|
|||
use futures::future::join_all;
|
||||
use tokio::sync::watch;
|
||||
|
||||
use garage_db as db;
|
||||
use garage_db::counted_tree_hack::CountedTree;
|
||||
|
||||
use garage_util::background::*;
|
||||
use garage_util::data::*;
|
||||
|
@ -334,9 +334,9 @@ impl<F: TableSchema, R: TableReplication> Worker for GcWorker<F, R> {
|
|||
}
|
||||
}
|
||||
|
||||
/// An entry stored in the gc_todo db tree associated with the table
|
||||
/// An entry stored in the gc_todo Sled tree associated with the table
|
||||
/// Contains helper function for parsing, saving, and removing
|
||||
/// such entry in the db
|
||||
/// such entry in Sled
|
||||
///
|
||||
/// Format of an entry:
|
||||
/// - key = 8 bytes: timestamp of tombstone
|
||||
|
@ -353,7 +353,7 @@ pub(crate) struct GcTodoEntry {
|
|||
}
|
||||
|
||||
impl GcTodoEntry {
|
||||
/// Creates a new GcTodoEntry (not saved in the db) from its components:
|
||||
/// Creates a new GcTodoEntry (not saved in Sled) from its components:
|
||||
/// the key of an entry in the table, and the hash of the associated
|
||||
/// serialized value
|
||||
pub(crate) fn new(key: Vec<u8>, value_hash: Hash) -> Self {
|
||||
|
@ -376,7 +376,7 @@ impl GcTodoEntry {
|
|||
}
|
||||
|
||||
/// Saves the GcTodoEntry in the gc_todo tree
|
||||
pub(crate) fn save(&self, gc_todo_tree: &db::Tree) -> Result<(), Error> {
|
||||
pub(crate) fn save(&self, gc_todo_tree: &CountedTree) -> Result<(), Error> {
|
||||
gc_todo_tree.insert(self.todo_table_key(), self.value_hash.as_slice())?;
|
||||
Ok(())
|
||||
}
|
||||
|
@ -386,14 +386,12 @@ impl GcTodoEntry {
|
|||
/// This is usefull to remove a todo entry only under the condition
|
||||
/// that it has not changed since the time it was read, i.e.
|
||||
/// what we have to do is still the same
|
||||
pub(crate) fn remove_if_equal(&self, gc_todo_tree: &db::Tree) -> Result<(), Error> {
|
||||
gc_todo_tree.db().transaction(|txn| {
|
||||
let key = self.todo_table_key();
|
||||
if txn.get(gc_todo_tree, &key)?.as_deref() == Some(self.value_hash.as_slice()) {
|
||||
txn.remove(gc_todo_tree, &key)?;
|
||||
}
|
||||
Ok(())
|
||||
})?;
|
||||
pub(crate) fn remove_if_equal(&self, gc_todo_tree: &CountedTree) -> Result<(), Error> {
|
||||
gc_todo_tree.compare_and_swap::<_, _, &[u8]>(
|
||||
&self.todo_table_key(),
|
||||
Some(self.value_hash),
|
||||
None,
|
||||
)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
|
|
@ -31,14 +31,14 @@ pub struct MerkleUpdater<F: TableSchema, R: TableReplication> {
|
|||
// - value = the hash of the full serialized item, if present,
|
||||
// or an empty vec if item is absent (deleted)
|
||||
// Fields in data:
|
||||
// pub(crate) merkle_todo: db::Tree,
|
||||
// pub(crate) merkle_todo: sled::Tree,
|
||||
// pub(crate) merkle_todo_notify: Notify,
|
||||
|
||||
// Content of the merkle tree: items where
|
||||
// - key = .bytes() for MerkleNodeKey
|
||||
// - value = serialization of a MerkleNode, assumed to be MerkleNode::empty if not found
|
||||
// Field in data:
|
||||
// pub(crate) merkle_tree: db::Tree,
|
||||
// pub(crate) merkle_tree: sled::Tree,
|
||||
empty_node_hash: Hash,
|
||||
}
|
||||
|
||||
|
@ -291,6 +291,10 @@ impl<F: TableSchema, R: TableReplication> MerkleUpdater<F, R> {
|
|||
Ok(self.data.merkle_tree.len()?)
|
||||
}
|
||||
|
||||
pub fn merkle_tree_fast_len(&self) -> Result<Option<usize>, Error> {
|
||||
Ok(self.data.merkle_tree.fast_len()?)
|
||||
}
|
||||
|
||||
pub fn todo_len(&self) -> Result<usize, Error> {
|
||||
Ok(self.data.merkle_todo.len()?)
|
||||
}
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
use opentelemetry::{global, metrics::*, KeyValue};
|
||||
|
||||
use garage_db as db;
|
||||
use garage_db::counted_tree_hack::CountedTree;
|
||||
|
||||
/// TableMetrics reference all counter used for metrics
|
||||
pub struct TableMetrics {
|
||||
|
@ -26,7 +27,7 @@ impl TableMetrics {
|
|||
store: db::Tree,
|
||||
merkle_tree: db::Tree,
|
||||
merkle_todo: db::Tree,
|
||||
gc_todo: db::Tree,
|
||||
gc_todo: CountedTree,
|
||||
) -> Self {
|
||||
let meter = global::meter(table_name);
|
||||
TableMetrics {
|
||||
|
@ -34,9 +35,9 @@ impl TableMetrics {
|
|||
.u64_value_observer(
|
||||
"table.size",
|
||||
move |observer| {
|
||||
if let Ok(value) = store.len() {
|
||||
if let Ok(Some(v)) = store.fast_len() {
|
||||
observer.observe(
|
||||
value as u64,
|
||||
v as u64,
|
||||
&[KeyValue::new("table_name", table_name)],
|
||||
);
|
||||
}
|
||||
|
@ -48,9 +49,9 @@ impl TableMetrics {
|
|||
.u64_value_observer(
|
||||
"table.merkle_tree_size",
|
||||
move |observer| {
|
||||
if let Ok(value) = merkle_tree.len() {
|
||||
if let Ok(Some(v)) = merkle_tree.fast_len() {
|
||||
observer.observe(
|
||||
value as u64,
|
||||
v as u64,
|
||||
&[KeyValue::new("table_name", table_name)],
|
||||
);
|
||||
}
|
||||
|
@ -76,12 +77,10 @@ impl TableMetrics {
|
|||
.u64_value_observer(
|
||||
"table.gc_todo_queue_length",
|
||||
move |observer| {
|
||||
if let Ok(value) = gc_todo.len() {
|
||||
observer.observe(
|
||||
value as u64,
|
||||
&[KeyValue::new("table_name", table_name)],
|
||||
);
|
||||
}
|
||||
observer.observe(
|
||||
gc_todo.len() as u64,
|
||||
&[KeyValue::new("table_name", table_name)],
|
||||
);
|
||||
},
|
||||
)
|
||||
.with_description("Table garbage collector TODO queue length")
|
||||
|
|
|
@ -87,10 +87,20 @@ pub struct Config {
|
|||
pub kubernetes_discovery: Option<KubernetesDiscoveryConfig>,
|
||||
|
||||
// -- DB
|
||||
/// Database engine to use for metadata (options: sqlite, lmdb)
|
||||
/// Database engine to use for metadata (options: sled, sqlite, lmdb)
|
||||
#[serde(default = "default_db_engine")]
|
||||
pub db_engine: String,
|
||||
|
||||
/// Sled cache size, in bytes
|
||||
#[serde(
|
||||
deserialize_with = "deserialize_capacity",
|
||||
default = "default_sled_cache_capacity"
|
||||
)]
|
||||
pub sled_cache_capacity: usize,
|
||||
/// Sled flush interval in milliseconds
|
||||
#[serde(default = "default_sled_flush_every_ms")]
|
||||
pub sled_flush_every_ms: u64,
|
||||
|
||||
/// LMDB map size
|
||||
#[serde(deserialize_with = "deserialize_capacity", default)]
|
||||
pub lmdb_map_size: usize,
|
||||
|
@ -236,6 +246,13 @@ fn default_db_engine() -> String {
|
|||
"lmdb".into()
|
||||
}
|
||||
|
||||
fn default_sled_cache_capacity() -> usize {
|
||||
128 * 1024 * 1024
|
||||
}
|
||||
fn default_sled_flush_every_ms() -> u64 {
|
||||
2000
|
||||
}
|
||||
|
||||
fn default_block_size() -> usize {
|
||||
1048576
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue