make lmdb's map_size configurable (fix #628) #630

Merged
lx merged 2 commits from configurable-map-size into main 2023-09-11 16:48:15 +00:00
6 changed files with 81 additions and 12 deletions

1
Cargo.lock generated
View file

@ -1421,6 +1421,7 @@ dependencies = [
"async-trait", "async-trait",
"blake2", "blake2",
"bytes", "bytes",
"bytesize",
"chrono", "chrono",
"digest", "digest",
"err-derive", "err-derive",

View file

@ -33,7 +33,7 @@ args@{
ignoreLockHash, ignoreLockHash,
}: }:
let let
nixifiedLockHash = "b958f9aca0ee3fb1f7b52b15508132d0a96480a7f43f83e0da6609c0fe1812ef"; nixifiedLockHash = "3e3f41f614ab470ecb4b06c670cd6a84c443d799d01f1d48f1d251872099c468";
workspaceSrc = if args.workspaceSrc == null then ./. else args.workspaceSrc; workspaceSrc = if args.workspaceSrc == null then ./. else args.workspaceSrc;
currentLockHash = builtins.hashFile "sha256" (workspaceSrc + /Cargo.lock); currentLockHash = builtins.hashFile "sha256" (workspaceSrc + /Cargo.lock);
lockHashIgnored = if ignoreLockHash lockHashIgnored = if ignoreLockHash
@ -2014,6 +2014,7 @@ in
async_trait = (buildRustPackages."registry+https://github.com/rust-lang/crates.io-index".async-trait."0.1.73" { profileName = "__noProfile"; }).out; async_trait = (buildRustPackages."registry+https://github.com/rust-lang/crates.io-index".async-trait."0.1.73" { profileName = "__noProfile"; }).out;
blake2 = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".blake2."0.10.6" { inherit profileName; }).out; blake2 = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".blake2."0.10.6" { inherit profileName; }).out;
bytes = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".bytes."1.4.0" { inherit profileName; }).out; bytes = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".bytes."1.4.0" { inherit profileName; }).out;
bytesize = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".bytesize."1.3.0" { inherit profileName; }).out;
chrono = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".chrono."0.4.26" { inherit profileName; }).out; chrono = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".chrono."0.4.26" { inherit profileName; }).out;
digest = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".digest."0.10.7" { inherit profileName; }).out; digest = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".digest."0.10.7" { inherit profileName; }).out;
err_derive = (buildRustPackages."registry+https://github.com/rust-lang/crates.io-index".err-derive."0.3.1" { profileName = "__noProfile"; }).out; err_derive = (buildRustPackages."registry+https://github.com/rust-lang/crates.io-index".err-derive."0.3.1" { profileName = "__noProfile"; }).out;

View file

@ -15,8 +15,9 @@ db_engine = "lmdb"
block_size = 1048576 block_size = 1048576
sled_cache_capacity = 134217728 sled_cache_capacity = "128MiB"
sled_flush_every_ms = 2000 sled_flush_every_ms = 2000
lmdb_map_size = "1T"
replication_mode = "3" replication_mode = "3"
@ -133,8 +134,8 @@ and not just the path to the metadata directory.
### `block_size` ### `block_size`
Garage splits stored objects in consecutive chunks of size `block_size` Garage splits stored objects in consecutive chunks of size `block_size`
(except the last one which might be smaller). The default size is 1MB and (except the last one which might be smaller). The default size is 1MiB and
should work in most cases. We recommend increasing it to e.g. 10MB if should work in most cases. We recommend increasing it to e.g. 10MiB if
you are using Garage to store large files and have fast network connections you are using Garage to store large files and have fast network connections
between all nodes (e.g. 1gbps). between all nodes (e.g. 1gbps).
@ -160,6 +161,14 @@ Increase this if sled is thrashing your SSD, at the risk of losing more data in
of a power outage (though this should not matter much as data is replicated on other of a power outage (though this should not matter much as data is replicated on other
nodes). The default value, 2000ms, should be appropriate for most use cases. nodes). The default value, 2000ms, should be appropriate for most use cases.
### `lmdb_map_size`
This parameters can be used to set the map size used by LMDB,
which is the size of the virtual memory region used for mapping the database file.
The value of this parameter is the maximum size the metadata database can take.
This value is not bound by the physical RAM size of the machine running Garage.
If not specified, it defaults to 1GiB on 32-bit machines and 1TiB on 64-bit machines.
### `replication_mode` ### `replication_mode`
Garage supports the following replication modes: Garage supports the following replication modes:

View file

@ -95,7 +95,7 @@ impl Garage {
info!("Opening Sled database at: {}", db_path.display()); info!("Opening Sled database at: {}", db_path.display());
let db = db::sled_adapter::sled::Config::default() let db = db::sled_adapter::sled::Config::default()
.path(&db_path) .path(&db_path)
.cache_capacity(config.sled_cache_capacity) .cache_capacity(config.sled_cache_capacity as u64)
.flush_every_ms(Some(config.sled_flush_every_ms)) .flush_every_ms(Some(config.sled_flush_every_ms))
.open() .open()
.ok_or_message("Unable to open sled DB")?; .ok_or_message("Unable to open sled DB")?;
@ -125,7 +125,10 @@ impl Garage {
info!("Opening LMDB database at: {}", db_path.display()); info!("Opening LMDB database at: {}", db_path.display());
std::fs::create_dir_all(&db_path) std::fs::create_dir_all(&db_path)
.ok_or_message("Unable to create LMDB data directory")?; .ok_or_message("Unable to create LMDB data directory")?;
let map_size = garage_db::lmdb_adapter::recommended_map_size(); let map_size = match config.lmdb_map_size {
v if v == usize::default() => garage_db::lmdb_adapter::recommended_map_size(),
v => v - (v % 4096),
};
use db::lmdb_adapter::heed; use db::lmdb_adapter::heed;
let mut env_builder = heed::EnvOpenOptions::new(); let mut env_builder = heed::EnvOpenOptions::new();
@ -142,6 +145,7 @@ impl Garage {
"OutOfMemory error while trying to open LMDB database. This can happen \ "OutOfMemory error while trying to open LMDB database. This can happen \
if your operating system is not allowing you to use sufficient virtual \ if your operating system is not allowing you to use sufficient virtual \
memory address space. Please check that no limit is set (ulimit -v). \ memory address space. Please check that no limit is set (ulimit -v). \
You may also try to set a smaller `lmdb_map_size` configuration parameter. \
On 32-bit machines, you should probably switch to another database engine.".into())) On 32-bit machines, you should probably switch to another database engine.".into()))
} }
x => x.ok_or_message("Unable to open LMDB DB")?, x => x.ok_or_message("Unable to open LMDB DB")?,

View file

@ -20,6 +20,7 @@ arc-swap = "1.0"
async-trait = "0.1" async-trait = "0.1"
blake2 = "0.10" blake2 = "0.10"
bytes = "1.0" bytes = "1.0"
bytesize = "1.2"
digest = "0.10" digest = "0.10"
err-derive = "0.3" err-derive = "0.3"
hexdump = "0.1" hexdump = "0.1"

View file

@ -1,4 +1,5 @@
//! Contains type and functions related to Garage configuration file //! Contains type and functions related to Garage configuration file
use std::convert::TryFrom;
use std::io::Read; use std::io::Read;
use std::net::SocketAddr; use std::net::SocketAddr;
use std::path::PathBuf; use std::path::PathBuf;
@ -16,7 +17,10 @@ pub struct Config {
pub data_dir: PathBuf, pub data_dir: PathBuf,
/// Size of data blocks to save to disk /// Size of data blocks to save to disk
#[serde(default = "default_block_size")] #[serde(
deserialize_with = "deserialize_capacity",
default = "default_block_size"
)]
pub block_size: usize, pub block_size: usize,
/// Replication mode. Supported values: /// Replication mode. Supported values:
@ -66,12 +70,19 @@ pub struct Config {
pub db_engine: String, pub db_engine: String,
/// Sled cache size, in bytes /// Sled cache size, in bytes
#[serde(default = "default_sled_cache_capacity")] #[serde(
pub sled_cache_capacity: u64, deserialize_with = "deserialize_capacity",
default = "default_sled_cache_capacity"
)]
pub sled_cache_capacity: usize,
/// Sled flush interval in milliseconds /// Sled flush interval in milliseconds
#[serde(default = "default_sled_flush_every_ms")] #[serde(default = "default_sled_flush_every_ms")]
pub sled_flush_every_ms: u64, pub sled_flush_every_ms: u64,
/// LMDB map size
#[serde(deserialize_with = "deserialize_capacity", default)]
pub lmdb_map_size: usize,
// -- APIs // -- APIs
/// Configuration for S3 api /// Configuration for S3 api
pub s3_api: S3ApiConfig, pub s3_api: S3ApiConfig,
@ -186,7 +197,7 @@ fn default_db_engine() -> String {
"sled".into() "sled".into()
} }
fn default_sled_cache_capacity() -> u64 { fn default_sled_cache_capacity() -> usize {
128 * 1024 * 1024 128 * 1024 * 1024
} }
fn default_sled_flush_every_ms() -> u64 { fn default_sled_flush_every_ms() -> u64 {
@ -266,8 +277,6 @@ fn deserialize_compression<'de, D>(deserializer: D) -> Result<Option<i32>, D::Er
where where
D: de::Deserializer<'de>, D: de::Deserializer<'de>,
{ {
use std::convert::TryFrom;
struct OptionVisitor; struct OptionVisitor;
impl<'de> serde::de::Visitor<'de> for OptionVisitor { impl<'de> serde::de::Visitor<'de> for OptionVisitor {
@ -312,6 +321,50 @@ where
deserializer.deserialize_any(OptionVisitor) deserializer.deserialize_any(OptionVisitor)
} }
fn deserialize_capacity<'de, D>(deserializer: D) -> Result<usize, D::Error>
where
D: de::Deserializer<'de>,
{
struct CapacityVisitor;
impl<'de> serde::de::Visitor<'de> for CapacityVisitor {
type Value = usize;
fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
formatter.write_str("int or '<capacity>'")
}
fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
where
E: de::Error,
{
value
.parse::<bytesize::ByteSize>()
.map(|x| x.as_u64())
.map_err(|e| E::custom(format!("invalid capacity value: {}", e)))
.and_then(|v| {
usize::try_from(v)
.map_err(|_| E::custom("capacity value out of bound".to_owned()))
})
}
fn visit_i64<E>(self, v: i64) -> Result<Self::Value, E>
where
E: de::Error,
{
usize::try_from(v).map_err(|_| E::custom("capacity value out of bound".to_owned()))
}
fn visit_u64<E>(self, v: u64) -> Result<Self::Value, E>
where
E: de::Error,
{
usize::try_from(v).map_err(|_| E::custom("capacity value out of bound".to_owned()))
}
}
deserializer.deserialize_any(CapacityVisitor)
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use crate::error::Error; use crate::error::Error;