diff --git a/Cargo.lock b/Cargo.lock index 8e142f42..9f4380c0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1381,6 +1381,7 @@ dependencies = [ "kube", "kuska-sodiumoxide", "netapp", + "nix", "opentelemetry", "pnet_datalink", "rand", @@ -1389,7 +1390,6 @@ dependencies = [ "serde", "serde_bytes", "serde_json", - "systemstat", "tokio", "tokio-stream", "tracing", @@ -1425,6 +1425,7 @@ dependencies = [ "async-trait", "blake2", "bytes", + "bytesize", "chrono", "digest", "err-derive", @@ -2295,6 +2296,17 @@ dependencies = [ "tokio-util 0.7.8", ] +[[package]] +name = "nix" +version = "0.27.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2eb04e9c688eff1c89d72b407f168cf79bb9e867a9d3323ed6c01519eb9cc053" +dependencies = [ + "bitflags 2.4.0", + "cfg-if", + "libc", +] + [[package]] name = "no-std-net" version = "0.6.0" @@ -3537,20 +3549,6 @@ dependencies = [ "unicode-xid", ] -[[package]] -name = "systemstat" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a24aec24a9312c83999a28e3ef9db7e2afd5c64bf47725b758cdc1cafd5b0bd2" -dependencies = [ - "bytesize", - "lazy_static", - "libc", - "nom", - "time 0.3.28", - "winapi", -] - [[package]] name = "tempfile" version = "3.8.0" diff --git a/Cargo.nix b/Cargo.nix index f392f0e4..d6109ab5 100644 --- a/Cargo.nix +++ b/Cargo.nix @@ -33,7 +33,7 @@ args@{ ignoreLockHash, }: let - nixifiedLockHash = "5ed4453c19fb71d30817a98578ec946d1a0f9b23e56fb8c9a41b51599636b488"; + nixifiedLockHash = "7bef0004fa84feec502c75d50632d54202c272d56d2549fc09e2a356141685bb"; workspaceSrc = if args.workspaceSrc == null then ./. else args.workspaceSrc; currentLockHash = builtins.hashFile "sha256" (workspaceSrc + /Cargo.lock); lockHashIgnored = if ignoreLockHash @@ -1965,6 +1965,7 @@ in ${ if rootFeatures' ? "garage/kubernetes-discovery" || rootFeatures' ? "garage_rpc/kube" || rootFeatures' ? "garage_rpc/kubernetes-discovery" then "kube" else null } = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".kube."0.75.0" { inherit profileName; }).out; sodiumoxide = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".kuska-sodiumoxide."0.2.5-0" { inherit profileName; }).out; netapp = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".netapp."0.5.2" { inherit profileName; }).out; + nix = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".nix."0.27.1" { inherit profileName; }).out; opentelemetry = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".opentelemetry."0.17.0" { inherit profileName; }).out; pnet_datalink = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".pnet_datalink."0.33.0" { inherit profileName; }).out; rand = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".rand."0.8.5" { inherit profileName; }).out; @@ -1973,7 +1974,6 @@ in serde = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".serde."1.0.188" { inherit profileName; }).out; serde_bytes = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".serde_bytes."0.11.12" { inherit profileName; }).out; serde_json = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".serde_json."1.0.105" { inherit profileName; }).out; - systemstat = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".systemstat."0.2.3" { inherit profileName; }).out; tokio = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".tokio."1.32.0" { inherit profileName; }).out; tokio_stream = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".tokio-stream."0.1.14" { inherit profileName; }).out; tracing = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".tracing."0.1.37" { inherit profileName; }).out; @@ -2018,6 +2018,7 @@ in async_trait = (buildRustPackages."registry+https://github.com/rust-lang/crates.io-index".async-trait."0.1.73" { profileName = "__noProfile"; }).out; blake2 = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".blake2."0.10.6" { inherit profileName; }).out; bytes = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".bytes."1.4.0" { inherit profileName; }).out; + bytesize = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".bytesize."1.3.0" { inherit profileName; }).out; chrono = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".chrono."0.4.26" { inherit profileName; }).out; digest = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".digest."0.10.7" { inherit profileName; }).out; err_derive = (buildRustPackages."registry+https://github.com/rust-lang/crates.io-index".err-derive."0.3.1" { profileName = "__noProfile"; }).out; @@ -2959,7 +2960,7 @@ in src = fetchCratesIo { inherit name version; sha256 = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3"; }; features = builtins.concatLists [ [ "default" ] - (lib.optional (rootFeatures' ? "garage/opentelemetry-otlp" || rootFeatures' ? "garage/telemetry-otlp" || rootFeatures' ? "garage_db/cli" || rootFeatures' ? "garage_db/pretty_env_logger") "extra_traits") + [ "extra_traits" ] [ "std" ] ]; }); @@ -3235,6 +3236,21 @@ in }; }); + "registry+https://github.com/rust-lang/crates.io-index".nix."0.27.1" = overridableMkRustCrate (profileName: rec { + name = "nix"; + version = "0.27.1"; + registry = "registry+https://github.com/rust-lang/crates.io-index"; + src = fetchCratesIo { inherit name version; sha256 = "2eb04e9c688eff1c89d72b407f168cf79bb9e867a9d3323ed6c01519eb9cc053"; }; + features = builtins.concatLists [ + [ "fs" ] + ]; + dependencies = { + bitflags = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".bitflags."2.4.0" { inherit profileName; }).out; + cfg_if = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".cfg-if."1.0.0" { inherit profileName; }).out; + libc = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".libc."0.2.147" { inherit profileName; }).out; + }; + }); + "registry+https://github.com/rust-lang/crates.io-index".no-std-net."0.6.0" = overridableMkRustCrate (profileName: rec { name = "no-std-net"; version = "0.6.0"; @@ -5004,21 +5020,6 @@ in }; }); - "registry+https://github.com/rust-lang/crates.io-index".systemstat."0.2.3" = overridableMkRustCrate (profileName: rec { - name = "systemstat"; - version = "0.2.3"; - registry = "registry+https://github.com/rust-lang/crates.io-index"; - src = fetchCratesIo { inherit name version; sha256 = "a24aec24a9312c83999a28e3ef9db7e2afd5c64bf47725b758cdc1cafd5b0bd2"; }; - dependencies = { - bytesize = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".bytesize."1.3.0" { inherit profileName; }).out; - lazy_static = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".lazy_static."1.4.0" { inherit profileName; }).out; - libc = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".libc."0.2.147" { inherit profileName; }).out; - ${ if hostPlatform.parsed.kernel.name == "linux" || hostPlatform.parsed.kernel.name == "android" then "nom" else null } = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".nom."7.1.3" { inherit profileName; }).out; - time = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".time."0.3.28" { inherit profileName; }).out; - ${ if hostPlatform.isWindows then "winapi" else null } = (rustPackages."registry+https://github.com/rust-lang/crates.io-index".winapi."0.3.9" { inherit profileName; }).out; - }; - }); - "registry+https://github.com/rust-lang/crates.io-index".tempfile."3.8.0" = overridableMkRustCrate (profileName: rec { name = "tempfile"; version = "3.8.0"; @@ -6038,7 +6039,6 @@ in [ "ntsecapi" ] [ "ntstatus" ] (lib.optional (rootFeatures' ? "garage/kubernetes-discovery" || rootFeatures' ? "garage_rpc/kube" || rootFeatures' ? "garage_rpc/kubernetes-discovery") "objbase") - [ "pdh" ] [ "processenv" ] (lib.optional (rootFeatures' ? "garage/default" || rootFeatures' ? "garage/sled" || rootFeatures' ? "garage_db/default" || rootFeatures' ? "garage_db/sled" || rootFeatures' ? "garage_model/default" || rootFeatures' ? "garage_model/sled") "processthreadsapi") [ "profileapi" ] @@ -6052,7 +6052,6 @@ in [ "winerror" ] [ "winnt" ] [ "winsock2" ] - [ "ws2def" ] [ "ws2ipdef" ] [ "ws2tcpip" ] [ "wtypesbase" ] diff --git a/doc/book/reference-manual/configuration.md b/doc/book/reference-manual/configuration.md index df1251c2..f07fb1e0 100644 --- a/doc/book/reference-manual/configuration.md +++ b/doc/book/reference-manual/configuration.md @@ -17,8 +17,9 @@ db_engine = "lmdb" block_size = 1048576 -sled_cache_capacity = 134217728 +sled_cache_capacity = "128MiB" sled_flush_every_ms = 2000 +lmdb_map_size = "1T" replication_mode = "3" @@ -191,8 +192,8 @@ if geographical replication is used. ### `block_size` Garage splits stored objects in consecutive chunks of size `block_size` -(except the last one which might be smaller). The default size is 1MB and -should work in most cases. We recommend increasing it to e.g. 10MB if +(except the last one which might be smaller). The default size is 1MiB and +should work in most cases. We recommend increasing it to e.g. 10MiB if you are using Garage to store large files and have fast network connections between all nodes (e.g. 1gbps). @@ -218,6 +219,14 @@ Increase this if sled is thrashing your SSD, at the risk of losing more data in of a power outage (though this should not matter much as data is replicated on other nodes). The default value, 2000ms, should be appropriate for most use cases. +### `lmdb_map_size` + +This parameters can be used to set the map size used by LMDB, +which is the size of the virtual memory region used for mapping the database file. +The value of this parameter is the maximum size the metadata database can take. +This value is not bound by the physical RAM size of the machine running Garage. +If not specified, it defaults to 1GiB on 32-bit machines and 1TiB on 64-bit machines. + ### `replication_mode` Garage supports the following replication modes: diff --git a/src/model/garage.rs b/src/model/garage.rs index 721d5e3a..8c9a3af3 100644 --- a/src/model/garage.rs +++ b/src/model/garage.rs @@ -124,7 +124,7 @@ impl Garage { info!("Opening Sled database at: {}", db_path.display()); let db = db::sled_adapter::sled::Config::default() .path(&db_path) - .cache_capacity(config.sled_cache_capacity) + .cache_capacity(config.sled_cache_capacity as u64) .flush_every_ms(Some(config.sled_flush_every_ms)) .open() .ok_or_message("Unable to open sled DB")?; @@ -163,7 +163,10 @@ impl Garage { info!("Opening LMDB database at: {}", db_path.display()); std::fs::create_dir_all(&db_path) .ok_or_message("Unable to create LMDB data directory")?; - let map_size = garage_db::lmdb_adapter::recommended_map_size(); + let map_size = match config.lmdb_map_size { + v if v == usize::default() => garage_db::lmdb_adapter::recommended_map_size(), + v => v - (v % 4096), + }; use db::lmdb_adapter::heed; let mut env_builder = heed::EnvOpenOptions::new(); @@ -182,6 +185,7 @@ impl Garage { "OutOfMemory error while trying to open LMDB database. This can happen \ if your operating system is not allowing you to use sufficient virtual \ memory address space. Please check that no limit is set (ulimit -v). \ + You may also try to set a smaller `lmdb_map_size` configuration parameter. \ On 32-bit machines, you should probably switch to another database engine.".into())) } x => x.ok_or_message("Unable to open LMDB DB")?, diff --git a/src/rpc/Cargo.toml b/src/rpc/Cargo.toml index f66478ce..8ccee46f 100644 --- a/src/rpc/Cargo.toml +++ b/src/rpc/Cargo.toml @@ -26,7 +26,7 @@ tracing = "0.1" rand = "0.8" itertools="0.10" sodiumoxide = { version = "0.2.5-0", package = "kuska-sodiumoxide" } -systemstat = "0.2.3" +nix = { version = "0.27", default-features = false, features = ["fs"] } async-trait = "0.1.7" serde = { version = "1.0", default-features = false, features = ["derive", "rc"] } diff --git a/src/rpc/system.rs b/src/rpc/system.rs index cf480549..b76e6d4a 100644 --- a/src/rpc/system.rs +++ b/src/rpc/system.rs @@ -896,46 +896,40 @@ impl NodeStatus { data_dir: &DataDirEnum, metrics: &SystemMetrics, ) { - use systemstat::{Platform, System}; - let mounts = System::new().mounts().unwrap_or_default(); - - let mount_avail = |path: &Path| { - mounts - .iter() - .filter(|x| path.starts_with(&x.fs_mounted_on)) - .max_by_key(|x| x.fs_mounted_on.len()) - .map(|x| (x.avail.as_u64(), x.total.as_u64())) + use nix::sys::statvfs::statvfs; + let mount_avail = |path: &Path| match statvfs(path) { + Ok(x) => { + let avail = x.blocks_available() * x.fragment_size(); + let total = x.blocks() * x.fragment_size(); + Some((x.filesystem_id(), avail, total)) + } + Err(_) => None, }; - self.meta_disk_avail = mount_avail(meta_dir); + self.meta_disk_avail = mount_avail(meta_dir).map(|(_, a, t)| (a, t)); self.data_disk_avail = match data_dir { - DataDirEnum::Single(dir) => mount_avail(dir), - DataDirEnum::Multiple(dirs) => { - // Take mounts corresponding to all specified data directories that - // can be used for writing data - let mounts = dirs - .iter() - .filter(|dir| dir.capacity.is_some()) - .map(|dir| { - mounts - .iter() - .filter(|mnt| dir.path.starts_with(&mnt.fs_mounted_on)) - .max_by_key(|mnt| mnt.fs_mounted_on.len()) - }) - .collect::>(); - if mounts.iter().any(|x| x.is_none()) { - None // could not get info for at least one mount - } else { - // dedup mounts in case several data directories are on the same filesystem - let mut mounts = mounts.iter().map(|x| x.unwrap()).collect::>(); - mounts.sort_by(|x, y| x.fs_mounted_on.cmp(&y.fs_mounted_on)); - mounts.dedup_by(|x, y| x.fs_mounted_on == y.fs_mounted_on); - // calculate sum of available and total space - Some(mounts.iter().fold((0, 0), |(x, y), mnt| { - (x + mnt.avail.as_u64(), y + mnt.total.as_u64()) - })) + DataDirEnum::Single(dir) => mount_avail(dir).map(|(_, a, t)| (a, t)), + DataDirEnum::Multiple(dirs) => (|| { + // TODO: more precise calculation that takes into account + // how data is going to be spread among partitions + let mut mounts = HashMap::new(); + for dir in dirs.iter() { + if dir.capacity.is_none() { + continue; + } + match mount_avail(&dir.path) { + Some((fsid, avail, total)) => { + mounts.insert(fsid, (avail, total)); + } + None => return None, + } } - } + Some( + mounts + .into_iter() + .fold((0, 0), |(x, y), (_, (a, b))| (x + a, y + b)), + ) + })(), }; if let Some((avail, total)) = self.meta_disk_avail { diff --git a/src/util/Cargo.toml b/src/util/Cargo.toml index 00dae4d1..2efb0270 100644 --- a/src/util/Cargo.toml +++ b/src/util/Cargo.toml @@ -20,6 +20,7 @@ arc-swap = "1.0" async-trait = "0.1" blake2 = "0.10" bytes = "1.0" +bytesize = "1.2" digest = "0.10" err-derive = "0.3" hexdump = "0.1" diff --git a/src/util/config.rs b/src/util/config.rs index 9d00fe82..cf31c87c 100644 --- a/src/util/config.rs +++ b/src/util/config.rs @@ -1,4 +1,5 @@ //! Contains type and functions related to Garage configuration file +use std::convert::TryFrom; use std::io::Read; use std::net::SocketAddr; use std::path::PathBuf; @@ -23,7 +24,10 @@ pub struct Config { pub data_fsync: bool, /// Size of data blocks to save to disk - #[serde(default = "default_block_size")] + #[serde( + deserialize_with = "deserialize_capacity", + default = "default_block_size" + )] pub block_size: usize, /// Replication mode. Supported values: @@ -73,12 +77,19 @@ pub struct Config { pub db_engine: String, /// Sled cache size, in bytes - #[serde(default = "default_sled_cache_capacity")] - pub sled_cache_capacity: u64, + #[serde( + deserialize_with = "deserialize_capacity", + default = "default_sled_cache_capacity" + )] + pub sled_cache_capacity: usize, /// Sled flush interval in milliseconds #[serde(default = "default_sled_flush_every_ms")] pub sled_flush_every_ms: u64, + /// LMDB map size + #[serde(deserialize_with = "deserialize_capacity", default)] + pub lmdb_map_size: usize, + // -- APIs /// Configuration for S3 api pub s3_api: S3ApiConfig, @@ -213,7 +224,7 @@ fn default_db_engine() -> String { "lmdb".into() } -fn default_sled_cache_capacity() -> u64 { +fn default_sled_cache_capacity() -> usize { 128 * 1024 * 1024 } fn default_sled_flush_every_ms() -> u64 { @@ -293,8 +304,6 @@ fn deserialize_compression<'de, D>(deserializer: D) -> Result, D::Er where D: de::Deserializer<'de>, { - use std::convert::TryFrom; - struct OptionVisitor; impl<'de> serde::de::Visitor<'de> for OptionVisitor { @@ -339,6 +348,50 @@ where deserializer.deserialize_any(OptionVisitor) } +fn deserialize_capacity<'de, D>(deserializer: D) -> Result +where + D: de::Deserializer<'de>, +{ + struct CapacityVisitor; + + impl<'de> serde::de::Visitor<'de> for CapacityVisitor { + type Value = usize; + fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { + formatter.write_str("int or ''") + } + + fn visit_str(self, value: &str) -> Result + where + E: de::Error, + { + value + .parse::() + .map(|x| x.as_u64()) + .map_err(|e| E::custom(format!("invalid capacity value: {}", e))) + .and_then(|v| { + usize::try_from(v) + .map_err(|_| E::custom("capacity value out of bound".to_owned())) + }) + } + + fn visit_i64(self, v: i64) -> Result + where + E: de::Error, + { + usize::try_from(v).map_err(|_| E::custom("capacity value out of bound".to_owned())) + } + + fn visit_u64(self, v: u64) -> Result + where + E: de::Error, + { + usize::try_from(v).map_err(|_| E::custom("capacity value out of bound".to_owned())) + } + } + + deserializer.deserialize_any(CapacityVisitor) +} + #[cfg(test)] mod tests { use crate::error::Error;