staging: telemetry: Use a init task to create fake disk devices for smartctl_exporter

This commit is contained in:
Baptiste Jonglez 2025-03-24 17:47:05 +01:00
parent 67230dd60c
commit ec1fa3e540

View file

@ -8,6 +8,50 @@ job "telemetry-smartctl-exporter" {
port "smartctl_exporter" { static = 9101 }
}
# This init task creates "fake" disk devices. This way, we can
# restrict which devices we expose to smartctl_exporter while having
# the same task configuration on all hosts.
task "create_fake_disks" {
driver = "docker"
user = "root"
config {
image = "bash:5.2.37"
args = [
"-x", "${NOMAD_TASK_DIR}/create_fake_disks.sh"
]
readonly_rootfs = true
mount {
type = "bind"
target = "/dev"
source = "/dev"
readonly = false
}
}
template {
data = <<EOF
echo "Checking existing disks and creating fake devices if needed..."
[ -a "/dev/sda" ] || ln -s /dev/null /dev/sda
[ -a "/dev/sdb" ] || ln -s /dev/null /dev/sdb
[ -a "/dev/nvme0" ] || ln -s /dev/null /dev/nvme0
EOF
destination = "local/create_fake_disks.sh"
perms = 755
}
resources {
cpu = 10
memory = 10
}
lifecycle {
hook = "prestart"
sidecar = false
}
}
task "smartctl_exporter" {
driver = "docker"
# Necessary to use low-level SMART and NVMe commands
@ -21,6 +65,7 @@ job "telemetry-smartctl-exporter" {
network_mode = "host"
# CAP_SYS_RAWIO is needed for SMART requests, while CAPS_SYS_ADMIN
# is needed for NVMe requests.
# These capabilities need to be allowed in the Nomad client config.
cap_drop = ["all"]
cap_add = ["CAP_SYS_RAWIO", "CAP_SYS_ADMIN"]
# Hardening options to avoid running the container as privileged,
@ -31,14 +76,23 @@ job "telemetry-smartctl-exporter" {
"seccomp=/var/lib/nomad/alloc/${NOMAD_ALLOC_ID}/${NOMAD_TASK_NAME}/local/smartctl-seccomp.json",
]
readonly_rootfs = true
# Sadly, devices must exist for Docker to accept this option, so
# we can't declare all possible devices.
# This may help: https://docs.docker.com/reference/cli/docker/container/run/#device-cgroup-rule
# Sadly, devices must exist for Docker to accept this option, otherwise it fails to run.
# This is why we create "fake" devices in the init task above.
devices = [
{
host_path = "/dev/sda"
container_path = "/dev/sda"
cgroup_permissions = "r"
},
{
host_path = "/dev/sdb"
container_path = "/dev/sdb"
cgroup_permissions = "r"
},
{
host_path = "/dev/nvme0"
container_path = "/dev/nvme0"
cgroup_permissions = "r"
}
]
}