Forgejo template

plume: increase memory again
deploy_nixos: add help to apply changes without rebooting in production
2025-04-07 21:58:31 +02:00 · 2025-03-26 20:21:57 +01:00 · 2025-03-26 00:17:59 +01:00 · 2025-03-26 00:17:08 +01:00 · 2025-03-25 22:21:35 +01:00 · 2025-03-25 22:12:42 +01:00
23 changed files with 909 additions and 136 deletions
--- a/cluster/prod/app/garage/deploy/garage.hcl
+++ b/cluster/prod/app/garage/deploy/garage.hcl
@ -3,11 +3,6 @@ job "garage" {
  type = "system"
  priority = 80

-  update {
-    max_parallel = 2
-    min_healthy_time  = "60s"
-  }
-
  group "garage" {
    network {
      port "s3" { static = 3900 }
@ -18,7 +13,11 @@ job "garage" {
    }

    update {
-      max_parallel = 10
+      # When upgrading the service, stop and upgrade nodes one by one.
+      # This should allow performing minor upgrades without downtime.
+      # (A higher value for max_parallel would risk stopping nodes in different
+      #  zones at the same time, which would make the cluster unavailable)
+      max_parallel = 1
      min_healthy_time = "30s"
      healthy_deadline = "5m"
    }
@ -26,7 +25,7 @@ job "garage" {
    task "server" {
      driver = "docker"
      config {
-        image = "superboum/garage:v1.0.0-rc1-hotfix-red-ftr-wquorum"
+        image = "dxflrs/garage:v1.99.1-internal"
        command = "/garage"
        args = [ "server" ]
        network_mode = "host"
--- a/cluster/prod/app/guichet/deploy/guichet.hcl
+++ b/cluster/prod/app/guichet/deploy/guichet.hcl
@ -13,7 +13,7 @@ job "guichet" {
    task "guichet" {
      driver = "docker"
      config {
-        image = "dxflrs/guichet:0x4y7bj1qb8w8hckvpbzlgyxh63j66ij"
+        image = "dxflrs/guichet:aqd78kjs4dmizm8gax67d8sd68l9gcf5"
        args = [ "server", "-config", "/etc/config.json" ]
        readonly_rootfs = true
        ports = [ "web_port" ]
--- a/cluster/prod/app/plume/deploy/plume.hcl
+++ b/cluster/prod/app/plume/deploy/plume.hcl
@ -17,8 +17,8 @@ job "plume-blog" {
        network_mode = "host"
        ports = [ "cache_port" ]

-	# cache
-	mount {
+        # cache
+        mount {
          type = "tmpfs"
          target = "/var/lib/varnish/varnishd:exec"
          readonly = false
@ -64,8 +64,8 @@ job "plume-blog" {
      }

      resources {
-        memory = 1024
-        memory_max = 1024
+        memory = 1500
+        memory_max = 1500
        cpu = 100
      }

--- a/cluster/prod/app/telemetry/config/prometheus.yml
+++ b/cluster/prod/app/telemetry/config/prometheus.yml
@ -16,6 +16,17 @@ scrape_configs:
        cert_file: /etc/prometheus/consul-client.crt
        key_file: /etc/prometheus/consul-client.key

+  - job_name: 'smartctl-exporter'
+    scrape_interval: 300s
+    consul_sd_configs:
+    - server: 'https://localhost:8501'
+      services:
+        - 'smartctl-exporter'
+      tls_config:
+        ca_file: /etc/prometheus/consul-ca.crt
+        cert_file: /etc/prometheus/consul-client.crt
+        key_file: /etc/prometheus/consul-client.key
+
  - job_name: 'tricot'
    consul_sd_configs:
    - server: 'https://localhost:8501'
--- a/cluster/prod/app/telemetry/config/smartctl-seccomp.json
+++ b/cluster/prod/app/telemetry/config/smartctl-seccomp.json
@ -0,0 +1,170 @@
+{
+	"defaultAction": "SCMP_ACT_ERRNO",
+	"defaultErrnoRet": 1,
+	"architectures": [
+           "SCMP_ARCH_X86_64"
+	],
+	"syscalls": [
+		{
+			"names": [
+				"rt_sigaction",
+				"rt_sigprocmask",
+				"getpid",
+				"fcntl",
+				"fstatfs",
+				"gettid",
+				"futex",
+				"getdents64",
+				"epoll_ctl",
+				"tgkill",
+				"openat",
+				"read",
+				"close",
+				"nanosleep",
+				"getsockname",
+				"setsockopt",
+				"chdir",
+				"capget",
+				"prctl",
+				"accept4",
+				"fstat",
+				"getcwd",
+				"setuid",
+				"setgid",
+				"setgroups",
+				"capset",
+				"newfstatat",
+				"write",
+				"writev",
+				"mmap",
+				"brk",
+				"rt_sigreturn",
+				"access",
+				"execve",
+				"getppid",
+				"exit_group",
+				"faccessat2",
+				"mprotect",
+				"pread64",
+				"arch_prctl",
+				"set_tid_address",
+				"set_robust_list",
+				"rseq",
+				"munmap",
+				"madvise",
+				"sigaltstack",
+				"statfs",
+				"waitid",
+				"readlinkat",
+				"eventfd2",
+				"epoll_create1",
+				"pipe2",
+				"pidfd_send_signal",
+				"pidfd_open",
+				"readlink",
+				"epoll_pwait",
+				"dup3",
+				"bind",
+				"listen",
+				"getrliimt",
+				"sched_getaffinity",
+				"sched_yield"
+			],
+			"action": "SCMP_ACT_ALLOW",
+			"comment": "globally needed by the go runtime"
+		},
+		{
+			"names": [
+                                "open",
+                                "uname"
+			],
+			"action": "SCMP_ACT_ALLOW",
+			"comment": "Used by smartctl"
+		},
+		{
+			"names": [
+                                "ioctl"
+			],
+			"action": "SCMP_ACT_ALLOW",
+			"comment": "allow SG_IO (aka SCSCI commands) on ioctl as it's what's used to read SMART data",
+			"args": [
+				{
+					"index": 1,
+					"value": 8837,
+					"op": "SCMP_CMP_EQ"
+				}
+			]
+		},
+		{
+			"names": [
+                                "ioctl"
+			],
+			"action": "SCMP_ACT_ALLOW",
+			"comment": "allow NVME_IOCTL_ID command (0x4e40) on ioctl as it's what's used to read data on NVMe devices",
+			"args": [
+				{
+					"index": 1,
+					"value": 20032,
+					"op": "SCMP_CMP_EQ"
+				}
+			]
+		},
+		{
+			"names": [
+                                "ioctl"
+			],
+			"action": "SCMP_ACT_ALLOW",
+			"comment": "allow NVME_IOCTL_ADMIN_CMD command (0xc0484e41) on ioctl as it's what's used to read data on NVMe devices. For some reason, it needs to be encoded as 0xffffffffc0484e41",
+			"args": [
+				{
+					"index": 1,
+					"value": 18446744072640548417,
+					"op": "SCMP_CMP_EQ"
+				}
+			]
+		},
+		{
+			"names": [
+                                "ioctl"
+			],
+			"action": "SCMP_ACT_ERRNO",
+			"comment": "Debug to allow/deny all ioctl (change to _LOG, _ALLOW, or _ERRNO appropriately)"
+		},
+		{
+			"names": [
+				"clone"
+			],
+			"action": "SCMP_ACT_ALLOW",
+			"comment": "partially allow clone as per docker config",
+			"args": [
+				{
+					"index": 0,
+					"value": 2114060288,
+					"op": "SCMP_CMP_MASKED_EQ"
+				}
+			]
+		},
+				{
+			"names": [
+				"clone3"
+			],
+			"action": "SCMP_ACT_ERRNO",
+			"comment": "disable clone3 in a specific way as per docker's default config",
+			"errnoRet": 38
+		},
+		{
+			"names": [
+				"socket"
+			],
+			"action": "SCMP_ACT_ALLOW",
+			"comment": "allow IPv4 sockets",
+			"args": [
+				{
+					"index": 0,
+					"value": 2,
+					"op": "SCMP_CMP_EQ"
+				}
+			]
+		}
+	]
+}
--- a/cluster/prod/app/telemetry/deploy/telemetry-node-exporter.hcl
+++ b/cluster/prod/app/telemetry/deploy/telemetry-node-exporter.hcl
@ -0,0 +1,51 @@
+job "telemetry-node-exporter" {
+  datacenters = ["neptune", "scorpio", "bespin", "corrin", "dathomir"]
+  type = "system"
+  priority = "100"
+
+  group "node_exporter" {
+    network {
+      port "node_exporter" { static = 9100 }
+    }
+
+    task "node_exporter" {
+      driver = "docker"
+
+      config {
+        image = "quay.io/prometheus/node-exporter:v1.8.1"
+        ports = ["node_exporter"]
+        volumes = [
+          "/:/host:ro,rslave"
+        ]
+        args = [
+          "--web.listen-address=0.0.0.0:${NOMAD_PORT_node_exporter}",
+          "--path.rootfs=/host"
+        ]
+      }
+
+      resources {
+        cpu = 50
+        memory = 40
+      }
+
+      service {
+        tags = [ "telemetry" ]
+        port = "node_exporter"
+        name = "node-exporter"
+        check {
+          type = "http"
+          path = "/"
+          port = 9100
+          address_mode = "driver"
+          interval = "60s"
+          timeout = "5s"
+          check_restart {
+            limit = 3
+            grace = "90s"
+            ignore_warnings = false
+          }
+        }
+      }
+    }
+  }
+}
--- a/cluster/prod/app/telemetry/deploy/telemetry-smartctl-exporter.hcl
+++ b/cluster/prod/app/telemetry/deploy/telemetry-smartctl-exporter.hcl
@ -0,0 +1,131 @@
+job "telemetry-smartctl-exporter" {
+  datacenters = ["neptune", "scorpio", "bespin", "corrin", "dathomir"]
+  type = "system"
+  priority = "100"
+
+  group "smartctl_exporter" {
+    network {
+      port "smartctl_exporter" { static = 9101 }
+    }
+
+    # This init task creates "fake" disk devices.  This way, we can
+    # restrict which devices we expose to smartctl_exporter while having
+    # the same task configuration on all hosts.
+    task "create_fake_disks" {
+      driver = "docker"
+      user = "root"
+
+      config {
+        image = "bash:5.2.37"
+        args = [
+          "-x", "${NOMAD_TASK_DIR}/create_fake_disks.sh"
+        ]
+        readonly_rootfs = true
+
+        mount {
+          type = "bind"
+          target = "/dev"
+          source = "/dev"
+          readonly = false
+        }
+      }
+
+      template {
+        data = <<EOF
+          echo "Checking existing disks and creating fake devices if needed..."
+          [ -a "/dev/sda" ] || ln -s /dev/null /dev/sda
+          [ -a "/dev/sdb" ] || ln -s /dev/null /dev/sdb
+          [ -a "/dev/nvme0" ] || ln -s /dev/null /dev/nvme0
+        EOF
+        destination = "local/create_fake_disks.sh"
+        perms = 755
+      }
+
+      resources {
+        cpu = 10
+        memory = 10
+      }
+
+      lifecycle {
+        hook    = "prestart"
+        sidecar = false
+      }
+    }
+
+    task "smartctl_exporter" {
+      driver = "docker"
+      # Necessary to use low-level SMART and NVMe commands
+      user = "root"
+
+      config {
+        image = "prometheuscommunity/smartctl-exporter:v0.13.0"
+        args = [
+          "--web.listen-address=0.0.0.0:${NOMAD_PORT_smartctl_exporter}"
+        ]
+        ports = ["smartctl_exporter"]
+        # CAP_SYS_RAWIO is needed for SMART requests, while CAP_SYS_ADMIN
+        # is needed for NVMe requests.
+        # These capabilities need to be allowed in the Nomad client config.
+        cap_drop = ["all"]
+        cap_add = ["CAP_SYS_RAWIO", "CAP_SYS_ADMIN"]
+        # Hardening options to avoid running the container as privileged,
+        # while still allowing just enough syscalls so that smartctl can query the disks.
+        security_opt = [
+          "no-new-privileges",
+          # Apparently there is no variable to determine the path to the allocation, hence this hack
+          "seccomp=/var/lib/nomad/alloc/${NOMAD_ALLOC_ID}/${NOMAD_TASK_NAME}/local/smartctl-seccomp.json",
+        ]
+        readonly_rootfs = true
+        # Sadly, devices must exist for Docker to accept this option, otherwise it fails to run.
+        # This is why we create "fake" devices in the init task above.
+        devices = [
+          {
+            host_path = "/dev/sda"
+            container_path = "/dev/sda"
+            cgroup_permissions = "r"
+          },
+          {
+            host_path = "/dev/sdb"
+            container_path = "/dev/sdb"
+            cgroup_permissions = "r"
+          },
+          {
+            host_path = "/dev/nvme0"
+            container_path = "/dev/nvme0"
+            cgroup_permissions = "r"
+          }
+        ]
+      }
+
+      template {
+        data = file("../config/smartctl-seccomp.json")
+        destination = "local/smartctl-seccomp.json"
+        perms = 444
+      }
+
+      resources {
+        cpu = 50
+        memory = 40
+      }
+
+      service {
+        tags = [ "telemetry" ]
+        port = "smartctl_exporter"
+        name = "smartctl-exporter"
+        check {
+          type = "http"
+          path = "/"
+          port = 9101
+          address_mode = "driver"
+          interval = "60s"
+          timeout = "5s"
+          check_restart {
+            limit = 3
+            grace = "90s"
+            ignore_warnings = false
+          }
+        }
+      }
+    }
+  }
+}
--- a/cluster/prod/app/telemetry/deploy/telemetry-storage.hcl
+++ b/cluster/prod/app/telemetry/deploy/telemetry-storage.hcl
@ -1,5 +1,5 @@
 job "telemetry-storage" {
-  datacenters = ["scorpio", "bespin"]
+  datacenters = ["scorpio", "bespin", "corrin"]
  type = "service"

  group "prometheus" {
@ -14,7 +14,7 @@ job "telemetry-storage" {
    constraint {
      attribute = "${attr.unique.hostname}"
      operator = "set_contains_any"
-      value = "ananas,df-ymk"
+      value = "pamplemousse,df-ymk"
    }

    task "prometheus" {
@ -26,7 +26,7 @@ job "telemetry-storage" {
        args = [
          "--config.file=/etc/prometheus/prometheus.yml",
          "--storage.tsdb.path=/data",
-          "--storage.tsdb.retention.size=20GB",
+          "--storage.tsdb.retention.size=40GB",
        ]
        volumes = [
          "secrets:/etc/prometheus",
@ -72,7 +72,7 @@ job "telemetry-storage" {
      resources {
        memory = 1500
        memory_max = 4000
-        cpu = 1000
+        cpu = 400
      }

      service {
--- a/cluster/prod/app/telemetry/deploy/telemetry-system.hcl
+++ b/cluster/prod/app/telemetry/deploy/telemetry-system.hcl
@ -1,49 +0,0 @@
-job "telemetry-system" {
-	datacenters = ["neptune", "scorpio", "bespin", "corrin", "dathomir"]
-	type = "system"
-	priority = "100"
-
-	group "collector" {
-    network {
-      port "node_exporter" { static = 9100 }
-    }
-
-		task "node_exporter" {
-			driver = "docker"
-
-			config {
-				image = "quay.io/prometheus/node-exporter:v1.8.1"
-				network_mode = "host"
-				volumes = [
-					"/:/host:ro,rslave"
-				]
-				args = [ "--path.rootfs=/host" ]
-			}
-
-			resources {
-				cpu = 50
-				memory = 40
-			}
-
-      service {
-        tags = [ "telemetry" ]
-        port = 9100
-        address_mode = "driver"
-        name = "node-exporter"
-        check {
-          type = "http"
-          path = "/"
-          port = 9100
-          address_mode = "driver"
-          interval = "60s"
-          timeout = "5s"
-          check_restart {
-            limit = 3
-            grace = "90s"
-            ignore_warnings = false
-          }
-        }
-      }
-		}
-	}
-}
--- a/cluster/prod/cluster.nix
+++ b/cluster/prod/cluster.nix
@ -6,6 +6,8 @@
  # The IP range to use for the Wireguard overlay of this cluster
  deuxfleurs.clusterPrefix = "10.83.0.0/16";

+  programs.vim.defaultEditor = true;
+
  deuxfleurs.clusterNodes = {
    "df-ykl" = {
      siteName = "bespin";
--- a/cluster/staging/app/git/deploy/forgejo.hcl
+++ b/cluster/staging/app/git/deploy/forgejo.hcl
@ -0,0 +1,137 @@
+job "git" {
+  datacenters = ["bespin"]
+  type = "service"
+
+  group "forgejo" {
+    count = 1
+
+    network {
+      port "http" { static = 3000 }
+      port "ssh"  { static = 22 }
+    }
+
+    ephemeral_disk {
+      size    = 10000
+    }
+
+    restart {
+      attempts = 10
+      delay    = "30s"
+    }
+
+    task "forgejo" {
+      driver = "docker"
+
+      config {
+        image = "codeberg.org/forgejo/forgejo:10.0.2"
+        network_mode = "host"
+        readonly_rootfs = true
+        ports = [ "http", "ssh" ]
+        volumes = [
+          "/ssd/forgejo:/data",
+          "/etc/timezone:/etc/timezone:ro",
+          "/etc/localtime:/etc/localtime:ro"
+        ]
+      }
+
+      template {
+        data = <<ENV
+USER_UID = 106
+USER_GID = 112
+DB_TYPE  = postgres
+DB_HOST  = db:3306
+GITEA__database__NAME   = gitea
+GITEA__database__USER   = gitea
+GITEA__database__PASSWD = "{{ key \"secrets/git/forgejo/database_password\" }}"
+# Mailer credentials
+GITEA__mailer__USER   = _system._gitea@deuxfleurs.fr
+GITEA__mailer__PASSWD = "{{ key \"secrets/git/forgejo/mailer_password\" }}"
+# General configuration
+GITEA__server__DOMAIN     = git.staging.deuxfleurs.org
+GITEA__server__SSH_DOMAIN = git.staging.deuxfleurs.org
+GITEA__server__ROOT_URL   = https://git.staging.deuxfleurs.org
+GITEA__server__LFS_START_SERVER = true
+GITEA__database__DB_TYPE  = mysql
+GITEA__database__HOST     = db
+GITEA__mailer__ENABLED    = true
+GITEA__mailer__SMTP_ADDR  = smtp.deuxfleurs.fr
+GITEA__mailer__SMTP_PORT  = 465
+GITEA__mailer__PROTOCOL   = smtps
+GITEA__mailer__FROM       = Deuxfleurs Gitea <_system._forjego@staging.deuxfleurs.org>
+GITEA__mailer__FORCE_TRUST_SERVER_CERT = true
+GITEA__mailer__SUBJECT_PREFIX = [Deuxfleurs Forgejo Staging]
+GITEA__service__REGISTER_EMAIL_CONFIRM = true
+GITEA__service__ENABLE_NOTIFY_MAIL = true
+GITEA__admin__DEFAULT_EMAIL_NOTIFICATIONS = enabled
+GITEA__lfs__PATH = /data/git/lfs
+# Prevent spam accounts
+GITEA__service__DEFAULT_ALLOW_CREATE_ORGANIZATION = false
+GITEA__service__DEFAULT_USER_VISIBILITY = limited
+GITEA__service__DEFAULT_KEEP_EMAIL_PRIVATE = true
+# Allow CORS for StaticCMS (a fork of Netlify CMS)
+GITEA__cors__ENABLED = true
+GITEA__cors__ALLOW_DOMAIN = *
+GITEA__cors__ALLOW_CREDENTIALS = true
+GITEA__cors__METHODS = GET,HEAD,POST,PUT,PATCH,DELETE,OPTIONS
+GITEA__cors__SCHEME = *
+GITEA__cors__HEADERS = Content-Type,User-Agent,Authorization
+# Options passed to Gitea
+# see https://docs.gitea.io/en-us/config-cheat-sheet/
+# config is in /data/gitea/config/app.ini
+GITEA__ui__ISSUE_PAGING_NUM = 20
+        ENV
+        destination = "secrets/env"
+        env = true
+      }
+
+      resources {
+        cpu = 1000
+        memory = 1000
+        memory_max = 1000
+      }
+
+      service {
+        name = "forgejo-ssh"
+        port = "ssh"
+        address_mode = "host"
+        tags = [
+          "forgejo-staging-ssh",
+          "tricot git.staging.deuxfleurs.org 100",
+          "d53-cname git.staging.deuxfleurs.org",
+        ]
+        check {
+          type = "tcp"
+          port = "ssh"
+          interval = "60s"
+          timeout = "5s"
+          check_restart {
+            limit = 3
+            grace = "90s"
+            ignore_warnings = false
+          }
+        }
+      }
+      service {
+        name = "forgejo-http"
+        port = "http"
+        address_mode = "host"
+        tags = [
+          "forgejo-staging-http",
+          "tricot-add-header Access-Control-Allow-Origin *",
+          "d53-cname git.staging.deuxfleurs.org",
+        ]
+        check {
+          type = "tcp"
+          port = "http"
+          interval = "60s"
+          timeout = "5s"
+          check_restart {
+            limit = 3
+            grace = "90s"
+            ignore_warnings = false
+          }
+        }
+      }
+    }
+  }
+}
--- a/cluster/staging/app/telemetry/config/prometheus.yml
+++ b/cluster/staging/app/telemetry/config/prometheus.yml
@ -12,9 +12,20 @@ scrape_configs:
      services:
        - 'node-exporter'
      tls_config:
-        ca_file: /etc/prom/consul.crt
-        cert_file: /etc/prom/consul-client.crt
-        key_file: /etc/prom/consul-client.key
+        ca_file: /etc/prometheus/consul.crt
+        cert_file: /etc/prometheus/consul-client.crt
+        key_file: /etc/prometheus/consul-client.key
+
+  - job_name: 'smartctl-exporter'
+    scrape_interval: 300s
+    consul_sd_configs:
+    - server: 'https://localhost:8501'
+      services:
+        - 'smartctl-exporter'
+      tls_config:
+        ca_file: /etc/prometheus/consul.crt
+        cert_file: /etc/prometheus/consul-client.crt
+        key_file: /etc/prometheus/consul-client.key

  - job_name: 'garage'
    authorization:
@ -25,9 +36,9 @@ scrape_configs:
      services:
        - 'garage-staging-admin'
      tls_config:
-        ca_file: /etc/prom/consul.crt
-        cert_file: /etc/prom/consul-client.crt
-        key_file: /etc/prom/consul-client.key
+        ca_file: /etc/prometheus/consul.crt
+        cert_file: /etc/prometheus/consul-client.crt
+        key_file: /etc/prometheus/consul-client.key

  - job_name: 'tricot'
    consul_sd_configs:
@ -35,9 +46,9 @@ scrape_configs:
      services:
        - 'tricot-metrics'
      tls_config:
-        ca_file: /etc/prom/consul.crt
-        cert_file: /etc/prom/consul-client.crt
-        key_file: /etc/prom/consul-client.key
+        ca_file: /etc/prometheus/consul.crt
+        cert_file: /etc/prometheus/consul-client.crt
+        key_file: /etc/prometheus/consul-client.key

  # see https://prometheus.io/docs/prometheus/latest/configuration/configuration/#static_config
  # and https://www.nomadproject.io/api-docs/metrics
@ -50,15 +61,15 @@ scrape_configs:
      format: ['prometheus']
    scheme: 'https'
    tls_config:
-      ca_file: /etc/prom/nomad-ca.crt
-      cert_file: /etc/prom/nomad-client.crt
-      key_file: /etc/prom/nomad-client.key
+      ca_file: /etc/prometheus/nomad-ca.crt
+      cert_file: /etc/prometheus/nomad-client.crt
+      key_file: /etc/prometheus/nomad-client.key
      insecure_skip_verify: true
    consul_sd_configs:
    - server: 'https://localhost:8501'
      services:
        - 'nomad-client'
      tls_config:
-        ca_file: /etc/prom/consul.crt
-        cert_file: /etc/prom/consul-client.crt
-        key_file: /etc/prom/consul-client.key
+        ca_file: /etc/prometheus/consul.crt
+        cert_file: /etc/prometheus/consul-client.crt
+        key_file: /etc/prometheus/consul-client.key
--- a/cluster/staging/app/telemetry/config/smartctl-seccomp.json
+++ b/cluster/staging/app/telemetry/config/smartctl-seccomp.json
@ -0,0 +1,170 @@
+{
+	"defaultAction": "SCMP_ACT_ERRNO",
+	"defaultErrnoRet": 1,
+	"architectures": [
+           "SCMP_ARCH_X86_64"
+	],
+	"syscalls": [
+		{
+			"names": [
+				"rt_sigaction",
+				"rt_sigprocmask",
+				"getpid",
+				"fcntl",
+				"fstatfs",
+				"gettid",
+				"futex",
+				"getdents64",
+				"epoll_ctl",
+				"tgkill",
+				"openat",
+				"read",
+				"close",
+				"nanosleep",
+				"getsockname",
+				"setsockopt",
+				"chdir",
+				"capget",
+				"prctl",
+				"accept4",
+				"fstat",
+				"getcwd",
+				"setuid",
+				"setgid",
+				"setgroups",
+				"capset",
+				"newfstatat",
+				"write",
+				"writev",
+				"mmap",
+				"brk",
+				"rt_sigreturn",
+				"access",
+				"execve",
+				"getppid",
+				"exit_group",
+				"faccessat2",
+				"mprotect",
+				"pread64",
+				"arch_prctl",
+				"set_tid_address",
+				"set_robust_list",
+				"rseq",
+				"munmap",
+				"madvise",
+				"sigaltstack",
+				"statfs",
+				"waitid",
+				"readlinkat",
+				"eventfd2",
+				"epoll_create1",
+				"pipe2",
+				"pidfd_send_signal",
+				"pidfd_open",
+				"readlink",
+				"epoll_pwait",
+				"dup3",
+				"bind",
+				"listen",
+				"getrliimt",
+				"sched_getaffinity",
+				"sched_yield"
+			],
+			"action": "SCMP_ACT_ALLOW",
+			"comment": "globally needed by the go runtime"
+		},
+		{
+			"names": [
+                                "open",
+                                "uname"
+			],
+			"action": "SCMP_ACT_ALLOW",
+			"comment": "Used by smartctl"
+		},
+		{
+			"names": [
+                                "ioctl"
+			],
+			"action": "SCMP_ACT_ALLOW",
+			"comment": "allow SG_IO (aka SCSCI commands) on ioctl as it's what's used to read SMART data",
+			"args": [
+				{
+					"index": 1,
+					"value": 8837,
+					"op": "SCMP_CMP_EQ"
+				}
+			]
+		},
+		{
+			"names": [
+                                "ioctl"
+			],
+			"action": "SCMP_ACT_ALLOW",
+			"comment": "allow NVME_IOCTL_ID command (0x4e40) on ioctl as it's what's used to read data on NVMe devices",
+			"args": [
+				{
+					"index": 1,
+					"value": 20032,
+					"op": "SCMP_CMP_EQ"
+				}
+			]
+		},
+		{
+			"names": [
+                                "ioctl"
+			],
+			"action": "SCMP_ACT_ALLOW",
+			"comment": "allow NVME_IOCTL_ADMIN_CMD command (0xc0484e41) on ioctl as it's what's used to read data on NVMe devices. For some reason, it needs to be encoded as 0xffffffffc0484e41",
+			"args": [
+				{
+					"index": 1,
+					"value": 18446744072640548417,
+					"op": "SCMP_CMP_EQ"
+				}
+			]
+		},
+		{
+			"names": [
+                                "ioctl"
+			],
+			"action": "SCMP_ACT_ERRNO",
+			"comment": "Debug to allow/deny all ioctl (change to _LOG, _ALLOW, or _ERRNO appropriately)"
+		},
+		{
+			"names": [
+				"clone"
+			],
+			"action": "SCMP_ACT_ALLOW",
+			"comment": "partially allow clone as per docker config",
+			"args": [
+				{
+					"index": 0,
+					"value": 2114060288,
+					"op": "SCMP_CMP_MASKED_EQ"
+				}
+			]
+		},
+				{
+			"names": [
+				"clone3"
+			],
+			"action": "SCMP_ACT_ERRNO",
+			"comment": "disable clone3 in a specific way as per docker's default config",
+			"errnoRet": 38
+		},
+		{
+			"names": [
+				"socket"
+			],
+			"action": "SCMP_ACT_ALLOW",
+			"comment": "allow IPv4 sockets",
+			"args": [
+				{
+					"index": 0,
+					"value": 2,
+					"op": "SCMP_CMP_EQ"
+				}
+			]
+		}
+	]
+}
--- a/cluster/staging/app/telemetry/deploy/telemetry-node-exporter.hcl
+++ b/cluster/staging/app/telemetry/deploy/telemetry-node-exporter.hcl
@ -0,0 +1,51 @@
+job "telemetry-node-exporter" {
+  datacenters = ["neptune", "dathomir", "corrin", "bespin"]
+  type = "system"
+  priority = "100"
+
+  group "node_exporter" {
+    network {
+      port "node_exporter" { static = 9100 }
+    }
+
+    task "node_exporter" {
+      driver = "docker"
+
+      config {
+        image = "quay.io/prometheus/node-exporter:v1.8.1"
+        ports = ["node_exporter"]
+        volumes = [
+          "/:/host:ro,rslave"
+        ]
+        args = [
+          "--web.listen-address=0.0.0.0:${NOMAD_PORT_node_exporter}",
+          "--path.rootfs=/host"
+        ]
+      }
+
+      resources {
+        cpu = 50
+        memory = 40
+      }
+
+      service {
+        tags = [ "telemetry" ]
+        port = "node_exporter"
+        name = "node-exporter"
+        check {
+          type = "http"
+          path = "/"
+          port = 9100
+          address_mode = "driver"
+          interval = "60s"
+          timeout = "5s"
+          check_restart {
+            limit = 3
+            grace = "90s"
+            ignore_warnings = false
+          }
+        }
+      }
+    }
+  }
+}
--- a/cluster/staging/app/telemetry/deploy/telemetry-smartctl-exporter.hcl
+++ b/cluster/staging/app/telemetry/deploy/telemetry-smartctl-exporter.hcl
@ -0,0 +1,131 @@
+job "telemetry-smartctl-exporter" {
+  datacenters = ["neptune", "dathomir", "corrin", "bespin"]
+  type = "system"
+  priority = "100"
+
+  group "smartctl_exporter" {
+    network {
+      port "smartctl_exporter" { static = 9101 }
+    }
+
+    # This init task creates "fake" disk devices.  This way, we can
+    # restrict which devices we expose to smartctl_exporter while having
+    # the same task configuration on all hosts.
+    task "create_fake_disks" {
+      driver = "docker"
+      user = "root"
+
+      config {
+        image = "bash:5.2.37"
+        args = [
+          "-x", "${NOMAD_TASK_DIR}/create_fake_disks.sh"
+        ]
+        readonly_rootfs = true
+
+        mount {
+          type = "bind"
+          target = "/dev"
+          source = "/dev"
+          readonly = false
+        }
+      }
+
+      template {
+        data = <<EOF
+          echo "Checking existing disks and creating fake devices if needed..."
+          [ -a "/dev/sda" ] || ln -s /dev/null /dev/sda
+          [ -a "/dev/sdb" ] || ln -s /dev/null /dev/sdb
+          [ -a "/dev/nvme0" ] || ln -s /dev/null /dev/nvme0
+        EOF
+        destination = "local/create_fake_disks.sh"
+        perms = 755
+      }
+
+      resources {
+        cpu = 10
+        memory = 10
+      }
+
+      lifecycle {
+        hook    = "prestart"
+        sidecar = false
+      }
+    }
+
+    task "smartctl_exporter" {
+      driver = "docker"
+      # Necessary to use low-level SMART and NVMe commands
+      user = "root"
+
+      config {
+        image = "prometheuscommunity/smartctl-exporter:v0.13.0"
+        args = [
+          "--web.listen-address=0.0.0.0:${NOMAD_PORT_smartctl_exporter}"
+        ]
+        ports = ["smartctl_exporter"]
+        # CAP_SYS_RAWIO is needed for SMART requests, while CAP_SYS_ADMIN
+        # is needed for NVMe requests.
+        # These capabilities need to be allowed in the Nomad client config.
+        cap_drop = ["all"]
+        cap_add = ["CAP_SYS_RAWIO", "CAP_SYS_ADMIN"]
+        # Hardening options to avoid running the container as privileged,
+        # while still allowing just enough syscalls so that smartctl can query the disks.
+        security_opt = [
+          "no-new-privileges",
+          # Apparently there is no variable to determine the path to the allocation, hence this hack
+          "seccomp=/var/lib/nomad/alloc/${NOMAD_ALLOC_ID}/${NOMAD_TASK_NAME}/local/smartctl-seccomp.json",
+        ]
+        readonly_rootfs = true
+        # Sadly, devices must exist for Docker to accept this option, otherwise it fails to run.
+        # This is why we create "fake" devices in the init task above.
+        devices = [
+          {
+            host_path = "/dev/sda"
+            container_path = "/dev/sda"
+            cgroup_permissions = "r"
+          },
+          {
+            host_path = "/dev/sdb"
+            container_path = "/dev/sdb"
+            cgroup_permissions = "r"
+          },
+          {
+            host_path = "/dev/nvme0"
+            container_path = "/dev/nvme0"
+            cgroup_permissions = "r"
+          }
+        ]
+      }
+
+      template {
+        data = file("../config/smartctl-seccomp.json")
+        destination = "local/smartctl-seccomp.json"
+        perms = 444
+      }
+
+      resources {
+        cpu = 50
+        memory = 40
+      }
+
+      service {
+        tags = [ "telemetry" ]
+        port = "smartctl_exporter"
+        name = "smartctl-exporter"
+        check {
+          type = "http"
+          path = "/"
+          port = 9101
+          address_mode = "driver"
+          interval = "60s"
+          timeout = "5s"
+          check_restart {
+            limit = 3
+            grace = "90s"
+            ignore_warnings = false
+          }
+        }
+      }
+    }
+  }
+}
--- a/cluster/staging/app/telemetry/deploy/telemetry-system.hcl
+++ b/cluster/staging/app/telemetry/deploy/telemetry-system.hcl
@ -1,49 +0,0 @@
-job "telemetry-system" {
-  datacenters = ["neptune", "dathomir", "corrin", "bespin"]
-  type = "system"
-  priority = "100"
-
-  group "collector" {
-     network {
-       port "node_exporter" { static = 9100 }
-     }
-
-		task "node_exporter" {
-			driver = "docker"
-
-			config {
-				image = "quay.io/prometheus/node-exporter:v1.8.1"
-				network_mode = "host"
-				volumes = [
-					"/:/host:ro,rslave"
-				]
-				args = [ "--path.rootfs=/host" ]
-			}
-
-			resources {
-				cpu = 50
-				memory = 40
-			}
-
-       service {
-         tags = [ "telemetry" ]
-         port = 9100
-         address_mode = "driver"
-         name = "node-exporter"
-         check {
-           type = "http"
-           path = "/"
-           port = 9100
-           address_mode = "driver"
-           interval = "60s"
-           timeout = "5s"
-           check_restart {
-             limit = 3
-             grace = "90s"
-             ignore_warnings = false
-           }
-         }
-       }
-		}
-	}
- }
--- a/cluster/staging/cluster.nix
+++ b/cluster/staging/cluster.nix
@ -36,10 +36,15 @@
  deuxfleurs.wgautomeshPort = 1667;
  deuxfleurs.services.wgautomesh.logLevel = "debug";

+  programs.vim = {
+    enable = true;
+    defaultEditor = true;
+  };
+
  # Bootstrap IPs for Consul cluster,
  # these are IPs on the Wireguard overlay
  services.consul.extraConfig.retry_join = [
-    "10.14.1.3"  # caribou
+    "10.14.4.1"  # df-pw5
    "10.14.2.33" # origan
    "10.14.3.1"  # piranha
  ];
--- a/cluster/staging/node/caribou.nix
+++ b/cluster/staging/node/caribou.nix
@ -10,7 +10,6 @@

  deuxfleurs.hostName = "caribou";
  deuxfleurs.staticIPv6.address = "2a01:e34:ec05:8a40::23";
-  deuxfleurs.isRaftServer = true;

  # this denote the version at install time, do not update
  system.stateVersion = "21.05";
--- a/cluster/staging/node/piranha.nix
+++ b/cluster/staging/node/piranha.nix
@ -11,6 +11,7 @@
  deuxfleurs.hostName = "piranha";
  deuxfleurs.staticIPv4.address = "192.168.5.25";
  deuxfleurs.staticIPv6.address = "2001:912:1ac0:2200::25";
+  deuxfleurs.isRaftServer = true;

  # this denote the version at install time, do not update
  system.stateVersion = "24.05";
--- a/5
+++ b/5
@ -26,6 +26,11 @@ if [ "$CLUSTER" = "prod" ]; then
 	message "2. Reboot node manually. You can also take the opportunity to upgrade with:"
 	message "         REBOOT_NODES=yes ./upgrade_nixos prod $NIXHOST"
 	message "3. Mark node as eligible again in Nomad"
+	message ""
+	message "If you are certain that the update is not disruptive, you can manually apply changes:"
+	message "1. Connect to node '$NIXHOST' over SSH"
+	message "2. Run this on the node:"
+	message "         sudo nixos-rebuild switch"
 	message "-------------------------------------------------------------------------------------"
 else
 	cmd nixos-rebuild switch
--- a/doc/ports
+++ b/doc/ports
@ -47,5 +47,6 @@ ports so that we can avoid conflicts when adding services.
 8999    opendkim
 9090    prometheus
 9100    node_exporter
+9101    smartctl_exporter
 9334    tricot metrics
 9991    guichet
--- a/nix/configuration.nix
+++ b/nix/configuration.nix
@ -68,11 +68,6 @@ SystemMaxUse=1G
  # Enable support for all terminal emulators such as urxvt
  environment.enableAllTerminfo = true;

-  programs.vim = {
-    enable = true;
-    defaultEditor = true;
-  };
-
  # Enable network time
  services.ntp.enable = false;
  services.timesyncd.enable = true;
--- a/nix/deuxfleurs.nix
+++ b/nix/deuxfleurs.nix
@ -397,6 +397,7 @@ in
                {
                  volumes.enabled = true;
                  allow_privileged = true;
+                  allow_caps = ["all"];
                }
              ];
            }
Author	SHA1	Message	Date
Maximilien Richer	8418c40250	Forgejo template	2025-04-07 21:58:31 +02:00
Baptiste Jonglez	fe68fdf54a	plume: increase memory again	2025-03-26 20:21:57 +01:00
Baptiste Jonglez	187d36eb9b	deploy_nixos: add help to apply changes without rebooting in production	2025-03-26 00:17:59 +01:00
Baptiste Jonglez	fd6275f5bc	prod: Fix vim configuration syntax (different between staging and prod due to NixOS version difference)	2025-03-26 00:17:08 +01:00
Baptiste Jonglez	fc88a063b1	node_exporter: avoid using network mode host	2025-03-25 22:21:35 +01:00
Baptiste Jonglez	bb8c9db2ed	telemetry: avoid network mode host, and poll less often	2025-03-25 22:12:42 +01:00
baptiste	451068d716	Merge pull request 'prod: telemetry: Add smartctl_exporter based on staging work' (#53 ) from prod_smartctl_monitoring into main Reviewed-on: #53	2025-03-25 21:09:08 +00:00
Baptiste Jonglez	797f946578	prod: telemetry: Add smartctl_exporter based on staging work	2025-03-24 17:53:17 +01:00
Baptiste Jonglez	596b7ab966	prod: telemetry: rename node-exporter job	2025-03-24 17:51:55 +01:00
Baptiste Jonglez	ec1fa3e540	staging: telemetry: Use a init task to create fake disk devices for smartctl_exporter	2025-03-24 17:47:05 +01:00
Quentin Dufour	67230dd60c	guichet now advertise the correct dxfl login command	2025-03-24 16:48:18 +01:00
Quentin Dufour	305c160899	guichet upgrade	2025-03-21 00:27:05 +01:00
Baptiste Jonglez	8d9aa00de5	staging: harden config of smartctl exporter It currently requires all nodes to have /dev/sda (the device passthrough is hardcoded for now)	2025-03-19 23:46:55 +01:00
Baptiste Jonglez	5790453ff1	nix: Allow all capabilities in Nomad This will be necessary for the smartctl exporter since it needs Linux capabilities that are not allowed by default in Nomad. We only have trusted Nomad jobs, and we already allow privileged containers anyway, so there is no security impact.	2025-03-19 23:39:04 +01:00
Baptiste Jonglez	a2a470ac3d	staging: promote piranha to Nomad server (caribou is dead)	2025-03-19 23:08:49 +01:00
Baptiste Jonglez	2009572fea	prod: telemetry: move storage from bespin/scorpio to bespin/corrin	2025-03-12 21:22:56 +01:00
Baptiste Jonglez	8f0a45f03e	staging: telemetry: add smartctl exporter	2025-03-12 21:06:56 +01:00
Baptiste Jonglez	b98e72af96	staging: telemetry: Fix metric collection due to faulty Consul connection	2025-03-12 20:51:49 +01:00
Baptiste Jonglez	e805cf5cf6	Augmentation stockage prometheus La limite actuelle correspond à environ 2 mois d'historique prometheus, c'est parfois trop peu pour pouvoir relever des tendances sur le long terme.	2025-03-11 23:10:07 +01:00
Armael	6b52ccd374	Merge pull request 'upgrade garage to v1.99.1' (#49 ) from garage-1.99 into main Reviewed-on: #49	2025-03-09 09:48:50 +00:00
Armaël Guéneau	c5a0577cbf	upgrade garage to v1.99.1	2025-03-09 10:44:12 +01:00