Switch back staging telemetry to docker and update stack

This commit is contained in:
Maximilien Richer 2025-01-25 18:50:55 +01:00
parent f304dec9ce
commit 6c6af54655
Signed by untrusted user: maximilien
GPG key ID: 04FD5063D6D43365
3 changed files with 171 additions and 159 deletions

View file

@ -2,95 +2,6 @@ job "telemetry-service" {
datacenters = ["neptune", "dathomir", "corrin", "bespin"] datacenters = ["neptune", "dathomir", "corrin", "bespin"]
type = "service" type = "service"
group "prometheus" {
count = 2
network {
port "prometheus" {
static = 9090
}
}
constraint {
attribute = "${attr.unique.hostname}"
operator = "set_contains_any"
value = "df-pw5,origan"
}
task "prometheus" {
driver = "nix2"
config {
nixpkgs = "github:nixos/nixpkgs/nixos-22.11"
packages = [ "#prometheus", "#coreutils", "#findutils", "#bash" ]
command = "prometheus"
args = [
"--config.file=/etc/prom/prometheus.yml",
"--storage.tsdb.path=/data",
"--storage.tsdb.retention.size=5GB",
]
bind = {
"/mnt/ssd/prometheus" = "/data"
}
}
template {
data = file("../config/prometheus.yml")
destination = "etc/prom/prometheus.yml"
}
template {
data = "{{ key \"secrets/consul/consul-ca.crt\" }}"
destination = "etc/prom/consul.crt"
}
template {
data = "{{ key \"secrets/consul/consul-client.crt\" }}"
destination = "etc/prom/consul-client.crt"
}
template {
data = "{{ key \"secrets/consul/consul-client.key\" }}"
destination = "etc/prom/consul-client.key"
}
template {
data = "{{ key \"secrets/nomad/nomad-ca.crt\" }}"
destination = "etc/prom/nomad-ca.crt"
}
template {
data = "{{ key \"secrets/nomad/nomad-client.crt\" }}"
destination = "etc/prom/nomad-client.crt"
}
template {
data = "{{ key \"secrets/nomad/nomad-client.key\" }}"
destination = "etc/prom/nomad-client.key"
}
resources {
memory = 500
cpu = 200
}
service {
port = "prometheus"
name = "prometheus"
check {
type = "http"
path = "/"
interval = "60s"
timeout = "5s"
check_restart {
limit = 3
grace = "90s"
ignore_warnings = false
}
}
}
}
}
group "grafana" { group "grafana" {
count = 1 count = 1
@ -106,50 +17,46 @@ job "telemetry-service" {
sidecar = false sidecar = false
} }
driver = "nix2" driver = "docker"
config { config {
packages = [ "#litestream" ] image = "litestream/litestream:0.3.13"
command = "litestream"
args = [ args = [
"restore", "-config", "/etc/litestream.yml", "/ephemeral/grafana.db" "restore", "-config", "/etc/litestream.yml", "/ephemeral/grafana.db"
] ]
bind = { volumes = [
"../alloc/data" = "/ephemeral", "../alloc/data:/ephemeral",
} "secrets/litestream.yml:/etc/litestream.yml"
]
} }
user = "472"
template { template {
data = file("../config/grafana-litestream.yml") data = file("../config/grafana-litestream.yml")
destination = "etc/litestream.yml" destination = "secrets/litestream.yml"
} }
resources { resources {
memory = 100 memory = 50
memory_max = 1000 memory_max = 200
cpu = 100 cpu = 100
} }
} }
task "grafana" { task "grafana" {
driver = "nix2" driver = "docker"
config { config {
nixpkgs = "github:nixos/nixpkgs/nixos-22.11" image = "grafana/grafana:11.4.0"
packages = [ "#grafana" ] network_mode = "host"
command = "grafana-server" ports = [ "grafana" ]
args = [ volumes = [
"-homepath", "/share/grafana", "../alloc/data:/var/lib/grafana",
"cfg:default.paths.data=/grafana", "secrets/prometheus.yaml:/etc/grafana/provisioning/datasources/prometheus.yaml"
"cfg:default.paths.provisioning=/grafana-provisioning"
] ]
bind = {
"../alloc/data" = "/grafana",
}
} }
template { template {
data = file("../config/grafana-datasource-prometheus.yaml") data = file("../config/grafana-datasource-prometheus.yaml")
destination = "grafana-provisioning/datasources/prometheus.yaml" destination = "secrets/prometheus.yaml"
} }
template { template {
@ -163,8 +70,9 @@ GF_SECURITY_ADMIN_PASSWORD={{ key "secrets/telemetry/grafana/admin_password" }}
} }
resources { resources {
memory = 300 memory = 100
cpu = 300 memory_max = 400
cpu = 300
} }
restart { restart {
@ -181,9 +89,12 @@ GF_SECURITY_ADMIN_PASSWORD={{ key "secrets/telemetry/grafana/admin_password" }}
"tricot grafana.staging.deuxfleurs.org", "tricot grafana.staging.deuxfleurs.org",
"d53-cname grafana.staging.deuxfleurs.org", "d53-cname grafana.staging.deuxfleurs.org",
] ]
port = "grafana" port = 3719
address_mode = "driver"
check { check {
type = "tcp" type = "tcp"
port = 3719
address_mode = "driver"
interval = "60s" interval = "60s"
timeout = "5s" timeout = "5s"
check_restart { check_restart {
@ -196,26 +107,27 @@ GF_SECURITY_ADMIN_PASSWORD={{ key "secrets/telemetry/grafana/admin_password" }}
} }
task "replicate-db" { task "replicate-db" {
driver = "nix2" driver = "docker"
config { config {
packages = [ "#litestream" ] image = "litestream/litestream:0.3.13"
command = "litestream"
args = [ args = [
"replicate", "-config", "/etc/litestream.yml" "replicate", "-config", "/etc/litestream.yml"
] ]
bind = { volumes = [
"../alloc/data" = "/ephemeral", "../alloc/data:/ephemeral",
} "secrets/litestream.yml:/etc/litestream.yml"
]
} }
user = "472"
template { template {
data = file("../config/grafana-litestream.yml") data = file("../config/grafana-litestream.yml")
destination = "etc/litestream.yml" destination = "secrets/litestream.yml"
} }
resources { resources {
memory = 100 memory = 50
memory_max = 500 memory_max = 200
cpu = 100 cpu = 100
} }
} }

View file

@ -0,0 +1,97 @@
job "telemetry-storage" {
datacenters = ["neptune", "dathomir", "corrin", "bespin"]
type = "service"
group "prometheus" {
count = 2
network {
port "prometheus" {
static = 9090
}
}
constraint {
attribute = "${attr.unique.hostname}"
operator = "set_contains_any"
value = "df-pw5,origan"
}
task "prometheus" {
driver = "docker"
config {
image = "prom/prometheus:v3.1.0"
network_mode = "host"
ports = [ "prometheus" ]
args = [
"--config.file=/etc/prometheus/prometheus.yml",
"--storage.tsdb.path=/data",
"--storage.tsdb.retention.size=20GB",
]
volumes = [
"secrets:/etc/prometheus",
"/mnt/ssd/prometheus:/data"
]
}
template {
data = file("../config/prometheus.yml")
destination = "secrets/prometheus.yml"
}
template {
data = "{{ key \"secrets/consul/consul-ca.crt\" }}"
destination = "secrets/consul.crt"
}
template {
data = "{{ key \"secrets/consul/consul-client.crt\" }}"
destination = "secrets/consul-client.crt"
}
template {
data = "{{ key \"secrets/consul/consul-client.key\" }}"
destination = "secrets/consul-client.key"
}
template {
data = "{{ key \"secrets/nomad/nomad-ca.crt\" }}"
destination = "secrets/nomad-ca.crt"
}
template {
data = "{{ key \"secrets/nomad/nomad-client.crt\" }}"
destination = "secrets/nomad-client.crt"
}
template {
data = "{{ key \"secrets/nomad/nomad-client.key\" }}"
destination = "secrets/nomad-client.key"
}
resources {
memory = 500
cpu = 200
}
service {
port = 9090
address_mode = "driver"
name = "prometheus"
check {
type = "http"
path = "/"
port = 9090
address_mode = "driver"
interval = "60s"
timeout = "5s"
check_restart {
limit = 3
grace = "90s"
ignore_warnings = false
}
}
}
}
}
}

View file

@ -4,43 +4,46 @@ job "telemetry-system" {
priority = "100" priority = "100"
group "collector" { group "collector" {
network { network {
port "node_exporter" { static = 9100 } port "node_exporter" { static = 9100 }
} }
task "node_exporter" { task "node_exporter" {
driver = "nix2" driver = "docker"
config { config {
packages = [ "#prometheus-node-exporter" ] image = "quay.io/prometheus/node-exporter:v1.8.1"
command = "node_exporter" network_mode = "host"
args = [ "--path.rootfs=/host" ] volumes = [
bind_read_only = { "/:/host:ro,rslave"
"/" = "/host" ]
} args = [ "--path.rootfs=/host" ]
} }
resources { resources {
cpu = 50 cpu = 50
memory = 40 memory = 40
} }
service { service {
name = "node-exporter" tags = [ "telemetry" ]
tags = [ "telemetry" ] port = 9100
port = "node_exporter" address_mode = "driver"
check { name = "node-exporter"
type = "http" check {
path = "/" type = "http"
interval = "60s" path = "/"
timeout = "5s" port = 9100
check_restart { address_mode = "driver"
limit = 3 interval = "60s"
grace = "90s" timeout = "5s"
ignore_warnings = false check_restart {
} limit = 3
} grace = "90s"
} ignore_warnings = false
} }
} }
} }
}
}
}