Switch back staging telemetry to docker and update stack

This commit is contained in:
Maximilien Richer 2025-01-25 18:50:55 +01:00
parent f304dec9ce
commit 6c6af54655
Signed by untrusted user: maximilien
GPG key ID: 04FD5063D6D43365
3 changed files with 171 additions and 159 deletions

View file

@ -2,95 +2,6 @@ job "telemetry-service" {
datacenters = ["neptune", "dathomir", "corrin", "bespin"]
type = "service"
group "prometheus" {
count = 2
network {
port "prometheus" {
static = 9090
}
}
constraint {
attribute = "${attr.unique.hostname}"
operator = "set_contains_any"
value = "df-pw5,origan"
}
task "prometheus" {
driver = "nix2"
config {
nixpkgs = "github:nixos/nixpkgs/nixos-22.11"
packages = [ "#prometheus", "#coreutils", "#findutils", "#bash" ]
command = "prometheus"
args = [
"--config.file=/etc/prom/prometheus.yml",
"--storage.tsdb.path=/data",
"--storage.tsdb.retention.size=5GB",
]
bind = {
"/mnt/ssd/prometheus" = "/data"
}
}
template {
data = file("../config/prometheus.yml")
destination = "etc/prom/prometheus.yml"
}
template {
data = "{{ key \"secrets/consul/consul-ca.crt\" }}"
destination = "etc/prom/consul.crt"
}
template {
data = "{{ key \"secrets/consul/consul-client.crt\" }}"
destination = "etc/prom/consul-client.crt"
}
template {
data = "{{ key \"secrets/consul/consul-client.key\" }}"
destination = "etc/prom/consul-client.key"
}
template {
data = "{{ key \"secrets/nomad/nomad-ca.crt\" }}"
destination = "etc/prom/nomad-ca.crt"
}
template {
data = "{{ key \"secrets/nomad/nomad-client.crt\" }}"
destination = "etc/prom/nomad-client.crt"
}
template {
data = "{{ key \"secrets/nomad/nomad-client.key\" }}"
destination = "etc/prom/nomad-client.key"
}
resources {
memory = 500
cpu = 200
}
service {
port = "prometheus"
name = "prometheus"
check {
type = "http"
path = "/"
interval = "60s"
timeout = "5s"
check_restart {
limit = 3
grace = "90s"
ignore_warnings = false
}
}
}
}
}
group "grafana" {
count = 1
@ -106,50 +17,46 @@ job "telemetry-service" {
sidecar = false
}
driver = "nix2"
driver = "docker"
config {
packages = [ "#litestream" ]
command = "litestream"
image = "litestream/litestream:0.3.13"
args = [
"restore", "-config", "/etc/litestream.yml", "/ephemeral/grafana.db"
]
bind = {
"../alloc/data" = "/ephemeral",
}
volumes = [
"../alloc/data:/ephemeral",
"secrets/litestream.yml:/etc/litestream.yml"
]
}
user = "472"
template {
data = file("../config/grafana-litestream.yml")
destination = "etc/litestream.yml"
destination = "secrets/litestream.yml"
}
resources {
memory = 100
memory_max = 1000
memory = 50
memory_max = 200
cpu = 100
}
}
task "grafana" {
driver = "nix2"
driver = "docker"
config {
nixpkgs = "github:nixos/nixpkgs/nixos-22.11"
packages = [ "#grafana" ]
command = "grafana-server"
args = [
"-homepath", "/share/grafana",
"cfg:default.paths.data=/grafana",
"cfg:default.paths.provisioning=/grafana-provisioning"
image = "grafana/grafana:11.4.0"
network_mode = "host"
ports = [ "grafana" ]
volumes = [
"../alloc/data:/var/lib/grafana",
"secrets/prometheus.yaml:/etc/grafana/provisioning/datasources/prometheus.yaml"
]
bind = {
"../alloc/data" = "/grafana",
}
}
template {
data = file("../config/grafana-datasource-prometheus.yaml")
destination = "grafana-provisioning/datasources/prometheus.yaml"
destination = "secrets/prometheus.yaml"
}
template {
@ -163,8 +70,9 @@ GF_SECURITY_ADMIN_PASSWORD={{ key "secrets/telemetry/grafana/admin_password" }}
}
resources {
memory = 300
cpu = 300
memory = 100
memory_max = 400
cpu = 300
}
restart {
@ -181,9 +89,12 @@ GF_SECURITY_ADMIN_PASSWORD={{ key "secrets/telemetry/grafana/admin_password" }}
"tricot grafana.staging.deuxfleurs.org",
"d53-cname grafana.staging.deuxfleurs.org",
]
port = "grafana"
port = 3719
address_mode = "driver"
check {
type = "tcp"
port = 3719
address_mode = "driver"
interval = "60s"
timeout = "5s"
check_restart {
@ -196,26 +107,27 @@ GF_SECURITY_ADMIN_PASSWORD={{ key "secrets/telemetry/grafana/admin_password" }}
}
task "replicate-db" {
driver = "nix2"
driver = "docker"
config {
packages = [ "#litestream" ]
command = "litestream"
image = "litestream/litestream:0.3.13"
args = [
"replicate", "-config", "/etc/litestream.yml"
]
bind = {
"../alloc/data" = "/ephemeral",
}
volumes = [
"../alloc/data:/ephemeral",
"secrets/litestream.yml:/etc/litestream.yml"
]
}
user = "472"
template {
data = file("../config/grafana-litestream.yml")
destination = "etc/litestream.yml"
destination = "secrets/litestream.yml"
}
resources {
memory = 100
memory_max = 500
memory = 50
memory_max = 200
cpu = 100
}
}

View file

@ -0,0 +1,97 @@
job "telemetry-storage" {
datacenters = ["neptune", "dathomir", "corrin", "bespin"]
type = "service"
group "prometheus" {
count = 2
network {
port "prometheus" {
static = 9090
}
}
constraint {
attribute = "${attr.unique.hostname}"
operator = "set_contains_any"
value = "df-pw5,origan"
}
task "prometheus" {
driver = "docker"
config {
image = "prom/prometheus:v3.1.0"
network_mode = "host"
ports = [ "prometheus" ]
args = [
"--config.file=/etc/prometheus/prometheus.yml",
"--storage.tsdb.path=/data",
"--storage.tsdb.retention.size=20GB",
]
volumes = [
"secrets:/etc/prometheus",
"/mnt/ssd/prometheus:/data"
]
}
template {
data = file("../config/prometheus.yml")
destination = "secrets/prometheus.yml"
}
template {
data = "{{ key \"secrets/consul/consul-ca.crt\" }}"
destination = "secrets/consul.crt"
}
template {
data = "{{ key \"secrets/consul/consul-client.crt\" }}"
destination = "secrets/consul-client.crt"
}
template {
data = "{{ key \"secrets/consul/consul-client.key\" }}"
destination = "secrets/consul-client.key"
}
template {
data = "{{ key \"secrets/nomad/nomad-ca.crt\" }}"
destination = "secrets/nomad-ca.crt"
}
template {
data = "{{ key \"secrets/nomad/nomad-client.crt\" }}"
destination = "secrets/nomad-client.crt"
}
template {
data = "{{ key \"secrets/nomad/nomad-client.key\" }}"
destination = "secrets/nomad-client.key"
}
resources {
memory = 500
cpu = 200
}
service {
port = 9090
address_mode = "driver"
name = "prometheus"
check {
type = "http"
path = "/"
port = 9090
address_mode = "driver"
interval = "60s"
timeout = "5s"
check_restart {
limit = 3
grace = "90s"
ignore_warnings = false
}
}
}
}
}
}

View file

@ -4,43 +4,46 @@ job "telemetry-system" {
priority = "100"
group "collector" {
network {
port "node_exporter" { static = 9100 }
}
network {
port "node_exporter" { static = 9100 }
}
task "node_exporter" {
driver = "nix2"
task "node_exporter" {
driver = "docker"
config {
packages = [ "#prometheus-node-exporter" ]
command = "node_exporter"
args = [ "--path.rootfs=/host" ]
bind_read_only = {
"/" = "/host"
}
}
config {
image = "quay.io/prometheus/node-exporter:v1.8.1"
network_mode = "host"
volumes = [
"/:/host:ro,rslave"
]
args = [ "--path.rootfs=/host" ]
}
resources {
cpu = 50
memory = 40
}
resources {
cpu = 50
memory = 40
}
service {
name = "node-exporter"
tags = [ "telemetry" ]
port = "node_exporter"
check {
type = "http"
path = "/"
interval = "60s"
timeout = "5s"
check_restart {
limit = 3
grace = "90s"
ignore_warnings = false
}
}
}
}
}
}
service {
tags = [ "telemetry" ]
port = 9100
address_mode = "driver"
name = "node-exporter"
check {
type = "http"
path = "/"
port = 9100
address_mode = "driver"
interval = "60s"
timeout = "5s"
check_restart {
limit = 3
grace = "90s"
ignore_warnings = false
}
}
}
}
}
}