nixcfg/cluster/staging/app/telemetry/deploy/telemetry.hcl

293 lines
6.2 KiB
HCL

job "telemetry" {
datacenters = ["neptune"]
type = "service"
group "prometheus" {
count = 2
network {
port "prometheus" {
static = 9090
}
}
constraint {
attribute = "${attr.unique.hostname}"
operator = "set_contains_any"
value = "cariacou,carcajou"
}
task "prometheus" {
driver = "nix2"
config {
nixpkgs = "github:nixos/nixpkgs/nixos-22.11"
packages = [ "#prometheus", "#coreutils", "#findutils", "#bash" ]
command = "prometheus"
args = [
"--config.file=/etc/prom/prometheus.yml",
"--storage.tsdb.path=/data",
"--storage.tsdb.retention.size=5GB",
]
bind = {
"/mnt/ssd/prometheus" = "/data"
}
}
template {
data = file("../config/prometheus.yml")
destination = "etc/prom/prometheus.yml"
}
template {
data = "{{ key \"secrets/consul/consul.crt\" }}"
destination = "etc/prom/consul.crt"
}
template {
data = "{{ key \"secrets/consul/consul-client.crt\" }}"
destination = "etc/prom/consul-client.crt"
}
template {
data = "{{ key \"secrets/consul/consul-client.key\" }}"
destination = "etc/prom/consul-client.key"
}
resources {
memory = 500
cpu = 200
}
service {
port = "prometheus"
name = "prometheus"
check {
type = "http"
path = "/"
interval = "60s"
timeout = "5s"
check_restart {
limit = 3
grace = "90s"
ignore_warnings = false
}
}
}
}
}
group "grafana" {
count = 1
network {
port "grafana" {
static = 3719
}
}
task "restore-db" {
lifecycle {
hook = "prestart"
sidecar = false
}
driver = "docker"
config {
image = "litestream/litestream:0.3.7"
args = [
"restore", "-config", "/etc/litestream.yml", "/ephemeral/grafana.db"
]
volumes = [
"../alloc/data:/ephemeral",
"secrets/litestream.yml:/etc/litestream.yml"
]
}
user = "472"
template {
data = file("../config/grafana-litestream.yml")
destination = "secrets/litestream.yml"
}
resources {
memory = 100
memory_max = 1000
cpu = 100
}
}
task "grafana" {
driver = "docker"
config {
image = "grafana/grafana:9.2.3"
network_mode = "host"
ports = [ "grafana" ]
volumes = [
"../alloc/data:/var/lib/grafana",
"secrets/prometheus.yaml:/etc/grafana/provisioning/datasources/prometheus.yaml"
]
}
template {
data = file("../config/grafana-datasource-prometheus.yaml")
destination = "secrets/prometheus.yaml"
}
template {
data = <<EOH
GF_INSTALL_PLUGINS=grafana-clock-panel,grafana-simple-json-datasource,grafana-piechart-panel,grafana-worldmap-panel,grafana-polystat-panel
GF_SERVER_HTTP_PORT=3719
EOH
destination = "secrets/env"
env = true
}
resources {
memory = 300
cpu = 800
}
service {
tags = [
"grafana",
"tricot grafana.staging.deuxfleurs.org",
]
port = 3719
address_mode = "driver"
name = "grafana"
check {
type = "tcp"
port = 3719
address_mode = "driver"
interval = "60s"
timeout = "5s"
check_restart {
limit = 3
grace = "90s"
ignore_warnings = false
}
}
}
}
task "replicate-db" {
driver = "docker"
config {
image = "litestream/litestream:0.3.7"
args = [
"replicate", "-config", "/etc/litestream.yml"
]
volumes = [
"../alloc/data:/ephemeral",
"secrets/litestream.yml:/etc/litestream.yml"
]
}
user = "472"
template {
data = file("../config/grafana-litestream.yml")
destination = "secrets/litestream.yml"
}
resources {
memory = 100
memory_max = 500
cpu = 100
}
}
}
group "jaeger" {
count = 1
network {
port "jaeger-frontend" {
to = 16686
}
port "jaeger-otlp-grpc" {
static = 4317
to = 4317
}
port "jaeger-otlp-http" {
static = 4318
to = 4318
}
}
task "jaeger" {
driver = "docker"
config {
image = "jaegertracing/all-in-one:1.36"
ports = [ "jaeger-frontend", "jaeger-otlp-grpc", "jaeger-otlp-http" ]
}
resources {
memory = 2000
cpu = 1000
}
template {
data = <<EOH
COLLECTOR_OTLP_ENABLED=true
EOH
destination = "secrets/env"
env = true
}
service {
port = "jaeger-frontend"
address_mode = "host"
name = "jaeger-frontend"
tags = [ "tricot jaeger.staging.deuxfleurs.org" ]
check {
type = "tcp"
port = "jaeger-frontend"
address_mode = "host"
interval = "60s"
timeout = "5s"
check_restart {
limit = 3
grace = "90s"
ignore_warnings = false
}
}
}
service {
port = "jaeger-otlp-grpc"
address_mode = "host"
name = "jaeger-otlp-grpc"
check {
type = "tcp"
port = "jaeger-otlp-grpc"
address_mode = "host"
interval = "60s"
timeout = "5s"
check_restart {
limit = 3
grace = "90s"
ignore_warnings = false
}
}
}
service {
port = "jaeger-otlp-http"
address_mode = "host"
name = "jaeger-otlp-http"
check {
type = "tcp"
port = "jaeger-otlp-grpc"
address_mode = "host"
interval = "60s"
timeout = "5s"
check_restart {
limit = 3
grace = "90s"
ignore_warnings = false
}
}
}
}
}
}