nixcfg/cluster/staging/app/telemetry/deploy/telemetry-service.hcl

305 lines
6.5 KiB
HCL

job "telemetry-service" {
datacenters = ["neptune", "jupiter", "corrin", "bespin"]
type = "service"
group "prometheus" {
count = 2
network {
port "prometheus" {
static = 9090
}
}
constraint {
attribute = "${attr.unique.hostname}"
operator = "set_contains_any"
value = "carcajou,origan"
}
task "prometheus" {
driver = "nix2"
config {
nixpkgs = "github:nixos/nixpkgs/nixos-22.11"
packages = [ "#prometheus", "#coreutils", "#findutils", "#bash" ]
command = "prometheus"
args = [
"--config.file=/etc/prom/prometheus.yml",
"--storage.tsdb.path=/data",
"--storage.tsdb.retention.size=5GB",
]
bind = {
"/mnt/ssd/prometheus" = "/data"
}
}
template {
data = file("../config/prometheus.yml")
destination = "etc/prom/prometheus.yml"
}
template {
data = "{{ key \"secrets/consul/consul.crt\" }}"
destination = "etc/prom/consul.crt"
}
template {
data = "{{ key \"secrets/consul/consul-client.crt\" }}"
destination = "etc/prom/consul-client.crt"
}
template {
data = "{{ key \"secrets/consul/consul-client.key\" }}"
destination = "etc/prom/consul-client.key"
}
resources {
memory = 500
cpu = 200
}
service {
port = "prometheus"
name = "prometheus"
check {
type = "http"
path = "/"
interval = "60s"
timeout = "5s"
check_restart {
limit = 3
grace = "90s"
ignore_warnings = false
}
}
}
}
}
group "grafana" {
count = 1
network {
port "grafana" {
static = 3719
}
}
task "restore-db" {
lifecycle {
hook = "prestart"
sidecar = false
}
driver = "nix2"
config {
packages = [ "#litestream" ]
command = "litestream"
args = [
"restore", "-config", "/etc/litestream.yml", "/ephemeral/grafana.db"
]
bind = {
"../alloc/data" = "/ephemeral",
}
}
template {
data = file("../config/grafana-litestream.yml")
destination = "etc/litestream.yml"
}
resources {
memory = 100
memory_max = 1000
cpu = 100
}
}
task "grafana" {
driver = "nix2"
config {
nixpkgs = "github:nixos/nixpkgs/nixos-22.11"
packages = [ "#grafana" ]
command = "grafana-server"
args = [
"-homepath", "/share/grafana",
"cfg:default.paths.data=/grafana",
"cfg:default.paths.provisioning=/grafana-provisioning"
]
bind = {
"../alloc/data" = "/grafana",
}
}
template {
data = file("../config/grafana-datasource-prometheus.yaml")
destination = "grafana-provisioning/datasources/prometheus.yaml"
}
template {
data = <<EOH
GF_INSTALL_PLUGINS=grafana-clock-panel,grafana-simple-json-datasource,grafana-piechart-panel,grafana-worldmap-panel,grafana-polystat-panel
GF_SERVER_HTTP_PORT=3719
GF_SECURITY_ADMIN_PASSWORD={{ key "secrets/telemetry/grafana/admin_password" }}
EOH
destination = "secrets/env"
env = true
}
resources {
memory = 300
cpu = 300
}
restart {
interval = "30s"
attempts = 10
delay = "1m"
mode = "delay"
}
service {
name = "grafana"
tags = [
"grafana",
"tricot grafana.staging.deuxfleurs.org",
"d53-cname grafana.staging.deuxfleurs.org",
]
port = "grafana"
check {
type = "tcp"
interval = "60s"
timeout = "5s"
check_restart {
limit = 3
grace = "90s"
ignore_warnings = false
}
}
}
}
task "replicate-db" {
driver = "nix2"
config {
packages = [ "#litestream" ]
command = "litestream"
args = [
"replicate", "-config", "/etc/litestream.yml"
]
bind = {
"../alloc/data" = "/ephemeral",
}
}
template {
data = file("../config/grafana-litestream.yml")
destination = "etc/litestream.yml"
}
resources {
memory = 100
memory_max = 500
cpu = 100
}
}
}
group "jaeger" {
count = 1
network {
port "jaeger-frontend" {
to = 16686
}
port "jaeger-otlp-grpc" {
static = 4317
to = 4317
}
port "jaeger-otlp-http" {
static = 4318
to = 4318
}
}
task "jaeger" {
driver = "docker"
config {
image = "jaegertracing/all-in-one:1.36"
ports = [ "jaeger-frontend", "jaeger-otlp-grpc", "jaeger-otlp-http" ]
}
resources {
memory = 1000
cpu = 500
}
template {
data = <<EOH
COLLECTOR_OTLP_ENABLED=true
EOH
destination = "secrets/env"
env = true
}
service {
port = "jaeger-frontend"
address_mode = "host"
name = "jaeger-frontend"
tags = [
"tricot jaeger.staging.deuxfleurs.org",
"d53-cname jaeger.staging.deuxfleurs.org",
]
check {
type = "tcp"
port = "jaeger-frontend"
address_mode = "host"
interval = "60s"
timeout = "5s"
check_restart {
limit = 3
grace = "90s"
ignore_warnings = false
}
}
}
service {
port = "jaeger-otlp-grpc"
address_mode = "host"
name = "jaeger-otlp-grpc"
check {
type = "tcp"
port = "jaeger-otlp-grpc"
address_mode = "host"
interval = "60s"
timeout = "5s"
check_restart {
limit = 3
grace = "90s"
ignore_warnings = false
}
}
}
service {
port = "jaeger-otlp-http"
address_mode = "host"
name = "jaeger-otlp-http"
check {
type = "tcp"
port = "jaeger-otlp-grpc"
address_mode = "host"
interval = "60s"
timeout = "5s"
check_restart {
limit = 3
grace = "90s"
ignore_warnings = false
}
}
}
}
}
}