
304 lines
6.5 KiB

job "telemetry-service" {
datacenters = ["neptune", "jupiter", "corrin"]
type = "service"
group "prometheus" {
count = 2
network {
port "prometheus" {
static = 9090
constraint {
attribute = "${attr.unique.hostname}"
operator = "set_contains_any"
value = "cariacou,origan"
task "prometheus" {
driver = "nix2"
config {
nixpkgs = "github:nixos/nixpkgs/nixos-22.11"
packages = [ "#prometheus", "#coreutils", "#findutils", "#bash" ]
command = "prometheus"
args = [
bind = {
"/mnt/ssd/prometheus" = "/data"
template {
data = file("../config/prometheus.yml")
destination = "etc/prom/prometheus.yml"
template {
data = "{{ key \"secrets/consul/consul.crt\" }}"
destination = "etc/prom/consul.crt"
template {
data = "{{ key \"secrets/consul/consul-client.crt\" }}"
destination = "etc/prom/consul-client.crt"
template {
data = "{{ key \"secrets/consul/consul-client.key\" }}"
destination = "etc/prom/consul-client.key"
resources {
memory = 500
cpu = 200
service {
port = "prometheus"
name = "prometheus"
check {
type = "http"
path = "/"
interval = "60s"
timeout = "5s"
check_restart {
limit = 3
grace = "90s"
ignore_warnings = false
group "grafana" {
count = 1
network {
port "grafana" {
static = 3719
task "restore-db" {
lifecycle {
hook = "prestart"
sidecar = false
driver = "nix2"
config {
packages = [ "#litestream" ]
command = "litestream"
args = [
"restore", "-config", "/etc/litestream.yml", "/ephemeral/grafana.db"
bind = {
"../alloc/data" = "/ephemeral",
template {
data = file("../config/grafana-litestream.yml")
destination = "etc/litestream.yml"
resources {
memory = 100
memory_max = 1000
cpu = 100
task "grafana" {
driver = "nix2"
config {
nixpkgs = "github:nixos/nixpkgs/nixos-22.11"
packages = [ "#grafana" ]
command = "grafana-server"
args = [
"-homepath", "/share/grafana",
bind = {
"../alloc/data" = "/grafana",
template {
data = file("../config/grafana-datasource-prometheus.yaml")
destination = "grafana-provisioning/datasources/prometheus.yaml"
template {
data = <<EOH
GF_SECURITY_ADMIN_PASSWORD={{ key "secrets/telemetry/grafana/admin_password" }}
destination = "secrets/env"
env = true
resources {
memory = 300
cpu = 800
restart {
interval = "30s"
attempts = 10
delay = "1m"
mode = "delay"
service {
name = "grafana"
tags = [
port = "grafana"
check {
type = "tcp"
interval = "60s"
timeout = "5s"
check_restart {
limit = 3
grace = "90s"
ignore_warnings = false
task "replicate-db" {
driver = "nix2"
config {
packages = [ "#litestream" ]
command = "litestream"
args = [
"replicate", "-config", "/etc/litestream.yml"
bind = {
"../alloc/data" = "/ephemeral",
template {
data = file("../config/grafana-litestream.yml")
destination = "etc/litestream.yml"
resources {
memory = 100
memory_max = 500
cpu = 100
group "jaeger" {
count = 1
network {
port "jaeger-frontend" {
to = 16686
port "jaeger-otlp-grpc" {
static = 4317
to = 4317
port "jaeger-otlp-http" {
static = 4318
to = 4318
task "jaeger" {
driver = "docker"
config {
image = "jaegertracing/all-in-one:1.36"
ports = [ "jaeger-frontend", "jaeger-otlp-grpc", "jaeger-otlp-http" ]
resources {
memory = 2000
cpu = 1000
template {
data = <<EOH
destination = "secrets/env"
env = true
service {
port = "jaeger-frontend"
address_mode = "host"
name = "jaeger-frontend"
tags = [
check {
type = "tcp"
port = "jaeger-frontend"
address_mode = "host"
interval = "60s"
timeout = "5s"
check_restart {
limit = 3
grace = "90s"
ignore_warnings = false
service {
port = "jaeger-otlp-grpc"
address_mode = "host"
name = "jaeger-otlp-grpc"
check {
type = "tcp"
port = "jaeger-otlp-grpc"
address_mode = "host"
interval = "60s"
timeout = "5s"
check_restart {
limit = 3
grace = "90s"
ignore_warnings = false
service {
port = "jaeger-otlp-http"
address_mode = "host"
name = "jaeger-otlp-http"
check {
type = "tcp"
port = "jaeger-otlp-grpc"
address_mode = "host"
interval = "60s"
timeout = "5s"
check_restart {
limit = 3
grace = "90s"
ignore_warnings = false