Prod-like telemetry into staging

This commit is contained in:
Alex 2022-09-20 17:13:46 +02:00
parent 9b6bdc7092
commit 56ff4c5cfd
Signed by untrusted user: lx
GPG key ID: 0E496D15096376BE
14 changed files with 494 additions and 206 deletions

View file

@ -0,0 +1,7 @@
apiVersion: 1
datasources:
- name: DS_PROMETHEUS
type: prometheus
access: proxy
url: http://prometheus.service.staging.consul:9090

View file

@ -0,0 +1,30 @@
global:
scrape_interval: 15s # By default, scrape targets every 15 seconds.
scrape_configs:
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']
- job_name: 'node-exporter'
consul_sd_configs:
- server: 'https://localhost:8501'
services:
- 'node-exporter'
tls_config:
ca_file: /etc/prometheus/consul.crt
cert_file: /etc/prometheus/consul-client.crt
key_file: /etc/prometheus/consul-client.key
- job_name: 'garage'
authorization:
type: Bearer
credentials: {{ key "secrets/garage-staging/metrics_token" }}
consul_sd_configs:
- server: 'https://localhost:8501'
services:
- 'garage-staging-admin'
tls_config:
ca_file: /etc/prometheus/consul.crt
cert_file: /etc/prometheus/consul-client.crt
key_file: /etc/prometheus/consul-client.key

View file

@ -1,182 +1,49 @@
job "telemetry-system" { job "telemetry-system" {
datacenters = ["neptune"] datacenters = ["neptune"]
type = "system" type = "system"
priority = "100"
group "elasticsearch" { group "collector" {
network { network {
port "elastic" { port "node_exporter" { static = 9100 }
static = 9200
}
port "elastic_internal" {
static = 9300
}
} }
task "elastic" { task "node_exporter" {
driver = "docker" driver = "docker"
config {
image = "docker.elastic.co/elasticsearch/elasticsearch:8.2.0" config {
network_mode = "host" image = "quay.io/prometheus/node-exporter:v1.1.2"
volumes = [ network_mode = "host"
"/mnt/ssd/telemetry/es_data:/usr/share/elasticsearch/data", volumes = [
"secrets/elastic-certificates.p12:/usr/share/elasticsearch/config/elastic-certificates.p12", "/:/host:ro,rslave"
] ]
ports = [ "elastic", "elastic_internal" ] args = [ "--path.rootfs=/host" ]
sysctl = { }
#"vm.max_map_count" = "262144",
} resources {
ulimit = { cpu = 50
memlock = "9223372036854775807:9223372036854775807", memory = 40
}
service {
tags = [ "telemetry" ]
port = 9100
address_mode = "driver"
name = "node-exporter"
check {
type = "http"
path = "/"
port = 9100
address_mode = "driver"
interval = "60s"
timeout = "5s"
check_restart {
limit = 3
grace = "90s"
ignore_warnings = false
}
} }
} }
}
user = "1000" }
resources {
memory = 1500
cpu = 500
}
template {
data = "{{ key \"secrets/telemetry/elasticsearch/elastic-certificates.p12\" }}"
destination = "secrets/elastic-certificates.p12"
}
template {
data = <<EOH
node.name={{ env "attr.unique.hostname" }}
http.port=9200
transport.port=9300
cluster.name=es-deuxfleurs
cluster.initial_master_nodes=carcajou,caribou,cariacou
discovery.seed_hosts=carcajou,caribou,cariacou
bootstrap.memory_lock=true
xpack.security.enabled=true
xpack.security.authc.api_key.enabled=true
xpack.security.transport.ssl.enabled=true
xpack.security.transport.ssl.verification_mode=certificate
xpack.security.transport.ssl.client_authentication=required
xpack.security.transport.ssl.keystore.path=/usr/share/elasticsearch/config/elastic-certificates.p12
xpack.security.transport.ssl.truststore.path=/usr/share/elasticsearch/config/elastic-certificates.p12
cluster.routing.allocation.disk.watermark.high=75%
cluster.routing.allocation.disk.watermark.low=65%
ES_JAVA_OPTS=-Xms512M -Xmx512M
EOH
destination = "secrets/env"
env = true
}
}
}
group "collector" {
network {
port "otel_grpc" {
static = 4317
}
port "apm" {
static = 8200
}
port "node_exporter" {
static = 9100
}
}
task "otel" {
driver = "docker"
config {
image = "otel/opentelemetry-collector-contrib:0.46.0"
args = [
"--config=/etc/otel-config.yaml",
]
network_mode = "host"
ports= [ "otel_grpc" ]
volumes = [
"secrets/otel-config.yaml:/etc/otel-config.yaml"
]
}
template {
data = file("../config/otel-config.yaml")
destination = "secrets/otel-config.yaml"
}
resources {
memory = 100
cpu = 100
}
}
task "apm" {
driver = "docker"
config {
image = "docker.elastic.co/apm/apm-server:8.2.0"
network_mode = "host"
ports = [ "apm" ]
args = [ "--strict.perms=false" ]
volumes = [
"secrets/apm-config.yaml:/usr/share/apm-server/apm-server.yml:ro"
]
}
template {
data = file("../config/apm-config.yaml")
destination = "secrets/apm-config.yaml"
}
resources {
memory = 100
cpu = 100
}
}
/*
task "node_exporter" {
driver = "docker"
config {
image = "quay.io/prometheus/node-exporter:v1.1.2"
network_mode = "host"
ports = [ "node_exporter" ]
volumes = [
"/:/host:ro,rslave"
]
args = [ "--path.rootfs=/host" ]
}
resources {
cpu = 50
memory = 40
}
}
*/
task "filebeat" {
driver = "docker"
config {
image = "docker.elastic.co/beats/filebeat:8.2.0"
network_mode = "host"
volumes = [
"/mnt/ssd/telemetry/filebeat:/usr/share/filebeat/data",
"secrets/filebeat.yml:/usr/share/filebeat/filebeat.yml",
"/var/run/docker.sock:/var/run/docker.sock",
"/var/lib/docker/containers/:/var/lib/docker/containers/:ro",
"/var/log/:/var/log/:ro",
]
args = [ "--strict.perms=false" ]
privileged = true
}
user = "root"
template {
data = file("../config/filebeat.yml")
destination = "secrets/filebeat.yml"
}
resources {
memory = 100
cpu = 100
}
}
}
} }

View file

@ -2,51 +2,59 @@ job "telemetry" {
datacenters = ["neptune"] datacenters = ["neptune"]
type = "service" type = "service"
group "kibana" { group "prometheus" {
count = 1 count = 1
network { network {
port "kibana" { port "prometheus" {
static = 5601 static = 9090
} }
} }
task "kibana" { task "prometheus" {
driver = "docker" driver = "docker"
config { config {
image = "docker.elastic.co/kibana/kibana:8.2.0" image = "prom/prometheus:v2.38.0"
network_mode = "host" network_mode = "host"
ports = [ "kibana" ] ports = [ "prometheus" ]
volumes = [
"secrets:/etc/prometheus"
]
} }
template { template {
data = <<EOH data = file("../config/prometheus.yml")
SERVER_NAME={{ env "attr.unique.hostname" }} destination = "secrets/prometheus.yml"
ELASTICSEARCH_HOSTS=http://localhost:9200 }
ELASTICSEARCH_USERNAME=kibana_system
ELASTICSEARCH_PASSWORD={{ key "secrets/telemetry/elastic_passwords/kibana_system" }} template {
SERVER_PUBLICBASEURL=https://kibana.home.adnab.me data = "{{ key \"secrets/consul/consul.crt\" }}"
EOH destination = "secrets/consul.crt"
destination = "secrets/env" }
env = true
template {
data = "{{ key \"secrets/consul/consul-client.crt\" }}"
destination = "secrets/consul-client.crt"
}
template {
data = "{{ key \"secrets/consul/consul-client.key\" }}"
destination = "secrets/consul-client.key"
} }
resources { resources {
memory = 1000 memory = 500
cpu = 500 cpu = 500
} }
service { service {
tags = [ port = 9090
"kibana",
"tricot kibana.staging.deuxfleurs.org",
]
port = 5601
address_mode = "driver" address_mode = "driver"
name = "kibana" name = "prometheus"
check { check {
type = "tcp" type = "http"
port = 5601 path = "/"
port = 9090
address_mode = "driver" address_mode = "driver"
interval = "60s" interval = "60s"
timeout = "5s" timeout = "5s"
@ -65,7 +73,7 @@ EOH
network { network {
port "grafana" { port "grafana" {
static = 3333 static = 3719
} }
} }
@ -107,19 +115,19 @@ EOH
ports = [ "grafana" ] ports = [ "grafana" ]
volumes = [ volumes = [
"../alloc/data:/var/lib/grafana", "../alloc/data:/var/lib/grafana",
"secrets/elastic.yaml:/etc/grafana/provisioning/datasources/elastic.yaml" "secrets/prometheus.yaml:/etc/grafana/provisioning/datasources/prometheus.yaml"
] ]
} }
template { template {
data = file("../config/grafana/provisioning/datasources/elastic.yaml") data = file("../config/grafana-datasource-prometheus.yaml")
destination = "secrets/elastic.yaml" destination = "secrets/prometheus.yaml"
} }
template { template {
data = <<EOH data = <<EOH
GF_INSTALL_PLUGINS=grafana-clock-panel,grafana-simple-json-datasource,grafana-piechart-panel,grafana-worldmap-panel,grafana-polystat-panel GF_INSTALL_PLUGINS=grafana-clock-panel,grafana-simple-json-datasource,grafana-piechart-panel,grafana-worldmap-panel,grafana-polystat-panel
GF_SERVER_HTTP_PORT=3333 GF_SERVER_HTTP_PORT=3719
EOH EOH
destination = "secrets/env" destination = "secrets/env"
env = true env = true
@ -135,12 +143,12 @@ EOH
"grafana", "grafana",
"tricot grafana.staging.deuxfleurs.org", "tricot grafana.staging.deuxfleurs.org",
] ]
port = 3333 port = 3719
address_mode = "driver" address_mode = "driver"
name = "grafana" name = "grafana"
check { check {
type = "tcp" type = "tcp"
port = 3333 port = 3719
address_mode = "driver" address_mode = "driver"
interval = "60s" interval = "60s"
timeout = "5s" timeout = "5s"

View file

@ -0,0 +1 @@
CMD openssl rand -base64 12

View file

@ -0,0 +1 @@
USER S3 access key for grafana db

View file

@ -0,0 +1 @@
USER S3 secret key for grafana db

View file

@ -0,0 +1,10 @@
dbs:
- path: /ephemeral/grafana.db
replicas:
- url: s3://grafana-db/grafana.db
region: garage-staging
endpoint: http://{{ env "attr.unique.network.ip-address" }}:3990
access-key-id: {{ key "secrets/telemetry/grafana/s3_access_key" | trimSpace }}
secret-access-key: {{ key "secrets/telemetry/grafana/s3_secret_key" | trimSpace }}
force-path-style: true
sync-interval: 60s

View file

@ -0,0 +1,182 @@
job "telemetry-system" {
datacenters = ["neptune"]
type = "system"
group "elasticsearch" {
network {
port "elastic" {
static = 9200
}
port "elastic_internal" {
static = 9300
}
}
task "elastic" {
driver = "docker"
config {
image = "docker.elastic.co/elasticsearch/elasticsearch:8.2.0"
network_mode = "host"
volumes = [
"/mnt/ssd/telemetry/es_data:/usr/share/elasticsearch/data",
"secrets/elastic-certificates.p12:/usr/share/elasticsearch/config/elastic-certificates.p12",
]
ports = [ "elastic", "elastic_internal" ]
sysctl = {
#"vm.max_map_count" = "262144",
}
ulimit = {
memlock = "9223372036854775807:9223372036854775807",
}
}
user = "1000"
resources {
memory = 1500
cpu = 500
}
template {
data = "{{ key \"secrets/telemetry/elasticsearch/elastic-certificates.p12\" }}"
destination = "secrets/elastic-certificates.p12"
}
template {
data = <<EOH
node.name={{ env "attr.unique.hostname" }}
http.port=9200
transport.port=9300
cluster.name=es-deuxfleurs
cluster.initial_master_nodes=carcajou,caribou,cariacou
discovery.seed_hosts=carcajou,caribou,cariacou
bootstrap.memory_lock=true
xpack.security.enabled=true
xpack.security.authc.api_key.enabled=true
xpack.security.transport.ssl.enabled=true
xpack.security.transport.ssl.verification_mode=certificate
xpack.security.transport.ssl.client_authentication=required
xpack.security.transport.ssl.keystore.path=/usr/share/elasticsearch/config/elastic-certificates.p12
xpack.security.transport.ssl.truststore.path=/usr/share/elasticsearch/config/elastic-certificates.p12
cluster.routing.allocation.disk.watermark.high=75%
cluster.routing.allocation.disk.watermark.low=65%
ES_JAVA_OPTS=-Xms512M -Xmx512M
EOH
destination = "secrets/env"
env = true
}
}
}
group "collector" {
network {
port "otel_grpc" {
static = 4317
}
port "apm" {
static = 8200
}
port "node_exporter" {
static = 9100
}
}
task "otel" {
driver = "docker"
config {
image = "otel/opentelemetry-collector-contrib:0.46.0"
args = [
"--config=/etc/otel-config.yaml",
]
network_mode = "host"
ports= [ "otel_grpc" ]
volumes = [
"secrets/otel-config.yaml:/etc/otel-config.yaml"
]
}
template {
data = file("../config/otel-config.yaml")
destination = "secrets/otel-config.yaml"
}
resources {
memory = 100
cpu = 100
}
}
task "apm" {
driver = "docker"
config {
image = "docker.elastic.co/apm/apm-server:8.2.0"
network_mode = "host"
ports = [ "apm" ]
args = [ "--strict.perms=false" ]
volumes = [
"secrets/apm-config.yaml:/usr/share/apm-server/apm-server.yml:ro"
]
}
template {
data = file("../config/apm-config.yaml")
destination = "secrets/apm-config.yaml"
}
resources {
memory = 100
cpu = 100
}
}
/*
task "node_exporter" {
driver = "docker"
config {
image = "quay.io/prometheus/node-exporter:v1.1.2"
network_mode = "host"
ports = [ "node_exporter" ]
volumes = [
"/:/host:ro,rslave"
]
args = [ "--path.rootfs=/host" ]
}
resources {
cpu = 50
memory = 40
}
}
*/
task "filebeat" {
driver = "docker"
config {
image = "docker.elastic.co/beats/filebeat:8.2.0"
network_mode = "host"
volumes = [
"/mnt/ssd/telemetry/filebeat:/usr/share/filebeat/data",
"secrets/filebeat.yml:/usr/share/filebeat/filebeat.yml",
"/var/run/docker.sock:/var/run/docker.sock",
"/var/lib/docker/containers/:/var/lib/docker/containers/:ro",
"/var/log/:/var/log/:ro",
]
args = [ "--strict.perms=false" ]
privileged = true
}
user = "root"
template {
data = file("../config/filebeat.yml")
destination = "secrets/filebeat.yml"
}
resources {
memory = 100
cpu = 100
}
}
}
}

View file

@ -0,0 +1,181 @@
job "telemetry" {
datacenters = ["neptune"]
type = "service"
group "kibana" {
count = 1
network {
port "kibana" {
static = 5601
}
}
task "kibana" {
driver = "docker"
config {
image = "docker.elastic.co/kibana/kibana:8.2.0"
network_mode = "host"
ports = [ "kibana" ]
}
template {
data = <<EOH
SERVER_NAME={{ env "attr.unique.hostname" }}
ELASTICSEARCH_HOSTS=http://localhost:9200
ELASTICSEARCH_USERNAME=kibana_system
ELASTICSEARCH_PASSWORD={{ key "secrets/telemetry/elastic_passwords/kibana_system" }}
SERVER_PUBLICBASEURL=https://kibana.home.adnab.me
EOH
destination = "secrets/env"
env = true
}
resources {
memory = 1000
cpu = 500
}
service {
tags = [
"kibana",
"tricot kibana.staging.deuxfleurs.org",
]
port = 5601
address_mode = "driver"
name = "kibana"
check {
type = "tcp"
port = 5601
address_mode = "driver"
interval = "60s"
timeout = "5s"
check_restart {
limit = 3
grace = "90s"
ignore_warnings = false
}
}
}
}
}
group "grafana" {
count = 1
network {
port "grafana" {
static = 3333
}
}
task "restore-db" {
lifecycle {
hook = "prestart"
sidecar = false
}
driver = "docker"
config {
image = "litestream/litestream:0.3.7"
args = [
"restore", "-config", "/etc/litestream.yml", "/ephemeral/grafana.db"
]
volumes = [
"../alloc/data:/ephemeral",
"secrets/litestream.yml:/etc/litestream.yml"
]
}
user = "472"
template {
data = file("../config/grafana-litestream.yml")
destination = "secrets/litestream.yml"
}
resources {
memory = 200
cpu = 1000
}
}
task "grafana" {
driver = "docker"
config {
image = "grafana/grafana:8.4.3"
network_mode = "host"
ports = [ "grafana" ]
volumes = [
"../alloc/data:/var/lib/grafana",
"secrets/elastic.yaml:/etc/grafana/provisioning/datasources/elastic.yaml"
]
}
template {
data = file("../config/grafana/provisioning/datasources/elastic.yaml")
destination = "secrets/elastic.yaml"
}
template {
data = <<EOH
GF_INSTALL_PLUGINS=grafana-clock-panel,grafana-simple-json-datasource,grafana-piechart-panel,grafana-worldmap-panel,grafana-polystat-panel
GF_SERVER_HTTP_PORT=3333
EOH
destination = "secrets/env"
env = true
}
resources {
memory = 500
cpu = 100
}
service {
tags = [
"grafana",
"tricot grafana.staging.deuxfleurs.org",
]
port = 3333
address_mode = "driver"
name = "grafana"
check {
type = "tcp"
port = 3333
address_mode = "driver"
interval = "60s"
timeout = "5s"
check_restart {
limit = 3
grace = "90s"
ignore_warnings = false
}
}
}
}
task "replicate-db" {
driver = "docker"
config {
image = "litestream/litestream:0.3.7"
args = [
"replicate", "-config", "/etc/litestream.yml"
]
volumes = [
"../alloc/data:/ephemeral",
"secrets/litestream.yml:/etc/litestream.yml"
]
}
user = "472"
template {
data = file("../config/grafana-litestream.yml")
destination = "secrets/litestream.yml"
}
resources {
memory = 200
cpu = 100
}
}
}
}