forked from Deuxfleurs/nixcfg
Remove garage files at bad location, add basic telemetry
This commit is contained in:
parent
fd3ed44dad
commit
72d033dcd4
14 changed files with 311 additions and 156 deletions
|
@ -21,4 +21,6 @@ bind_addr = "[::]:3902"
|
|||
root_domain = ".web.deuxfleurs.fr"
|
||||
|
||||
[admin]
|
||||
api_bind_addr = "[::1]:3903"
|
||||
api_bind_addr = "[::]:3903"
|
||||
metrics_token = "{{ key "secrets/garage/metrics_token" | trimSpace }}"
|
||||
admin_token = "{{ key "secrets/garage/admin_token" | trimSpace }}"
|
||||
|
|
|
@ -18,6 +18,7 @@ job "garage" {
|
|||
port "s3" { static = 3900 }
|
||||
port "rpc" { static = 3901 }
|
||||
port "web" { static = 3902 }
|
||||
port "admin" { static = 3903 }
|
||||
}
|
||||
|
||||
update {
|
||||
|
@ -125,6 +126,24 @@ job "garage" {
|
|||
}
|
||||
}
|
||||
|
||||
service {
|
||||
port = 3903
|
||||
address_mode = "driver"
|
||||
name = "garage-admin"
|
||||
check {
|
||||
type = "tcp"
|
||||
port = 3903
|
||||
address_mode = "driver"
|
||||
interval = "60s"
|
||||
timeout = "5s"
|
||||
check_restart {
|
||||
limit = 3
|
||||
grace = "90s"
|
||||
ignore_warnings = false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
restart {
|
||||
interval = "30m"
|
||||
attempts = 10
|
||||
|
|
1
cluster/prod/app/garage/secrets/garage/metrics_token
Normal file
1
cluster/prod/app/garage/secrets/garage/metrics_token
Normal file
|
@ -0,0 +1 @@
|
|||
CMD_ONCE openssl rand -hex 32
|
|
@ -0,0 +1,7 @@
|
|||
apiVersion: 1
|
||||
|
||||
datasources:
|
||||
- name: DS_PROMETHEUS
|
||||
type: prometheus
|
||||
access: proxy
|
||||
url: http://prometheus.service.prod.consul:9090
|
10
cluster/prod/app/telemetry/config/grafana-litestream.yml
Normal file
10
cluster/prod/app/telemetry/config/grafana-litestream.yml
Normal file
|
@ -0,0 +1,10 @@
|
|||
dbs:
|
||||
- path: /ephemeral/grafana.db
|
||||
replicas:
|
||||
- url: s3://grafana-db/grafana.db
|
||||
region: garage
|
||||
endpoint: http://{{ env "attr.unique.network.ip-address" }}:3900
|
||||
access-key-id: {{ key "secrets/telemetry/grafana/s3_access_key" | trimSpace }}
|
||||
secret-access-key: {{ key "secrets/telemetry/grafana/s3_secret_key" | trimSpace }}
|
||||
force-path-style: true
|
||||
sync-interval: 60s
|
30
cluster/prod/app/telemetry/config/prometheus.yml
Normal file
30
cluster/prod/app/telemetry/config/prometheus.yml
Normal file
|
@ -0,0 +1,30 @@
|
|||
global:
|
||||
scrape_interval: 15s # By default, scrape targets every 15 seconds.
|
||||
|
||||
scrape_configs:
|
||||
- job_name: 'prometheus'
|
||||
static_configs:
|
||||
- targets: ['localhost:9090']
|
||||
|
||||
- job_name: 'node-exporter'
|
||||
consul_sd_configs:
|
||||
- server: 'https://localhost:8501'
|
||||
services:
|
||||
- 'node-exporter'
|
||||
tls_config:
|
||||
ca_file: /etc/prometheus/consul.crt
|
||||
cert_file: /etc/prometheus/consul-client.crt
|
||||
key_file: /etc/prometheus/consul-client.key
|
||||
|
||||
- job_name: 'garage'
|
||||
authorization:
|
||||
type: Bearer
|
||||
credentials: {{ key "secrets/garage/metrics_token" }}
|
||||
consul_sd_configs:
|
||||
- server: 'https://localhost:8501'
|
||||
services:
|
||||
- 'garage-admin'
|
||||
tls_config:
|
||||
ca_file: /etc/prometheus/consul.crt
|
||||
cert_file: /etc/prometheus/consul-client.crt
|
||||
key_file: /etc/prometheus/consul-client.key
|
49
cluster/prod/app/telemetry/deploy/telemetry-system.hcl
Normal file
49
cluster/prod/app/telemetry/deploy/telemetry-system.hcl
Normal file
|
@ -0,0 +1,49 @@
|
|||
job "telemetry-system" {
|
||||
datacenters = ["neptune", "orion"]
|
||||
type = "system"
|
||||
priority = "100"
|
||||
|
||||
group "collector" {
|
||||
network {
|
||||
port "node_exporter" { static = 9100 }
|
||||
}
|
||||
|
||||
task "node_exporter" {
|
||||
driver = "docker"
|
||||
|
||||
config {
|
||||
image = "quay.io/prometheus/node-exporter:v1.1.2"
|
||||
network_mode = "host"
|
||||
volumes = [
|
||||
"/:/host:ro,rslave"
|
||||
]
|
||||
args = [ "--path.rootfs=/host" ]
|
||||
}
|
||||
|
||||
resources {
|
||||
cpu = 50
|
||||
memory = 40
|
||||
}
|
||||
|
||||
service {
|
||||
tags = [ "telemetry" ]
|
||||
port = 9100
|
||||
address_mode = "driver"
|
||||
name = "node-exporter"
|
||||
check {
|
||||
type = "http"
|
||||
path = "/"
|
||||
port = 9100
|
||||
address_mode = "driver"
|
||||
interval = "60s"
|
||||
timeout = "5s"
|
||||
check_restart {
|
||||
limit = 3
|
||||
grace = "90s"
|
||||
ignore_warnings = false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
189
cluster/prod/app/telemetry/deploy/telemetry.hcl
Normal file
189
cluster/prod/app/telemetry/deploy/telemetry.hcl
Normal file
|
@ -0,0 +1,189 @@
|
|||
job "telemetry" {
|
||||
datacenters = ["neptune"]
|
||||
type = "service"
|
||||
|
||||
group "prometheus" {
|
||||
count = 1
|
||||
|
||||
network {
|
||||
port "prometheus" {
|
||||
static = 9090
|
||||
}
|
||||
}
|
||||
|
||||
task "prometheus" {
|
||||
driver = "docker"
|
||||
config {
|
||||
image = "prom/prometheus:v2.38.0"
|
||||
network_mode = "host"
|
||||
ports = [ "prometheus" ]
|
||||
volumes = [
|
||||
"secrets:/etc/prometheus"
|
||||
]
|
||||
}
|
||||
|
||||
template {
|
||||
data = file("../config/prometheus.yml")
|
||||
destination = "secrets/prometheus.yml"
|
||||
}
|
||||
|
||||
template {
|
||||
data = "{{ key \"secrets/consul/consul.crt\" }}"
|
||||
destination = "secrets/consul.crt"
|
||||
}
|
||||
|
||||
template {
|
||||
data = "{{ key \"secrets/consul/consul-client.crt\" }}"
|
||||
destination = "secrets/consul-client.crt"
|
||||
}
|
||||
|
||||
template {
|
||||
data = "{{ key \"secrets/consul/consul-client.key\" }}"
|
||||
destination = "secrets/consul-client.key"
|
||||
}
|
||||
|
||||
resources {
|
||||
memory = 500
|
||||
cpu = 500
|
||||
}
|
||||
|
||||
service {
|
||||
port = 9090
|
||||
address_mode = "driver"
|
||||
name = "prometheus"
|
||||
check {
|
||||
type = "http"
|
||||
path = "/"
|
||||
port = 9090
|
||||
address_mode = "driver"
|
||||
interval = "60s"
|
||||
timeout = "5s"
|
||||
check_restart {
|
||||
limit = 3
|
||||
grace = "90s"
|
||||
ignore_warnings = false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
group "grafana" {
|
||||
count = 1
|
||||
|
||||
network {
|
||||
port "grafana" {
|
||||
static = 3719
|
||||
}
|
||||
}
|
||||
|
||||
task "restore-db" {
|
||||
lifecycle {
|
||||
hook = "prestart"
|
||||
sidecar = false
|
||||
}
|
||||
|
||||
driver = "docker"
|
||||
config {
|
||||
image = "litestream/litestream:0.3.7"
|
||||
args = [
|
||||
"restore", "-config", "/etc/litestream.yml", "/ephemeral/grafana.db"
|
||||
]
|
||||
volumes = [
|
||||
"../alloc/data:/ephemeral",
|
||||
"secrets/litestream.yml:/etc/litestream.yml"
|
||||
]
|
||||
}
|
||||
user = "472"
|
||||
|
||||
template {
|
||||
data = file("../config/grafana-litestream.yml")
|
||||
destination = "secrets/litestream.yml"
|
||||
}
|
||||
|
||||
resources {
|
||||
memory = 200
|
||||
cpu = 1000
|
||||
}
|
||||
}
|
||||
|
||||
task "grafana" {
|
||||
driver = "docker"
|
||||
config {
|
||||
image = "grafana/grafana:8.4.3"
|
||||
network_mode = "host"
|
||||
ports = [ "grafana" ]
|
||||
volumes = [
|
||||
"../alloc/data:/var/lib/grafana",
|
||||
"secrets/prometheus.yaml:/etc/grafana/provisioning/datasources/prometheus.yaml"
|
||||
]
|
||||
}
|
||||
|
||||
template {
|
||||
data = file("../config/grafana-datasource-prometheus.yaml")
|
||||
destination = "secrets/prometheus.yaml"
|
||||
}
|
||||
|
||||
template {
|
||||
data = <<EOH
|
||||
GF_INSTALL_PLUGINS=grafana-clock-panel,grafana-simple-json-datasource,grafana-piechart-panel,grafana-worldmap-panel,grafana-polystat-panel
|
||||
GF_SERVER_HTTP_PORT=3719
|
||||
EOH
|
||||
destination = "secrets/env"
|
||||
env = true
|
||||
}
|
||||
|
||||
resources {
|
||||
memory = 500
|
||||
cpu = 100
|
||||
}
|
||||
|
||||
service {
|
||||
tags = [
|
||||
"grafana",
|
||||
"tricot grafana-new.deuxfleurs.fr",
|
||||
]
|
||||
port = 3719
|
||||
address_mode = "driver"
|
||||
name = "grafana"
|
||||
check {
|
||||
type = "tcp"
|
||||
port = 3719
|
||||
address_mode = "driver"
|
||||
interval = "60s"
|
||||
timeout = "5s"
|
||||
check_restart {
|
||||
limit = 3
|
||||
grace = "90s"
|
||||
ignore_warnings = false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
task "replicate-db" {
|
||||
driver = "docker"
|
||||
config {
|
||||
image = "litestream/litestream:0.3.7"
|
||||
args = [
|
||||
"replicate", "-config", "/etc/litestream.yml"
|
||||
]
|
||||
volumes = [
|
||||
"../alloc/data:/ephemeral",
|
||||
"secrets/litestream.yml:/etc/litestream.yml"
|
||||
]
|
||||
}
|
||||
user = "472"
|
||||
|
||||
template {
|
||||
data = file("../config/grafana-litestream.yml")
|
||||
destination = "secrets/litestream.yml"
|
||||
}
|
||||
|
||||
resources {
|
||||
memory = 200
|
||||
cpu = 100
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1 @@
|
|||
CMD openssl rand -base64 12
|
|
@ -0,0 +1 @@
|
|||
USER S3 access key for grafana db
|
|
@ -0,0 +1 @@
|
|||
USER S3 secret key for grafana db
|
|
@ -1,24 +0,0 @@
|
|||
block_size = 1048576
|
||||
|
||||
metadata_dir = "/meta"
|
||||
data_dir = "/data"
|
||||
|
||||
replication_mode = "3"
|
||||
|
||||
rpc_bind_addr = "[::]:3901"
|
||||
rpc_secret = "{{ key "secrets/garage/rpc_secret" | trimSpace }}"
|
||||
|
||||
sled_cache_capacity = 536870912
|
||||
sled_sync_interval_ms = 10000
|
||||
|
||||
[s3_api]
|
||||
s3_region = "garage"
|
||||
api_bind_addr = "[::]:3900"
|
||||
root_domain = ".garage.deuxfleurs.fr"
|
||||
|
||||
[s3_web]
|
||||
bind_addr = "[::]:3902"
|
||||
root_domain = ".web.deuxfleurs.fr"
|
||||
|
||||
[admin]
|
||||
api_bind_addr = "[::1]:3903"
|
|
@ -1,131 +0,0 @@
|
|||
job "garage" {
|
||||
datacenters = ["dc1", "saturne", "neptune"]
|
||||
type = "system"
|
||||
priority = 80
|
||||
|
||||
constraint {
|
||||
attribute = "${attr.cpu.arch}"
|
||||
value = "amd64"
|
||||
}
|
||||
|
||||
group "garage" {
|
||||
network {
|
||||
port "s3" { static = 3900 }
|
||||
port "rpc" { static = 3901 }
|
||||
port "web" { static = 3902 }
|
||||
}
|
||||
|
||||
update {
|
||||
max_parallel = 1
|
||||
min_healthy_time = "30s"
|
||||
healthy_deadline = "5m"
|
||||
}
|
||||
|
||||
task "server" {
|
||||
driver = "docker"
|
||||
config {
|
||||
advertise_ipv6_address = true
|
||||
image = "dxflrs/amd64_garage:v0.7.1"
|
||||
command = "/garage"
|
||||
args = [ "server" ]
|
||||
network_mode = "host"
|
||||
volumes = [
|
||||
"/mnt/storage/garage/data:/data",
|
||||
"/mnt/ssd/garage/meta:/meta",
|
||||
"secrets/garage.toml:/etc/garage.toml",
|
||||
]
|
||||
logging {
|
||||
type = "journald"
|
||||
}
|
||||
}
|
||||
|
||||
template {
|
||||
data = file("../config/garage.toml")
|
||||
destination = "secrets/garage.toml"
|
||||
}
|
||||
|
||||
resources {
|
||||
memory = 1500
|
||||
cpu = 1000
|
||||
}
|
||||
|
||||
kill_signal = "SIGINT"
|
||||
kill_timeout = "20s"
|
||||
|
||||
service {
|
||||
tags = [
|
||||
"garage_api",
|
||||
"tricot garage.deuxfleurs.fr",
|
||||
"tricot *.garage.deuxfleurs.fr",
|
||||
]
|
||||
port = 3900
|
||||
address_mode = "driver"
|
||||
name = "garage-api"
|
||||
check {
|
||||
type = "tcp"
|
||||
port = 3900
|
||||
address_mode = "driver"
|
||||
interval = "60s"
|
||||
timeout = "5s"
|
||||
check_restart {
|
||||
limit = 3
|
||||
grace = "90s"
|
||||
ignore_warnings = false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
service {
|
||||
tags = ["garage-rpc"]
|
||||
port = 3901
|
||||
address_mode = "driver"
|
||||
name = "garage-rpc"
|
||||
check {
|
||||
type = "tcp"
|
||||
port = 3901
|
||||
address_mode = "driver"
|
||||
interval = "60s"
|
||||
timeout = "5s"
|
||||
check_restart {
|
||||
limit = 3
|
||||
grace = "90s"
|
||||
ignore_warnings = false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
service {
|
||||
tags = [
|
||||
"garage-web",
|
||||
"tricot * 1",
|
||||
"tricot-add-header Content-Security-Policy default-src 'self' 'unsafe-inline'; script-src 'self' 'unsafe-inline' https://code.jquery.com/; frame-ancestors 'self'",
|
||||
"tricot-add-header Strict-Transport-Security max-age=63072000; includeSubDomains; preload",
|
||||
"tricot-add-header X-Frame-Options SAMEORIGIN",
|
||||
"tricot-add-header X-XSS-Protection 1; mode=block",
|
||||
]
|
||||
port = 3902
|
||||
address_mode = "driver"
|
||||
name = "garage-web"
|
||||
check {
|
||||
type = "tcp"
|
||||
port = 3902
|
||||
address_mode = "driver"
|
||||
interval = "60s"
|
||||
timeout = "5s"
|
||||
check_restart {
|
||||
limit = 3
|
||||
grace = "90s"
|
||||
ignore_warnings = false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
restart {
|
||||
interval = "30m"
|
||||
attempts = 10
|
||||
delay = "15s"
|
||||
mode = "delay"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue