prod garage: add health check using admin api's '/health'

This commit is contained in:
Alex 2023-08-27 13:56:51 +02:00
parent 8e304e8f5f
commit ecb4cabcf0

View file

@ -14,7 +14,7 @@ job "garage" {
port "rpc" { static = 3901 }
port "web" { static = 3902 }
port "admin" { static = 3903 }
port "k2v" { static = 3904 }
port "k2v" { static = 3904 }
}
update {
@ -26,7 +26,6 @@ job "garage" {
task "server" {
driver = "docker"
config {
advertise_ipv6_address = true
image = "dxflrs/garage:v0.8.2"
command = "/garage"
args = [ "server" ]
@ -70,20 +69,22 @@ job "garage" {
kill_timeout = "20s"
restart {
interval = "30m"
attempts = 10
delay = "15s"
mode = "delay"
}
#### Configuration for service ports: admin port (internal use only)
service {
tags = [
"garage_api",
"tricot garage.deuxfleurs.fr",
"tricot *.garage.deuxfleurs.fr",
"tricot-site-lb",
]
port = 3900
address_mode = "driver"
name = "garage-api"
port = "admin"
address_mode = "host"
name = "garage-admin"
# Check that Garage is alive and answering TCP connections
check {
type = "tcp"
port = 3900
address_mode = "driver"
interval = "60s"
timeout = "5s"
check_restart {
@ -94,6 +95,41 @@ job "garage" {
}
}
#### Configuration for service ports: externally available ports (API, web)
service {
tags = [
"garage_api",
"tricot garage.deuxfleurs.fr",
"tricot *.garage.deuxfleurs.fr",
"tricot-site-lb",
]
port = "s3"
address_mode = "host"
name = "garage-api"
# Check 1: Garage is alive and answering TCP connections
check {
name = "garage-api-live"
type = "tcp"
interval = "60s"
timeout = "5s"
check_restart {
limit = 3
grace = "90s"
ignore_warnings = false
}
}
# Check 2: Garage is in a healthy state and requests should be routed here
check {
name = "garage-api-healthy"
port = "admin"
type = "http"
path = "/health"
interval = "60s"
timeout = "5s"
}
}
service {
tags = [
"garage-web",
@ -105,13 +141,13 @@ job "garage" {
"tricot-add-header X-Content-Type-Options nosniff",
"tricot-site-lb",
]
port = 3902
address_mode = "driver"
port = "web"
address_mode = "host"
name = "garage-web"
# Check 1: Garage is alive and answering TCP connections
check {
name = "garage-web-live"
type = "tcp"
port = 3902
address_mode = "driver"
interval = "60s"
timeout = "5s"
check_restart {
@ -120,23 +156,14 @@ job "garage" {
ignore_warnings = false
}
}
}
service {
port = 3903
address_mode = "driver"
name = "garage-admin"
# Check 2: Garage is in a healthy state and requests should be routed here
check {
type = "tcp"
port = 3903
address_mode = "driver"
name = "garage-web-healthy"
port = "admin"
type = "http"
path = "/health"
interval = "60s"
timeout = "5s"
check_restart {
limit = 3
grace = "90s"
ignore_warnings = false
}
}
}
@ -146,13 +173,13 @@ job "garage" {
"tricot k2v.deuxfleurs.fr",
"tricot-site-lb",
]
port = 3904
address_mode = "driver"
port = "k2v"
address_mode = "host"
name = "garage-k2v"
# Check 1: Garage is alive and answering TCP connections
check {
name = "garage-k2v-live"
type = "tcp"
port = 3904
address_mode = "driver"
interval = "60s"
timeout = "5s"
check_restart {
@ -161,13 +188,15 @@ job "garage" {
ignore_warnings = false
}
}
}
restart {
interval = "30m"
attempts = 10
delay = "15s"
mode = "delay"
# Check 2: Garage is in a healthy state and requests should be routed here
check {
name = "garage-k2v-healthy"
port = "admin"
type = "http"
path = "/health"
interval = "60s"
timeout = "5s"
}
}
}
}