prod garage: add health check using admin api's '/health'

This commit is contained in:
Alex 2023-08-27 13:56:51 +02:00
parent 8e304e8f5f
commit ecb4cabcf0

View file

@ -14,7 +14,7 @@ job "garage" {
port "rpc" { static = 3901 } port "rpc" { static = 3901 }
port "web" { static = 3902 } port "web" { static = 3902 }
port "admin" { static = 3903 } port "admin" { static = 3903 }
port "k2v" { static = 3904 } port "k2v" { static = 3904 }
} }
update { update {
@ -26,7 +26,6 @@ job "garage" {
task "server" { task "server" {
driver = "docker" driver = "docker"
config { config {
advertise_ipv6_address = true
image = "dxflrs/garage:v0.8.2" image = "dxflrs/garage:v0.8.2"
command = "/garage" command = "/garage"
args = [ "server" ] args = [ "server" ]
@ -70,20 +69,22 @@ job "garage" {
kill_timeout = "20s" kill_timeout = "20s"
restart {
interval = "30m"
attempts = 10
delay = "15s"
mode = "delay"
}
#### Configuration for service ports: admin port (internal use only)
service { service {
tags = [ port = "admin"
"garage_api", address_mode = "host"
"tricot garage.deuxfleurs.fr", name = "garage-admin"
"tricot *.garage.deuxfleurs.fr", # Check that Garage is alive and answering TCP connections
"tricot-site-lb",
]
port = 3900
address_mode = "driver"
name = "garage-api"
check { check {
type = "tcp" type = "tcp"
port = 3900
address_mode = "driver"
interval = "60s" interval = "60s"
timeout = "5s" timeout = "5s"
check_restart { check_restart {
@ -94,6 +95,41 @@ job "garage" {
} }
} }
#### Configuration for service ports: externally available ports (API, web)
service {
tags = [
"garage_api",
"tricot garage.deuxfleurs.fr",
"tricot *.garage.deuxfleurs.fr",
"tricot-site-lb",
]
port = "s3"
address_mode = "host"
name = "garage-api"
# Check 1: Garage is alive and answering TCP connections
check {
name = "garage-api-live"
type = "tcp"
interval = "60s"
timeout = "5s"
check_restart {
limit = 3
grace = "90s"
ignore_warnings = false
}
}
# Check 2: Garage is in a healthy state and requests should be routed here
check {
name = "garage-api-healthy"
port = "admin"
type = "http"
path = "/health"
interval = "60s"
timeout = "5s"
}
}
service { service {
tags = [ tags = [
"garage-web", "garage-web",
@ -105,13 +141,13 @@ job "garage" {
"tricot-add-header X-Content-Type-Options nosniff", "tricot-add-header X-Content-Type-Options nosniff",
"tricot-site-lb", "tricot-site-lb",
] ]
port = 3902 port = "web"
address_mode = "driver" address_mode = "host"
name = "garage-web" name = "garage-web"
# Check 1: Garage is alive and answering TCP connections
check { check {
name = "garage-web-live"
type = "tcp" type = "tcp"
port = 3902
address_mode = "driver"
interval = "60s" interval = "60s"
timeout = "5s" timeout = "5s"
check_restart { check_restart {
@ -120,23 +156,14 @@ job "garage" {
ignore_warnings = false ignore_warnings = false
} }
} }
} # Check 2: Garage is in a healthy state and requests should be routed here
service {
port = 3903
address_mode = "driver"
name = "garage-admin"
check { check {
type = "tcp" name = "garage-web-healthy"
port = 3903 port = "admin"
address_mode = "driver" type = "http"
path = "/health"
interval = "60s" interval = "60s"
timeout = "5s" timeout = "5s"
check_restart {
limit = 3
grace = "90s"
ignore_warnings = false
}
} }
} }
@ -146,13 +173,13 @@ job "garage" {
"tricot k2v.deuxfleurs.fr", "tricot k2v.deuxfleurs.fr",
"tricot-site-lb", "tricot-site-lb",
] ]
port = 3904 port = "k2v"
address_mode = "driver" address_mode = "host"
name = "garage-k2v" name = "garage-k2v"
# Check 1: Garage is alive and answering TCP connections
check { check {
name = "garage-k2v-live"
type = "tcp" type = "tcp"
port = 3904
address_mode = "driver"
interval = "60s" interval = "60s"
timeout = "5s" timeout = "5s"
check_restart { check_restart {
@ -161,13 +188,15 @@ job "garage" {
ignore_warnings = false ignore_warnings = false
} }
} }
} # Check 2: Garage is in a healthy state and requests should be routed here
check {
restart { name = "garage-k2v-healthy"
interval = "30m" port = "admin"
attempts = 10 type = "http"
delay = "15s" path = "/health"
mode = "delay" interval = "60s"
timeout = "5s"
}
} }
} }
} }