garage: harmonize staging and prod (checks, services)

This commit is contained in:
Baptiste Jonglez 2024-06-08 16:42:49 +02:00
parent cbb0093f2c
commit 9fc22d72d4
2 changed files with 108 additions and 75 deletions

View file

@ -80,9 +80,9 @@ job "garage" {
#### Configuration for service ports: admin port (internal use only) #### Configuration for service ports: admin port (internal use only)
service { service {
name = "garage-admin"
port = "admin" port = "admin"
address_mode = "host" address_mode = "host"
name = "garage-admin"
# Check that Garage is alive and answering TCP connections # Check that Garage is alive and answering TCP connections
check { check {
type = "tcp" type = "tcp"
@ -96,9 +96,10 @@ job "garage" {
} }
} }
#### Configuration for service ports: externally available ports (API, web) #### Configuration for service ports: externally available ports (S3 API, K2V, web)
service { service {
name = "garage-api"
tags = [ tags = [
"garage_api", "garage_api",
"tricot garage.deuxfleurs.fr", "tricot garage.deuxfleurs.fr",
@ -107,7 +108,6 @@ job "garage" {
] ]
port = "s3" port = "s3"
address_mode = "host" address_mode = "host"
name = "garage-api"
# Check 1: Garage is alive and answering TCP connections # Check 1: Garage is alive and answering TCP connections
check { check {
name = "garage-api-live" name = "garage-api-live"
@ -132,6 +132,39 @@ job "garage" {
} }
service { service {
name = "garage-k2v"
tags = [
"garage_k2v",
"tricot k2v.deuxfleurs.fr",
"tricot-site-lb",
]
port = "k2v"
address_mode = "host"
# Check 1: Garage is alive and answering TCP connections
check {
name = "garage-k2v-live"
type = "tcp"
interval = "60s"
timeout = "5s"
check_restart {
limit = 3
grace = "90s"
ignore_warnings = false
}
}
# Check 2: Garage is in a healthy state and requests should be routed here
check {
name = "garage-k2v-healthy"
port = "admin"
type = "http"
path = "/health"
interval = "60s"
timeout = "5s"
}
}
service {
name = "garage-web"
tags = [ tags = [
"garage-web", "garage-web",
"tricot * 1", "tricot * 1",
@ -144,7 +177,6 @@ job "garage" {
] ]
port = "web" port = "web"
address_mode = "host" address_mode = "host"
name = "garage-web"
# Check 1: Garage is alive and answering TCP connections # Check 1: Garage is alive and answering TCP connections
check { check {
name = "garage-web-live" name = "garage-web-live"
@ -183,39 +215,6 @@ job "garage" {
port = "web" port = "web"
on_update = "ignore" on_update = "ignore"
} }
service {
tags = [
"garage_k2v",
"tricot k2v.deuxfleurs.fr",
"tricot-site-lb",
]
port = "k2v"
address_mode = "host"
name = "garage-k2v"
# Check 1: Garage is alive and answering TCP connections
check {
name = "garage-k2v-live"
type = "tcp"
interval = "60s"
timeout = "5s"
check_restart {
limit = 3
grace = "90s"
ignore_warnings = false
}
}
# Check 2: Garage is in a healthy state and requests should be routed here
check {
name = "garage-k2v-healthy"
port = "admin"
type = "http"
path = "/health"
interval = "60s"
timeout = "5s"
}
}
} }
} }
} }

View file

@ -73,12 +73,42 @@ job "garage-staging" {
kill_signal = "SIGINT" kill_signal = "SIGINT"
kill_timeout = "20s" kill_timeout = "20s"
restart {
interval = "5m"
attempts = 10
delay = "1m"
mode = "delay"
}
service { service {
name = "garage-staging-rpc" name = "garage-staging-rpc"
tags = ["garage-staging-rpc"] tags = ["garage-staging-rpc"]
port = "rpc" port = "rpc"
} }
#### Configuration for service ports: admin port (internal use only)
service {
name = "garage-staging-admin"
tags = [
"garage-staging-admin",
]
port = "admin"
check {
name = "garage-tcp-liveness-check"
type = "tcp"
interval = "60s"
timeout = "5s"
check_restart {
limit = 3
grace = "90s"
ignore_warnings = false
}
}
}
#### Configuration for service ports: externally available ports (S3 API, K2V, web)
service { service {
name = "garage-staging-s3-api" name = "garage-staging-s3-api"
tags = [ tags = [
@ -90,7 +120,21 @@ job "garage-staging" {
"tricot-site-lb", "tricot-site-lb",
] ]
port = "s3" port = "s3"
# Check 1: Garage is alive and answering TCP connections
check { check {
name = "garage-staging-api-live"
type = "tcp"
interval = "60s"
timeout = "5s"
check_restart {
limit = 3
grace = "90s"
ignore_warnings = false
}
}
# Check 2: Garage is in a healthy state and requests should be routed here
check {
name = "garage-staging-api-healthy"
port = "admin" port = "admin"
type = "http" type = "http"
path = "/health" path = "/health"
@ -108,7 +152,21 @@ job "garage-staging" {
"tricot-site-lb", "tricot-site-lb",
] ]
port = "k2v" port = "k2v"
# Check 1: Garage is alive and answering TCP connections
check { check {
name = "garage-staging-k2v-live"
type = "tcp"
interval = "60s"
timeout = "5s"
check_restart {
limit = 3
grace = "90s"
ignore_warnings = false
}
}
# Check 2: Garage is in a healthy state and requests should be routed here
check {
name = "garage-staging-k2v-healthy"
port = "admin" port = "admin"
type = "http" type = "http"
path = "/health" path = "/health"
@ -134,51 +192,27 @@ job "garage-staging" {
"tricot-site-lb", "tricot-site-lb",
] ]
port = "web" port = "web"
# Check 1: Garage is alive and answering TCP connections
check { check {
port = "admin" name = "garage-staging-web-live"
type = "http"
path = "/health"
interval = "60s"
timeout = "5s"
}
}
service {
name = "garage-staging-admin"
tags = [
"garage-staging-admin",
]
port = "admin"
check {
name = "garage-admin-health-check"
type = "http"
path = "/health"
interval = "60s"
timeout = "5s"
check_restart {
limit = 10
grace = "90s"
ignore_warnings = true
}
}
check {
name = "garage-tcp-liveness-check"
type = "tcp" type = "tcp"
interval = "60s" interval = "60s"
timeout = "5s" timeout = "5s"
check_restart { check_restart {
limit = 3 limit = 3
grace = "90s" grace = "90s"
ignore_warnings = true ignore_warnings = false
} }
} }
# Check 2: Garage is in a healthy state and requests should be routed here
check {
name = "garage-staging-web-healthy"
port = "admin"
type = "http"
path = "/health"
interval = "60s"
timeout = "5s"
} }
restart {
interval = "5m"
attempts = 10
delay = "1m"
mode = "delay"
} }
} }
} }