From 9fc22d72d44e3c987bb9fa589fde8af1626cb98f Mon Sep 17 00:00:00 2001 From: Baptiste Jonglez Date: Sat, 8 Jun 2024 16:42:49 +0200 Subject: [PATCH] garage: harmonize staging and prod (checks, services) --- cluster/prod/app/garage/deploy/garage.hcl | 73 ++++++------ cluster/staging/app/garage/deploy/garage.hcl | 110 ++++++++++++------- 2 files changed, 108 insertions(+), 75 deletions(-) diff --git a/cluster/prod/app/garage/deploy/garage.hcl b/cluster/prod/app/garage/deploy/garage.hcl index b6ea7f6..aad5c9b 100644 --- a/cluster/prod/app/garage/deploy/garage.hcl +++ b/cluster/prod/app/garage/deploy/garage.hcl @@ -80,9 +80,9 @@ job "garage" { #### Configuration for service ports: admin port (internal use only) service { + name = "garage-admin" port = "admin" address_mode = "host" - name = "garage-admin" # Check that Garage is alive and answering TCP connections check { type = "tcp" @@ -96,9 +96,10 @@ job "garage" { } } - #### Configuration for service ports: externally available ports (API, web) + #### Configuration for service ports: externally available ports (S3 API, K2V, web) service { + name = "garage-api" tags = [ "garage_api", "tricot garage.deuxfleurs.fr", @@ -107,7 +108,6 @@ job "garage" { ] port = "s3" address_mode = "host" - name = "garage-api" # Check 1: Garage is alive and answering TCP connections check { name = "garage-api-live" @@ -132,6 +132,39 @@ job "garage" { } service { + name = "garage-k2v" + tags = [ + "garage_k2v", + "tricot k2v.deuxfleurs.fr", + "tricot-site-lb", + ] + port = "k2v" + address_mode = "host" + # Check 1: Garage is alive and answering TCP connections + check { + name = "garage-k2v-live" + type = "tcp" + interval = "60s" + timeout = "5s" + check_restart { + limit = 3 + grace = "90s" + ignore_warnings = false + } + } + # Check 2: Garage is in a healthy state and requests should be routed here + check { + name = "garage-k2v-healthy" + port = "admin" + type = "http" + path = "/health" + interval = "60s" + timeout = "5s" + } + } + + service { + name = "garage-web" tags = [ "garage-web", "tricot * 1", @@ -144,7 +177,6 @@ job "garage" { ] port = "web" address_mode = "host" - name = "garage-web" # Check 1: Garage is alive and answering TCP connections check { name = "garage-web-live" @@ -183,39 +215,6 @@ job "garage" { port = "web" on_update = "ignore" } - - - service { - tags = [ - "garage_k2v", - "tricot k2v.deuxfleurs.fr", - "tricot-site-lb", - ] - port = "k2v" - address_mode = "host" - name = "garage-k2v" - # Check 1: Garage is alive and answering TCP connections - check { - name = "garage-k2v-live" - type = "tcp" - interval = "60s" - timeout = "5s" - check_restart { - limit = 3 - grace = "90s" - ignore_warnings = false - } - } - # Check 2: Garage is in a healthy state and requests should be routed here - check { - name = "garage-k2v-healthy" - port = "admin" - type = "http" - path = "/health" - interval = "60s" - timeout = "5s" - } - } } } } diff --git a/cluster/staging/app/garage/deploy/garage.hcl b/cluster/staging/app/garage/deploy/garage.hcl index 7a7b44b..b8e7227 100644 --- a/cluster/staging/app/garage/deploy/garage.hcl +++ b/cluster/staging/app/garage/deploy/garage.hcl @@ -73,12 +73,42 @@ job "garage-staging" { kill_signal = "SIGINT" kill_timeout = "20s" + restart { + interval = "5m" + attempts = 10 + delay = "1m" + mode = "delay" + } + service { name = "garage-staging-rpc" tags = ["garage-staging-rpc"] port = "rpc" } + #### Configuration for service ports: admin port (internal use only) + + service { + name = "garage-staging-admin" + tags = [ + "garage-staging-admin", + ] + port = "admin" + check { + name = "garage-tcp-liveness-check" + type = "tcp" + interval = "60s" + timeout = "5s" + check_restart { + limit = 3 + grace = "90s" + ignore_warnings = false + } + } + } + + #### Configuration for service ports: externally available ports (S3 API, K2V, web) + service { name = "garage-staging-s3-api" tags = [ @@ -90,7 +120,21 @@ job "garage-staging" { "tricot-site-lb", ] port = "s3" + # Check 1: Garage is alive and answering TCP connections check { + name = "garage-staging-api-live" + type = "tcp" + interval = "60s" + timeout = "5s" + check_restart { + limit = 3 + grace = "90s" + ignore_warnings = false + } + } + # Check 2: Garage is in a healthy state and requests should be routed here + check { + name = "garage-staging-api-healthy" port = "admin" type = "http" path = "/health" @@ -108,7 +152,21 @@ job "garage-staging" { "tricot-site-lb", ] port = "k2v" + # Check 1: Garage is alive and answering TCP connections check { + name = "garage-staging-k2v-live" + type = "tcp" + interval = "60s" + timeout = "5s" + check_restart { + limit = 3 + grace = "90s" + ignore_warnings = false + } + } + # Check 2: Garage is in a healthy state and requests should be routed here + check { + name = "garage-staging-k2v-healthy" port = "admin" type = "http" path = "/health" @@ -134,7 +192,21 @@ job "garage-staging" { "tricot-site-lb", ] port = "web" + # Check 1: Garage is alive and answering TCP connections check { + name = "garage-staging-web-live" + type = "tcp" + interval = "60s" + timeout = "5s" + check_restart { + limit = 3 + grace = "90s" + ignore_warnings = false + } + } + # Check 2: Garage is in a healthy state and requests should be routed here + check { + name = "garage-staging-web-healthy" port = "admin" type = "http" path = "/health" @@ -142,44 +214,6 @@ job "garage-staging" { timeout = "5s" } } - - service { - name = "garage-staging-admin" - tags = [ - "garage-staging-admin", - ] - port = "admin" - check { - name = "garage-admin-health-check" - type = "http" - path = "/health" - interval = "60s" - timeout = "5s" - check_restart { - limit = 10 - grace = "90s" - ignore_warnings = true - } - } - check { - name = "garage-tcp-liveness-check" - type = "tcp" - interval = "60s" - timeout = "5s" - check_restart { - limit = 3 - grace = "90s" - ignore_warnings = true - } - } - } - - restart { - interval = "5m" - attempts = 10 - delay = "1m" - mode = "delay" - } } } }