From 050b7d4846e8769f3cdd84c8f5d407ea2e5ced43 Mon Sep 17 00:00:00 2001 From: Quentin Dufour Date: Thu, 11 Aug 2022 22:36:48 +0200 Subject: [PATCH] Add Prometheus --- example/deploy_garage.sh | 5 +++++ liveness.md | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) diff --git a/example/deploy_garage.sh b/example/deploy_garage.sh index 4e661d4..b9941aa 100755 --- a/example/deploy_garage.sh +++ b/example/deploy_garage.sh @@ -41,6 +41,11 @@ root_domain = ".s3.garage" bind_addr = "[::]:3902" root_domain = ".web.garage" index = "index.html" + +[admin] +api_bind_addr = "[::]:3903" +admin_token = "ae8cb40ea7368bbdbb6430af11cca7da833d3458a5f52086f4e805a570fb5c2a" +trace_sink = "http://[fc00:9a7a:9e:ffff:ffff:ffff:ffff:ffff]:4317" EOF RUST_LOG=garage=debug ${GARAGE_PATH} server 2>> ${NODE_STORAGE_PATH}/logs & disown diff --git a/liveness.md b/liveness.md index c33bd05..17fa3f9 100644 --- a/liveness.md +++ b/liveness.md @@ -69,3 +69,43 @@ $ ./s3concurrent 2022/08/11 20:37:51 done, 3 coroutines returned 2022/08/11 20:37:51 start concurrent loop with 4 coroutines ``` + +## Overview of available tools to observe Garage internals + +Even if I have some theory on what is going wrong, I want to collect as many information as possible before making hypothesis, +adding specific debug hooks, and so on. Hopefully, we have at least 3 different tools in Garage to gather information about its internals: + - Prometheus telemetry + - Opentelemetry traces + - Tokio traces (not sure) + +### Prometheus + +Nothing very interesting is returned by Prometheus telemetry, except we returned 503 and 500 errors: + +``` +# HELP api_s3_error_counter Number of API calls to the various S3 API endpoints that resulted in errors +# TYPE api_s3_error_counter counter +api_s3_error_counter{api_endpoint="PutObject",status_code="500"} 3 +api_s3_error_counter{api_endpoint="PutObject",status_code="503"} 2 +# HELP api_s3_request_counter Number of API calls to the various S3 API endpoints +# TYPE api_s3_request_counter counter +api_s3_request_counter{api_endpoint="CreateBucket"} 1 +api_s3_request_counter{api_endpoint="PutObject"} 6 +# HELP api_s3_request_duration Duration of API calls to the various S3 API endpoints +# TYPE api_s3_request_duration histogram +api_s3_request_duration_bucket{api_endpoint="CreateBucket",le="0.5"} 1 +api_s3_request_duration_bucket{api_endpoint="CreateBucket",le="0.9"} 1 +api_s3_request_duration_bucket{api_endpoint="CreateBucket",le="0.99"} 1 +api_s3_request_duration_bucket{api_endpoint="CreateBucket",le="+Inf"} 1 +api_s3_request_duration_sum{api_endpoint="CreateBucket"} 0.109302301 +api_s3_request_duration_count{api_endpoint="CreateBucket"} 1 +api_s3_request_duration_bucket{api_endpoint="PutObject",le="0.5"} 0 +api_s3_request_duration_bucket{api_endpoint="PutObject",le="0.9"} 0 +api_s3_request_duration_bucket{api_endpoint="PutObject",le="0.99"} 0 +api_s3_request_duration_bucket{api_endpoint="PutObject",le="+Inf"} 6 +api_s3_request_duration_sum{api_endpoint="PutObject"} 147.68400154399998 +api_s3_request_duration_count{api_endpoint="PutObject"} 6 +``` + + +