New way to deploy Garage

This commit is contained in:
Quentin 2022-09-14 17:25:16 +02:00
parent 30643ca50d
commit e3409ce6b7
Signed by untrusted user: quentin
GPG key ID: E9602264D639FF68
15 changed files with 390 additions and 61 deletions

8
benchmarks/clean Executable file
View file

@ -0,0 +1,8 @@
#!/usr/bin/env python3
import os
from fragments import garage, shared
garage.destroy()
shared.log("clean done")

Binary file not shown.

View file

@ -0,0 +1,202 @@
import glob, json, requests, time, garage_admin_sdk
from os.path import exists
from os import environ as env
from pathlib import Path
from fragments import shared
from garage_admin_sdk.api import nodes_api, layout_api, key_api
from garage_admin_sdk.model.node_cluster_info import NodeClusterInfo
from garage_admin_sdk.model.layout_version import LayoutVersion
from garage_admin_sdk.model.add_key_request import AddKeyRequest
from garage_admin_sdk.model.update_key_request import UpdateKeyRequest
from garage_admin_sdk.model.update_key_request_allow import UpdateKeyRequestAllow
storage_path = "./i/am/not/defined"
rpc_secret = "3e9abff5f9e480afbadb46a77b7a26fe0e404258f0dc3fd5386b0ba8e0ad2fba"
metrics = "cacce0b2de4bc2d9f5b5fdff551e01ac1496055aed248202d415398987e35f81"
admin = "ae8cb40ea7368bbdbb6430af11cca7da833d3458a5f52086f4e805a570fb5c2a"
path = None
access_key = None
secret_key = None
configuration = garage_admin_sdk.Configuration(
host = "http://localhost:3903/v0",
access_token = admin
)
api = garage_admin_sdk.ApiClient(configuration)
nodes = nodes_api.NodesApi(api)
layout = layout_api.LayoutApi(api)
keys = key_api.KeyApi(api)
# Setup, launch on import
storage_path = Path(shared.storage_path) / "garage" / env['HOST']
if 'ZONE' in env:
storage_path = Path(shared.storage_path) / "garage" / env['ZONE'] / env['HOST']
config = storage_path / "garage.toml"
env['GARAGE_CONFIG_FILE'] = str(config)
def deploy_coord(version=None, target=None):
destroy()
from_ci(version, target)
shared.log("start daemon")
daemon()
shared.log("discover nodes")
connect()
shared.log("build layout")
create_layout()
shared.log("create key")
create_key()
shared.log("ready")
def deploy_follow(version=None, target=None):
destroy()
from_ci(version, target)
shared.log("start daemon")
daemon()
shared.log("wait for coord")
sync_on_key_up()
shared.log("ready")
def from_local(p):
global path
path = p
shared.exec(f"{p} --version")
def from_ci(version=None, target=None):
global path
version = version or "v0.7.3"
target = target or "x86_64-unknown-linux-musl"
binary = f"garage-{target}-{version}"
path = Path(shared.binary_path) / binary
if shared.id() != 1: return
if not exists(path):
shared.exec(f"mkdir -p {shared.binary_path}")
shared.exec(f"wget https://garagehq.deuxfleurs.fr/_releases/{version}/{target}/garage -O {path}")
shared.exec(f"chmod +x {path}")
shared.exec(f"{path} --version")
def daemon():
shared.exec(f"mkdir -p {storage_path}")
with open(config, 'w+') as f:
f.write(f"""
metadata_dir = "{storage_path}/meta"
data_dir = "{storage_path}/data"
replication_mode = "3"
rpc_bind_addr = "[::]:3901"
rpc_public_addr = "[{env['IP']}]:3901"
rpc_secret = "{rpc_secret}"
bootstrap_peers=[]
[s3_api]
s3_region = "garage"
api_bind_addr = "[::]:3900"
root_domain = ".s3.garage"
[s3_web]
bind_addr = "[::]:3902"
root_domain = ".web.garage"
index = "index.html"
[admin]
api_bind_addr = "0.0.0.0:3903"
metrics_token = "{metrics}"
admin_token = "{admin}"
""")
shared.exec(f"{path} server 2>> {storage_path}/logs.stderr 1>> {storage_path}/logs.stdout & echo $! > {storage_path}/daemon.pid")
time.sleep(1)
node_info = storage_path / "node_info"
node_id = nodes.get_nodes().node
with open(node_info, 'w+') as f:
f.write(json.dumps({
"node_addr": f"{node_id}@{env['IP']}:3901",
"node_id": node_id,
"zone": env['ZONE'],
"host": env['HOST'],
}))
def destroy():
dpid = Path(storage_path) / "daemon.pid"
if exists(dpid):
shared.exec(f"kill -9 $(cat {dpid})")
shared.exec(f"rm -f {dpid}")
if len(str(storage_path)) < 8: # arbitrary, stupid safe guard
print(storage_path)
raise Exception("You tried to clean a storage path that might be the root of your FS, panicking...")
shared.exec(f"rm -fr {storage_path}")
# this function is ugly, sorry :s
_cluster_info = None
def cluster_info():
global _cluster_info
if _cluster_info is not None: return _cluster_info
while True:
time.sleep(1)
node_files = glob.glob(f"{shared.storage_path}/**/node_info", recursive=True)
if len(node_files) == shared.count(): break
_cluster_info = [ json.loads(Path(f).read_text()) for f in node_files ]
return _cluster_info
def connect():
cinf = cluster_info()
ret = nodes.add_node([n['node_addr'] for n in cinf])
for st in ret:
if not st.success:
raise Exception("Node connect failed", ret)
shared.log("all nodes connected")
def create_layout():
v = layout.get_layout().version
cinf = cluster_info()
nlay = dict()
for n in cinf:
nlay[n['node_id']] = NodeClusterInfo(
zone = n['zone'],
capacity = 1,
tags = [ n['host'] ],
)
layout.add_layout(nlay)
layout.apply_layout(LayoutVersion(version=v+1))
def create_key():
global key
kinfo = shared.fn_retry(lambda: keys.add_key(AddKeyRequest(name="mknet")))
allow_create = UpdateKeyRequestAllow(create_bucket=True)
keys.update_key(kinfo.access_key_id, UpdateKeyRequest(allow=allow_create))
key = kinfo
def delete_key():
global key
delete_key(key.access_key_id)
key = None
def sync_on_key_up():
global key
while True:
try:
key = keys.search_key("mknet")
return key
except:
pass
time.sleep(1)
def sync_on_key_down():
while True:
try:
keys.search_key("mknet")
except:
return
time.sleep(1)

View file

View file

@ -0,0 +1,18 @@
a = """
echo "sleep 3 min to wait for minio bootstrap"
sleep 180
export ENDPOINT=localhost:9000
export AWS_ACCESS_KEY_ID=minioadmin
export AWS_SECRET_ACCESS_KEY=minioadmin
mc alias set minio-bench http://$ENDPOINT $AWS_ACCESS_KEY_ID $AWS_SECRET_ACCESS_KEY
for i in $(seq 1 10); do
mc mb minio-bench/bench$i
done
s3lat | tee 50ms.minio.csv
"""
def on_garage():
raise Exception("Not yet implemented")

View file

@ -0,0 +1,28 @@
import os, time
binary_path = "/tmp/mknet-bin"
storage_path = "/tmp/mknet-store"
def exec(s):
if os.system(s) != 0:
raise Exception("Command terminated with an error")
def exec_retry(s, cnt=16):
print(s)
for i in range(cnt):
time.sleep(i) # this is expected to sleep before running the command to reduce the noise
if os.system(s) == 0: return
raise Exception("Command terminated with an error too many times")
def fn_retry(f, cnt=5):
for i in range(cnt):
try:
r = f()
return r
except Exception as e:
if i+1 == cnt: raise e
log(f"failed call, retry in {i} sec")
time.sleep(i)
def id(): return int(os.environ['ID'])
def count(): return int(os.environ['SERVER_COUNT'])
def log(*args): print(f"[{id()}/{count()} - {os.environ['HOST']}]", *args)

12
benchmarks/garage-s3lat Executable file
View file

@ -0,0 +1,12 @@
#!/usr/bin/env python3
from fragments import garage, s3lat, shared
if shared.id() == 1:
garage.deploy_coord()
s3lat.on_garage()
garage.delete_key()
garage.destroy()
else:
garage.deploy_follow()
garage.sync_on_key_down()
garage.destroy()

View file

@ -0,0 +1 @@
git+https://git.deuxfleurs.fr/quentin/garage-admin-sdk@7b1c1faf7a#egg=garage-admin-sdk&subdirectory=python

View file

@ -1,61 +0,0 @@
#!/bin/bash
set -euo pipefail
IFS=$'\n\t'
GARAGE_PATH=garage
STORAGE_PATH=/tmp/garage-testnet
export RPC_SECRET=3e9abff5f9e480afbadb46a77b7a26fe0e404258f0dc3fd5386b0ba8e0ad2fba
if [ -z "$ZONE" ]; then
NODE_STORAGE_PATH=${STORAGE_PATH}/${HOST}
else
NODE_STORAGE_PATH=${STORAGE_PATH}/${ZONE}/${HOST}
fi
BOOTSTRAP_FILE=${STORAGE_PATH}/bootstrap_peer
export GARAGE_CONFIG_FILE=${NODE_STORAGE_PATH}/garage.toml
mkdir -p ${NODE_STORAGE_PATH}
cd ${NODE_STORAGE_PATH}
rm ${BOOTSTRAP_FILE} 2>/dev/null || true
cat > ${GARAGE_CONFIG_FILE} << EOF
metadata_dir = "${NODE_STORAGE_PATH}/meta"
data_dir = "${NODE_STORAGE_PATH}/data"
replication_mode = "3"
rpc_bind_addr = "[::]:3901"
rpc_public_addr = "[${IP}]:3901"
rpc_secret = "${RPC_SECRET}"
bootstrap_peers=[]
[s3_api]
s3_region = "garage"
api_bind_addr = "[::]:3900"
root_domain = ".s3.garage"
[s3_web]
bind_addr = "[::]:3902"
root_domain = ".web.garage"
index = "index.html"
EOF
RUST_LOG=garage=debug ${GARAGE_PATH} server 2>> ${NODE_STORAGE_PATH}/logs & disown
sleep 2
CONFIG_NODE_FPATH=$(find /tmp/garage-testnet/ -maxdepth 3 -name garage.toml|head -n 1)
SELF_ID=$(${GARAGE_PATH} node id 2>/dev/null)
SHORT_ID=$(echo ${SELF_ID} | cut -c-64)
${GARAGE_PATH} -c ${CONFIG_NODE_FPATH} node connect ${SELF_ID}
${GARAGE_PATH} -c ${CONFIG_NODE_FPATH} layout assign ${SHORT_ID} -z ${ZONE:-unzonned-${HOST}} -c 1 -t ${HOST}
if [ ${CONFIG_NODE_FPATH} == ${GARAGE_CONFIG_FILE} ]; then
sleep 2
${GARAGE_PATH} layout show
${GARAGE_PATH} layout apply --version 1
fi

View file

@ -3,4 +3,8 @@ setup(name='mknet',
version='1.0',
scripts=['mknet'],
py_modules=['net'],
install_requires=[
'PyYAML',
'requests'
],
)

12
shell.nix Normal file
View file

@ -0,0 +1,12 @@
{ pkgs ? import <nixpkgs> {} }:
pkgs.mkShell {
nativeBuildInputs = [
pkgs.python310
pkgs.python310Packages.pyyaml
pkgs.python310Packages.requests
# to test the pip setup
pkgs.python310Packages.pip
pkgs.python310Packages.setuptools
];
}

46
topo/multi-dc.yml Normal file
View file

@ -0,0 +1,46 @@
links:
- &100
bandwidth: 100M
latency: 500us
- &wan
bandwidth: 100M
latency: 50ms
jitter: 10ms
zones:
- &dc1
name: dc1
internal: *100
external: *wan
- &dc2
name: dc2
internal: *100
external: *wan
- &dc3
name: dc3
internal: *100
external: *wan
servers:
- name: dc1s1
zone: *dc1
- name: dc1s2
zone: *dc1
- name: dc2s1
zone: *dc2
- name: dc2s2
zone: *dc2
- name: dc3s1
zone: *dc3
- name: dc3s2
zone: *dc3
global:
subnet:
base: 'fc00:9a7a:9e::'
local: 64
zone: 16
latency-offset: 3ms
upstream:
ip: fc00:9a7a:9e:ffff:ffff:ffff:ffff:ffff
conn: *wan

34
topo/single-dc.yml Normal file
View file

@ -0,0 +1,34 @@
links:
- &fiber
bandwidth: 100M
latency: 50ms
jitter: 10ms
zones:
- &dc1
name: dc1
internal: *fiber
external: *fiber
servers:
- name: dc1s1
zone: *dc1
- name: dc1s2
zone: *dc1
- name: dc1s3
zone: *dc1
- name: dc1s4
zone: *dc1
- name: dc1s5
zone: *dc1
global:
subnet:
base: 'fc00:9a7a:9e::'
local: 64
zone: 16
latency-offset: 3ms
upstream:
ip: fc00:9a7a:9e:ffff:ffff:ffff:ffff:ffff
conn: *fiber

25
topo/slow-net.yml Normal file
View file

@ -0,0 +1,25 @@
links:
- &slow
bandwidth: 1M
latency: 500us
- &1000
bandwidth: 1000M
latency: 100us
servers:
- name: node1
<<: *slow
- name: node2
<<: *slow
- name: node3
<<: *slow
global:
subnet:
base: 'fc00:9a7a:9e::'
local: 64
zone: 16
latency-offset: 3ms
upstream:
ip: fc00:9a7a:9e:ffff:ffff:ffff:ffff:ffff
conn: *1000