Restart backups
This commit is contained in:
parent
1749a98e86
commit
02c65de5fe
27 changed files with 496 additions and 1 deletions
28
cluster/prod/app/backup/build/backup-consul/Dockerfile
Normal file
28
cluster/prod/app/backup/build/backup-consul/Dockerfile
Normal file
|
@ -0,0 +1,28 @@
|
|||
FROM golang:buster as builder
|
||||
|
||||
WORKDIR /root
|
||||
RUN git clone https://filippo.io/age && cd age/cmd/age && go build -o age .
|
||||
|
||||
FROM amd64/debian:buster
|
||||
|
||||
COPY --from=builder /root/age/cmd/age/age /usr/local/bin/age
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get -qq -y full-upgrade && \
|
||||
apt-get install -y rsync wget openssh-client unzip && \
|
||||
apt-get clean && \
|
||||
rm -f /var/lib/apt/lists/*_*
|
||||
|
||||
RUN mkdir -p /root/.ssh
|
||||
WORKDIR /root
|
||||
|
||||
RUN wget https://releases.hashicorp.com/consul/1.8.5/consul_1.8.5_linux_amd64.zip && \
|
||||
unzip consul_1.8.5_linux_amd64.zip && \
|
||||
chmod +x consul && \
|
||||
mv consul /usr/local/bin && \
|
||||
rm consul_1.8.5_linux_amd64.zip
|
||||
|
||||
COPY do_backup.sh /root/do_backup.sh
|
||||
|
||||
CMD "/root/do_backup.sh"
|
||||
|
20
cluster/prod/app/backup/build/backup-consul/do_backup.sh
Executable file
20
cluster/prod/app/backup/build/backup-consul/do_backup.sh
Executable file
|
@ -0,0 +1,20 @@
|
|||
#!/bin/sh
|
||||
|
||||
set -x -e
|
||||
|
||||
cd /root
|
||||
|
||||
chmod 0600 .ssh/id_ed25519
|
||||
|
||||
cat > .ssh/config <<EOF
|
||||
Host backuphost
|
||||
HostName $TARGET_SSH_HOST
|
||||
Port $TARGET_SSH_PORT
|
||||
User $TARGET_SSH_USER
|
||||
EOF
|
||||
|
||||
consul kv export | \
|
||||
gzip | \
|
||||
age -r "$(cat /root/.ssh/id_ed25519.pub)" | \
|
||||
ssh backuphost "cat > $TARGET_SSH_DIR/consul/$(date --iso-8601=minute)_consul_kv_export.gz.age"
|
||||
|
1
cluster/prod/app/backup/build/backup-psql/.gitignore
vendored
Normal file
1
cluster/prod/app/backup/build/backup-psql/.gitignore
vendored
Normal file
|
@ -0,0 +1 @@
|
|||
result
|
8
cluster/prod/app/backup/build/backup-psql/README.md
Normal file
8
cluster/prod/app/backup/build/backup-psql/README.md
Normal file
|
@ -0,0 +1,8 @@
|
|||
## Build
|
||||
|
||||
```bash
|
||||
docker load < $(nix-build docker.nix)
|
||||
docker push superboum/backup-psql:???
|
||||
```
|
||||
|
||||
|
106
cluster/prod/app/backup/build/backup-psql/backup-psql.py
Executable file
106
cluster/prod/app/backup/build/backup-psql/backup-psql.py
Executable file
|
@ -0,0 +1,106 @@
|
|||
#!/usr/bin/env python3
|
||||
import shutil,sys,os,datetime,minio,subprocess
|
||||
|
||||
working_directory = "."
|
||||
if 'CACHE_DIR' in os.environ: working_directory = os.environ['CACHE_DIR']
|
||||
required_space_in_bytes = 20 * 1024 * 1024 * 1024
|
||||
bucket = os.environ['AWS_BUCKET']
|
||||
key = os.environ['AWS_ACCESS_KEY_ID']
|
||||
secret = os.environ['AWS_SECRET_ACCESS_KEY']
|
||||
endpoint = os.environ['AWS_ENDPOINT']
|
||||
pubkey = os.environ['CRYPT_PUBLIC_KEY']
|
||||
psql_host = os.environ['PSQL_HOST']
|
||||
psql_user = os.environ['PSQL_USER']
|
||||
s3_prefix = str(datetime.datetime.now())
|
||||
files = [ "backup_manifest", "base.tar.gz", "pg_wal.tar.gz" ]
|
||||
clear_paths = [ os.path.join(working_directory, f) for f in files ]
|
||||
crypt_paths = [ os.path.join(working_directory, f) + ".age" for f in files ]
|
||||
s3_keys = [ s3_prefix + "/" + f for f in files ]
|
||||
|
||||
def abort(msg):
|
||||
for p in clear_paths + crypt_paths:
|
||||
if os.path.exists(p):
|
||||
print(f"Remove {p}")
|
||||
os.remove(p)
|
||||
|
||||
if msg: sys.exit(msg)
|
||||
else: print("success")
|
||||
|
||||
# Check we have enough space on disk
|
||||
if shutil.disk_usage(working_directory).free < required_space_in_bytes:
|
||||
abort(f"Not enough space on disk at path {working_directory} to perform a backup, aborting")
|
||||
|
||||
# Check postgres password is set
|
||||
if 'PGPASSWORD' not in os.environ:
|
||||
abort(f"You must pass postgres' password through the environment variable PGPASSWORD")
|
||||
|
||||
# Check our working directory is empty
|
||||
if len(os.listdir(working_directory)) != 0:
|
||||
abort(f"Working directory {working_directory} is not empty, aborting")
|
||||
|
||||
# Check Minio
|
||||
client = minio.Minio(endpoint, key, secret)
|
||||
if not client.bucket_exists(bucket):
|
||||
abort(f"Bucket {bucket} does not exist or its access is forbidden, aborting")
|
||||
|
||||
# Perform the backup locally
|
||||
try:
|
||||
ret = subprocess.run(["pg_basebackup",
|
||||
f"--host={psql_host}",
|
||||
f"--username={psql_user}",
|
||||
f"--pgdata={working_directory}",
|
||||
f"--format=tar",
|
||||
"--wal-method=stream",
|
||||
"--gzip",
|
||||
"--compress=6",
|
||||
"--progress",
|
||||
"--max-rate=5M",
|
||||
])
|
||||
if ret.returncode != 0:
|
||||
abort(f"pg_basebackup exited, expected return code 0, got {ret.returncode}. aborting")
|
||||
except Exception as e:
|
||||
abort(f"pg_basebackup raised exception {e}. aborting")
|
||||
|
||||
# Check that the expected files are here
|
||||
for p in clear_paths:
|
||||
print(f"Checking that {p} exists locally")
|
||||
if not os.path.exists(p):
|
||||
abort(f"File {p} expected but not found, aborting")
|
||||
|
||||
# Cipher them
|
||||
for c, e in zip(clear_paths, crypt_paths):
|
||||
print(f"Ciphering {c} to {e}")
|
||||
try:
|
||||
ret = subprocess.run(["age", "-r", pubkey, "-o", e, c])
|
||||
if ret.returncode != 0:
|
||||
abort(f"age exit code is {ret}, 0 expected. aborting")
|
||||
except Exception as e:
|
||||
abort(f"aged raised an exception. {e}. aborting")
|
||||
|
||||
# Upload the backup to S3
|
||||
for p, k in zip(crypt_paths, s3_keys):
|
||||
try:
|
||||
print(f"Uploading {p} to {k}")
|
||||
result = client.fput_object(bucket, k, p)
|
||||
print(
|
||||
"created {0} object; etag: {1}, version-id: {2}".format(
|
||||
result.object_name, result.etag, result.version_id,
|
||||
),
|
||||
)
|
||||
except Exception as e:
|
||||
abort(f"Exception {e} occured while upload {p}. aborting")
|
||||
|
||||
# Check that the files have been uploaded
|
||||
for k in s3_keys:
|
||||
try:
|
||||
print(f"Checking that {k} exists remotely")
|
||||
result = client.stat_object(bucket, k)
|
||||
print(
|
||||
"last-modified: {0}, size: {1}".format(
|
||||
result.last_modified, result.size,
|
||||
),
|
||||
)
|
||||
except Exception as e:
|
||||
abort(f"{k} not found on S3. {e}. aborting")
|
||||
|
||||
abort(None)
|
8
cluster/prod/app/backup/build/backup-psql/common.nix
Normal file
8
cluster/prod/app/backup/build/backup-psql/common.nix
Normal file
|
@ -0,0 +1,8 @@
|
|||
{
|
||||
pkgsSrc = fetchTarball {
|
||||
# Latest commit on https://github.com/NixOS/nixpkgs/tree/nixos-21.11
|
||||
# As of 2022-04-15
|
||||
url ="https://github.com/NixOS/nixpkgs/archive/2f06b87f64bc06229e05045853e0876666e1b023.tar.gz";
|
||||
sha256 = "sha256:1d7zg96xw4qsqh7c89pgha9wkq3rbi9as3k3d88jlxy2z0ns0cy2";
|
||||
};
|
||||
}
|
37
cluster/prod/app/backup/build/backup-psql/default.nix
Normal file
37
cluster/prod/app/backup/build/backup-psql/default.nix
Normal file
|
@ -0,0 +1,37 @@
|
|||
let
|
||||
common = import ./common.nix;
|
||||
pkgs = import common.pkgsSrc {};
|
||||
python-with-my-packages = pkgs.python3.withPackages (p: with p; [
|
||||
minio
|
||||
]);
|
||||
in
|
||||
pkgs.stdenv.mkDerivation {
|
||||
name = "backup-psql";
|
||||
src = pkgs.lib.sourceFilesBySuffices ./. [ ".py" ];
|
||||
|
||||
buildInputs = [
|
||||
python-with-my-packages
|
||||
pkgs.age
|
||||
pkgs.postgresql_14
|
||||
];
|
||||
|
||||
buildPhase = ''
|
||||
cat > backup-psql <<EOF
|
||||
#!${pkgs.bash}/bin/bash
|
||||
|
||||
export PYTHONPATH=${python-with-my-packages}/${python-with-my-packages.sitePackages}
|
||||
export PATH=${python-with-my-packages}/bin:${pkgs.age}/bin:${pkgs.postgresql_14}/bin
|
||||
|
||||
${python-with-my-packages}/bin/python3 $out/lib/backup-psql.py
|
||||
EOF
|
||||
|
||||
chmod +x backup-psql
|
||||
'';
|
||||
|
||||
installPhase = ''
|
||||
mkdir -p $out/{bin,lib}
|
||||
cp *.py $out/lib/backup-psql.py
|
||||
cp backup-psql $out/bin/backup-psql
|
||||
'';
|
||||
}
|
||||
|
11
cluster/prod/app/backup/build/backup-psql/docker.nix
Normal file
11
cluster/prod/app/backup/build/backup-psql/docker.nix
Normal file
|
@ -0,0 +1,11 @@
|
|||
let
|
||||
common = import ./common.nix;
|
||||
app = import ./default.nix;
|
||||
pkgs = import common.pkgsSrc {};
|
||||
in
|
||||
pkgs.dockerTools.buildImage {
|
||||
name = "superboum/backup-psql-docker";
|
||||
config = {
|
||||
Cmd = [ "${app}/bin/backup-psql" ];
|
||||
};
|
||||
}
|
196
cluster/prod/app/backup/deploy/backup-daily.hcl
Normal file
196
cluster/prod/app/backup/deploy/backup-daily.hcl
Normal file
|
@ -0,0 +1,196 @@
|
|||
job "backup_daily" {
|
||||
datacenters = ["orion", "neptune"]
|
||||
type = "batch"
|
||||
|
||||
priority = "60"
|
||||
|
||||
periodic {
|
||||
cron = "@daily"
|
||||
// Do not allow overlapping runs.
|
||||
prohibit_overlap = true
|
||||
}
|
||||
|
||||
group "backup-dovecot" {
|
||||
constraint {
|
||||
attribute = "${attr.unique.hostname}"
|
||||
operator = "="
|
||||
value = "doradille"
|
||||
}
|
||||
|
||||
task "main" {
|
||||
driver = "docker"
|
||||
|
||||
config {
|
||||
image = "restic/restic:0.14.0"
|
||||
entrypoint = [ "/bin/sh", "-c" ]
|
||||
args = [ "restic backup /mail && restic forget --keep-within 1m1d --keep-within-weekly 3m --keep-within-monthly 1y && restic prune --max-unused 50% --max-repack-size 2G && restic check" ]
|
||||
volumes = [
|
||||
"/mnt/ssd/mail:/mail"
|
||||
]
|
||||
}
|
||||
|
||||
template {
|
||||
data = <<EOH
|
||||
AWS_ACCESS_KEY_ID={{ key "secrets/email/dovecot/backup_aws_access_key_id" }}
|
||||
AWS_SECRET_ACCESS_KEY={{ key "secrets/email/dovecot/backup_aws_secret_access_key" }}
|
||||
RESTIC_REPOSITORY={{ key "secrets/email/dovecot/backup_restic_repository" }}
|
||||
RESTIC_PASSWORD={{ key "secrets/email/dovecot/backup_restic_password" }}
|
||||
EOH
|
||||
|
||||
destination = "secrets/env_vars"
|
||||
env = true
|
||||
}
|
||||
|
||||
resources {
|
||||
cpu = 500
|
||||
memory = 100
|
||||
memory_max = 300
|
||||
}
|
||||
|
||||
restart {
|
||||
attempts = 2
|
||||
interval = "30m"
|
||||
delay = "15s"
|
||||
mode = "fail"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
group "backup-plume" {
|
||||
constraint {
|
||||
attribute = "${attr.unique.hostname}"
|
||||
operator = "="
|
||||
value = "dahlia"
|
||||
}
|
||||
|
||||
task "main" {
|
||||
driver = "docker"
|
||||
|
||||
config {
|
||||
image = "restic/restic:0.14.0"
|
||||
entrypoint = [ "/bin/sh", "-c" ]
|
||||
args = [ "restic backup /plume && restic forget --keep-within 1m1d --keep-within-weekly 3m --keep-within-monthly 1y && restic prune --max-unused 50% --max-repack-size 2G && restic check" ]
|
||||
volumes = [
|
||||
"/mnt/ssd/plume/media:/plume"
|
||||
]
|
||||
}
|
||||
|
||||
template {
|
||||
data = <<EOH
|
||||
AWS_ACCESS_KEY_ID={{ key "secrets/plume/backup_aws_access_key_id" }}
|
||||
AWS_SECRET_ACCESS_KEY={{ key "secrets/plume/backup_aws_secret_access_key" }}
|
||||
RESTIC_REPOSITORY={{ key "secrets/plume/backup_restic_repository" }}
|
||||
RESTIC_PASSWORD={{ key "secrets/plume/backup_restic_password" }}
|
||||
EOH
|
||||
|
||||
destination = "secrets/env_vars"
|
||||
env = true
|
||||
}
|
||||
|
||||
resources {
|
||||
cpu = 500
|
||||
memory = 100
|
||||
memory_max = 300
|
||||
}
|
||||
|
||||
restart {
|
||||
attempts = 2
|
||||
interval = "30m"
|
||||
delay = "15s"
|
||||
mode = "fail"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
group "backup-consul" {
|
||||
task "consul-kv-export" {
|
||||
driver = "docker"
|
||||
|
||||
lifecycle {
|
||||
hook = "prestart"
|
||||
sidecar = false
|
||||
}
|
||||
|
||||
config {
|
||||
image = "consul:1.13.1"
|
||||
network_mode = "host"
|
||||
entrypoint = [ "/bin/sh", "-c" ]
|
||||
args = [ "/bin/consul kv export > $NOMAD_ALLOC_DIR/consul.json" ]
|
||||
volumes = [
|
||||
"secrets:/etc/consul",
|
||||
]
|
||||
}
|
||||
|
||||
env {
|
||||
CONSUL_HTTP_ADDR = "https://consul.service.prod.consul:8501"
|
||||
CONSUL_HTTP_SSL = "true"
|
||||
CONSUL_CACERT = "/etc/consul/consul.crt"
|
||||
CONSUL_CLIENT_CERT = "/etc/consul/consul-client.crt"
|
||||
CONSUL_CLIENT_KEY = "/etc/consul/consul-client.key"
|
||||
}
|
||||
|
||||
resources {
|
||||
cpu = 200
|
||||
memory = 200
|
||||
}
|
||||
|
||||
|
||||
template {
|
||||
data = "{{ key \"secrets/consul/consul.crt\" }}"
|
||||
destination = "secrets/consul.crt"
|
||||
}
|
||||
|
||||
template {
|
||||
data = "{{ key \"secrets/consul/consul-client.crt\" }}"
|
||||
destination = "secrets/consul-client.crt"
|
||||
}
|
||||
|
||||
template {
|
||||
data = "{{ key \"secrets/consul/consul-client.key\" }}"
|
||||
destination = "secrets/consul-client.key"
|
||||
}
|
||||
|
||||
restart {
|
||||
attempts = 2
|
||||
interval = "30m"
|
||||
delay = "15s"
|
||||
mode = "fail"
|
||||
}
|
||||
}
|
||||
|
||||
task "restic-backup" {
|
||||
driver = "docker"
|
||||
|
||||
config {
|
||||
image = "restic/restic:0.12.1"
|
||||
entrypoint = [ "/bin/sh", "-c" ]
|
||||
args = [ "restic backup $NOMAD_ALLOC_DIR/consul.json && restic forget --keep-within 1m1d --keep-within-weekly 3m --keep-within-monthly 1y && restic prune --max-unused 50% --max-repack-size 2G && restic check" ]
|
||||
}
|
||||
|
||||
|
||||
template {
|
||||
data = <<EOH
|
||||
AWS_ACCESS_KEY_ID={{ key "secrets/backup/consul/backup_aws_access_key_id" }}
|
||||
AWS_SECRET_ACCESS_KEY={{ key "secrets/backup/consul/backup_aws_secret_access_key" }}
|
||||
RESTIC_REPOSITORY={{ key "secrets/backup/consul/backup_restic_repository" }}
|
||||
RESTIC_PASSWORD={{ key "secrets/backup/consul/backup_restic_password" }}
|
||||
EOH
|
||||
|
||||
destination = "secrets/env_vars"
|
||||
env = true
|
||||
}
|
||||
|
||||
resources {
|
||||
cpu = 200
|
||||
memory = 200
|
||||
}
|
||||
|
||||
restart {
|
||||
attempts = 2
|
||||
interval = "30m"
|
||||
delay = "15s"
|
||||
mode = "fail"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
55
cluster/prod/app/backup/deploy/backup-weekly.hcl
Normal file
55
cluster/prod/app/backup/deploy/backup-weekly.hcl
Normal file
|
@ -0,0 +1,55 @@
|
|||
job "backup_weekly" {
|
||||
datacenters = ["orion"]
|
||||
type = "batch"
|
||||
|
||||
priority = "60"
|
||||
|
||||
periodic {
|
||||
cron = "@weekly"
|
||||
// Do not allow overlapping runs.
|
||||
prohibit_overlap = true
|
||||
}
|
||||
|
||||
group "backup-psql" {
|
||||
task "main" {
|
||||
driver = "docker"
|
||||
|
||||
config {
|
||||
image = "superboum/backup-psql-docker:gyr3aqgmhs0hxj0j9hkrdmm1m07i8za2"
|
||||
volumes = [
|
||||
// Mount a cache on the hard disk to avoid filling up the SSD
|
||||
"/mnt/storage/tmp_bckp_psql:/mnt/cache"
|
||||
]
|
||||
}
|
||||
|
||||
template {
|
||||
data = <<EOH
|
||||
CACHE_DIR=/mnt/cache
|
||||
AWS_BUCKET=backups-pgbasebackup
|
||||
AWS_ENDPOINT=s3.deuxfleurs.shirokumo.net
|
||||
AWS_ACCESS_KEY_ID={{ key "secrets/postgres/backup/aws_access_key_id" }}
|
||||
AWS_SECRET_ACCESS_KEY={{ key "secrets/postgres/backup/aws_secret_access_key" }}
|
||||
CRYPT_PUBLIC_KEY={{ key "secrets/postgres/backup/crypt_public_key" }}
|
||||
PSQL_HOST=psql-proxy.service.prod.consul
|
||||
PSQL_USER={{ key "secrets/postgres/keeper/pg_repl_username" }}
|
||||
PGPASSWORD={{ key "secrets/postgres/keeper/pg_repl_pwd" }}
|
||||
EOH
|
||||
|
||||
destination = "secrets/env_vars"
|
||||
env = true
|
||||
}
|
||||
|
||||
resources {
|
||||
cpu = 200
|
||||
memory = 200
|
||||
}
|
||||
|
||||
restart {
|
||||
attempts = 2
|
||||
interval = "30m"
|
||||
delay = "15s"
|
||||
mode = "fail"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1 @@
|
|||
USER Backup AWS access key ID
|
|
@ -0,0 +1 @@
|
|||
USER Backup AWS secret access key
|
|
@ -0,0 +1 @@
|
|||
USER Restic password to encrypt backups
|
|
@ -0,0 +1 @@
|
|||
USER Restic repository, eg. s3:https://s3.garage.tld
|
1
cluster/prod/app/backup/secrets/backup/id_ed25519
Normal file
1
cluster/prod/app/backup/secrets/backup/id_ed25519
Normal file
|
@ -0,0 +1 @@
|
|||
USER_LONG Private ed25519 key of the container doing the backup
|
1
cluster/prod/app/backup/secrets/backup/id_ed25519.pub
Normal file
1
cluster/prod/app/backup/secrets/backup/id_ed25519.pub
Normal file
|
@ -0,0 +1 @@
|
|||
USER Public ed25519 key of the container doing the backup (this key must be in authorized_keys on the backup target host)
|
|
@ -0,0 +1 @@
|
|||
USER Minio access key
|
|
@ -0,0 +1 @@
|
|||
USER Minio secret key
|
|
@ -0,0 +1 @@
|
|||
USER a private key to decript backups from age
|
|
@ -0,0 +1 @@
|
|||
USER A public key to encypt backups with age
|
1
cluster/prod/app/backup/secrets/backup/target_ssh_dir
Normal file
1
cluster/prod/app/backup/secrets/backup/target_ssh_dir
Normal file
|
@ -0,0 +1 @@
|
|||
USER Directory where to store backups on target host
|
|
@ -0,0 +1 @@
|
|||
USER SSH fingerprint of the target machine (format: copy here the corresponding line from your known_hosts file)
|
1
cluster/prod/app/backup/secrets/backup/target_ssh_host
Normal file
1
cluster/prod/app/backup/secrets/backup/target_ssh_host
Normal file
|
@ -0,0 +1 @@
|
|||
USER Hostname of the backup target host
|
1
cluster/prod/app/backup/secrets/backup/target_ssh_port
Normal file
1
cluster/prod/app/backup/secrets/backup/target_ssh_port
Normal file
|
@ -0,0 +1 @@
|
|||
USER SSH port number to connect to the target host
|
1
cluster/prod/app/backup/secrets/backup/target_ssh_user
Normal file
1
cluster/prod/app/backup/secrets/backup/target_ssh_user
Normal file
|
@ -0,0 +1 @@
|
|||
USER SSH username to log in as on the target host
|
|
@ -475,7 +475,8 @@ job "email" {
|
|||
|
||||
resources {
|
||||
cpu = 200
|
||||
memory = 1000
|
||||
memory = 500
|
||||
memory_max = 1000
|
||||
}
|
||||
|
||||
service {
|
||||
|
|
9
restic-summary
Executable file
9
restic-summary
Executable file
|
@ -0,0 +1,9 @@
|
|||
#!/usr/bin/env bash
|
||||
for svc in dovecot consul plume; do
|
||||
export RESTIC_REPOSITORY=`pass deuxfleurs/backups/$svc/restic_repository`
|
||||
export RESTIC_PASSWORD=`pass deuxfleurs/backups/$svc/restic_password`
|
||||
export AWS_ACCESS_KEY_ID=`pass deuxfleurs/backups/$svc/aws_s3_access_key`
|
||||
export AWS_SECRET_ACCESS_KEY=`pass deuxfleurs/backups/$svc/aws_s3_secret_key`
|
||||
restic unlock
|
||||
restic snapshots
|
||||
done
|
Loading…
Reference in a new issue