Restart backups

This commit is contained in:
Quentin 2022-09-01 18:05:50 +02:00
parent 1749a98e86
commit 02c65de5fe
Signed by untrusted user: quentin
GPG key ID: E9602264D639FF68
27 changed files with 496 additions and 1 deletions

View file

@ -0,0 +1,28 @@
FROM golang:buster as builder
WORKDIR /root
RUN git clone https://filippo.io/age && cd age/cmd/age && go build -o age .
FROM amd64/debian:buster
COPY --from=builder /root/age/cmd/age/age /usr/local/bin/age
RUN apt-get update && \
apt-get -qq -y full-upgrade && \
apt-get install -y rsync wget openssh-client unzip && \
apt-get clean && \
rm -f /var/lib/apt/lists/*_*
RUN mkdir -p /root/.ssh
WORKDIR /root
RUN wget https://releases.hashicorp.com/consul/1.8.5/consul_1.8.5_linux_amd64.zip && \
unzip consul_1.8.5_linux_amd64.zip && \
chmod +x consul && \
mv consul /usr/local/bin && \
rm consul_1.8.5_linux_amd64.zip
COPY do_backup.sh /root/do_backup.sh
CMD "/root/do_backup.sh"

View file

@ -0,0 +1,20 @@
#!/bin/sh
set -x -e
cd /root
chmod 0600 .ssh/id_ed25519
cat > .ssh/config <<EOF
Host backuphost
HostName $TARGET_SSH_HOST
Port $TARGET_SSH_PORT
User $TARGET_SSH_USER
EOF
consul kv export | \
gzip | \
age -r "$(cat /root/.ssh/id_ed25519.pub)" | \
ssh backuphost "cat > $TARGET_SSH_DIR/consul/$(date --iso-8601=minute)_consul_kv_export.gz.age"

View file

@ -0,0 +1 @@
result

View file

@ -0,0 +1,8 @@
## Build
```bash
docker load < $(nix-build docker.nix)
docker push superboum/backup-psql:???
```

View file

@ -0,0 +1,106 @@
#!/usr/bin/env python3
import shutil,sys,os,datetime,minio,subprocess
working_directory = "."
if 'CACHE_DIR' in os.environ: working_directory = os.environ['CACHE_DIR']
required_space_in_bytes = 20 * 1024 * 1024 * 1024
bucket = os.environ['AWS_BUCKET']
key = os.environ['AWS_ACCESS_KEY_ID']
secret = os.environ['AWS_SECRET_ACCESS_KEY']
endpoint = os.environ['AWS_ENDPOINT']
pubkey = os.environ['CRYPT_PUBLIC_KEY']
psql_host = os.environ['PSQL_HOST']
psql_user = os.environ['PSQL_USER']
s3_prefix = str(datetime.datetime.now())
files = [ "backup_manifest", "base.tar.gz", "pg_wal.tar.gz" ]
clear_paths = [ os.path.join(working_directory, f) for f in files ]
crypt_paths = [ os.path.join(working_directory, f) + ".age" for f in files ]
s3_keys = [ s3_prefix + "/" + f for f in files ]
def abort(msg):
for p in clear_paths + crypt_paths:
if os.path.exists(p):
print(f"Remove {p}")
os.remove(p)
if msg: sys.exit(msg)
else: print("success")
# Check we have enough space on disk
if shutil.disk_usage(working_directory).free < required_space_in_bytes:
abort(f"Not enough space on disk at path {working_directory} to perform a backup, aborting")
# Check postgres password is set
if 'PGPASSWORD' not in os.environ:
abort(f"You must pass postgres' password through the environment variable PGPASSWORD")
# Check our working directory is empty
if len(os.listdir(working_directory)) != 0:
abort(f"Working directory {working_directory} is not empty, aborting")
# Check Minio
client = minio.Minio(endpoint, key, secret)
if not client.bucket_exists(bucket):
abort(f"Bucket {bucket} does not exist or its access is forbidden, aborting")
# Perform the backup locally
try:
ret = subprocess.run(["pg_basebackup",
f"--host={psql_host}",
f"--username={psql_user}",
f"--pgdata={working_directory}",
f"--format=tar",
"--wal-method=stream",
"--gzip",
"--compress=6",
"--progress",
"--max-rate=5M",
])
if ret.returncode != 0:
abort(f"pg_basebackup exited, expected return code 0, got {ret.returncode}. aborting")
except Exception as e:
abort(f"pg_basebackup raised exception {e}. aborting")
# Check that the expected files are here
for p in clear_paths:
print(f"Checking that {p} exists locally")
if not os.path.exists(p):
abort(f"File {p} expected but not found, aborting")
# Cipher them
for c, e in zip(clear_paths, crypt_paths):
print(f"Ciphering {c} to {e}")
try:
ret = subprocess.run(["age", "-r", pubkey, "-o", e, c])
if ret.returncode != 0:
abort(f"age exit code is {ret}, 0 expected. aborting")
except Exception as e:
abort(f"aged raised an exception. {e}. aborting")
# Upload the backup to S3
for p, k in zip(crypt_paths, s3_keys):
try:
print(f"Uploading {p} to {k}")
result = client.fput_object(bucket, k, p)
print(
"created {0} object; etag: {1}, version-id: {2}".format(
result.object_name, result.etag, result.version_id,
),
)
except Exception as e:
abort(f"Exception {e} occured while upload {p}. aborting")
# Check that the files have been uploaded
for k in s3_keys:
try:
print(f"Checking that {k} exists remotely")
result = client.stat_object(bucket, k)
print(
"last-modified: {0}, size: {1}".format(
result.last_modified, result.size,
),
)
except Exception as e:
abort(f"{k} not found on S3. {e}. aborting")
abort(None)

View file

@ -0,0 +1,8 @@
{
pkgsSrc = fetchTarball {
# Latest commit on https://github.com/NixOS/nixpkgs/tree/nixos-21.11
# As of 2022-04-15
url ="https://github.com/NixOS/nixpkgs/archive/2f06b87f64bc06229e05045853e0876666e1b023.tar.gz";
sha256 = "sha256:1d7zg96xw4qsqh7c89pgha9wkq3rbi9as3k3d88jlxy2z0ns0cy2";
};
}

View file

@ -0,0 +1,37 @@
let
common = import ./common.nix;
pkgs = import common.pkgsSrc {};
python-with-my-packages = pkgs.python3.withPackages (p: with p; [
minio
]);
in
pkgs.stdenv.mkDerivation {
name = "backup-psql";
src = pkgs.lib.sourceFilesBySuffices ./. [ ".py" ];
buildInputs = [
python-with-my-packages
pkgs.age
pkgs.postgresql_14
];
buildPhase = ''
cat > backup-psql <<EOF
#!${pkgs.bash}/bin/bash
export PYTHONPATH=${python-with-my-packages}/${python-with-my-packages.sitePackages}
export PATH=${python-with-my-packages}/bin:${pkgs.age}/bin:${pkgs.postgresql_14}/bin
${python-with-my-packages}/bin/python3 $out/lib/backup-psql.py
EOF
chmod +x backup-psql
'';
installPhase = ''
mkdir -p $out/{bin,lib}
cp *.py $out/lib/backup-psql.py
cp backup-psql $out/bin/backup-psql
'';
}

View file

@ -0,0 +1,11 @@
let
common = import ./common.nix;
app = import ./default.nix;
pkgs = import common.pkgsSrc {};
in
pkgs.dockerTools.buildImage {
name = "superboum/backup-psql-docker";
config = {
Cmd = [ "${app}/bin/backup-psql" ];
};
}

View file

@ -0,0 +1,196 @@
job "backup_daily" {
datacenters = ["orion", "neptune"]
type = "batch"
priority = "60"
periodic {
cron = "@daily"
// Do not allow overlapping runs.
prohibit_overlap = true
}
group "backup-dovecot" {
constraint {
attribute = "${attr.unique.hostname}"
operator = "="
value = "doradille"
}
task "main" {
driver = "docker"
config {
image = "restic/restic:0.14.0"
entrypoint = [ "/bin/sh", "-c" ]
args = [ "restic backup /mail && restic forget --keep-within 1m1d --keep-within-weekly 3m --keep-within-monthly 1y && restic prune --max-unused 50% --max-repack-size 2G && restic check" ]
volumes = [
"/mnt/ssd/mail:/mail"
]
}
template {
data = <<EOH
AWS_ACCESS_KEY_ID={{ key "secrets/email/dovecot/backup_aws_access_key_id" }}
AWS_SECRET_ACCESS_KEY={{ key "secrets/email/dovecot/backup_aws_secret_access_key" }}
RESTIC_REPOSITORY={{ key "secrets/email/dovecot/backup_restic_repository" }}
RESTIC_PASSWORD={{ key "secrets/email/dovecot/backup_restic_password" }}
EOH
destination = "secrets/env_vars"
env = true
}
resources {
cpu = 500
memory = 100
memory_max = 300
}
restart {
attempts = 2
interval = "30m"
delay = "15s"
mode = "fail"
}
}
}
group "backup-plume" {
constraint {
attribute = "${attr.unique.hostname}"
operator = "="
value = "dahlia"
}
task "main" {
driver = "docker"
config {
image = "restic/restic:0.14.0"
entrypoint = [ "/bin/sh", "-c" ]
args = [ "restic backup /plume && restic forget --keep-within 1m1d --keep-within-weekly 3m --keep-within-monthly 1y && restic prune --max-unused 50% --max-repack-size 2G && restic check" ]
volumes = [
"/mnt/ssd/plume/media:/plume"
]
}
template {
data = <<EOH
AWS_ACCESS_KEY_ID={{ key "secrets/plume/backup_aws_access_key_id" }}
AWS_SECRET_ACCESS_KEY={{ key "secrets/plume/backup_aws_secret_access_key" }}
RESTIC_REPOSITORY={{ key "secrets/plume/backup_restic_repository" }}
RESTIC_PASSWORD={{ key "secrets/plume/backup_restic_password" }}
EOH
destination = "secrets/env_vars"
env = true
}
resources {
cpu = 500
memory = 100
memory_max = 300
}
restart {
attempts = 2
interval = "30m"
delay = "15s"
mode = "fail"
}
}
}
group "backup-consul" {
task "consul-kv-export" {
driver = "docker"
lifecycle {
hook = "prestart"
sidecar = false
}
config {
image = "consul:1.13.1"
network_mode = "host"
entrypoint = [ "/bin/sh", "-c" ]
args = [ "/bin/consul kv export > $NOMAD_ALLOC_DIR/consul.json" ]
volumes = [
"secrets:/etc/consul",
]
}
env {
CONSUL_HTTP_ADDR = "https://consul.service.prod.consul:8501"
CONSUL_HTTP_SSL = "true"
CONSUL_CACERT = "/etc/consul/consul.crt"
CONSUL_CLIENT_CERT = "/etc/consul/consul-client.crt"
CONSUL_CLIENT_KEY = "/etc/consul/consul-client.key"
}
resources {
cpu = 200
memory = 200
}
template {
data = "{{ key \"secrets/consul/consul.crt\" }}"
destination = "secrets/consul.crt"
}
template {
data = "{{ key \"secrets/consul/consul-client.crt\" }}"
destination = "secrets/consul-client.crt"
}
template {
data = "{{ key \"secrets/consul/consul-client.key\" }}"
destination = "secrets/consul-client.key"
}
restart {
attempts = 2
interval = "30m"
delay = "15s"
mode = "fail"
}
}
task "restic-backup" {
driver = "docker"
config {
image = "restic/restic:0.12.1"
entrypoint = [ "/bin/sh", "-c" ]
args = [ "restic backup $NOMAD_ALLOC_DIR/consul.json && restic forget --keep-within 1m1d --keep-within-weekly 3m --keep-within-monthly 1y && restic prune --max-unused 50% --max-repack-size 2G && restic check" ]
}
template {
data = <<EOH
AWS_ACCESS_KEY_ID={{ key "secrets/backup/consul/backup_aws_access_key_id" }}
AWS_SECRET_ACCESS_KEY={{ key "secrets/backup/consul/backup_aws_secret_access_key" }}
RESTIC_REPOSITORY={{ key "secrets/backup/consul/backup_restic_repository" }}
RESTIC_PASSWORD={{ key "secrets/backup/consul/backup_restic_password" }}
EOH
destination = "secrets/env_vars"
env = true
}
resources {
cpu = 200
memory = 200
}
restart {
attempts = 2
interval = "30m"
delay = "15s"
mode = "fail"
}
}
}
}

View file

@ -0,0 +1,55 @@
job "backup_weekly" {
datacenters = ["orion"]
type = "batch"
priority = "60"
periodic {
cron = "@weekly"
// Do not allow overlapping runs.
prohibit_overlap = true
}
group "backup-psql" {
task "main" {
driver = "docker"
config {
image = "superboum/backup-psql-docker:gyr3aqgmhs0hxj0j9hkrdmm1m07i8za2"
volumes = [
// Mount a cache on the hard disk to avoid filling up the SSD
"/mnt/storage/tmp_bckp_psql:/mnt/cache"
]
}
template {
data = <<EOH
CACHE_DIR=/mnt/cache
AWS_BUCKET=backups-pgbasebackup
AWS_ENDPOINT=s3.deuxfleurs.shirokumo.net
AWS_ACCESS_KEY_ID={{ key "secrets/postgres/backup/aws_access_key_id" }}
AWS_SECRET_ACCESS_KEY={{ key "secrets/postgres/backup/aws_secret_access_key" }}
CRYPT_PUBLIC_KEY={{ key "secrets/postgres/backup/crypt_public_key" }}
PSQL_HOST=psql-proxy.service.prod.consul
PSQL_USER={{ key "secrets/postgres/keeper/pg_repl_username" }}
PGPASSWORD={{ key "secrets/postgres/keeper/pg_repl_pwd" }}
EOH
destination = "secrets/env_vars"
env = true
}
resources {
cpu = 200
memory = 200
}
restart {
attempts = 2
interval = "30m"
delay = "15s"
mode = "fail"
}
}
}
}

View file

@ -0,0 +1 @@
USER Backup AWS access key ID

View file

@ -0,0 +1 @@
USER Backup AWS secret access key

View file

@ -0,0 +1 @@
USER Restic password to encrypt backups

View file

@ -0,0 +1 @@
USER Restic repository, eg. s3:https://s3.garage.tld

View file

@ -0,0 +1 @@
USER_LONG Private ed25519 key of the container doing the backup

View file

@ -0,0 +1 @@
USER Public ed25519 key of the container doing the backup (this key must be in authorized_keys on the backup target host)

View file

@ -0,0 +1 @@
USER Minio access key

View file

@ -0,0 +1 @@
USER Minio secret key

View file

@ -0,0 +1 @@
USER a private key to decript backups from age

View file

@ -0,0 +1 @@
USER A public key to encypt backups with age

View file

@ -0,0 +1 @@
USER Directory where to store backups on target host

View file

@ -0,0 +1 @@
USER SSH fingerprint of the target machine (format: copy here the corresponding line from your known_hosts file)

View file

@ -0,0 +1 @@
USER Hostname of the backup target host

View file

@ -0,0 +1 @@
USER SSH port number to connect to the target host

View file

@ -0,0 +1 @@
USER SSH username to log in as on the target host

View file

@ -475,7 +475,8 @@ job "email" {
resources { resources {
cpu = 200 cpu = 200
memory = 1000 memory = 500
memory_max = 1000
} }
service { service {

9
restic-summary Executable file
View file

@ -0,0 +1,9 @@
#!/usr/bin/env bash
for svc in dovecot consul plume; do
export RESTIC_REPOSITORY=`pass deuxfleurs/backups/$svc/restic_repository`
export RESTIC_PASSWORD=`pass deuxfleurs/backups/$svc/restic_password`
export AWS_ACCESS_KEY_ID=`pass deuxfleurs/backups/$svc/aws_s3_access_key`
export AWS_SECRET_ACCESS_KEY=`pass deuxfleurs/backups/$svc/aws_s3_secret_key`
restic unlock
restic snapshots
done