Maintenance du 2022-03-09

This commit is contained in:
Quentin 2022-03-09 16:54:19 +01:00
parent 1322dae8da
commit a1c6c33d73
11 changed files with 40 additions and 92 deletions

View file

@ -93,6 +93,12 @@ job "postgres13.3" {
"--pg-su-password", "${PG_SU_PWD}", "--pg-su-password", "${PG_SU_PWD}",
"--pg-repl-username", "${PG_REPL_USER}", "--pg-repl-username", "${PG_REPL_USER}",
"--pg-repl-password", "${PG_REPL_PWD}", "--pg-repl-password", "${PG_REPL_PWD}",
/*
The postgres daemon accepts 0.0.0.0, ::, and * here but not Stolon.
Otherwise you will have the following error and your cluster will be broken (no replication)
WARN cmd/keeper.go:1979 provided --pg-listen-address "*": is not an ip address but a hostname. This will be advertized to the other components and may have undefined behaviors if resolved differently by other hosts
WARN cmd/keeper.go:1984 cannot resolve provided --pg-listen-address "*": lookup *: no such host
*/
"--pg-listen-address", "${attr.unique.network.ip-address}", "--pg-listen-address", "${attr.unique.network.ip-address}",
"--pg-port", "${NOMAD_PORT_psql_port}", "--pg-port", "${NOMAD_PORT_psql_port}",
"--pg-bin-path", "/usr/lib/postgresql/13/bin/" "--pg-bin-path", "/usr/lib/postgresql/13/bin/"

View file

@ -4,9 +4,12 @@
For each machine, **one by one** do: For each machine, **one by one** do:
- Check that cluster is healthy - Check that cluster is healthy
- Check gluster - Check garage
- `sudo gluster peer status` - check that all nodes are online `docker exec -ti xxx /garage status`
- `sudo gluster volume status all` (check Online Col, only `Y` must appear) - check that tables are in sync `docker exec -ti 63a4d7ecd795 /garage repair --yes tables`
- check garage logs
- no unknown errors or resync should be in progress
- the following line must appear `INFO garage_util::background > Worker exited: Repair worker`
- Check that Nomad is healthy - Check that Nomad is healthy
- `nomad server members` - `nomad server members`
- `nomad node status` - `nomad node status`
@ -17,5 +20,5 @@ For each machine, **one by one** do:
- Run `nomad node drain -enable -force -self` - Run `nomad node drain -enable -force -self`
- Reboot - Reboot
- Run `nomad node drain -self -disable` - Run `nomad node drain -self -disable`
- Check that cluster is healthy - Check that cluster is healthy (basically the whole first point)

View file

@ -14,6 +14,10 @@
- role: network - role: network
tags: net tags: net
# UNSAFE!! This section configures glusterfs. Once done, don't run it ever again as it may break stuff. - hosts: extra_nodes
# - role: storage serial: 1
# tags: sto roles:
- role: common
tags: base
- role: users
tags: account

View file

@ -7,7 +7,7 @@ cluster_nodes:
ipv4: 192.168.0.2 ipv4: 192.168.0.2
gatewayv4: 192.168.0.254 gatewayv4: 192.168.0.254
ipv6: 2a01:e0a:260:b5b0::2 ipv6: 2a01:e0a:260:b5b0::2
gatewayv6: 2a01:e34:ec5c:dbe0::1 gatewayv6: 2a01:e0a:260:b5b0::1
interface: eno1 interface: eno1
dns_1: 212.27.40.240 dns_1: 212.27.40.240
dns_2: 212.27.40.241 dns_2: 212.27.40.241
@ -39,6 +39,8 @@ cluster_nodes:
dns_2: 212.27.40.241 dns_2: 212.27.40.241
ansible_python_interpreter: python3 ansible_python_interpreter: python3
extra_nodes:
hosts:
io: io:
ansible_host: io.machine.deuxfleurs.fr ansible_host: io.machine.deuxfleurs.fr
ansible_port: 22 ansible_port: 22

View file

@ -44,6 +44,14 @@
- pciutils - pciutils
- pv - pv
- zstd - zstd
- miniupnpc
- rsync
- ncdu
- smartmontools
- ioping
- lm-sensors
- netcat
- sysstat
state: present state: present
- name: "Passwordless sudo" - name: "Passwordless sudo"

View file

@ -1,6 +1,6 @@
- name: "Set consul version" - name: "Set consul version"
set_fact: set_fact:
consul_version: 1.11.2 consul_version: 1.11.4
- name: "Download and install Consul for x86_64" - name: "Download and install Consul for x86_64"
unarchive: unarchive:

View file

@ -10,12 +10,12 @@
-A INPUT -p tcp --dport 22 -j ACCEPT -A INPUT -p tcp --dport 22 -j ACCEPT
# Diplonat needs everything open to communicate with IGD with the router # Diplonat needs everything open to communicate with IGD with the router
-A INPUT -s 192.168.1.254 -j ACCEPT -A INPUT -s 192.168.0.254 -j ACCEPT
# Cluster # Cluster
{% for selected_host in groups['cluster_nodes'] %} -A INPUT -s 192.168.0.2 -j ACCEPT
-A INPUT -s {{ hostvars[selected_host]['ipv4'] }} -j ACCEPT -A INPUT -s 192.168.0.3 -j ACCEPT
{% endfor %} -A INPUT -s 192.168.0.4 -j ACCEPT
# Local # Local
-A INPUT -i docker0 -j ACCEPT -A INPUT -i docker0 -j ACCEPT

View file

@ -16,9 +16,9 @@
-A INPUT -p tcp --dport 22 -j ACCEPT -A INPUT -p tcp --dport 22 -j ACCEPT
# Cluster # Cluster
{% for selected_host in groups['cluster_nodes'] %} -A INPUT -s 2a01:e0a:260:b5b0::2 -j ACCEPT
-A INPUT -s {{ hostvars[selected_host]['ipv6'] }} -j ACCEPT -A INPUT -s 2a01:e0a:260:b5b0::3 -j ACCEPT
{% endfor %} -A INPUT -s 2a01:e0a:260:b5b0::4 -j ACCEPT
# Local # Local
-A INPUT -i docker0 -j ACCEPT -A INPUT -i docker0 -j ACCEPT

View file

@ -1,6 +1,6 @@
- name: "Set nomad version" - name: "Set nomad version"
set_fact: set_fact:
nomad_version: 1.2.4 nomad_version: 1.2.6
- name: "Download and install Nomad for x86_64" - name: "Download and install Nomad for x86_64"
unarchive: unarchive:

View file

@ -1,3 +0,0 @@
---
- name: umount gluster
shell: umount --force --lazy /mnt/glusterfs ; true

View file

@ -1,72 +0,0 @@
- name: "Add GlusterFS Repo Key"
apt_key:
url: https://download.gluster.org/pub/gluster/glusterfs/5/rsa.pub
state: present
- name: "Add GlusterFS official repository"
apt_repository:
repo: "deb [arch=amd64] https://download.gluster.org/pub/gluster/glusterfs/5/LATEST/Debian/buster/amd64/apt buster main"
state: present
filename: gluster
- name: "Install GlusterFS"
apt:
name:
- glusterfs-server
- glusterfs-client
state: present
- name: "Ensure Gluster Daemon started and enabled"
service:
name: glusterd
enabled: yes
state: started
- name: "Create directory for GlusterFS bricks"
file: path=/mnt/storage/glusterfs/brick1 recurse=yes state=directory
- name: "Create GlusterFS volumes"
gluster_volume:
state: present
name: donnees
bricks: /mnt/storage/glusterfs/brick1/g1
#rebalance: yes
redundancies: 1
disperses: 3
#replicas: 3
force: yes
options:
client.event-threads: "8"
server.event-threads: "8"
performance.stat-prefetch: "on"
nfs.disable: "on"
features.cache-invalidation: "on"
performance.client-io-threads: "on"
config.transport: tcp
performance.quick-read: "on"
performance.io-cache: "on"
nfs.export-volumes: "off"
cluster.lookup-optimize: "on"
cluster: "{% for selected_host in groups['cluster_nodes'] %}{{ hostvars[selected_host]['ipv4'] }}{{ ',' if not loop.last else '' }}{% endfor %}"
run_once: true
- name: "Create mountpoint"
file: path=/mnt/glusterfs recurse=yes state=directory
- name: "Flush handlers (umount glusterfs and restart ganesha)"
meta: flush_handlers
- name: "Add fstab entry"
tags: gluster-fstab
mount:
path: /mnt/glusterfs
src: "{{ ipv4 }}:/donnees"
fstype: glusterfs
opts: "defaults,_netdev,noauto,x-systemd.automount"
state: present
- name: Mount everything
command: mount -a
args:
warn: no