diff --git a/app/postgres/deploy/postgres.hcl b/app/postgres/deploy/postgres.hcl index 6672644..bf68a59 100644 --- a/app/postgres/deploy/postgres.hcl +++ b/app/postgres/deploy/postgres.hcl @@ -93,6 +93,12 @@ job "postgres13.3" { "--pg-su-password", "${PG_SU_PWD}", "--pg-repl-username", "${PG_REPL_USER}", "--pg-repl-password", "${PG_REPL_PWD}", + /* + The postgres daemon accepts 0.0.0.0, ::, and * here but not Stolon. + Otherwise you will have the following error and your cluster will be broken (no replication) + WARN cmd/keeper.go:1979 provided --pg-listen-address "*": is not an ip address but a hostname. This will be advertized to the other components and may have undefined behaviors if resolved differently by other hosts + WARN cmd/keeper.go:1984 cannot resolve provided --pg-listen-address "*": lookup *: no such host + */ "--pg-listen-address", "${attr.unique.network.ip-address}", "--pg-port", "${NOMAD_PORT_psql_port}", "--pg-bin-path", "/usr/lib/postgresql/13/bin/" diff --git a/os/config/README.md b/os/config/README.md index a1078df..81fe9c9 100644 --- a/os/config/README.md +++ b/os/config/README.md @@ -4,9 +4,12 @@ For each machine, **one by one** do: - Check that cluster is healthy - - Check gluster - - `sudo gluster peer status` - - `sudo gluster volume status all` (check Online Col, only `Y` must appear) + - Check garage + - check that all nodes are online `docker exec -ti xxx /garage status` + - check that tables are in sync `docker exec -ti 63a4d7ecd795 /garage repair --yes tables` + - check garage logs + - no unknown errors or resync should be in progress + - the following line must appear `INFO garage_util::background > Worker exited: Repair worker` - Check that Nomad is healthy - `nomad server members` - `nomad node status` @@ -17,5 +20,5 @@ For each machine, **one by one** do: - Run `nomad node drain -enable -force -self` - Reboot - Run `nomad node drain -self -disable` - - Check that cluster is healthy + - Check that cluster is healthy (basically the whole first point) diff --git a/os/config/cluster_nodes.yml b/os/config/cluster_nodes.yml index ea58630..61d540d 100644 --- a/os/config/cluster_nodes.yml +++ b/os/config/cluster_nodes.yml @@ -14,6 +14,10 @@ - role: network tags: net -# UNSAFE!! This section configures glusterfs. Once done, don't run it ever again as it may break stuff. -# - role: storage -# tags: sto +- hosts: extra_nodes + serial: 1 + roles: + - role: common + tags: base + - role: users + tags: account diff --git a/os/config/production.yml b/os/config/production.yml index d59e153..446dd40 100644 --- a/os/config/production.yml +++ b/os/config/production.yml @@ -7,7 +7,7 @@ cluster_nodes: ipv4: 192.168.0.2 gatewayv4: 192.168.0.254 ipv6: 2a01:e0a:260:b5b0::2 - gatewayv6: 2a01:e34:ec5c:dbe0::1 + gatewayv6: 2a01:e0a:260:b5b0::1 interface: eno1 dns_1: 212.27.40.240 dns_2: 212.27.40.241 @@ -39,6 +39,8 @@ cluster_nodes: dns_2: 212.27.40.241 ansible_python_interpreter: python3 +extra_nodes: + hosts: io: ansible_host: io.machine.deuxfleurs.fr ansible_port: 22 diff --git a/os/config/roles/common/tasks/main.yml b/os/config/roles/common/tasks/main.yml index c75ae81..5f46835 100644 --- a/os/config/roles/common/tasks/main.yml +++ b/os/config/roles/common/tasks/main.yml @@ -44,6 +44,14 @@ - pciutils - pv - zstd + - miniupnpc + - rsync + - ncdu + - smartmontools + - ioping + - lm-sensors + - netcat + - sysstat state: present - name: "Passwordless sudo" diff --git a/os/config/roles/consul/tasks/main.yml b/os/config/roles/consul/tasks/main.yml index e0d3b0a..6bc100b 100644 --- a/os/config/roles/consul/tasks/main.yml +++ b/os/config/roles/consul/tasks/main.yml @@ -1,6 +1,6 @@ - name: "Set consul version" set_fact: - consul_version: 1.11.2 + consul_version: 1.11.4 - name: "Download and install Consul for x86_64" unarchive: diff --git a/os/config/roles/network/templates/rules.v4 b/os/config/roles/network/templates/rules.v4 index a5f138b..89d9ebf 100644 --- a/os/config/roles/network/templates/rules.v4 +++ b/os/config/roles/network/templates/rules.v4 @@ -10,12 +10,12 @@ -A INPUT -p tcp --dport 22 -j ACCEPT # Diplonat needs everything open to communicate with IGD with the router --A INPUT -s 192.168.1.254 -j ACCEPT +-A INPUT -s 192.168.0.254 -j ACCEPT # Cluster -{% for selected_host in groups['cluster_nodes'] %} --A INPUT -s {{ hostvars[selected_host]['ipv4'] }} -j ACCEPT -{% endfor %} +-A INPUT -s 192.168.0.2 -j ACCEPT +-A INPUT -s 192.168.0.3 -j ACCEPT +-A INPUT -s 192.168.0.4 -j ACCEPT # Local -A INPUT -i docker0 -j ACCEPT diff --git a/os/config/roles/network/templates/rules.v6 b/os/config/roles/network/templates/rules.v6 index ef3de43..35bcb0d 100644 --- a/os/config/roles/network/templates/rules.v6 +++ b/os/config/roles/network/templates/rules.v6 @@ -16,9 +16,9 @@ -A INPUT -p tcp --dport 22 -j ACCEPT # Cluster -{% for selected_host in groups['cluster_nodes'] %} --A INPUT -s {{ hostvars[selected_host]['ipv6'] }} -j ACCEPT -{% endfor %} +-A INPUT -s 2a01:e0a:260:b5b0::2 -j ACCEPT +-A INPUT -s 2a01:e0a:260:b5b0::3 -j ACCEPT +-A INPUT -s 2a01:e0a:260:b5b0::4 -j ACCEPT # Local -A INPUT -i docker0 -j ACCEPT diff --git a/os/config/roles/nomad/tasks/main.yml b/os/config/roles/nomad/tasks/main.yml index 7c90a86..a6f36b1 100644 --- a/os/config/roles/nomad/tasks/main.yml +++ b/os/config/roles/nomad/tasks/main.yml @@ -1,6 +1,6 @@ - name: "Set nomad version" set_fact: - nomad_version: 1.2.4 + nomad_version: 1.2.6 - name: "Download and install Nomad for x86_64" unarchive: diff --git a/os/config/roles/storage/handlers/main.yml b/os/config/roles/storage/handlers/main.yml deleted file mode 100644 index a395c93..0000000 --- a/os/config/roles/storage/handlers/main.yml +++ /dev/null @@ -1,3 +0,0 @@ ---- -- name: umount gluster - shell: umount --force --lazy /mnt/glusterfs ; true diff --git a/os/config/roles/storage/tasks/main.yml b/os/config/roles/storage/tasks/main.yml deleted file mode 100644 index d66011b..0000000 --- a/os/config/roles/storage/tasks/main.yml +++ /dev/null @@ -1,72 +0,0 @@ -- name: "Add GlusterFS Repo Key" - apt_key: - url: https://download.gluster.org/pub/gluster/glusterfs/5/rsa.pub - state: present - -- name: "Add GlusterFS official repository" - apt_repository: - repo: "deb [arch=amd64] https://download.gluster.org/pub/gluster/glusterfs/5/LATEST/Debian/buster/amd64/apt buster main" - state: present - filename: gluster - -- name: "Install GlusterFS" - apt: - name: - - glusterfs-server - - glusterfs-client - state: present - -- name: "Ensure Gluster Daemon started and enabled" - service: - name: glusterd - enabled: yes - state: started - -- name: "Create directory for GlusterFS bricks" - file: path=/mnt/storage/glusterfs/brick1 recurse=yes state=directory - -- name: "Create GlusterFS volumes" - gluster_volume: - state: present - name: donnees - bricks: /mnt/storage/glusterfs/brick1/g1 - #rebalance: yes - redundancies: 1 - disperses: 3 - #replicas: 3 - force: yes - options: - client.event-threads: "8" - server.event-threads: "8" - performance.stat-prefetch: "on" - nfs.disable: "on" - features.cache-invalidation: "on" - performance.client-io-threads: "on" - config.transport: tcp - performance.quick-read: "on" - performance.io-cache: "on" - nfs.export-volumes: "off" - cluster.lookup-optimize: "on" - - cluster: "{% for selected_host in groups['cluster_nodes'] %}{{ hostvars[selected_host]['ipv4'] }}{{ ',' if not loop.last else '' }}{% endfor %}" - run_once: true - -- name: "Create mountpoint" - file: path=/mnt/glusterfs recurse=yes state=directory - -- name: "Flush handlers (umount glusterfs and restart ganesha)" - meta: flush_handlers - -- name: "Add fstab entry" - tags: gluster-fstab - mount: - path: /mnt/glusterfs - src: "{{ ipv4 }}:/donnees" - fstype: glusterfs - opts: "defaults,_netdev,noauto,x-systemd.automount" - state: present - -- name: Mount everything - command: mount -a - args: - warn: no