diff --git a/ansible/README.md b/ansible/README.md index 0d0c607..db8d960 100644 --- a/ansible/README.md +++ b/ansible/README.md @@ -1,52 +1,15 @@ +# ANSIBLE -## Provisionning +## How to proceed - 1. Need a public IP address - 2. Deploy Debian sid/buster - 3. Add a DNS entry like xxxx.machine.deuxfleurs.fr A 0.0.0.0 in Cloudflare + Havelock - 4. Setup the fqdn in /etc/hosts (127.0.1.1 xxxx.machine.deuxfleurs.fr) - 5. Switch the SSH port to the port 110 - 6. Add the server to the ./production file - 7. Reboot machine - 8. Deploy Ansible - 9. Check that everything works as intended - 10. Update NS 1.cluster.deuxfleurs.fr - -## Useful commands - -Show every variables collected by Ansible for a given host: - -``` -ansible -i production villequin.machine.deuxfleurs.fr -m setup -``` - -Run playbook for only one host: - -``` -ansible-playbook -i production --limit villequin.machine.deuxfleurs.fr site.yml -``` - -Dump hostvars: - -``` -ansible -m debug villequin.machine.deuxfleurs.fr -i ./production -a "var=hostvars" -``` - -Deploy only one tag: - -``` -ansible-playbook -i production site.yml --tags "container" -``` - -Redeploy everything: - -``` -ansible-playbook -i production site.yml -``` - -Upgrade packages and force overwirte to fix bad packing done by GlusterFS: - -``` -apt-get -o Dpkg::Options::="--force-overwrite" dist-upgrade -y -``` +For each machine, **one by one** do: + - Check that cluster is healthy + - `sudo gluster peer status` + - `sudo gluster volume status all` (check Online Col, only `Y` must appear) + - Check that Nomad is healthy + - Check that Consul is healthy + - Check that Postgres is healthy + - Run `ansible-playbook -i production --limit site.yml` + - Reboot + - Check that cluster is healthy diff --git a/ansible/README.more.md b/ansible/README.more.md new file mode 100644 index 0000000..0d0c607 --- /dev/null +++ b/ansible/README.more.md @@ -0,0 +1,52 @@ + +## Provisionning + + 1. Need a public IP address + 2. Deploy Debian sid/buster + 3. Add a DNS entry like xxxx.machine.deuxfleurs.fr A 0.0.0.0 in Cloudflare + Havelock + 4. Setup the fqdn in /etc/hosts (127.0.1.1 xxxx.machine.deuxfleurs.fr) + 5. Switch the SSH port to the port 110 + 6. Add the server to the ./production file + 7. Reboot machine + 8. Deploy Ansible + 9. Check that everything works as intended + 10. Update NS 1.cluster.deuxfleurs.fr + +## Useful commands + +Show every variables collected by Ansible for a given host: + +``` +ansible -i production villequin.machine.deuxfleurs.fr -m setup +``` + +Run playbook for only one host: + +``` +ansible-playbook -i production --limit villequin.machine.deuxfleurs.fr site.yml +``` + +Dump hostvars: + +``` +ansible -m debug villequin.machine.deuxfleurs.fr -i ./production -a "var=hostvars" +``` + +Deploy only one tag: + +``` +ansible-playbook -i production site.yml --tags "container" +``` + +Redeploy everything: + +``` +ansible-playbook -i production site.yml +``` + +Upgrade packages and force overwirte to fix bad packing done by GlusterFS: + +``` +apt-get -o Dpkg::Options::="--force-overwrite" dist-upgrade -y +``` + diff --git a/ansible/cluster_nodes.yml b/ansible/cluster_nodes.yml index df73e79..ea58630 100644 --- a/ansible/cluster_nodes.yml +++ b/ansible/cluster_nodes.yml @@ -11,17 +11,8 @@ tags: kv - role: nomad tags: orchestrator - -# UNSAFE!! This section is disabled by default, to run it the flags -t net should be added -# to the ansible playbook command line. -# Reason: when rules.{v4,v6} are changed, the whole iptables configuration is reloaded. -# This creates issues with Docker, which injects its own configuration in iptables when it starts. -# In practice, most (all?) containers will break if rules.{v4,v6} are changed, -# and docker will have to be restared. -- hosts: cluster_nodes - roles: - - role: network - tags: [ net, never ] + - role: network + tags: net # UNSAFE!! This section configures glusterfs. Once done, don't run it ever again as it may break stuff. # - role: storage diff --git a/ansible/production b/ansible/production index 6266502..c8f08f2 100644 --- a/ansible/production +++ b/ansible/production @@ -1,4 +1,4 @@ [cluster_nodes] -veterini ansible_host=fbx-rennes2.machine.deuxfleurs.fr ansible_port=110 ansible_user=root public_ip=192.168.1.2 private_ip=192.168.1.2 interface=eno1 -silicareux ansible_host=fbx-rennes2.machine.deuxfleurs.fr ansible_port=111 ansible_user=root public_ip=192.168.1.3 private_ip=192.168.1.3 interface=eno1 -wonse ansible_host=fbx-rennes2.machine.deuxfleurs.fr ansible_port=112 ansible_user=root public_ip=192.168.1.4 private_ip=192.168.1.4 interface=eno1 +veterini ansible_host=fbx-rennes2.machine.deuxfleurs.fr ansible_port=110 ansible_user=root public_ip=192.168.1.2 private_ip=192.168.1.2 interface=eno1 dns_server=80.67.169.40 +silicareux ansible_host=fbx-rennes2.machine.deuxfleurs.fr ansible_port=111 ansible_user=root public_ip=192.168.1.3 private_ip=192.168.1.3 interface=eno1 dns_server=80.67.169.40 +wonse ansible_host=fbx-rennes2.machine.deuxfleurs.fr ansible_port=112 ansible_user=root public_ip=192.168.1.4 private_ip=192.168.1.4 interface=eno1 dns_server=80.67.169.40 diff --git a/ansible/roles/common/tasks/main.yml b/ansible/roles/common/tasks/main.yml index b4d00bb..0b15790 100644 --- a/ansible/roles/common/tasks/main.yml +++ b/ansible/roles/common/tasks/main.yml @@ -12,6 +12,12 @@ autoclean: yes autoremove: yes +- name: "Remove base tools" + apt: + name: + - systemd-resolved + state: absent + - name: "Install base tools" apt: name: diff --git a/ansible/roles/consul/handlers/main.yml b/ansible/roles/consul/handlers/main.yml deleted file mode 100644 index e8cd4a4..0000000 --- a/ansible/roles/consul/handlers/main.yml +++ /dev/null @@ -1,4 +0,0 @@ ---- - -- name: restart consul - service: name=consul state=restarted diff --git a/ansible/roles/consul/tasks/main.yml b/ansible/roles/consul/tasks/main.yml index bb1d9ef..2b77080 100644 --- a/ansible/roles/consul/tasks/main.yml +++ b/ansible/roles/consul/tasks/main.yml @@ -1,16 +1,6 @@ - name: "Set consul version" set_fact: - consul_version: 1.7.4 - -- name: "Download and install Consul for armv7l" - unarchive: - src: "https://releases.hashicorp.com/consul/{{ consul_version }}/consul_{{ consul_version }}_linux_arm.zip" - dest: /usr/local/bin - remote_src: yes - when: - - "ansible_architecture == 'armv7l'" - notify: - - restart consul + consul_version: 1.8.0 - name: "Download and install Consul for x86_64" unarchive: @@ -19,31 +9,18 @@ remote_src: yes when: - "ansible_architecture == 'x86_64'" - notify: - - restart consul - -- name: "Download and install Consul for arm64" - unarchive: - src: "https://releases.hashicorp.com/consul/{{ consul_version }}/consul_{{ consul_version }}_linux_arm64.zip" - dest: /usr/local/bin - remote_src: yes - when: - - "ansible_architecture == 'aarch64'" - notify: - - restart consul - name: "Create consul configuration directory" file: path=/etc/consul/ state=directory - name: "Deploy consul configuration" template: src=consul.json.j2 dest=/etc/consul/consul.json - notify: - - restart consul - name: "Deploy consul systemd service" copy: src=consul.service dest=/etc/systemd/system/consul.service - notify: - - restart consul - name: "Enable consul systemd service at boot" service: name=consul state=started enabled=yes daemon_reload=yes + +- name: "Deploy resolv.conf to use Consul" + template: src=resolv.conf.j2 dest=/etc/resolv.conf diff --git a/ansible/roles/consul/templates/consul.json.j2 b/ansible/roles/consul/templates/consul.json.j2 index d1bd2d8..b6c86aa 100644 --- a/ansible/roles/consul/templates/consul.json.j2 +++ b/ansible/roles/consul/templates/consul.json.j2 @@ -17,6 +17,9 @@ "ports": { "dns": 53 }, + "recursors": [ + "{{ dns_server }}" + ], "encrypt": "{{ consul_gossip_encrypt }}", "domain": "2.cluster.deuxfleurs.fr", "performance": { diff --git a/ansible/roles/consul/templates/resolv.conf.j2 b/ansible/roles/consul/templates/resolv.conf.j2 new file mode 100644 index 0000000..2404034 --- /dev/null +++ b/ansible/roles/consul/templates/resolv.conf.j2 @@ -0,0 +1,2 @@ +nameserver {{ private_ip }} +nameserver {{ dns_server }} diff --git a/ansible/roles/network/files/nsswitch.conf b/ansible/roles/network/files/nsswitch.conf index f4c3149..a84e024 100644 --- a/ansible/roles/network/files/nsswitch.conf +++ b/ansible/roles/network/files/nsswitch.conf @@ -9,8 +9,7 @@ group: files systemd shadow: files gshadow: files -#hosts: files dns -hosts: files mymachines resolve [!UNAVAIL=return] dns myhostname +hosts: files dns networks: files protocols: db files diff --git a/ansible/roles/network/files/systemd-resolve-no-listen.conf b/ansible/roles/network/files/systemd-resolve-no-listen.conf deleted file mode 100644 index 6e95967..0000000 --- a/ansible/roles/network/files/systemd-resolve-no-listen.conf +++ /dev/null @@ -1,2 +0,0 @@ -[Resolve] -DNSStubListener=no diff --git a/ansible/roles/network/handlers/main.yml b/ansible/roles/network/handlers/main.yml deleted file mode 100644 index 3454894..0000000 --- a/ansible/roles/network/handlers/main.yml +++ /dev/null @@ -1,12 +0,0 @@ ---- -- name: reload iptables - shell: iptables-restore < /etc/iptables/rules.v4 && systemctl restart docker && ifdown nomad1 || true && ifup nomad1 || true - -- name: reload ip6tables - shell: ip6tables-restore < /etc/iptables/rules.v6 - -- name: reload nomad interface - shell: ifdown nomad1 || true ; ifup nomad1 - -- name: reload systemd-resolved - service: name=systemd-resolved state=restarted diff --git a/ansible/roles/network/tasks/main.yml b/ansible/roles/network/tasks/main.yml index 7f95b0f..2087765 100644 --- a/ansible/roles/network/tasks/main.yml +++ b/ansible/roles/network/tasks/main.yml @@ -1,42 +1,23 @@ -- name: "Add dummy interface to handle Nomad NAT restriction nomad#2770" - template: src=nomad-interface.j2 dest=/etc/network/interfaces.d/nomad.cfg - when: public_ip != private_ip - notify: - - reload nomad interface - - name: "Deploy iptablesv4 configuration" template: src=rules.v4.j2 dest=/etc/iptables/rules.v4 - notify: - - reload iptables - name: "Deploy iptablesv6 configuration" copy: src=rules.v6 dest=/etc/iptables/rules.v6 - notify: - - reload ip6tables - name: "Activate IP forwarding" sysctl: name: net.ipv4.ip_forward - value: 1 + value: "1" sysctl_set: yes -- name: "Create systemd-resolved override directory" - file: path=/etc/systemd/resolved.conf.d/ state=directory - -- name: "Prevent systemd-resolved from listening on port 53 (DNS)" - copy: src=systemd-resolve-no-listen.conf dest=/etc/systemd/resolved.conf.d/systemd-resolve-no-listen.conf - notify: reload systemd-resolved - -- name: "Use systemd-resolved as a source for /etc/resolv.conf" - file: - src: "/run/systemd/resolve/resolv.conf" - dest: "/etc/resolv.conf" - state: link - force: yes - notify: reload systemd-resolved - -- name: "Update nsswitch.conf to use systemd-resolved" +# These two lines are used to undo previous config, remove them once it is done +- name: "Update nsswitch.conf to not use systemd-resolved" copy: src=nsswitch.conf dest=/etc/nsswitch.conf -- name: "Flush handlers" - meta: flush_handlers +- name: "Disable systemd-resolved" + systemd: + name: systemd-resolved + state: stopped + enabled: false + + diff --git a/ansible/roles/network/templates/nomad-interface.j2 b/ansible/roles/network/templates/nomad-interface.j2 deleted file mode 100644 index 74e9cd4..0000000 --- a/ansible/roles/network/templates/nomad-interface.j2 +++ /dev/null @@ -1,8 +0,0 @@ -auto nomad1 -iface nomad1 inet manual - pre-up /sbin/ip link add nomad1 type dummy - up /sbin/ip addr add {{ public_ip }} dev nomad1 - up /sbin/iptables -t nat -A PREROUTING -d {{ private_ip }}/32 -j NETMAP --to {{ public_ip }}/32 - down /sbin/iptables -t nat -D PREROUTING -d {{ private_ip }}/32 -j NETMAP --to {{ public_ip }}/32 - post-down /sbin/ip link del nomad1 - diff --git a/ansible/roles/nomad/handlers/main.yml b/ansible/roles/nomad/handlers/main.yml deleted file mode 100644 index 0274673..0000000 --- a/ansible/roles/nomad/handlers/main.yml +++ /dev/null @@ -1,5 +0,0 @@ ---- - -- name: restart nomad - service: name=nomad state=restarted - diff --git a/ansible/roles/nomad/tasks/main.yml b/ansible/roles/nomad/tasks/main.yml index 0b7b65c..7c73362 100644 --- a/ansible/roles/nomad/tasks/main.yml +++ b/ansible/roles/nomad/tasks/main.yml @@ -1,16 +1,6 @@ - name: "Set nomad version" set_fact: - nomad_version: 0.11.3 - -- name: "Download and install Nomad for armv7l" - unarchive: - src: "https://releases.hashicorp.com/nomad/{{ nomad_version }}/nomad_{{ nomad_version }}_linux_arm.zip" - dest: /usr/local/bin - remote_src: yes - when: - - "ansible_architecture == 'armv7l'" - notify: - - restart nomad + nomad_version: 0.12.0-beta2 - name: "Download and install Nomad for x86_64" unarchive: @@ -19,31 +9,15 @@ remote_src: yes when: - "ansible_architecture == 'x86_64'" - notify: - - restart nomad - -- name: "Download and install Nomad for arm64" - unarchive: - src: "https://releases.hashicorp.com/nomad/{{ nomad_version }}/nomad_{{ nomad_version }}_linux_arm64.zip" - dest: /usr/local/bin - remote_src: yes - when: - - "ansible_architecture == 'aarch64'" - notify: - - restart nomad - name: "Create Nomad configuration directory" file: path=/etc/nomad/ state=directory - name: "Deploy Nomad configuration" template: src=nomad.hcl.j2 dest=/etc/nomad/nomad.hcl - notify: - - restart nomad - name: "Deploy Nomad systemd service" copy: src=nomad.service dest=/etc/systemd/system/nomad.service - notify: - - restart nomad - name: "Enable Nomad systemd service at boot" service: name=nomad state=started enabled=yes daemon_reload=yes diff --git a/ansible/roles/nomad/templates/nomad.hcl.j2 b/ansible/roles/nomad/templates/nomad.hcl.j2 index 8107410..b0be6a8 100644 --- a/ansible/roles/nomad/templates/nomad.hcl.j2 +++ b/ansible/roles/nomad/templates/nomad.hcl.j2 @@ -26,5 +26,9 @@ client { #cpu_total_compute = 4000 servers = ["127.0.0.1:4648"] network_interface = "{{ interface }}" + options { + docker.privileged.enabled = "true" + docker.volumes.enabled = "true" + } } diff --git a/nomad/seafile.hcl b/nomad/seafile.hcl index f118999..9c26df2 100644 --- a/nomad/seafile.hcl +++ b/nomad/seafile.hcl @@ -24,8 +24,15 @@ job "seafile" { seafhttp_port = 8082 } + mounts = [ + { + type = "bind" + source = "/mnt/glusterfs/seafile" + target = "/mnt/seafile-data" + } + ] + volumes = [ - "/mnt/glusterfs/seafile:/mnt/seafile-data", "secrets/conf:/srv/webstore/conf", "secrets/ccnet:/srv/webstore/ccnet" ]