diff --git a/cluster/prod/cluster.nix b/cluster/prod/cluster.nix index 802d084..44c376e 100644 --- a/cluster/prod/cluster.nix +++ b/cluster/prod/cluster.nix @@ -13,7 +13,6 @@ site_name = "neptune"; publicKey = "VvXT0fPDfWsHxumZqVShpS33dJQAdpJ1E79ZbCBJP34="; IP = "10.83.1.1"; - lan_endpoint = "192.168.1.31:33799"; endpoint = "77.207.15.215:33731"; } { @@ -21,7 +20,6 @@ site_name = "neptune"; publicKey = "goTkBJGmzrGDOAjUcdH9G0JekipqSMoaYQdB6IHnzi0="; IP = "10.83.1.2"; - lan_endpoint = "192.168.1.32:33799"; endpoint = "77.207.15.215:33732"; } { @@ -29,7 +27,6 @@ site_name = "neptune"; publicKey = "oZDAb8LoLW87ktUHyFFec0VaIar97bqq47mGbdVqJ0U="; IP = "10.83.1.3"; - lan_endpoint = "192.168.1.33:33799"; endpoint = "77.207.15.215:33733"; } { @@ -37,7 +34,6 @@ site_name = "orion"; publicKey = "EtRoWBYCdjqgXX0L+uWLg8KxNfIK8k9OTh30tL19bXU="; IP = "10.83.2.1"; - lan_endpoint = "192.168.1.11:33799"; endpoint = "82.66.80.201:33731"; } { @@ -45,7 +41,6 @@ site_name = "orion"; publicKey = "HbLC938mysadMSOxWgq8+qrv+dBKzPP/43OMJp/3phA="; IP = "10.83.2.2"; - lan_endpoint = "192.168.1.12:33799"; endpoint = "82.66.80.201:33732"; } { @@ -53,7 +48,6 @@ site_name = "orion"; publicKey = "e1C8jgTj9eD20ywG08G1FQZ+Js3wMK/msDUE1wO3l1Y="; IP = "10.83.2.3"; - lan_endpoint = "192.168.1.13:33799"; endpoint = "82.66.80.201:33733"; } { @@ -61,7 +55,6 @@ site_name = "bespin"; publicKey = "bIjxey/VhBgVrLa0FxN/KISOt2XFmQeSh1MPivUq9gg="; IP = "10.83.3.1"; - lan_endpoint = "192.168.5.117:33799"; endpoint = "109.136.55.235:33731"; } { @@ -69,7 +62,6 @@ site_name = "bespin"; publicKey = "pUIKv8UBl586O7DBrHBsb9BgNU7WlYQ2r2RSNkD+JAQ="; IP = "10.83.3.2"; - lan_endpoint = "192.168.5.134:33799"; endpoint = "109.136.55.235:33732"; } { @@ -77,7 +69,6 @@ site_name = "bespin"; publicKey = "VBmpo15iIJP7250NAsF+ryhZc3j+8TZFnE1Djvn5TXI="; IP = "10.83.3.3"; - lan_endpoint = "192.168.5.116:33799"; endpoint = "109.136.55.235:33733"; } { @@ -85,7 +76,6 @@ site_name = "scorpio"; publicKey = "Sm9cmNZ/BfWVPFflMO+fuyiera4r203b/dKhHTQmBFg="; IP = "10.83.4.1"; - lan_endpoint = "192.168.1.41:33799"; endpoint = "82.65.41.110:33741"; } ]; diff --git a/cluster/staging/cluster.nix b/cluster/staging/cluster.nix index 94ea43b..cf30d6e 100644 --- a/cluster/staging/cluster.nix +++ b/cluster/staging/cluster.nix @@ -13,7 +13,6 @@ site_name = "neptune"; publicKey = "7Nm7pMmyS7Nts1MB+loyD8u84ODxHPTkDu+uqQR6yDk="; IP = "10.14.1.2"; - lan_endpoint = "192.168.1.22:33799"; endpoint = "77.207.15.215:33722"; } { @@ -21,7 +20,6 @@ site_name = "neptune"; publicKey = "lABn/axzD1jkFulX8c+K3B3CbKXORlIMDDoe8sQVxhs="; IP = "10.14.1.3"; - lan_endpoint = "192.168.1.23:33799"; endpoint = "77.207.15.215:33723"; } { @@ -29,7 +27,6 @@ site_name = "jupiter"; publicKey = "smBQYUS60JDkNoqkTT7TgbpqFiM43005fcrT6472llI="; IP = "10.14.2.33"; - lan_endpoint = "192.168.1.33:33799"; endpoint = "82.64.238.84:33733"; } { @@ -37,18 +34,17 @@ site_name = "corrin"; publicKey = "m9rLf+233X1VColmeVrM/xfDGro5W6Gk5N0zqcf32WY="; IP = "10.14.3.1"; - lan_endpoint = "192.168.1.25:33799"; - endpoint = "82.120.233.78:33721"; + #endpoint = "82.120.233.78:33721"; } { hostname = "df-pw5"; site_name = "bespin"; publicKey = "XLOYoMXF+PO4jcgfSVAk+thh4VmWx0wzWnb0xs08G1s="; IP = "10.14.4.1"; - lan_endpoint = "192.168.5.130:33799"; endpoint = "bitfrost.fiber.shirokumo.net:33734"; } ]; + services.wgautomesh.logLevel = "debug"; # Bootstrap IPs for Consul cluster, # these are IPs on the Wireguard overlay diff --git a/cluster/staging/known_hosts b/cluster/staging/known_hosts index 5f0a144..0cb04f2 100644 --- a/cluster/staging/known_hosts +++ b/cluster/staging/known_hosts @@ -8,3 +8,4 @@ piranha.polyno.me,2a01:cb05:8984:3c00:223:24ff:feb0:ea82 ssh-ed25519 AAAAC3NzaC1 2001:910:1204:1::21 ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIPXTUrXRFhudJBESCqjHCOttzqYPyIzpPOMkI8+SwLRx 2a01:e0a:5e4:1d0:223:24ff:feaf:fdec ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIAsZas74RT6lCZwuUOPR23nPdbSdpWORyAmRgjoiMVHK df-pw5.machine.deuxfleurs.fr ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIK/dJIxioCkfeehxeGiZR7qquYGoqEH/YrRJ/ukEcaLH +10.14.3.1 ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIJnpO6zpLWsyyugOoOj+2bUow9TUrcWgURFGGaoyu+co diff --git a/cluster/staging/ssh_config b/cluster/staging/ssh_config index b1a02e8..3043207 100644 --- a/cluster/staging/ssh_config +++ b/cluster/staging/ssh_config @@ -10,7 +10,9 @@ Host origan HostName origan.df.trinity.fr.eu.org Host piranha - HostName piranha.polyno.me + ProxyJump caribou.machine.deuxfleurs.fr + HostName 10.14.3.1 + #HostName piranha.polyno.me Host df-pw5 HostName df-pw5.machine.deuxfleurs.fr diff --git a/deploy_nixos b/deploy_nixos index 2b4235a..b716993 100755 --- a/deploy_nixos +++ b/deploy_nixos @@ -7,6 +7,10 @@ copy cluster/$CLUSTER/cluster.nix /etc/nixos/cluster.nix copy cluster/$CLUSTER/node/$NIXHOST.nix /etc/nixos/node.nix copy cluster/$CLUSTER/node/$NIXHOST.site.nix /etc/nixos/site.nix +cmd mkdir -p /var/lib/wgautomesh +write_pass deuxfleurs/cluster/$CLUSTER/wgautomesh_gossip_secret /var/lib/wgautomesh/gossip_secret +copy nix/wgautomesh.nix /etc/nixos/wgautomesh.nix + if [ "$CLUSTER" = "staging" ]; then copy nix/nomad-driver-nix2.nix /etc/nixos/nomad-driver-nix2.nix fi @@ -14,6 +18,9 @@ fi if [ "$CLUSTER" = "prod" ]; then cmd nixos-rebuild boot message "-------------------------------------------------------------------------------------" + message "NIXOS CHANGES THAT WOULD BE APPLIED UPON SWITCH OR REBOOT:" + cmd nixos-rebuild dry-activate + message "-------------------------------------------------------------------------------------" message "New NixOS configuration hasn't been applied, to avoid disturbing production services." message "Please apply the following procedure to node '$NIXHOST':" message "1. Drain node in Nomad so that all jobs are relocated elsewhere" diff --git a/doc/ports b/doc/ports index c8c88dd..73fdb48 100644 --- a/doc/ports +++ b/doc/ports @@ -16,6 +16,7 @@ ports so that we can avoid conflicts when adding services. 587 postfix submission 993 dovecot imaps 1337 dovecot zauthoentication +1666 wgautomesh 1728 nix-serve 3719 grafana 3900 garage S3 (prod) diff --git a/nix/configuration.nix b/nix/configuration.nix index 0b07056..9d3169b 100644 --- a/nix/configuration.nix +++ b/nix/configuration.nix @@ -84,5 +84,8 @@ SystemMaxUse=1G dns = [ "172.17.0.1" ]; })}"; }; + + nix.gc.automatic = true; + nix.gc.options = "--delete-older-than 30d"; } diff --git a/nix/deuxfleurs.nix b/nix/deuxfleurs.nix index 3a94860..f7b70d7 100644 --- a/nix/deuxfleurs.nix +++ b/nix/deuxfleurs.nix @@ -28,12 +28,8 @@ in }; endpoint = mkOption { type = nullOr str; - description = "Wireguard endpoint on the public Internet"; - }; - lan_endpoint = mkOption { - type = nullOr str; - description = "Wireguard endpoint for nodes in the same site"; default = null; + description = "Wireguard endpoint on the public Internet"; }; }; }; @@ -134,6 +130,10 @@ in }; }; + imports = [ + ./wgautomesh.nix + ]; + config = let node_meta = { "site" = cfg.site_name; @@ -147,6 +147,7 @@ in else {}); in { + # Configure admin accounts on all nodes users.users = builtins.mapAttrs (name: publicKeys: { isNormalUser = true; @@ -233,20 +234,43 @@ in services.resolved.enable = false; # Configure Wireguard VPN between all nodes - systemd.services."wg-quick-wg0".after = [ "unbound.service" ]; - networking.wg-quick.interfaces.wg0 = { - address = [ "${cfg.cluster_ip}/16" ]; + networking.wireguard.interfaces.wg0 = { + ips = [ "${cfg.cluster_ip}/16" ]; listenPort = cfg.wireguard_port; privateKeyFile = "/var/lib/deuxfleurs/wireguard-keys/private"; mtu = 1420; - peers = map ({ publicKey, endpoint, IP, site_name, lan_endpoint, ... }: { - publicKey = publicKey; - allowedIPs = [ "${IP}/32" ]; - endpoint = if site_name != null && site_name == cfg.site_name && lan_endpoint != null - then lan_endpoint else endpoint; - persistentKeepalive = 25; + }; + services.wgautomesh = { + enable = true; + interface = "wg0"; + gossipPort = 1666; + gossipSecretFile = "/var/lib/wgautomesh/gossip_secret"; + persistFile = "/var/lib/wgautomesh/state"; + upnpForwardPublicPort = + let + us = filter ({ hostname, ...}: hostname == config.networking.hostName) cfg.cluster_nodes; + in + if length us > 0 && (head us).endpoint != null then + strings.toInt (lists.last (split ":" (head us).endpoint)) + else null; + peers = map ({ publicKey, endpoint, IP, ... }: { + address = IP; + pubkey = publicKey; + endpoint = endpoint; }) cfg.cluster_nodes; }; + # Old code for wg-quick, we can use this as a fallback if we fail to make wgautomesh work + # systemd.services."wg-quick-wg0".after = [ "unbound.service" ]; + # networking.wg-quick.interfaces.wg0 = { + # address = [ "${cfg.cluster_ip}/16" ]; + # listenPort = cfg.wireguard_port; + # privateKeyFile = "/var/lib/deuxfleurs/wireguard-keys/private"; + # mtu = 1420; + # peers = map ({ publicKey, endpoint, IP, ... }: { + # inherit publicKey endpoint; + # allowedIPs = [ "${IP}/32" ]; + # persistentKeepalive = 25; + # }; system.activationScripts.generate_df_wg_key = '' if [ ! -f /var/lib/deuxfleurs/wireguard-keys/private ]; then @@ -303,7 +327,7 @@ in services.nomad.enable = true; systemd.services.nomad.after = [ "wg-quick-wg0.service" ]; - services.nomad.package = pkgs.nomad_1_3; + services.nomad.package = pkgs.nomad_1_4; services.nomad.extraPackages = [ pkgs.glibc pkgs.zstd diff --git a/nix/wgautomesh.nix b/nix/wgautomesh.nix new file mode 100644 index 0000000..55aa73f --- /dev/null +++ b/nix/wgautomesh.nix @@ -0,0 +1,129 @@ +let + wgautomesh = builtins.fetchTarball { + url = "https://git.deuxfleurs.fr/attachments/ce203833-1ae7-43d4-9bf4-b49b560c9f4b"; + sha256 = "sha256:1kc990s7xkwff53vs6c3slg7ljsyr9xz1i13j61ivfj6djyh8rmj"; + }; +in + +{ lib, config, pkgs, ... }: +with lib; +let + cfg = config.services.wgautomesh; +in + with builtins; + { + options.services.wgautomesh = { + enable = mkEnableOption "wgautomesh"; + logLevel = mkOption { + type = types.enum [ "trace" "debug" "info" "warn" "error" ]; + default = "info"; + description = "wgautomesh log level (trace/debug/info/warn/error)"; + }; + interface = mkOption { + type = types.str; + description = "Wireguard interface to manage"; + }; + gossipPort = mkOption { + type = types.port; + description = "wgautomesh gossip port"; + }; + gossipSecretFile = mkOption { + type = types.nullOr types.str; + description = "File containing the gossip secret encryption key"; + }; + persistFile = mkOption { + type = types.nullOr types.str; + description = "Path where to persist known peer addresses"; + }; + lanDiscovery = mkOption { + type = types.bool; + default = true; + description = "Enable discovery using LAN broadcast"; + }; + openFirewall = mkOption { + type = types.bool; + default = true; + description = "Automatically open gossip port in firewall"; + }; + upnpForwardPublicPort = mkOption { + type = types.nullOr types.port; + default = null; + description = "Public port number to try to redirect to this machine using UPnP IGD"; + }; + peers = mkOption { + type = types.listOf (types.submodule { + options = { + pubkey = mkOption { + type = types.str; + description = "Wireguard public key"; + }; + address = mkOption { + type = types.str; + description = "Wireguard peer address"; + }; + endpoint = mkOption { + type = types.nullOr types.str; + description = "bootstrap endpoint"; + }; + }; + }); + description = "wgautomesh peer list"; + }; + }; + + config = mkIf cfg.enable ( + let + peerDefs = map (peer: + let endpointDef = if peer.endpoint == null then "" + else ''endpoint = "${peer.endpoint}"''; + in + '' + [[peers]] + pubkey = "${peer.pubkey}" + address = "${peer.address}" + ${endpointDef} + '') cfg.peers; + extraDefs = (if cfg.lanDiscovery then ["lan_discovery = true"] else []) + ++ (if (cfg.gossipSecretFile != null) + then [''gossip_secret_file = "${cfg.gossipSecretFile}"''] else []) + ++ (if (cfg.persistFile != null) + then [''persist_file = "${cfg.persistFile}"''] else []) + ++ (if (cfg.upnpForwardPublicPort != null) + then [''upnp_forward_external_port = ${toString cfg.upnpForwardPublicPort}''] else []); + configfile = pkgs.writeText "wgautomesh.toml" '' + interface = "${cfg.interface}" + gossip_port = ${toString cfg.gossipPort} + ${concatStringsSep "\n" extraDefs} + + ${concatStringsSep "\n" peerDefs} + ''; + in { + systemd.services.wgautomesh = { + enable = true; + path = [ pkgs.wireguard-tools ]; + environment = { + RUST_LOG = "wgautomesh=${cfg.logLevel}"; + }; + description = "wgautomesh"; + serviceConfig = { + Type = "simple"; + + ExecStart = "${wgautomesh}/bin/wgautomesh ${configfile}"; + Restart = "always"; + RestartSec = "30"; + + ExecStartPre = [ "+${pkgs.coreutils}/bin/chown wgautomesh /var/lib/wgautomesh/gossip_secret" ]; + + DynamicUser = true; + User = "wgautomesh"; + StateDirectory = "wgautomesh"; + StateDirectoryMode = "0700"; + AmbientCapabilities = "CAP_NET_ADMIN"; + CapabilityBoundingSet = "CAP_NET_ADMIN"; + }; + wantedBy = [ "multi-user.target" ]; + }; + networking.firewall.allowedUDPPorts = mkIf cfg.openFirewall [ cfg.gossipPort ]; + }); + } +