From c55f465ba1f369852ab4122a9fa42c85b4a571de Mon Sep 17 00:00:00 2001 From: oddlama Date: Thu, 11 Jan 2024 02:56:19 +0100 Subject: [PATCH] feat: add unified microvm & container definition; add net, misc, disko lib extension --- README.md | 3 + flake.lock | 14 + flake.nix | 10 +- lib/default.nix | 17 ++ lib/disko.nix | 89 ++++++ lib/misc.nix | 84 ++++++ lib/net.nix | 361 +++++++++++++++++++++++++ lib/types.nix | 52 ++++ modules/default.nix | 12 +- modules/guests/common-guest-config.nix | 27 ++ modules/guests/container.nix | 64 +++++ modules/guests/default.nix | 281 +++++++++++++++++++ modules/guests/microvm.nix | 82 ++++++ 13 files changed, 1093 insertions(+), 3 deletions(-) create mode 100644 lib/default.nix create mode 100644 lib/disko.nix create mode 100644 lib/misc.nix create mode 100644 lib/net.nix create mode 100644 lib/types.nix create mode 100644 modules/guests/common-guest-config.nix create mode 100644 modules/guests/container.nix create mode 100644 modules/guests/default.nix create mode 100644 modules/guests/microvm.nix diff --git a/README.md b/README.md index 5bd5ae0..420406a 100644 --- a/README.md +++ b/README.md @@ -35,3 +35,6 @@ Certain modules may require the use of additional flakes. In particular you migh - [impermanence](https://github.com/nix-community/impermanence) - [agenix](https://github.com/ryantm/agenix) - [agenix-rekey](https://github.com/oddlama/agenix-rekey) +- [microvm.nix](https://github.com/astro/microvm.nix) + +You also must have a `specialArgs.inputs` that refers to all of your flake's inputs. diff --git a/flake.lock b/flake.lock index b076c63..c6bfe05 100644 --- a/flake.lock +++ b/flake.lock @@ -76,6 +76,19 @@ "type": "github" } }, + "lib-net": { + "flake": false, + "locked": { + "lastModified": 1596309860, + "narHash": "sha256-izAzepR/6cDvnRfaa2ceSolMLMwqzQB5x9q62aR5J2g=", + "type": "tarball", + "url": "https://gist.github.com/duairc/5c9bb3c922e5d501a1edb9e7b3b845ba/archive/3885f7cd9ed0a746a9d675da6f265d41e9fd6704.tar.gz" + }, + "original": { + "type": "tarball", + "url": "https://gist.github.com/duairc/5c9bb3c922e5d501a1edb9e7b3b845ba/archive/3885f7cd9ed0a746a9d675da6f265d41e9fd6704.tar.gz" + } + }, "nixpkgs": { "locked": { "lastModified": 1703013332, @@ -138,6 +151,7 @@ "inputs": { "devshell": "devshell", "flake-utils": "flake-utils", + "lib-net": "lib-net", "nixpkgs": "nixpkgs", "pre-commit-hooks": "pre-commit-hooks" } diff --git a/flake.nix b/flake.nix index c79bbba..257bd02 100644 --- a/flake.nix +++ b/flake.nix @@ -6,6 +6,12 @@ }; flake-utils.url = "github:numtide/flake-utils"; + + lib-net = { + url = "https://gist.github.com/duairc/5c9bb3c922e5d501a1edb9e7b3b845ba/archive/3885f7cd9ed0a746a9d675da6f265d41e9fd6704.tar.gz"; + flake = false; + }; + nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable"; pre-commit-hooks = { @@ -23,12 +29,14 @@ devshell, pre-commit-hooks, ... - }: + } @ inputs: { nixosModules.extra-modules = import ./modules; nixosModules.default = self.nixosModules.extra-modules; homeManagerModules.extra-modules = import ./hm-modules; homeManagerModules.default = self.homeManagerModules.extra-modules; + overlays.extra-modules = import ./lib inputs; + overlays.default = self.overlays.extra-modules; } // flake-utils.lib.eachDefaultSystem (system: rec { pkgs = import nixpkgs { diff --git a/lib/default.nix b/lib/default.nix new file mode 100644 index 0000000..9b4de49 --- /dev/null +++ b/lib/default.nix @@ -0,0 +1,17 @@ +inputs: final: prev: +prev.lib.composeManyExtensions ( + # Order is important to allow using prev instead of final in more places to + # speed up evaluation. + map (x: import x inputs) [ + # No dependencies + ./types.nix + # No dependencies + ./misc.nix + # No dependencies + ./disko.nix + # Requires misc + ./net.nix + ] +) +final +prev diff --git a/lib/disko.nix b/lib/disko.nix new file mode 100644 index 0000000..91fc188 --- /dev/null +++ b/lib/disko.nix @@ -0,0 +1,89 @@ +_inputs: final: prev: { + lib = + prev.lib + // { + disko = { + content = { + luksZfs = luksName: pool: { + type = "luks"; + name = "${pool}_${luksName}"; + settings.allowDiscards = true; + content = { + type = "zfs"; + inherit pool; + }; + }; + }; + gpt = { + partGrub = name: start: end: { + inherit name start end; + part-type = "primary"; + flags = ["bios_grub"]; + }; + partEfi = name: start: end: { + inherit name start end; + fs-type = "fat32"; + bootable = true; + content = { + type = "filesystem"; + format = "vfat"; + mountpoint = "/boot"; + }; + }; + partSwap = name: start: end: { + inherit name start end; + fs-type = "linux-swap"; + content = { + type = "swap"; + randomEncryption = true; + }; + }; + partLuksZfs = luksName: pool: start: end: { + inherit start end; + name = "${pool}_${luksName}"; + content = final.lib.disko.content.luksZfs luksName pool; + }; + }; + zfs = rec { + mkZpool = prev.lib.recursiveUpdate { + type = "zpool"; + rootFsOptions = { + compression = "zstd"; + acltype = "posix"; + atime = "off"; + xattr = "sa"; + dnodesize = "auto"; + mountpoint = "none"; + canmount = "off"; + devices = "off"; + }; + options.ashift = "12"; + }; + + impermanenceZfsDatasets = { + "local" = unmountable; + "local/root" = + filesystem "/" + // { + postCreateHook = "zfs snapshot rpool/local/root@blank"; + }; + "local/nix" = filesystem "/nix"; + "local/state" = filesystem "/state"; + "safe" = unmountable; + "safe/persist" = filesystem "/persist"; + }; + + unmountable = {type = "zfs_fs";}; + filesystem = mountpoint: { + type = "zfs_fs"; + options = { + canmount = "noauto"; + inherit mountpoint; + }; + # Required to add dependencies for initrd + inherit mountpoint; + }; + }; + }; + }; +} diff --git a/lib/misc.nix b/lib/misc.nix new file mode 100644 index 0000000..70d667b --- /dev/null +++ b/lib/misc.nix @@ -0,0 +1,84 @@ +_inputs: _final: prev: let + inherit + (prev.lib) + filter + foldl' + genAttrs + genList + mergeAttrs + mkMerge + stringToCharacters + substring + unique + ; + + # Counts how often each element occurrs in xs. + # Elements must be strings. + countOccurrences = + foldl' + (acc: x: acc // {${x} = (acc.${x} or 0) + 1;}) + {}; + + # Returns all elements in xs that occur at least twice + duplicates = xs: let + occurrences = countOccurrences xs; + in + unique (filter (x: occurrences.${x} > 1) xs); + + # Concatenates all given attrsets as if calling a // b in order. + concatAttrs = foldl' mergeAttrs {}; + + # True if the path or string starts with / + isAbsolutePath = x: substring 0 1 x == "/"; + + # Merges all given attributes from the given attrsets using mkMerge. + # Useful to merge several top-level configs in a module. + mergeToplevelConfigs = keys: attrs: + genAttrs keys (attr: mkMerge (map (x: x.${attr} or {}) attrs)); + + # Calculates base^exp, but careful, this overflows for results > 2^62 + pow = base: exp: foldl' (a: x: x * a) 1 (genList (_: base) exp); + + hexLiteralValues = { + "0" = 0; + "1" = 1; + "2" = 2; + "3" = 3; + "4" = 4; + "5" = 5; + "6" = 6; + "7" = 7; + "8" = 8; + "9" = 9; + "a" = 10; + "b" = 11; + "c" = 12; + "d" = 13; + "e" = 14; + "f" = 15; + "A" = 10; + "B" = 11; + "C" = 12; + "D" = 13; + "E" = 14; + "F" = 15; + }; + + # Converts the given hex string to an integer. Only reliable for inputs in [0, 2^63), + # after that the sign bit will overflow. + hexToDec = v: foldl' (acc: x: acc * 16 + hexLiteralValues.${x}) 0 (stringToCharacters v); +in { + lib = + prev.lib + // { + inherit + concatAttrs + countOccurrences + duplicates + hexToDec + isAbsolutePath + mergeToplevelConfigs + pow + ; + }; +} diff --git a/lib/net.nix b/lib/net.nix new file mode 100644 index 0000000..3713f09 --- /dev/null +++ b/lib/net.nix @@ -0,0 +1,361 @@ +inputs: _final: prev: let + inherit + (prev.lib) + all + any + assertMsg + elem + filter + flip + foldl' + hasInfix + head + min + partition + range + recursiveUpdate + reverseList + splitString + substring + unique + warnIf + ; + + # From misc.nix + inherit + (prev.lib) + hexToDec + pow + ; + + # IP address math library + # https://gist.github.com/duairc/5c9bb3c922e5d501a1edb9e7b3b845ba + # Plus some extensions by us + libNet = + (import "${inputs.lib-net}/net.nix" { + inherit (inputs.nixpkgs) lib; + }) + .lib + .net; +in { + lib = recursiveUpdate prev.lib { + net = recursiveUpdate (removeAttrs libNet ["types"]) { + cidr = rec { + # host :: (ip | mac | integer) -> cidr -> ip + # + # Wrapper that extends the original host function to + # check whether the argument `n` is in-range for the given cidr. + # + # Examples: + # + # > net.cidr.host 255 "192.168.1.0/24" + # "192.168.1.255" + # > net.cidr.host (256) "192.168.1.0/24" + # + # > net.cidr.host (-1) "192.168.1.0/24" + # "192.168.1.255" + # > net.cidr.host (-256) "192.168.1.0/24" + # "192.168.1.0" + # > net.cidr.host (-257) "192.168.1.0/24" + # + host = i: n: let + cap = libNet.cidr.capacity n; + in + assert assertMsg (i >= (-cap) && i < cap) "The host ${toString i} lies outside of ${n}"; + libNet.cidr.host i n; + # hostCidr :: (ip | mac | integer) -> cidr -> cidr + # + # Returns the nth host in the given cidr range (like cidr.host) + # but as a cidr that retains the original prefix length. + # + # Examples: + # + # > net.cidr.hostCidr 2 "192.168.1.0/24" + # "192.168.1.2/24" + hostCidr = n: x: "${libNet.cidr.host n x}/${toString (libNet.cidr.length x)}"; + # ip :: (cidr | ip) -> ip + # + # Returns just the ip part of the cidr. + # + # Examples: + # + # > net.cidr.ip "192.168.1.100/24" + # "192.168.1.100" + # > net.cidr.ip "192.168.1.100" + # "192.168.1.100" + ip = x: head (splitString "/" x); + # canonicalize :: cidr -> cidr + # + # Replaces the ip of the cidr with the canonical network address + # (first contained address in range) + # + # Examples: + # + # > net.cidr.canonicalize "192.168.1.100/24" + # "192.168.1.0/24" + canonicalize = x: libNet.cidr.make (libNet.cidr.length x) (ip x); + # mergev4 :: [cidrv4 | ipv4] -> (cidrv4 | null) + # + # Returns the smallest cidr network that includes all given networks. + # If no cidr mask is given, /32 is assumed. + # + # Examples: + # + # > net.cidr.mergev4 ["192.168.1.1/24" "192.168.6.1/32"] + # "192.168.0.0/21" + mergev4 = addrs_: let + # Append /32 if necessary + addrs = map (x: + if hasInfix "/" x + then x + else "${x}/32") + addrs_; + # The smallest occurring length is the first we need to start checking, since + # any greater cidr length represents a smaller address range which + # wouldn't contain all of the original addresses. + startLength = foldl' min 32 (map libNet.cidr.length addrs); + possibleLengths = reverseList (range 0 startLength); + # The first ip address will be "expanded" in cidr length until it covers all other + # used addresses. + firstIp = ip (head addrs); + # Return the first (i.e. greatest length -> smallest prefix) cidr length + # in the list that covers all used addresses + bestLength = head (filter + # All given addresses must be contained by the generated address. + (len: + all (x: + libNet.cidr.contains + (ip x) + (libNet.cidr.make len firstIp)) + addrs) + possibleLengths); + in + assert assertMsg (!any (hasInfix ":") addrs) "mergev4 cannot operate on ipv6 addresses"; + if addrs == [] + then null + else libNet.cidr.make bestLength firstIp; + # mergev6 :: [cidrv6 | ipv6] -> (cidrv6 | null) + # + # Returns the smallest cidr network that includes all given networks. + # If no cidr mask is given, /128 is assumed. + # + # Examples: + # + # > net.cidr.mergev6 ["fd00:dead:cafe::/64" "fd00:fd12:3456:7890::/56"] + # "fd00:c000::/18" + mergev6 = addrs_: let + # Append /128 if necessary + addrs = map (x: + if hasInfix "/" x + then x + else "${x}/128") + addrs_; + # The smallest occurring length is the first we need to start checking, since + # any greater cidr length represents a smaller address range which + # wouldn't contain all of the original addresses. + startLength = foldl' min 128 (map libNet.cidr.length addrs); + possibleLengths = reverseList (range 0 startLength); + # The first ip address will be "expanded" in cidr length until it covers all other + # used addresses. + firstIp = ip (head addrs); + # Return the first (i.e. greatest length -> smallest prefix) cidr length + # in the list that covers all used addresses + bestLength = head (filter + # All given addresses must be contained by the generated address. + (len: + all (x: + libNet.cidr.contains + (ip x) + (libNet.cidr.make len firstIp)) + addrs) + possibleLengths); + in + assert assertMsg (all (hasInfix ":") addrs) "mergev6 cannot operate on ipv4 addresses"; + if addrs == [] + then null + else libNet.cidr.make bestLength firstIp; + # merge :: [cidr] -> { cidrv4 = (cidrv4 | null); cidrv6 = (cidrv4 | null); } + # + # Returns the smallest cidr network that includes all given networks, + # but yields two separate result for all given ipv4 and ipv6 addresses. + # Equivalent to calling mergev4 and mergev6 on a partition individually. + merge = addrs: let + v4_and_v6 = partition (hasInfix ":") addrs; + in { + cidrv4 = mergev4 v4_and_v6.wrong; + cidrv6 = mergev6 v4_and_v6.right; + }; + # assignIps :: cidr -> [int | ip] -> [string] -> [ip] + # + # Assigns a semi-stable ip address from the given cidr network to each hostname. + # The algorithm is based on hashing (abusing sha256) with linear probing. + # The order of hosts doesn't matter. No ip (or offset) from the reserved list + # will be assigned. The network address and broadcast address will always be reserved + # automatically. + # + # Examples: + # + # > net.cidr.assignIps "192.168.100.1/24" [] ["a" "b" "c"] + # { a = "192.168.100.202"; b = "192.168.100.74"; c = "192.168.100.226"; } + # + # > net.cidr.assignIps "192.168.100.1/24" [] ["a" "b" "c" "a-new-elem"] + # { a = "192.168.100.202"; a-new-elem = "192.168.100.88"; b = "192.168.100.74"; c = "192.168.100.226"; } + # + # > net.cidr.assignIps "192.168.100.1/24" [202 "192.168.100.74"] ["a" "b" "c"] + # { a = "192.168.100.203"; b = "192.168.100.75"; c = "192.168.100.226"; } + assignIps = net: reserved: hosts: let + cidrSize = libNet.cidr.size net; + capacity = libNet.cidr.capacity net; + # The base address of the network. Used to convert ip-based reservations to offsets + baseAddr = host 0 net; + # Reserve some values for the network, host and broadcast address. + # The network and broadcast address should never be used, and we + # want to reserve the host address for the host. We also convert + # any ips to offsets here. + init = unique ( + [0 (capacity - 1)] + ++ flip map reserved (x: + if builtins.typeOf x == "int" + then x + else -(libNet.ip.diff baseAddr x)) + ); + nHosts = builtins.length hosts; + nInit = builtins.length init; + # Pre-sort all hosts, to ensure ordering invariance + sortedHosts = + warnIf + ((nInit + nHosts) > 0.3 * capacity) + "assignIps: hash stability may be degraded since utilization is >30%" + (builtins.sort builtins.lessThan hosts); + # Generates a hash (i.e. offset value) for a given hostname + hashElem = x: + builtins.bitAnd (capacity - 1) + (hexToDec (builtins.substring 0 16 (builtins.hashString "sha256" x))); + # Do linear probing. Returns the first unused value at or after the given value. + probe = avoid: value: + if elem value avoid + # TODO lib.mod + # Poor man's modulo, because nix has no modulo. Luckily we operate on a residue + # class of x modulo 2^n, so we can use bitAnd instead. + then probe avoid (builtins.bitAnd (capacity - 1) (value + 1)) + else value; + # Hash a new element and avoid assigning any existing values. + assignOne = { + assigned, + used, + }: x: let + value = probe used (hashElem x); + in { + assigned = + assigned + // { + ${x} = host value net; + }; + used = [value] ++ used; + }; + in + assert assertMsg (cidrSize >= 2 && cidrSize <= 62) + "assignIps: cidrSize=${toString cidrSize} is not in [2, 62]."; + assert assertMsg (nHosts <= capacity - nInit) + "assignIps: number of hosts (${toString nHosts}) must be <= capacity (${toString capacity}) - reserved (${toString nInit})"; + # Assign an ip in the subnet to each element, in order + (foldl' assignOne { + assigned = {}; + used = init; + } + sortedHosts) + .assigned; + }; + ip = rec { + # Checks whether the given address (with or without cidr notation) is an ipv4 address. + isv4 = x: !isv6 x; + # Checks whether the given address (with or without cidr notation) is an ipv6 address. + isv6 = hasInfix ":"; + }; + mac = { + # Adds offset to the given base address and ensures the result is in + # a locally administered range by replacing the second nibble with a 2. + addPrivate = base: offset: let + added = libNet.mac.add base offset; + pre = substring 0 1 added; + suf = substring 2 (-1) added; + in "${pre}2${suf}"; + # assignMacs :: mac (base) -> int (size) -> [int | mac] (reserved) -> [string] (hosts) -> [mac] + # + # Assigns a semi-stable MAC address starting in [base, base + 2^size) to each hostname. + # The algorithm is based on hashing (abusing sha256) with linear probing. + # The order of hosts doesn't matter. No mac (or offset) from the reserved list + # will be assigned. + # + # Examples: + # + # > net.mac.assignMacs "11:22:33:00:00:00" 24 [] ["a" "b" "c"] + # { a = "11:22:33:1b:bd:ca"; b = "11:22:33:39:59:4a"; c = "11:22:33:50:7a:e2"; } + # + # > net.mac.assignMacs "11:22:33:00:00:00" 24 [] ["a" "b" "c" "a-new-elem"] + # { a = "11:22:33:1b:bd:ca"; a-new-elem = "11:22:33:d6:5d:58"; b = "11:22:33:39:59:4a"; c = "11:22:33:50:7a:e2"; } + # + # > net.mac.assignMacs "11:22:33:00:00:00" 24 ["11:22:33:1b:bd:ca"] ["a" "b" "c"] + # { a = "11:22:33:1b:bd:cb"; b = "11:22:33:39:59:4a"; c = "11:22:33:50:7a:e2"; } + assignMacs = base: size: reserved: hosts: let + capacity = pow 2 size; + baseAsInt = libNet.mac.diff base "00:00:00:00:00:00"; + init = unique ( + flip map reserved (x: + if builtins.typeOf x == "int" + then x + else libNet.mac.diff x base) + ); + nHosts = builtins.length hosts; + nInit = builtins.length init; + # Pre-sort all hosts, to ensure ordering invariance + sortedHosts = + warnIf + ((nInit + nHosts) > 0.3 * capacity) + "assignMacs: hash stability may be degraded since utilization is >30%" + (builtins.sort builtins.lessThan hosts); + # Generates a hash (i.e. offset value) for a given hostname + hashElem = x: + builtins.bitAnd (capacity - 1) + (hexToDec (substring 0 16 (builtins.hashString "sha256" x))); + # Do linear probing. Returns the first unused value at or after the given value. + probe = avoid: value: + if elem value avoid + # TODO lib.mod + # Poor man's modulo, because nix has no modulo. Luckily we operate on a residue + # class of x modulo 2^n, so we can use bitAnd instead. + then probe avoid (builtins.bitAnd (capacity - 1) (value + 1)) + else value; + # Hash a new element and avoid assigning any existing values. + assignOne = { + assigned, + used, + }: x: let + value = probe used (hashElem x); + in { + assigned = + assigned + // { + ${x} = libNet.mac.add value base; + }; + used = [value] ++ used; + }; + in + assert assertMsg (size >= 2 && size <= 62) + "assignMacs: size=${toString size} is not in [2, 62]."; + assert assertMsg (builtins.bitAnd (capacity - 1) baseAsInt == 0) + "assignMacs: the size=${toString size} least significant bits of the base mac address must be 0."; + assert assertMsg (nHosts <= capacity - nInit) + "assignMacs: number of hosts (${toString nHosts}) must be <= capacity (${toString capacity}) - reserved (${toString nInit})"; + # Assign an ip in the subnet to each element, in order + (foldl' assignOne { + assigned = {}; + used = init; + } + sortedHosts) + .assigned; + }; + }; + types.net = libNet.types; + }; +} diff --git a/lib/types.nix b/lib/types.nix new file mode 100644 index 0000000..326fcbb --- /dev/null +++ b/lib/types.nix @@ -0,0 +1,52 @@ +_inputs: _final: prev: let + inherit + (prev.lib) + all + assertMsg + isAttrs + mkOptionType + recursiveUpdate + showOption + types + ; + + # Checks whether the value is a lazy value without causing + # it's value to be evaluated + isLazyValue = x: isAttrs x && x ? _lazyValue; + # Constructs a lazy value holding the given value. + lazyValue = value: {_lazyValue = value;}; + + # Represents a lazy value of the given type, which + # holds the actual value as an attrset like { _lazyValue = ; }. + # This allows the option to be defined and filtered from a defintion + # list without evaluating the value. + lazyValueOf = type: + mkOptionType rec { + name = "lazyValueOf ${type.name}"; + inherit (type) description descriptionClass emptyValue getSubOptions getSubModules; + check = isLazyValue; + merge = loc: defs: + assert assertMsg + (all (x: type.check x._lazyValue) defs) + "The option `${showOption loc}` is defined with a lazy value holding an invalid type"; + types.mergeOneOption loc defs; + substSubModules = m: types.uniq (type.substSubModules m); + functor = (types.defaultFunctor name) // {wrapped = type;}; + nestedTypes.elemType = type; + }; + + # Represents a value or lazy value of the given type that will + # automatically be coerced to the given type when merged. + lazyOf = type: types.coercedTo (lazyValueOf type) (x: x._lazyValue) type; +in { + lib = recursiveUpdate prev.lib { + types = { + inherit + isLazyValue + lazyValue + lazyValueOf + lazyOf + ; + }; + }; +} diff --git a/modules/default.nix b/modules/default.nix index 8e40808..00ae9c2 100644 --- a/modules/default.nix +++ b/modules/default.nix @@ -1,7 +1,15 @@ -{ +{inputs, ...}: { imports = [ - ./interface-naming.nix + inputs.microvm.nixosModules.host + ./boot.nix + ./guests/default.nix + ./interface-naming.nix ./nginx.nix + ./node.nix + ]; + + nixpkgs.overlays = [ + inputs.microvm.overlay ]; } diff --git a/modules/guests/common-guest-config.nix b/modules/guests/common-guest-config.nix new file mode 100644 index 0000000..4c52c57 --- /dev/null +++ b/modules/guests/common-guest-config.nix @@ -0,0 +1,27 @@ +_guestName: guestCfg: {lib, ...}: let + inherit (lib) mkForce; +in { + node.name = guestCfg.nodeName; + node.type = guestCfg.backend; + + nix = { + settings.auto-optimise-store = mkForce false; + optimise.automatic = mkForce false; + gc.automatic = mkForce false; + }; + + systemd.network.networks."10-${guestCfg.networking.mainLinkName}" = { + matchConfig.Name = guestCfg.networking.mainLinkName; + DHCP = "yes"; + # XXX: Do we really want this? + dhcpV4Config.UseDNS = false; + dhcpV6Config.UseDNS = false; + ipv6AcceptRAConfig.UseDNS = false; + networkConfig = { + IPv6PrivacyExtensions = "yes"; + MulticastDNS = true; + IPv6AcceptRA = true; + }; + linkConfig.RequiredForOnline = "routable"; + }; +} diff --git a/modules/guests/container.nix b/modules/guests/container.nix new file mode 100644 index 0000000..e088b5d --- /dev/null +++ b/modules/guests/container.nix @@ -0,0 +1,64 @@ +guestName: guestCfg: { + config, + inputs, + lib, + pkgs, + ... +}: let + inherit + (lib) + flip + mapAttrs' + nameValuePair + ; +in { + ephemeral = true; + privateNetwork = true; + autoStart = guestCfg.autostart; + macvlans = ["${guestCfg.container.macvlan}:${guestCfg.networking.mainLinkName}"]; + extraFlags = [ + "--uuid=${builtins.substring 0 32 (builtins.hashString "sha256" guestName)}" + ]; + bindMounts = flip mapAttrs' guestCfg.zfs ( + _: zfsCfg: + nameValuePair zfsCfg.guestMountpoint { + hostPath = zfsCfg.hostMountpoint; + isReadOnly = false; + } + ); + nixosConfiguration = inputs.nixpkgs.lib.nixosSystem { + specialArgs = guestCfg.extraSpecialArgs; + prefix = ["nodes" "${config.node.name}-${guestName}" "config"]; + system = null; + modules = + [ + { + boot.isContainer = true; + networking.useHostResolvConf = false; + + # We cannot force the package set via nixpkgs.pkgs and + # inputs.nixpkgs.nixosModules.readOnlyPkgs, since some nixosModules + # like nixseparatedebuginfod depend on adding packages via nixpkgs.overlays. + # So we just mimic the options and overlays defined by the passed pkgs set. + nixpkgs.hostPlatform = config.nixpkgs.hostPlatform.system; + nixpkgs.overlays = pkgs.overlays; + nixpkgs.config = pkgs.config; + + # Bind the /guest/* paths from above so impermancence doesn't complain. + # We bind-mount stuff from the host to itself, which is perfectly defined + # and not recursive. This allows us to have a fileSystems entry for each + # bindMount which other stuff can depend upon (impermanence adds dependencies + # to the state fs). + fileSystems = flip mapAttrs' guestCfg.zfs (_: zfsCfg: + nameValuePair zfsCfg.guestMountpoint { + neededForBoot = true; + fsType = "none"; + device = zfsCfg.guestMountpoint; + options = ["bind"]; + }); + } + (import ./common-guest-config.nix guestName guestCfg) + ] + ++ guestCfg.modules; + }; +} diff --git a/modules/guests/default.nix b/modules/guests/default.nix new file mode 100644 index 0000000..56f9050 --- /dev/null +++ b/modules/guests/default.nix @@ -0,0 +1,281 @@ +{ + config, + lib, + pkgs, + utils, + ... +} @ attrs: let + inherit + (lib) + attrNames + attrValues + attrsToList + disko + escapeShellArg + flatten + flip + groupBy + hasPrefix + listToAttrs + literalExpression + makeBinPath + mapAttrs + mapAttrsToList + mergeToplevelConfigs + mkIf + mkMerge + mkOption + net + types + ; + + # All available backends + backends = ["microvm" "container"]; + + guestsByBackend = + lib.genAttrs backends (_: {}) + // mapAttrs (_: listToAttrs) (groupBy (x: x.value.backend) (attrsToList config.guests)); + + # List the necessary mount units for the given guest + fsMountUnitsFor = guestCfg: + map + (x: "${utils.escapeSystemdPath x.hostMountpoint}.mount") + (attrValues guestCfg.zfs); + + # Configuration required on the host for a specific guest + defineGuest = _guestName: guestCfg: { + # Add the required datasets to the disko configuration of the machine + disko.devices.zpool = mkMerge (flip map (attrValues guestCfg.zfs) (zfsCfg: { + ${zfsCfg.pool}.datasets.${zfsCfg.dataset} = disko.filesystem zfsCfg.hostMountpoint; + })); + + # Ensure that the zfs dataset exists before it is mounted. + systemd.services = mkMerge (flip map (attrValues guestCfg.zfs) (zfsCfg: let + fsMountUnit = "${utils.escapeSystemdPath zfsCfg.hostMountpoint}.mount"; + in { + "zfs-ensure-${utils.escapeSystemdPath zfsCfg.hostMountpoint}" = { + wantedBy = [fsMountUnit]; + before = [fsMountUnit]; + after = [ + "zfs-import-${utils.escapeSystemdPath zfsCfg.pool}.service" + "zfs-mount.target" + ]; + unitConfig.DefaultDependencies = "no"; + serviceConfig.Type = "oneshot"; + script = let + poolDataset = "${zfsCfg.pool}/${zfsCfg.dataset}"; + diskoDataset = config.disko.devices.zpool.${zfsCfg.pool}.datasets.${zfsCfg.dataset}; + in '' + export PATH=${makeBinPath [pkgs.zfs]}":$PATH" + if ! zfs list -H -o type ${escapeShellArg poolDataset} &>/dev/null ; then + ${diskoDataset._create} + fi + ''; + }; + })); + }; + + defineMicrovm = guestName: guestCfg: { + # Ensure that the zfs dataset exists before it is mounted. + systemd.services."microvm@${guestName}" = { + requires = fsMountUnitsFor guestCfg; + after = fsMountUnitsFor guestCfg; + }; + + microvm.vms.${guestName} = import ./microvm.nix guestName guestCfg attrs; + }; + + defineContainer = guestName: guestCfg: { + # Ensure that the zfs dataset exists before it is mounted. + systemd.services."container@${guestName}" = { + requires = fsMountUnitsFor guestCfg; + after = fsMountUnitsFor guestCfg; + # Don't use the notify service type. Using exec will always consider containers + # started immediately and donesn't wait until the container is fully booted. + # Containers should behave like independent machines, and issues inside the container + # will unnecessarily lock up the service on the host otherwise. + # This causes issues on system activation or when containers take longer to start + # than TimeoutStartSec. + serviceConfig.Type = lib.mkForce "exec"; + }; + + containers.${guestName} = import ./container.nix guestName guestCfg attrs; + }; +in { + imports = [ + { + # This is opt-out, so we can't put this into the mkIf below + microvm.host.enable = guestsByBackend.microvm != {}; + } + ]; + + options.node.type = mkOption { + type = types.enum (["host"] ++ backends); + description = "The type of this machine."; + default = "host"; + }; + + options.containers = mkOption { + type = types.attrsOf (types.submodule (submod: { + options.nixosConfiguration = mkOption { + type = types.unspecified; + default = null; + description = "Set this to the result of a `nixosSystem` invocation to use it as the guest system. This will set the `path` option for you."; + }; + config = mkIf (submod.config.nixosConfiguration != null) { + path = submod.config.nixosConfiguration.config.system.build.toplevel; + }; + })); + }; + + options.guests = mkOption { + default = {}; + description = "Defines the actual vms and handles the necessary base setup for them."; + type = types.attrsOf (types.submodule (submod: { + options = { + nodeName = mkOption { + type = types.str; + default = "${config.node.name}-${submod.config._module.args.name}"; + description = '' + The name of the resulting node. By default this will be a compound name + of the host's name and the guest's name to avoid name clashes. Can be + overwritten to designate special names to specific guests. + ''; + }; + + backend = mkOption { + type = types.enum backends; + description = '' + Determines how the guest will be hosted. You can currently choose + between microvm based deployment, or nixos containers. + ''; + }; + + extraSpecialArgs = mkOption { + type = types.attrs; + default = {}; + example = literalExpression "{ inherit inputs; }"; + description = '' + Extra `specialArgs` passed to each guest system definition. This + option can be used to pass additional arguments to all modules. + ''; + }; + + # Options for the microvm backend + microvm = { + system = mkOption { + type = types.str; + description = "The system that this microvm should use"; + }; + + macvtap = mkOption { + type = types.str; + description = "The host interface to which the microvm should be attached via macvtap"; + }; + + baseMac = mkOption { + type = types.net.mac; + description = "The base mac address from which the guest's mac will be derived. Only the second and third byte are used, so for 02:XX:YY:ZZ:ZZ:ZZ, this specifies XX and YY, while Zs are generated automatically. Not used if the mac is set directly."; + default = "02:01:27:00:00:00"; + }; + + mac = mkOption { + type = types.net.mac; + description = "The MAC address for the guest's macvtap interface"; + default = let + base = "02:${lib.substring 3 5 submod.config.microvm.baseMac}:00:00:00"; + in + (net.mac.assignMacs base 24 [] (attrNames config.guests)).${submod.config._module.args.name}; + }; + }; + + # Options for the container backend + container = { + macvlan = mkOption { + type = types.str; + description = "The host interface to which the container should be attached"; + }; + }; + + networking.mainLinkName = mkOption { + type = types.str; + description = "The main ethernet link name inside of the guest. For containers, this cannot be named similar to an existing interface on the host."; + default = + if submod.config.backend == "microvm" + then submod.config.microvm.macvtap + else if submod.config.backend == "container" + then "mv-${submod.config.container.macvlan}" + else throw "Invalid backend"; + }; + + zfs = mkOption { + description = "zfs datasets to mount into the guest"; + default = {}; + type = types.attrsOf (types.submodule (zfsSubmod: { + options = { + pool = mkOption { + type = types.str; + description = "The host's zfs pool on which the dataset resides"; + }; + + dataset = mkOption { + type = types.str; + example = "safe/guests/mycontainer"; + description = "The host's dataset that should be used for this mountpoint (will automatically be created, including parent datasets)"; + }; + + hostMountpoint = mkOption { + type = types.path; + default = "/guests/${submod.config._module.args.name}${zfsSubmod.config.guestMountpoint}"; + example = "/guests/mycontainer/persist"; + description = "The host's mountpoint for the guest's dataset"; + }; + + guestMountpoint = mkOption { + type = types.path; + default = zfsSubmod.config._module.args.name; + example = "/persist"; + description = "The mountpoint inside the guest."; + }; + }; + })); + }; + + autostart = mkOption { + type = types.bool; + default = false; + description = "Whether this guest should be started automatically with the host"; + }; + + modules = mkOption { + type = types.listOf types.unspecified; + default = []; + description = "Additional modules to load"; + }; + }; + })); + }; + + config = mkIf (config.guests != {}) ( + mkMerge [ + { + systemd.tmpfiles.rules = [ + "d /guests 0700 root root -" + ]; + + assertions = flatten (flip mapAttrsToList config.guests ( + guestName: guestCfg: + flip mapAttrsToList guestCfg.zfs ( + zfsName: zfsCfg: { + assertion = hasPrefix "/" zfsCfg.guestMountpoint; + message = "guest ${guestName}: zfs ${zfsName}: the guestMountpoint must be an absolute path."; + } + ) + )); + } + (mergeToplevelConfigs ["disko" "systemd"] (mapAttrsToList defineGuest config.guests)) + (mergeToplevelConfigs ["containers" "systemd"] (mapAttrsToList defineContainer guestsByBackend.container)) + (mergeToplevelConfigs ["microvm" "systemd"] (mapAttrsToList defineMicrovm guestsByBackend.microvm)) + ] + ); +} diff --git a/modules/guests/microvm.nix b/modules/guests/microvm.nix new file mode 100644 index 0000000..51126d3 --- /dev/null +++ b/modules/guests/microvm.nix @@ -0,0 +1,82 @@ +guestName: guestCfg: { + config, + inputs, + lib, + pkgs, + ... +}: let + inherit + (lib) + flip + mapAttrsToList + mkDefault + mkForce + ; +in { + specialArgs = guestCfg.extraSpecialArgs; + pkgs = inputs.self.pkgs.${guestCfg.microvm.system}; + inherit (guestCfg) autostart; + config = { + imports = + guestCfg.modules + ++ [ + (import ./common-guest-config.nix guestName guestCfg) + ({config, ...}: { + # Set early hostname too, so we can associate those logs to this host and don't get "localhost" entries in loki + boot.kernelParams = ["systemd.hostname=${config.networking.hostName}"]; + }) + ]; + + # TODO needed because of https://github.com/NixOS/nixpkgs/issues/102137 + environment.noXlibs = mkForce false; + lib.microvm.mac = guestCfg.microvm.mac; + + microvm = { + hypervisor = mkDefault "qemu"; + + # Give them some juice by default + mem = mkDefault (1024 + 2048); + + # Add a writable store overlay, but since this is always ephemeral + # disable any store optimization from nix. + writableStoreOverlay = "/nix/.rw-store"; + + # MACVTAP bridge to the host's network + interfaces = [ + { + type = "macvtap"; + id = "vm-${guestName}"; + inherit (guestCfg.microvm) mac; + macvtap = { + link = guestCfg.microvm.macvtap; + mode = "bridge"; + }; + } + ]; + + shares = + [ + # Share the nix-store of the host + { + source = "/nix/store"; + mountPoint = "/nix/.ro-store"; + tag = "ro-store"; + proto = "virtiofs"; + } + ] + ++ flip mapAttrsToList guestCfg.zfs ( + _: zfsCfg: { + source = zfsCfg.hostMountpoint; + mountPoint = zfsCfg.guestMountpoint; + tag = builtins.substring 0 16 (builtins.hashString "sha256" zfsCfg.hostMountpoint); + proto = "virtiofs"; + } + ); + }; + + networking.renameInterfacesByMac.${guestCfg.networking.mainLinkName} = guestCfg.microvm.mac; + systemd.network.networks."10-${guestCfg.networking.mainLinkName}".matchConfig = mkForce { + MACAddress = guestCfg.microvm.mac; + }; + }; +}