{ config, globals, lib, minimal, nodes, pkgs, ... }: let inherit (lib) concatLists elem flip forEach mapAttrsToList mkAfter mkEnableOption mkIf mkOption optional optionalAttrs optionals toList types ; cfg = config.meta.telegraf; mkIfNotEmpty = xs: mkIf (xs != [ ]) xs; in { options.meta.telegraf = { enable = mkEnableOption "telegraf to push metrics to influx."; scrapeSensors = mkOption { type = types.bool; default = true; description = "Scrape sensors with lm_sensors. You should disable this for virtualized hosts."; }; secrets = mkOption { type = types.attrsOf types.path; default = { }; example = { "@INFLUX_TOKEN@" = "/run/agenix/influx-token"; }; description = "Additional secrets to replace in pre-start. The attr name will be searched and replaced in the config with the value read from the given file."; }; availableMonitoringNetworks = mkOption { type = types.listOf types.str; example = [ "internet" ]; description = '' Any of the global monitoring definitions which has a network from this list will automatically be monitored via telegraf. Set this to any networks that can be reached from this node. This includes `local-` by default. ''; }; influxdb2 = { domain = mkOption { type = types.str; example = "influxdb.example.com"; description = "The influxdb v2 database to push to. https will be enforced."; }; organization = mkOption { type = types.str; description = "The organization to push to."; }; bucket = mkOption { type = types.str; description = "The bucket to push to."; }; user = mkOption { type = types.str; default = "admin"; description = "The user for which the api key should be created."; }; node = mkOption { type = types.str; description = "The node which hosts the influxdb service (used to provision an api token)."; }; }; }; config = mkIf (!minimal && cfg.enable) { # Monitor anything that can only be monitored from this node meta.telegraf.availableMonitoringNetworks = [ "local-${config.node.name}" ]; assertions = [ { assertion = !config.boot.isContainer; message = "Containers don't support telegraf because memlock is not enabled."; } ]; nodes.${cfg.influxdb2.node} = { # Mirror the original secret on the influx host age.secrets."telegraf-influxdb-token-${config.node.name}" = { inherit (config.age.secrets.telegraf-influxdb-token) rekeyFile; mode = "440"; group = "influxdb2"; }; services.influxdb2.provision.organizations.machines.auths."telegraf (${config.node.name})" = { readBuckets = [ "telegraf" ]; writeBuckets = [ "telegraf" ]; tokenFile = nodes.${cfg.influxdb2.node}.config.age.secrets."telegraf-influxdb-token-${config.node.name}".path; }; }; age.secrets.telegraf-influxdb-token = { generator.script = "alnum"; mode = "440"; group = "telegraf"; }; meta.telegraf.secrets."@INFLUX_TOKEN@" = config.age.secrets.telegraf-influxdb-token.path; security.elewrap.telegraf-sensors = mkIf cfg.scrapeSensors { command = [ "${pkgs.lm_sensors}/bin/sensors" "-A" "-u" ]; targetUser = "root"; allowedUsers = [ "telegraf" ]; }; security.elewrap.telegraf-nvme = mkIf config.services.smartd.enable { command = [ "${pkgs.nvme-cli}/bin/nvme" ]; targetUser = "root"; allowedUsers = [ "telegraf" ]; passArguments = true; }; security.elewrap.telegraf-smartctl = mkIf config.services.smartd.enable { command = [ "${pkgs.smartmontools}/bin/smartctl" ]; targetUser = "root"; allowedUsers = [ "telegraf" ]; passArguments = true; }; services.telegraf = { enable = true; environmentFiles = [ "/dev/null" ]; # Needed so the config file is copied to /run/telegraf extraConfig = { agent = { interval = "10s"; round_interval = true; # Always collect on :00,:10,... metric_batch_size = 5000; metric_buffer_limit = 50000; collection_jitter = "0s"; flush_interval = "20s"; flush_jitter = "5s"; precision = "1ms"; hostname = config.node.name; omit_hostname = false; }; outputs = { influxdb_v2 = { urls = [ "https://${cfg.influxdb2.domain}" ]; token = "@INFLUX_TOKEN@"; inherit (cfg.influxdb2) organization bucket; }; }; inputs = { conntrack = { }; cpu = { }; disk = { }; diskio = { }; internal = { }; interrupts = { }; kernel = { }; kernel_vmstat = { }; linux_sysctl_fs = { }; mem = { }; net = { ignore_protocol_stats = true; }; netstat = { }; nstat = { }; processes = { }; swap = { }; system = { }; systemd_units = { unittype = "service"; }; temp = { }; wireguard = { }; ping = mkIfNotEmpty ( concatLists ( flip mapAttrsToList globals.monitoring.ping ( name: pingCfg: optionals (elem pingCfg.network cfg.availableMonitoringNetworks) ( concatLists ( forEach [ "hostv4" "hostv6" ] ( attr: optional (pingCfg.${attr} != null) { interval = "1m"; method = "native"; urls = [ pingCfg.${attr} ]; ipv4 = attr == "hostv4"; ipv6 = attr == "hostv6"; tags = { inherit name; inherit (pingCfg) network; ip_version = if attr == "hostv4" then "v4" else "v6"; }; fieldinclude = [ "percent_packet_loss" "average_response_ms" ]; } ) ) ) ) ) ); http_response = mkIfNotEmpty ( concatLists ( flip mapAttrsToList globals.monitoring.http ( name: httpCfg: optional (elem httpCfg.network cfg.availableMonitoringNetworks) { interval = "1m"; urls = toList httpCfg.url; method = "GET"; response_status_code = httpCfg.expectedStatus; response_string_match = mkIf (httpCfg.expectedBodyRegex != null) httpCfg.expectedBodyRegex; insecure_skip_verify = httpCfg.skipTlsVerification; follow_redirects = true; tags = { inherit name; inherit (httpCfg) network; }; } ) ) ); dns_query = mkIfNotEmpty ( concatLists ( flip mapAttrsToList globals.monitoring.dns ( name: dnsCfg: optional (elem dnsCfg.network cfg.availableMonitoringNetworks) { interval = "1m"; servers = [ dnsCfg.server ]; domains = [ dnsCfg.domain ]; record_type = dnsCfg.record-type; tags = { inherit name; inherit (dnsCfg) network; }; } ) ) ); net_response = mkIfNotEmpty ( concatLists ( flip mapAttrsToList globals.monitoring.tcp ( name: tcpCfg: optional (elem tcpCfg.network cfg.availableMonitoringNetworks) { interval = "1m"; address = "${tcpCfg.host}:${toString tcpCfg.port}"; protocol = "tcp"; tags = { inherit name; inherit (tcpCfg) network; }; fieldexclude = [ "result_type" "string_found" ]; } ) ) ); } // optionalAttrs config.services.smartd.enable { sensors = { }; smart = { attributes = true; path_nvme = config.security.elewrap.telegraf-nvme.path; path_smartctl = config.security.elewrap.telegraf-smartctl.path; use_sudo = false; }; } // optionalAttrs config.services.nginx.enable { nginx.urls = [ "http://localhost/nginx_status" ]; } // optionalAttrs (config.networking.wireless.enable || config.networking.wireless.iwd.enable) { wireless = { }; }; }; }; services.nginx.virtualHosts = mkIf config.services.nginx.enable { localhost.listenAddresses = [ "127.0.0.1" "[::1]" ]; localhost.locations."= /nginx_status".extraConfig = '' allow 127.0.0.0/8; allow ::1; deny all; stub_status; access_log off; ''; }; environment.persistence."/persist".directories = [ { directory = "/var/lib/telegraf"; user = "telegraf"; group = "telegraf"; mode = "0700"; } ]; systemd.services.telegraf = { path = [ # Make sensors refer to the correct wrapper (mkIf cfg.scrapeSensors ( pkgs.writeShellScriptBin "sensors" config.security.elewrap.telegraf-sensors.path )) ]; serviceConfig = { ExecStartPre = mkAfter [ (pkgs.writeShellScript "pre-start-token" ( lib.concatLines ( lib.flip lib.mapAttrsToList config.meta.telegraf.secrets ( key: secret: '' ${lib.getExe pkgs.replace-secret} \ ${lib.escapeShellArg key} \ ${lib.escapeShellArg secret} \ /var/run/telegraf/config.toml '' ) ) )) ]; # For wireguard statistics AmbientCapabilities = [ "CAP_NET_ADMIN" ]; RestartSec = "60"; # Retry every minute }; }; }; }