mirror of
https://github.com/oddlama/nix-config.git
synced 2025-10-10 14:50:40 +02:00
363 lines
11 KiB
Nix
363 lines
11 KiB
Nix
{
|
|
config,
|
|
globals,
|
|
lib,
|
|
minimal,
|
|
nodes,
|
|
pkgs,
|
|
...
|
|
}:
|
|
let
|
|
inherit (lib)
|
|
concatLists
|
|
elem
|
|
flip
|
|
forEach
|
|
mapAttrsToList
|
|
mkAfter
|
|
mkEnableOption
|
|
mkIf
|
|
mkOption
|
|
optional
|
|
optionalAttrs
|
|
optionals
|
|
toList
|
|
types
|
|
;
|
|
|
|
cfg = config.meta.telegraf;
|
|
mkIfNotEmpty = xs: mkIf (xs != [ ]) xs;
|
|
in
|
|
{
|
|
options.meta.telegraf = {
|
|
enable = mkEnableOption "telegraf to push metrics to influx.";
|
|
|
|
scrapeSensors = mkOption {
|
|
type = types.bool;
|
|
default = true;
|
|
description = "Scrape sensors with lm_sensors. You should disable this for virtualized hosts.";
|
|
};
|
|
|
|
secrets = mkOption {
|
|
type = types.attrsOf types.path;
|
|
default = { };
|
|
example = {
|
|
"@INFLUX_TOKEN@" = "/run/agenix/influx-token";
|
|
};
|
|
description = "Additional secrets to replace in pre-start. The attr name will be searched and replaced in the config with the value read from the given file.";
|
|
};
|
|
|
|
availableMonitoringNetworks = mkOption {
|
|
type = types.listOf types.str;
|
|
example = [ "internet" ];
|
|
description = ''
|
|
Any of the global monitoring definitions which has a network from this list
|
|
will automatically be monitored via telegraf. Set this to any networks that
|
|
can be reached from this node. This includes `local-<node.name>` by default.
|
|
'';
|
|
};
|
|
|
|
influxdb2 = {
|
|
domain = mkOption {
|
|
type = types.str;
|
|
example = "influxdb.example.com";
|
|
description = "The influxdb v2 database to push to. https will be enforced.";
|
|
};
|
|
|
|
organization = mkOption {
|
|
type = types.str;
|
|
description = "The organization to push to.";
|
|
};
|
|
|
|
bucket = mkOption {
|
|
type = types.str;
|
|
description = "The bucket to push to.";
|
|
};
|
|
|
|
user = mkOption {
|
|
type = types.str;
|
|
default = "admin";
|
|
description = "The user for which the api key should be created.";
|
|
};
|
|
|
|
node = mkOption {
|
|
type = types.str;
|
|
description = "The node which hosts the influxdb service (used to provision an api token).";
|
|
};
|
|
};
|
|
};
|
|
|
|
config = mkIf (!minimal && cfg.enable) {
|
|
# Monitor anything that can only be monitored from this node
|
|
meta.telegraf.availableMonitoringNetworks = [ "local-${config.node.name}" ];
|
|
|
|
assertions = [
|
|
{
|
|
assertion = !config.boot.isContainer;
|
|
message = "Containers don't support telegraf because memlock is not enabled.";
|
|
}
|
|
];
|
|
|
|
nodes.${cfg.influxdb2.node} = {
|
|
# Mirror the original secret on the influx host
|
|
age.secrets."telegraf-influxdb-token-${config.node.name}" = {
|
|
inherit (config.age.secrets.telegraf-influxdb-token) rekeyFile;
|
|
mode = "440";
|
|
group = "influxdb2";
|
|
};
|
|
|
|
services.influxdb2.provision.organizations.machines.auths."telegraf (${config.node.name})" = {
|
|
readBuckets = [ "telegraf" ];
|
|
writeBuckets = [ "telegraf" ];
|
|
tokenFile =
|
|
nodes.${cfg.influxdb2.node}.config.age.secrets."telegraf-influxdb-token-${config.node.name}".path;
|
|
};
|
|
};
|
|
|
|
age.secrets.telegraf-influxdb-token = {
|
|
generator.script = "alnum";
|
|
mode = "440";
|
|
group = "telegraf";
|
|
};
|
|
|
|
meta.telegraf.secrets."@INFLUX_TOKEN@" = config.age.secrets.telegraf-influxdb-token.path;
|
|
|
|
security.elewrap.telegraf-sensors = mkIf cfg.scrapeSensors {
|
|
command = [
|
|
"${pkgs.lm_sensors}/bin/sensors"
|
|
"-A"
|
|
"-u"
|
|
];
|
|
targetUser = "root";
|
|
allowedUsers = [ "telegraf" ];
|
|
};
|
|
|
|
security.elewrap.telegraf-nvme = mkIf config.services.smartd.enable {
|
|
command = [ "${pkgs.nvme-cli}/bin/nvme" ];
|
|
targetUser = "root";
|
|
allowedUsers = [ "telegraf" ];
|
|
passArguments = true;
|
|
};
|
|
|
|
security.elewrap.telegraf-smartctl = mkIf config.services.smartd.enable {
|
|
command = [ "${pkgs.smartmontools}/bin/smartctl" ];
|
|
targetUser = "root";
|
|
allowedUsers = [ "telegraf" ];
|
|
passArguments = true;
|
|
};
|
|
|
|
services.telegraf = {
|
|
enable = true;
|
|
environmentFiles = [ "/dev/null" ]; # Needed so the config file is copied to /run/telegraf
|
|
extraConfig = {
|
|
agent = {
|
|
interval = "10s";
|
|
round_interval = true; # Always collect on :00,:10,...
|
|
metric_batch_size = 5000;
|
|
metric_buffer_limit = 50000;
|
|
collection_jitter = "0s";
|
|
flush_interval = "20s";
|
|
flush_jitter = "5s";
|
|
precision = "1ms";
|
|
hostname = config.node.name;
|
|
omit_hostname = false;
|
|
};
|
|
outputs = {
|
|
influxdb_v2 = {
|
|
urls = [ "https://${cfg.influxdb2.domain}" ];
|
|
token = "@INFLUX_TOKEN@";
|
|
inherit (cfg.influxdb2) organization bucket;
|
|
};
|
|
};
|
|
inputs =
|
|
{
|
|
conntrack = { };
|
|
cpu = { };
|
|
disk = { };
|
|
diskio = { };
|
|
internal = { };
|
|
interrupts = { };
|
|
kernel = { };
|
|
kernel_vmstat = { };
|
|
linux_sysctl_fs = { };
|
|
mem = { };
|
|
net = {
|
|
ignore_protocol_stats = true;
|
|
};
|
|
netstat = { };
|
|
nstat = { };
|
|
processes = { };
|
|
swap = { };
|
|
system = { };
|
|
systemd_units = {
|
|
unittype = "service";
|
|
};
|
|
temp = { };
|
|
wireguard = { };
|
|
|
|
ping = mkIfNotEmpty (
|
|
concatLists (
|
|
flip mapAttrsToList globals.monitoring.ping (
|
|
name: pingCfg:
|
|
optionals (elem pingCfg.network cfg.availableMonitoringNetworks) (
|
|
concatLists (
|
|
forEach
|
|
[
|
|
"hostv4"
|
|
"hostv6"
|
|
]
|
|
(
|
|
attr:
|
|
optional (pingCfg.${attr} != null) {
|
|
interval = "1m";
|
|
method = "native";
|
|
urls = [ pingCfg.${attr} ];
|
|
ipv4 = attr == "hostv4";
|
|
ipv6 = attr == "hostv6";
|
|
tags = {
|
|
inherit name;
|
|
inherit (pingCfg) network;
|
|
ip_version = if attr == "hostv4" then "v4" else "v6";
|
|
};
|
|
fieldinclude = [
|
|
"percent_packet_loss"
|
|
"average_response_ms"
|
|
];
|
|
}
|
|
)
|
|
)
|
|
)
|
|
)
|
|
)
|
|
);
|
|
|
|
http_response = mkIfNotEmpty (
|
|
concatLists (
|
|
flip mapAttrsToList globals.monitoring.http (
|
|
name: httpCfg:
|
|
optional (elem httpCfg.network cfg.availableMonitoringNetworks) {
|
|
interval = "1m";
|
|
urls = toList httpCfg.url;
|
|
method = "GET";
|
|
response_status_code = httpCfg.expectedStatus;
|
|
response_string_match = mkIf (httpCfg.expectedBodyRegex != null) httpCfg.expectedBodyRegex;
|
|
insecure_skip_verify = httpCfg.skipTlsVerification;
|
|
follow_redirects = true;
|
|
tags = {
|
|
inherit name;
|
|
inherit (httpCfg) network;
|
|
};
|
|
}
|
|
)
|
|
)
|
|
);
|
|
|
|
dns_query = mkIfNotEmpty (
|
|
concatLists (
|
|
flip mapAttrsToList globals.monitoring.dns (
|
|
name: dnsCfg:
|
|
optional (elem dnsCfg.network cfg.availableMonitoringNetworks) {
|
|
interval = "1m";
|
|
servers = [ dnsCfg.server ];
|
|
domains = [ dnsCfg.domain ];
|
|
record_type = dnsCfg.record-type;
|
|
tags = {
|
|
inherit name;
|
|
inherit (dnsCfg) network;
|
|
};
|
|
}
|
|
)
|
|
)
|
|
);
|
|
|
|
net_response = mkIfNotEmpty (
|
|
concatLists (
|
|
flip mapAttrsToList globals.monitoring.tcp (
|
|
name: tcpCfg:
|
|
optional (elem tcpCfg.network cfg.availableMonitoringNetworks) {
|
|
interval = "1m";
|
|
address = "${tcpCfg.host}:${toString tcpCfg.port}";
|
|
protocol = "tcp";
|
|
tags = {
|
|
inherit name;
|
|
inherit (tcpCfg) network;
|
|
};
|
|
fieldexclude = [
|
|
"result_type"
|
|
"string_found"
|
|
];
|
|
}
|
|
)
|
|
)
|
|
);
|
|
}
|
|
// optionalAttrs config.services.smartd.enable {
|
|
sensors = { };
|
|
smart = {
|
|
attributes = true;
|
|
path_nvme = config.security.elewrap.telegraf-nvme.path;
|
|
path_smartctl = config.security.elewrap.telegraf-smartctl.path;
|
|
use_sudo = false;
|
|
};
|
|
}
|
|
// optionalAttrs config.services.nginx.enable {
|
|
nginx.urls = [ "http://localhost/nginx_status" ];
|
|
}
|
|
// optionalAttrs (config.networking.wireless.enable || config.networking.wireless.iwd.enable) {
|
|
wireless = { };
|
|
};
|
|
};
|
|
};
|
|
|
|
services.nginx.virtualHosts = mkIf config.services.nginx.enable {
|
|
localhost.listenAddresses = [
|
|
"127.0.0.1"
|
|
"[::1]"
|
|
];
|
|
localhost.locations."= /nginx_status".extraConfig = ''
|
|
allow 127.0.0.0/8;
|
|
allow ::1;
|
|
deny all;
|
|
stub_status;
|
|
access_log off;
|
|
'';
|
|
};
|
|
|
|
environment.persistence."/persist".directories = [
|
|
{
|
|
directory = "/var/lib/telegraf";
|
|
user = "telegraf";
|
|
group = "telegraf";
|
|
mode = "0700";
|
|
}
|
|
];
|
|
|
|
systemd.services.telegraf = {
|
|
path = [
|
|
# Make sensors refer to the correct wrapper
|
|
(mkIf cfg.scrapeSensors (
|
|
pkgs.writeShellScriptBin "sensors" config.security.elewrap.telegraf-sensors.path
|
|
))
|
|
];
|
|
serviceConfig = {
|
|
ExecStartPre = mkAfter [
|
|
(pkgs.writeShellScript "pre-start-token" (
|
|
lib.concatLines (
|
|
lib.flip lib.mapAttrsToList config.meta.telegraf.secrets (
|
|
key: secret: ''
|
|
${lib.getExe pkgs.replace-secret} \
|
|
${lib.escapeShellArg key} \
|
|
${lib.escapeShellArg secret} \
|
|
/var/run/telegraf/config.toml
|
|
''
|
|
)
|
|
)
|
|
))
|
|
];
|
|
# For wireguard statistics
|
|
AmbientCapabilities = [ "CAP_NET_ADMIN" ];
|
|
RestartSec = "60"; # Retry every minute
|
|
};
|
|
};
|
|
};
|
|
}
|