feat: enable telegraf on all server nodes; add 10 minute autorestart

This commit is contained in:
oddlama 2023-06-25 02:34:05 +02:00
parent b8f647fb4a
commit f606e6e554
No known key found for this signature in database
GPG key ID: 14EFE510775FE39A
25 changed files with 228 additions and 41 deletions

View file

@ -37,7 +37,7 @@ in {
allNodes = attrNames colmenaNodes;
isColmenaNode = elem nodeName allNodes;
foreignConfigs = concatMap (n: colmenaNodes.${n}.config.nodes.${nodeName} or []) allNodes;
toplevelAttrs = ["age" "proxiedDomains" "networking" "systemd" "services"];
toplevelAttrs = ["age" "providedDomains" "networking" "systemd" "services"];
in
optionalAttrs isColmenaNode (mergeToplevelConfigs toplevelAttrs (
foreignConfigs

View file

@ -125,6 +125,7 @@ in {
RuntimeDirectory = "oauth2_proxy";
RuntimeDirectoryMode = "0750";
UMask = "007"; # TODO remove once https://github.com/oauth2-proxy/oauth2-proxy/issues/2141 is fixed
RestartSec = "600"; # Retry every 10 minutes
};
users.groups.oauth2_proxy.members = ["nginx"];

View file

@ -50,7 +50,7 @@ in {
{
basic_auth.username = "${nodeName}+promtail-loki-basic-auth-password";
basic_auth.password_file = config.age.secrets.promtail-loki-basic-auth-password.path;
url = "https://${nodes.${cfg.proxy}.config.proxiedDomains.loki}/loki/api/v1/push";
url = "https://${nodes.${cfg.proxy}.config.providedDomains.loki}/loki/api/v1/push";
}
];
@ -147,5 +147,7 @@ in {
];
};
};
systemd.services.promtail.serviceConfig.RestartSec = "600"; # Retry every 10 minutes
};
}

View file

@ -0,0 +1,7 @@
{lib, ...}: {
options.providedDomains = lib.mkOption {
type = lib.types.attrsOf lib.types.str;
default = {};
description = "Registry of domains that this host 'provides' (that refer to this host with some functionality). For easy cross-node referencing.";
};
}

View file

@ -1,7 +0,0 @@
{lib, ...}: {
options.proxiedDomains = lib.mkOption {
type = lib.types.attrsOf lib.types.str;
default = {};
description = "Registry of proxied domains for easy cross-node referencing.";
};
}

View file

@ -21,16 +21,26 @@
in {
options.extra.telegraf = {
enable = mkEnableOption (mdDoc "telegraf to push metrics to influx.");
proxy = mkOption {
type = types.str;
description = mdDoc "The node name of the proxy server which provides the influx api endpoint.";
influxdb2 = {
url = mkOption {
type = types.str;
example = "https://influxdb.example.com";
description = mdDoc "The influxdb v2 database url to push to.";
};
organization = mkOption {
type = types.str;
description = mdDoc "The organization to push to.";
};
bucket = mkOption {
type = types.str;
description = mdDoc "The bucket to push to.";
};
};
};
config = mkIf cfg.enable {
# Connect safely via wireguard to skip authentication
networking.hosts.${nodes.${cfg.proxy}.config.extra.wireguard."proxy-${cfg.proxy}".ipv4} = [nodes.${cfg.proxy}.config.proxiedDomains.influxdb];
age.secrets.telegraf-influxdb-token = {
rekeyFile = nodePath + "/secrets/telegraf-influxdb-token.age";
mode = "440";
@ -55,10 +65,9 @@ in {
};
outputs = {
influxdb_v2 = {
urls = ["https://${nodes.${cfg.proxy}.config.proxiedDomains.influxdb}"];
urls = [cfg.influxdb2.url];
token = "$INFLUX_TOKEN";
organization = "servers";
bucket = "telegraf";
inherit (cfg.influxdb2) organization bucket;
};
};
inputs =
@ -103,8 +112,11 @@ in {
"/run/wrappers"
pkgs.lm_sensors
];
# For wireguard statistics
serviceConfig.AmbientCapabilities = ["CAP_NET_ADMIN"];
serviceConfig = {
# For wireguard statistics
AmbientCapabilities = ["CAP_NET_ADMIN"];
RestartSec = "600"; # Retry every 10 minutes
};
};
};
}