From 240b4f8e9f33086ce8cfa2734b5c55a1484d5419 Mon Sep 17 00:00:00 2001 From: Tobias Wiese Date: Wed, 13 May 2020 16:53:00 +0200 Subject: service/cluster: monitor health of agents When an agent is unhealthy only notify about that, and suppress notifications about other services on that agent not running. Signed-off-by: Tobias Wiese --- zones.d/master/cluster-health.conf | 20 ++++++++++++++++++++ zones.d/master/hosts.conf | 1 + 2 files changed, 21 insertions(+) create mode 100644 zones.d/master/cluster-health.conf diff --git a/zones.d/master/cluster-health.conf b/zones.d/master/cluster-health.conf new file mode 100644 index 0000000..c1d8956 --- /dev/null +++ b/zones.d/master/cluster-health.conf @@ -0,0 +1,20 @@ +apply Service "agent-health" { + import "generic-service" + zone = "master" + + check_command = "cluster-zone" + + assign where host.vars.os + ignore where host.vars.cluster_zone == "master" +} + +apply Dependency "agent-health-check" to Service { + parent_service_name = "agent-health" + + states = [ OK ] + disable_notifications = true + + assign where host.vars.os + ignore where host.vars.cluster_zone == "master" + ignore where service.name == "agent-health" // Avoid self dependency +} diff --git a/zones.d/master/hosts.conf b/zones.d/master/hosts.conf index b6ecb28..f306dde 100644 --- a/zones.d/master/hosts.conf +++ b/zones.d/master/hosts.conf @@ -4,6 +4,7 @@ object Host "new-babbage.server.tobiaswiese.net" { address = "185.244.192.195" address6 = "2a03:4000:27:5fc::1" + vars.cluster_zone = "master" vars.os = "debian" vars.os_family = "debian" vars.kernel = "linux" -- cgit v1.2.3