summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTobias Wiese <tobias@tobiaswiese.com>2020-05-13 16:53:00 +0200
committerTobias Wiese <tobias@tobiaswiese.com>2020-05-14 12:15:16 +0200
commit240b4f8e9f33086ce8cfa2734b5c55a1484d5419 (patch)
tree1495c7f39144eaf5cca350c56d1331dc278bcbb3
parentab656fb6bc13de4d263ca3d05979f0d2bebf3d50 (diff)
service/cluster: monitor health of agents
When an agent is unhealthy only notify about that, and suppress notifications about other services on that agent not running. Signed-off-by: Tobias Wiese <tobias@tobiaswiese.com>
-rw-r--r--zones.d/master/cluster-health.conf20
-rw-r--r--zones.d/master/hosts.conf1
2 files changed, 21 insertions, 0 deletions
diff --git a/zones.d/master/cluster-health.conf b/zones.d/master/cluster-health.conf
new file mode 100644
index 0000000..c1d8956
--- /dev/null
+++ b/zones.d/master/cluster-health.conf
@@ -0,0 +1,20 @@
+apply Service "agent-health" {
+ import "generic-service"
+ zone = "master"
+
+ check_command = "cluster-zone"
+
+ assign where host.vars.os
+ ignore where host.vars.cluster_zone == "master"
+}
+
+apply Dependency "agent-health-check" to Service {
+ parent_service_name = "agent-health"
+
+ states = [ OK ]
+ disable_notifications = true
+
+ assign where host.vars.os
+ ignore where host.vars.cluster_zone == "master"
+ ignore where service.name == "agent-health" // Avoid self dependency
+}
diff --git a/zones.d/master/hosts.conf b/zones.d/master/hosts.conf
index b6ecb28..f306dde 100644
--- a/zones.d/master/hosts.conf
+++ b/zones.d/master/hosts.conf
@@ -4,6 +4,7 @@ object Host "new-babbage.server.tobiaswiese.net" {
address = "185.244.192.195"
address6 = "2a03:4000:27:5fc::1"
+ vars.cluster_zone = "master"
vars.os = "debian"
vars.os_family = "debian"
vars.kernel = "linux"