summaryrefslogtreecommitdiffstats
path: root/Prometheus/alerts
diff options
context:
space:
mode:
authorGeorg2021-08-06 17:42:45 +0200
committerGeorg2021-08-06 17:42:45 +0200
commit4bd9e36ff45b2001b048a23a218e26605deac7d0 (patch)
tree4b592921aa4cbf98d75059f9223712b6ddcc1799 /Prometheus/alerts
parent1791f4374b72e5bd45dfbe6d2e543d0537f711e0 (diff)
downloadmonitoring-4bd9e36ff45b2001b048a23a218e26605deac7d0.tar.gz
monitoring-4bd9e36ff45b2001b048a23a218e26605deac7d0.tar.bz2
monitoring-4bd9e36ff45b2001b048a23a218e26605deac7d0.zip
Mass Commit 06/08/2021
Signed-off-by: Georg <georg@lysergic.dev>
Diffstat (limited to 'Prometheus/alerts')
-rw-r--r--Prometheus/alerts/lysergic/blackbox.yml94
-rw-r--r--Prometheus/alerts/lysergic/nginx_exporters.yml13
-rw-r--r--Prometheus/alerts/lysergic/node_exporters.yml13
-rw-r--r--Prometheus/alerts/lysergic/wireguard_exporters.yml23
-rw-r--r--Prometheus/alerts/tripsit/blackbox.yml21
-rw-r--r--Prometheus/alerts/tripsit/node_exporters.yml13
6 files changed, 177 insertions, 0 deletions
diff --git a/Prometheus/alerts/lysergic/blackbox.yml b/Prometheus/alerts/lysergic/blackbox.yml
new file mode 100644
index 0000000..49a553d
--- /dev/null
+++ b/Prometheus/alerts/lysergic/blackbox.yml
@@ -0,0 +1,94 @@
+groups:
+- name: lysergic-blackbox_exporters
+ rules:
+ - alert: LIBERTACASA-WEB-DOWN
+ expr: probe_success{instance=~"https://liberta.casa|https://www.lysergic.dev"} == 0
+ for: 30s
+ labels:
+ severity: warning
+ project: LYSERGIC
+ annotations:
+ title: Node {{ $labels.instance }} is down
+ description: Failed to scrape {{ $labels.job }} on {{ $labels.instance}} for more than 30 seconds. Node seems down.
+ - alert: XKEK-WEB-DOWN
+ expr: probe_success{instance=~"https://xkek.net|https://kekx.net"} == 0
+ for: 30s
+ labels:
+ severity: warning
+ project: LYSERGIC
+ annotations:
+ title: Node {{ $labels.instance }} is down
+ description: Failed to scrape {{ $labels.job }} on {{ $labels.instance}} for more than 30 seconds. Node seems down.
+ - alert: JITSI-WEB-DOWN
+ expr: probe_success{instance=~"https://meet.lysergic.dev"} == 0
+ for: 30s
+ labels:
+ severity: warning
+ project: LYSERGIC
+ annotations:
+ title: Node {{ $labels.instance }} is down
+ description: Failed to scrape {{ $labels.job }} on {{ $labels.instance}} for more than 30 seconds. Node seems down.
+ - alert: PASTA-WEB-DOWN
+ expr: probe_success{instance=~"https://pasta.lysergic.dev"} == 0
+ for: 30s
+ labels:
+ severity: warning
+ project: LYSERGIC
+ annotations:
+ title: Node {{ $labels.instance }} is down
+ description: Failed to scrape {{ $labels.job }} on {{ $labels.instance}} for more than 30 seconds. Node seems down.
+ - alert: GITEA-WEB-DOWN
+ expr: probe_success{instance=~"https://git.com.de"} == 0
+ for: 30s
+ labels:
+ severity: warning
+ project: LYSERGIC
+ annotations:
+ title: Node {{ $labels.instance }} is down
+ description: Failed to scrape {{ $labels.job }} on {{ $labels.instance}} for more than 30 seconds. Node seems down.
+ - alert: CGIT-WEB-DOWN
+ expr: probe_success{instance=~"https://git.casa"} == 0
+ for: 30s
+ labels:
+ severity: warning
+ project: LYSERGIC
+ annotations:
+ title: Node {{ $labels.instance }} is down
+ description: Failed to scrape {{ $labels.job }} on {{ $labels.instance}} for more than 30 seconds. Node seems down.
+ - alert: ELEMENT-WEB-DOWN
+ expr: probe_success{instance=~"https://element.liberta.casa"} == 0
+ for: 30s
+ labels:
+ severity: warning
+ project: LYSERGIC
+ annotations:
+ title: Node {{ $labels.instance }} is down
+ description: Failed to scrape {{ $labels.job }} on {{ $labels.instance}} for more than 30 seconds. Node seems down.
+ - alert: ETHERPAD-WEB-DOWN
+ expr: probe_success{instance=~"https://pad.hugz.io"} == 0
+ for: 30s
+ labels:
+ severity: warning
+ project: LYSERGIC
+ annotations:
+ title: Node {{ $labels.instance }} is down
+ description: Failed to scrape {{ $labels.job }} on {{ $labels.instance}} for more than 30 seconds. Node seems down.
+ - alert: SEARX-WEB-DOWN
+ expr: probe_success{instance=~"https://searx.xkek.net"} == 0
+ for: 120s
+ labels:
+ severity: warning
+ project: LYSERGIC
+ annotations:
+ title: Node {{ $labels.instance }} is down
+ description: Failed to scrape {{ $labels.job }} on {{ $labels.instance}} for more than 30 seconds. Node seems down.
+ - alert: YACY-WEB-DOWN
+ expr: probe_success{instance=~"https://yacy.xkek.net"} == 0
+ for: 30s
+ labels:
+ severity: warning
+ project: LYSERGIC
+ annotations:
+ title: Node {{ $labels.instance }} is down
+ description: Failed to scrape {{ $labels.job }} on {{ $labels.instance}} for more than 30 seconds. Node seems down.
+/etc/prometheus/alerts/lysergic/blackbox.yml lines 29-93/93 (END)
diff --git a/Prometheus/alerts/lysergic/nginx_exporters.yml b/Prometheus/alerts/lysergic/nginx_exporters.yml
new file mode 100644
index 0000000..8a3777d
--- /dev/null
+++ b/Prometheus/alerts/lysergic/nginx_exporters.yml
@@ -0,0 +1,13 @@
+groups:
+- name: lysergic-nginx_exporters
+ rules:
+ - alert: lysergic-nginx-down
+ expr: nginx_up{project="LYSERGIC", job="nginx_exporters"} == 0
+ for: 1m
+ labels:
+ job: nginx_exporters
+ severity: WARNING
+ project: LYSERGIC
+ annotations:
+ title: nginx {{ $labels.instance }} is down
+ description: Failed to scrape {{ $labels.job }} on {{ $labels.instance}} for more than 1 minute. nginx seems down.
diff --git a/Prometheus/alerts/lysergic/node_exporters.yml b/Prometheus/alerts/lysergic/node_exporters.yml
new file mode 100644
index 0000000..392b3b7
--- /dev/null
+++ b/Prometheus/alerts/lysergic/node_exporters.yml
@@ -0,0 +1,13 @@
+groups:
+- name: lysergic-node_exporters
+ rules:
+ - alert: lysergic-node-down
+ expr: up{project="LYSERGIC", job="node_exporters"} == 0
+ for: 1m
+ labels:
+ job: node_exporters
+ severity: warning
+ project: LYSERGIC
+ annotations:
+ title: Node {{ $labels.instance }} is down
+ description: Failed to scrape {{ $labels.job }} on {{ $labels.instance}} for more than 1 minute. Node seems down.
diff --git a/Prometheus/alerts/lysergic/wireguard_exporters.yml b/Prometheus/alerts/lysergic/wireguard_exporters.yml
new file mode 100644
index 0000000..87fecb6
--- /dev/null
+++ b/Prometheus/alerts/lysergic/wireguard_exporters.yml
@@ -0,0 +1,23 @@
+groups:
+- name: lysergic-wireguard_exporters
+ rules:
+ - alert: mercury-wireguard-down
+ expr: wireguard_peer_last_handshake_seconds{device="wg0", instance="localhost:9586", job="wireguard-mercury", public_key="$WG0_PUBKEY"} - time() <- 125
+ for: 10s
+ labels:
+ job: wireguard-mercury
+ severity: CRITICAL
+ project: LYSERGIC
+ annotations:
+ title: WireGuard {{ $labels.instance }} is down
+ description: 'Failed to scrape {{ $labels.job }} on {{ $labels.instance}} for more than 2 minutes. Uplink to Psyched seems down. Affected services: Global infrastructure monitoring. Other outages might no longer be tracked. Urgent investigation is advised.'
+ - alert: lysergic-wireguard-down
+ expr: wireguard_peer_last_handshake_seconds{project="LYSERGIC",device!="wg1"} - time() <- 130
+ for: 10s
+ labels:
+ job: wireguard-lysergic
+ severity: CRITICAL
+ project: LYSERGIC
+ annotations:
+ title: WireGuard Incident
+ description: 'No handshakes for {{ $labels.device }} in {{ $labels.job }} on {{ $labels.instance}} have been exchanged for an extended amount of time. Critical services could be affected.'
diff --git a/Prometheus/alerts/tripsit/blackbox.yml b/Prometheus/alerts/tripsit/blackbox.yml
new file mode 100644
index 0000000..0390718
--- /dev/null
+++ b/Prometheus/alerts/tripsit/blackbox.yml
@@ -0,0 +1,21 @@
+groups:
+- name: tripsit-blackbox_exporters
+ rules:
+ - alert: TRIPSIT.ME-WEB-INCIDENT
+ expr: probe_success{instance=~"https://tripsit.me|https://chat.tripsit.me|https://chat.tripsit.me/chat|https://drugs.tripsit.me|https://benzo.tripsit.me|https://dxm.tripsit.me|https://combo.tripsit.me|https://wiki.tripsit.me|https://tripbot.tripsit.me"} == 0
+ for: 15s
+ labels:
+ severity: critical
+ project: TRIPSIT
+ annotations:
+ title: Node {{ $labels.instance }} is down
+ description: Failed to scrape {{ $labels.job }} on {{ $labels.instance}} for more than 15 seconds. Node seems down.
+ - alert: TRIPSIT.DEV-WEB-INCIDENT
+ expr: probe_success{instance=~"https://mail.tripsit.dev|https://dopamine.tripsit.dev"} == 0
+ for: 1m
+ labels:
+ severity: warning
+ project: TRIPSIT
+ annotations:
+ title: Node {{ $labels.instance }} is down
+ description: Failed to scrape {{ $labels.job }} on {{ $labels.instance}} for more than 1 minute. Node seems down.
diff --git a/Prometheus/alerts/tripsit/node_exporters.yml b/Prometheus/alerts/tripsit/node_exporters.yml
new file mode 100644
index 0000000..ee82367
--- /dev/null
+++ b/Prometheus/alerts/tripsit/node_exporters.yml
@@ -0,0 +1,13 @@
+groups:
+- name: tripsit-node_exporters
+ rules:
+ - alert: TRIPSIT-NODE-INCIDENT
+ expr: up{project="TRIPSIT", job="node_exporters"} == 0
+ for: 30s
+ labels:
+ severity: warning
+ job: node_exporters
+ project: TRIPSIT
+ annotations:
+ title: Node {{ $labels.instance }} is down
+ description: Failed to scrape {{ $labels.job }} on {{ $labels.instance}} for more than 1 minute. Node seems down.