diff options
| author | Georg | 2021-08-06 17:42:45 +0200 | 
|---|---|---|
| committer | Georg | 2021-08-06 17:42:45 +0200 | 
| commit | 4bd9e36ff45b2001b048a23a218e26605deac7d0 (patch) | |
| tree | 4b592921aa4cbf98d75059f9223712b6ddcc1799 /Prometheus/alerts/lysergic | |
| parent | 1791f4374b72e5bd45dfbe6d2e543d0537f711e0 (diff) | |
| download | monitoring-4bd9e36ff45b2001b048a23a218e26605deac7d0.tar.gz monitoring-4bd9e36ff45b2001b048a23a218e26605deac7d0.tar.bz2 monitoring-4bd9e36ff45b2001b048a23a218e26605deac7d0.zip  | |
Mass Commit 06/08/2021
Signed-off-by: Georg <georg@lysergic.dev>
Diffstat (limited to 'Prometheus/alerts/lysergic')
| -rw-r--r-- | Prometheus/alerts/lysergic/blackbox.yml | 94 | ||||
| -rw-r--r-- | Prometheus/alerts/lysergic/nginx_exporters.yml | 13 | ||||
| -rw-r--r-- | Prometheus/alerts/lysergic/node_exporters.yml | 13 | ||||
| -rw-r--r-- | Prometheus/alerts/lysergic/wireguard_exporters.yml | 23 | 
4 files changed, 143 insertions, 0 deletions
diff --git a/Prometheus/alerts/lysergic/blackbox.yml b/Prometheus/alerts/lysergic/blackbox.yml new file mode 100644 index 0000000..49a553d --- /dev/null +++ b/Prometheus/alerts/lysergic/blackbox.yml @@ -0,0 +1,94 @@ +groups: +- name: lysergic-blackbox_exporters +  rules: +  - alert: LIBERTACASA-WEB-DOWN +    expr: probe_success{instance=~"https://liberta.casa|https://www.lysergic.dev"} == 0 +    for: 30s +    labels: +      severity: warning +      project: LYSERGIC +    annotations: +      title: Node {{ $labels.instance }} is down +      description: Failed to scrape {{ $labels.job }} on {{ $labels.instance}} for more than 30 seconds. Node seems down. +  - alert: XKEK-WEB-DOWN +    expr: probe_success{instance=~"https://xkek.net|https://kekx.net"} == 0 +    for: 30s +    labels: +      severity: warning +      project: LYSERGIC +    annotations: +      title: Node {{ $labels.instance }} is down +      description: Failed to scrape {{ $labels.job }} on {{ $labels.instance}} for more than 30 seconds. Node seems down. +  - alert: JITSI-WEB-DOWN +    expr: probe_success{instance=~"https://meet.lysergic.dev"} == 0 +    for: 30s +    labels: +      severity: warning +      project: LYSERGIC +    annotations: +      title: Node {{ $labels.instance }} is down +      description: Failed to scrape {{ $labels.job }} on {{ $labels.instance}} for more than 30 seconds. Node seems down. +  - alert: PASTA-WEB-DOWN +    expr: probe_success{instance=~"https://pasta.lysergic.dev"} == 0 +    for: 30s +    labels: +      severity: warning +      project: LYSERGIC +    annotations: +      title: Node {{ $labels.instance }} is down +      description: Failed to scrape {{ $labels.job }} on {{ $labels.instance}} for more than 30 seconds. Node seems down. +  - alert: GITEA-WEB-DOWN +    expr: probe_success{instance=~"https://git.com.de"} == 0 +    for: 30s +    labels: +      severity: warning +      project: LYSERGIC +    annotations: +      title: Node {{ $labels.instance }} is down +      description: Failed to scrape {{ $labels.job }} on {{ $labels.instance}} for more than 30 seconds. Node seems down. +  - alert: CGIT-WEB-DOWN +    expr: probe_success{instance=~"https://git.casa"} == 0 +    for: 30s +    labels: +      severity: warning +      project: LYSERGIC +    annotations: +      title: Node {{ $labels.instance }} is down +      description: Failed to scrape {{ $labels.job }} on {{ $labels.instance}} for more than 30 seconds. Node seems down. +  - alert: ELEMENT-WEB-DOWN +    expr: probe_success{instance=~"https://element.liberta.casa"} == 0 +    for: 30s +    labels: +      severity: warning +      project: LYSERGIC +    annotations: +      title: Node {{ $labels.instance }} is down +      description: Failed to scrape {{ $labels.job }} on {{ $labels.instance}} for more than 30 seconds. Node seems down. +  - alert: ETHERPAD-WEB-DOWN +    expr: probe_success{instance=~"https://pad.hugz.io"} == 0 +    for: 30s +    labels: +      severity: warning +      project: LYSERGIC +    annotations: +      title: Node {{ $labels.instance }} is down +      description: Failed to scrape {{ $labels.job }} on {{ $labels.instance}} for more than 30 seconds. Node seems down. +  - alert: SEARX-WEB-DOWN +    expr: probe_success{instance=~"https://searx.xkek.net"} == 0 +    for: 120s +    labels: +      severity: warning +      project: LYSERGIC +    annotations: +      title: Node {{ $labels.instance }} is down +      description: Failed to scrape {{ $labels.job }} on {{ $labels.instance}} for more than 30 seconds. Node seems down. +  - alert: YACY-WEB-DOWN +    expr: probe_success{instance=~"https://yacy.xkek.net"} == 0 +    for: 30s +    labels: +      severity: warning +      project: LYSERGIC +    annotations: +      title: Node {{ $labels.instance }} is down +      description: Failed to scrape {{ $labels.job }} on {{ $labels.instance}} for more than 30 seconds. Node seems down. +/etc/prometheus/alerts/lysergic/blackbox.yml lines 29-93/93 (END) diff --git a/Prometheus/alerts/lysergic/nginx_exporters.yml b/Prometheus/alerts/lysergic/nginx_exporters.yml new file mode 100644 index 0000000..8a3777d --- /dev/null +++ b/Prometheus/alerts/lysergic/nginx_exporters.yml @@ -0,0 +1,13 @@ +groups: +- name: lysergic-nginx_exporters +  rules: +  - alert: lysergic-nginx-down +    expr: nginx_up{project="LYSERGIC", job="nginx_exporters"} == 0 +    for: 1m +    labels: +      job: nginx_exporters +      severity: WARNING +      project: LYSERGIC +    annotations: +      title: nginx {{ $labels.instance }} is down +      description: Failed to scrape {{ $labels.job }} on {{ $labels.instance}} for more than 1 minute. nginx seems down. diff --git a/Prometheus/alerts/lysergic/node_exporters.yml b/Prometheus/alerts/lysergic/node_exporters.yml new file mode 100644 index 0000000..392b3b7 --- /dev/null +++ b/Prometheus/alerts/lysergic/node_exporters.yml @@ -0,0 +1,13 @@ +groups: +- name: lysergic-node_exporters +  rules: +  - alert: lysergic-node-down +    expr: up{project="LYSERGIC", job="node_exporters"} == 0 +    for: 1m +    labels: +      job: node_exporters +      severity: warning +      project: LYSERGIC +    annotations: +      title: Node {{ $labels.instance }} is down +      description: Failed to scrape {{ $labels.job }} on {{ $labels.instance}} for more than 1 minute. Node seems down. diff --git a/Prometheus/alerts/lysergic/wireguard_exporters.yml b/Prometheus/alerts/lysergic/wireguard_exporters.yml new file mode 100644 index 0000000..87fecb6 --- /dev/null +++ b/Prometheus/alerts/lysergic/wireguard_exporters.yml @@ -0,0 +1,23 @@ +groups: +- name: lysergic-wireguard_exporters +  rules: +  - alert: mercury-wireguard-down +    expr: wireguard_peer_last_handshake_seconds{device="wg0", instance="localhost:9586", job="wireguard-mercury", public_key="$WG0_PUBKEY"} - time() <- 125 +    for: 10s +    labels: +      job: wireguard-mercury +      severity: CRITICAL +      project: LYSERGIC +    annotations: +      title: WireGuard {{ $labels.instance }} is down +      description: 'Failed to scrape {{ $labels.job }} on {{ $labels.instance}} for more than 2 minutes. Uplink to Psyched seems down. Affected services: Global infrastructure monitoring. Other outages might no longer be tracked. Urgent investigation is advised.' +  - alert: lysergic-wireguard-down +    expr: wireguard_peer_last_handshake_seconds{project="LYSERGIC",device!="wg1"} - time() <- 130 +    for: 10s +    labels: +      job: wireguard-lysergic +      severity: CRITICAL +      project: LYSERGIC +    annotations: +      title: WireGuard Incident +      description: 'No handshakes for {{ $labels.device }} in {{ $labels.job }} on {{ $labels.instance}} have been exchanged for an extended amount of time. Critical services could be affected.'  | 
