stacks/monitoring/prometheus/rules/blackbox.yml

# stacks/monitoring/prometheus/rules/blackbox.yml
# Higher-level SLO-ish alerts derived from blackbox probes.

groups:
  - name: blackbox-slo
    rules:
      - alert: BlackboxProbeSlowHTTP
        expr: probe_http_duration_seconds{phase="processing"} > 1.5
        for: 10m
        labels:
          severity: warning
        annotations:
          summary: "Backend processing slow for {{ $labels.instance }}"
          description: "processing phase > 1.5s sustained."

      - alert: BlackboxHighDNSLatency
        expr: probe_dns_lookup_time_seconds > 0.5
        for: 15m
        labels:
          severity: warning
        annotations:
          summary: "DNS lookup slow for {{ $labels.instance }}"

      - alert: BlackboxRedirectLoop
        expr: probe_http_redirects > 5
        for: 5m
        labels:
          severity: warning
        annotations:
          summary: "Too many redirects on {{ $labels.instance }}"

      - alert: BlackboxTLSVersionOld
        expr: probe_tls_version_info{version="TLS 1.0"} == 1 or probe_tls_version_info{version="TLS 1.1"} == 1
        for: 1h
        labels:
          severity: warning
        annotations:
          summary: "Obsolete TLS version on {{ $labels.instance }}"

      - alert: BlackboxAvailabilityLow
        expr: job:blackbox_http_success:ratio5m < 0.95
        for: 15m
        labels:
          severity: critical
        annotations:
          summary: "Availability < 95% for {{ $labels.instance }}"