stacks/monitoring/prometheus.yml

# stacks/monitoring/prometheus.yml
# Scrape config for the homelab. One global interval, then per-job overrides
# when something is chatty or slow. The ${...} placeholders come from
# ./alerts/*.yml so I can version alert rules separately.

global:
  scrape_interval: 30s
  evaluation_interval: 30s
  external_labels:
    origin: homelab

rule_files:
  - /etc/prometheus/alerts/*.yml

alerting:
  alertmanagers:
    - static_configs:
        - targets: ["alertmanager:9093"]

scrape_configs:
  - job_name: prometheus
    static_configs:
      - targets: ["localhost:9090"]

  - job_name: node
    static_configs:
      - targets: ["node-exporter:9100"]
        labels:
          host: homelab-1

  - job_name: cadvisor
    static_configs:
      - targets: ["cadvisor:8080"]
    metric_relabel_configs:
      # Drop per-container cgroup garbage we do not render in Grafana.
      - source_labels: [__name__]
        regex: "container_(fs_inodes_free|tasks_state)"
        action: drop

  - job_name: caddy
    metrics_path: /metrics
    static_configs:
      - targets: ["caddy:2019"]

  - job_name: loki
    metrics_path: /metrics
    static_configs:
      - targets: ["loki:3100"]

  - job_name: blackbox_http
    metrics_path: /probe
    params:
      module: [http_2xx]
    static_configs:
      - targets:
          - https://jellyfin.home.arpa
          - https://gitea.home.arpa
          - https://immich.home.arpa
    relabel_configs:
      - source_labels: [__address__]
        target_label: __param_target
      - source_labels: [__param_target]
        target_label: instance
      - target_label: __address__
        replacement: blackbox-exporter:9115