stacks/monitoring/compose.yml

# stacks/monitoring/compose.yml
# Prometheus + Grafana + Loki + Promtail. Deployed with:
#   docker compose -f stacks/monitoring/compose.yml up -d
# See mercemay.top/src/homelab-compose/ for how this slots behind Caddy.

networks:
  monitoring:
    driver: bridge
  edge:
    external: true
    name: homelab_edge

volumes:
  prometheus_data:
  grafana_data:
  loki_data:
  alertmanager_data:

services:
  prometheus:
    image: prom/prometheus:v2.54.1
    restart: unless-stopped
    command:
      - --config.file=/etc/prometheus/prometheus.yml
      - --storage.tsdb.retention.time=90d
      - --storage.tsdb.retention.size=20GB
      - --web.enable-lifecycle
    volumes:
      - ./prometheus.yml:/etc/prometheus/prometheus.yml:ro
      - ./alerts:/etc/prometheus/alerts:ro
      - prometheus_data:/prometheus
    networks: [monitoring, edge]
    healthcheck:
      test: ["CMD", "wget", "-q", "--spider", "http://localhost:9090/-/ready"]
      interval: 30s
      timeout: 5s
      retries: 3

  grafana:
    image: grafana/grafana:11.1.4
    restart: unless-stopped
    env_file: .env
    environment:
      GF_USERS_ALLOW_SIGN_UP: "false"
      GF_SERVER_ROOT_URL: "https://grafana.${DOMAIN}"
      GF_INSTALL_PLUGINS: ""
    volumes:
      - grafana_data:/var/lib/grafana
      - ./grafana/provisioning:/etc/grafana/provisioning:ro
      - ./grafana/dashboards:/var/lib/grafana/dashboards:ro
    depends_on:
      - prometheus
      - loki
    networks: [monitoring, edge]

  loki:
    image: grafana/loki:3.1.1
    restart: unless-stopped
    command: -config.file=/etc/loki/loki.yaml
    volumes:
      - ./loki.yaml:/etc/loki/loki.yaml:ro
      - loki_data:/loki
    networks: [monitoring, edge]
    healthcheck:
      test: ["CMD", "wget", "-q", "--spider", "http://localhost:3100/ready"]
      interval: 30s
      timeout: 5s
      retries: 5

  promtail:
    image: grafana/promtail:3.1.1
    restart: unless-stopped
    command: -config.file=/etc/promtail/promtail.yaml
    volumes:
      - ./promtail.yaml:/etc/promtail/promtail.yaml:ro
      - /var/log:/var/log:ro
      - /var/lib/docker/containers:/var/lib/docker/containers:ro
    depends_on:
      - loki
    networks: [monitoring]

  node-exporter:
    image: prom/node-exporter:v1.8.2
    restart: unless-stopped
    command:
      - --path.rootfs=/host
      - --collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)
    pid: host
    volumes:
      - /:/host:ro,rslave
    networks: [monitoring]

  cadvisor:
    image: gcr.io/cadvisor/cadvisor:v0.49.1
    restart: unless-stopped
    privileged: true
    devices:
      - /dev/kmsg
    volumes:
      - /:/rootfs:ro
      - /var/run:/var/run:ro
      - /sys:/sys:ro
      - /var/lib/docker/:/var/lib/docker:ro
    networks: [monitoring]

  alertmanager:
    image: prom/alertmanager:v0.27.0
    restart: unless-stopped
    command:
      - --config.file=/etc/alertmanager/alertmanager.yml
      - --storage.path=/alertmanager
    volumes:
      - ./alertmanager.yml:/etc/alertmanager/alertmanager.yml:ro
      - alertmanager_data:/alertmanager
    networks: [monitoring, edge]