#!/usr/bin/env bash
# scripts/health-check-all.sh
# Iterate every stack and probe each service it exposes. Exits 0 on OK,
# 2 on any warn (so cron emails but does not alert-page).
#
# Docs: mercemay.top/src/homelab-compose/
set -euo pipefail
HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
# shellcheck source=/dev/null
. "${HERE}/lib/log.sh"
declare -A TARGETS=(
[auth]="https://auth.home.arpa"
[jellyfin]="https://jellyfin.home.arpa/System/Info/Public"
[gitea]="https://gitea.home.arpa/api/v1/version"
[grafana]="https://grafana.home.arpa/api/health"
[prometheus]="https://prometheus.home.arpa/-/healthy"
[paperless]="https://paperless.home.arpa/api/"
[immich]="https://immich.home.arpa/api/server-info/ping"
[sonarr]="https://sonarr.home.arpa/ping"
[radarr]="https://radarr.home.arpa/ping"
[prowlarr]="https://prowlarr.home.arpa/ping"
[bazarr]="https://bazarr.home.arpa/api/system/status"
)
warn=0
fail=0
check() {
local name="$1" url="$2"
local code
code=$(curl -k -s -o /dev/null -w '%{http_code}' --max-time 8 "${url}" || echo "000")
case "${code}" in
200|204|301|302|401)
log_info "OK ${name} ${code}"
;;
000)
log_err "FAIL ${name} no-response"
fail=$((fail + 1))
;;
5??)
log_err "FAIL ${name} ${code}"
fail=$((fail + 1))
;;
*)
log_warn "WARN ${name} ${code}"
warn=$((warn + 1))
;;
esac
}
for name in "${!TARGETS[@]}"; do
check "${name}" "${TARGETS[${name}]}"
done
log_info "docker compose ps summary:"
for stack in /srv/homelab/stacks/*/docker-compose.yml; do
(
cd "$(dirname "${stack}")" &&
docker compose ps --format 'table {{.Service}}\t{{.Status}}'
)
done
if (( fail > 0 )); then
exit 1
elif (( warn > 0 )); then
exit 2
fi
exit 0