#!/usr/bin/env bash
# scripts/upgrade.sh -- pull new images and restart services one stack at a
# time so only a single thing is down at once.
#
# I dropped watchtower in commit 5512de8 because it started a container
# update mid-dinner and immich broke for two hours. This script is the
# boring, manual replacement.
#
# Usage:
# scripts/upgrade.sh # every stack
# scripts/upgrade.sh media # just stacks/media
# scripts/upgrade.sh --dry-run # show what would change
#
# mercemay.top/src/homelab-compose/ documents the recovery path.
set -euo pipefail
ROOT=$(git rev-parse --show-toplevel 2>/dev/null || pwd)
cd "$ROOT"
DRY=0
SELECTED=()
while (( $# )); do
case "$1" in
--dry-run) DRY=1; shift ;;
-h|--help) sed -n '2,14p' "$0" | sed 's/^# \{0,1\}//'; exit 0 ;;
-*) echo "upgrade: unknown flag $1" >&2; exit 2 ;;
*) SELECTED+=("$1"); shift ;;
esac
done
if [[ ${#SELECTED[@]} -eq 0 ]]; then
mapfile -t SELECTED < <(find stacks -mindepth 1 -maxdepth 1 -type d -printf '%f\n' | sort)
fi
log() { printf '[upgrade %s] %s\n' "$(date +%H:%M:%S)" "$*"; }
pull_stack() {
local stack=$1
local compose="stacks/$stack/compose.yml"
[[ -f $compose ]] || { log "skip $stack (no compose.yml)"; return; }
log "pulling $stack"
if (( DRY )); then
docker compose -f "$compose" pull --quiet --dry-run 2>/dev/null || \
docker compose -f "$compose" config --images
return
fi
docker compose -f "$compose" pull --quiet
}
restart_stack() {
local stack=$1
local compose="stacks/$stack/compose.yml"
[[ -f $compose ]] || return
log "recreating $stack"
if (( DRY )); then
log " would run: docker compose -f $compose up -d --remove-orphans"
return
fi
docker compose -f "$compose" up -d --remove-orphans
wait_healthy "$stack"
}
wait_healthy() {
local stack=$1
local compose="stacks/$stack/compose.yml"
local deadline=$(( $(date +%s) + 120 ))
while (( $(date +%s) < deadline )); do
local failing
failing=$(docker compose -f "$compose" ps --format '{{.Health}}' | grep -c -E 'unhealthy|starting' || true)
if [[ $failing -eq 0 ]]; then
log " $stack healthy"
return 0
fi
sleep 5
done
log " $stack still unhealthy after 120s, check with 'docker compose -f $compose ps'"
return 1
}
# Pull everything first so one slow download does not block the first
# restart. Each pull is independent so a failure in one stack should not
# stop the rest; we do care about restart failures.
for s in "${SELECTED[@]}"; do
pull_stack "$s" || log "pull failed for $s, continuing"
done
FAILED=()
for s in "${SELECTED[@]}"; do
if ! restart_stack "$s"; then
FAILED+=("$s")
fi
done
if [[ ${#FAILED[@]} -gt 0 ]]; then
log "failed: ${FAILED[*]}"
exit 1
fi
log "all selected stacks upgraded"