scripts/smart-test-weekly.sh

#!/usr/bin/env bash
# scripts/smart-test-weekly.sh
# Kick off a short SMART self-test on every rotational disk. Runs
# non-blocking (`--test short`) and polls for completion up to 15 min.
#
# Intended to be launched by smart-test.timer.

set -euo pipefail

HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
# shellcheck source=/dev/null
. "${HERE}/lib/log.sh"

disks=()
while IFS= read -r dev; do
    disks+=("${dev}")
done < <(lsblk -dn -o NAME,ROTA,TYPE | awk '$3=="disk" && $2=="1" {print "/dev/"$1}')

if (( ${#disks[@]} == 0 )); then
    log_info "no rotational disks found"
    exit 0
fi

for d in "${disks[@]}"; do
    log_info "starting short smart test on ${d}"
    if ! smartctl -t short "${d}" >/dev/null; then
        log_warn "could not start smart test on ${d}"
    fi
done

deadline=$(( $(date +%s) + 900 ))
while (( $(date +%s) < deadline )); do
    pending=0
    for d in "${disks[@]}"; do
        if smartctl -c "${d}" | grep -q 'Self-test execution status.*in progress'; then
            pending=$((pending + 1))
        fi
    done
    (( pending == 0 )) && break
    sleep 30
done

"${HERE}/../stacks/monitoring/exporters/smart-exporter.sh" || true
log_info "smart-test-weekly done"