#!/usr/bin/env bash
# stacks/monitoring/exporters/smart-exporter.sh
# Writes a snapshot of smartctl health per disk into a prom textfile.
# Meant to be called from smart-test.timer once an hour, not per scrape.
#
# Docs: mercemay.top/src/homelab-compose/
set -euo pipefail
TEXTFILE_DIR="${TEXTFILE_DIR:-/var/lib/node_exporter/textfile}"
OUT="${TEXTFILE_DIR}/smart.prom"
TMP="${OUT}.$$"
mkdir -p "${TEXTFILE_DIR}"
disks=()
while IFS= read -r line; do
disks+=("${line}")
done < <(lsblk -dn -o NAME,TYPE | awk '$2=="disk"{print "/dev/"$1}')
: > "${TMP}"
{
echo "# HELP smart_device_health 1 if smartctl health PASSED"
echo "# TYPE smart_device_health gauge"
echo "# HELP smart_temperature_celsius current temperature"
echo "# TYPE smart_temperature_celsius gauge"
echo "# HELP smart_power_on_hours total power on hours"
echo "# TYPE smart_power_on_hours counter"
echo "# HELP smart_reallocated_sectors reallocated sector count"
echo "# TYPE smart_reallocated_sectors gauge"
echo "# HELP smart_pending_sectors pending reallocation sectors"
echo "# TYPE smart_pending_sectors gauge"
} >> "${TMP}"
for disk in "${disks[@]}"; do
model=$(smartctl -i "${disk}" 2>/dev/null \
| awk -F: '/Device Model|Model Number/ {gsub(/^ +/,"",$2); print $2; exit}')
serial=$(smartctl -i "${disk}" 2>/dev/null \
| awk -F: '/Serial Number/ {gsub(/^ +/,"",$2); print $2; exit}')
model="${model:-unknown}"
serial="${serial:-unknown}"
labels="device=\"${disk}\",model=\"${model// /_}\",serial=\"${serial// /_}\""
health=0
if smartctl -H "${disk}" 2>/dev/null | grep -qE 'PASSED|OK'; then
health=1
fi
printf 'smart_device_health{%s} %s\n' "${labels}" "${health}" >> "${TMP}"
tempc=$(smartctl -A "${disk}" 2>/dev/null \
| awk '/Temperature_Celsius|Current Drive Temperature/ {for (i=NF;i>=1;i--) if ($i ~ /^[0-9]+$/) {print $i; exit}}')
if [[ -n "${tempc:-}" ]]; then
printf 'smart_temperature_celsius{%s} %s\n' "${labels}" "${tempc}" >> "${TMP}"
fi
poh=$(smartctl -A "${disk}" 2>/dev/null \
| awk '/Power_On_Hours|Power On Hours/ {for (i=NF;i>=1;i--) if ($i ~ /^[0-9]+$/) {print $i; exit}}')
if [[ -n "${poh:-}" ]]; then
printf 'smart_power_on_hours{%s} %s\n' "${labels}" "${poh}" >> "${TMP}"
fi
reall=$(smartctl -A "${disk}" 2>/dev/null \
| awk '/Reallocated_Sector_Ct|Reallocate.*Count/ {for (i=NF;i>=1;i--) if ($i ~ /^[0-9]+$/) {print $i; exit}}')
if [[ -n "${reall:-}" ]]; then
printf 'smart_reallocated_sectors{%s} %s\n' "${labels}" "${reall}" >> "${TMP}"
fi
pend=$(smartctl -A "${disk}" 2>/dev/null \
| awk '/Current_Pending_Sector/ {for (i=NF;i>=1;i--) if ($i ~ /^[0-9]+$/) {print $i; exit}}')
if [[ -n "${pend:-}" ]]; then
printf 'smart_pending_sectors{%s} %s\n' "${labels}" "${pend}" >> "${TMP}"
fi
done
mv -f "${TMP}" "${OUT}"
chmod 0644 "${OUT}"