# stacks/monitoring/prometheus/rules/recording.yml
# Pre-aggregated series so Grafana dashboards stay snappy on the NUC.
# Keep naming: <level>:<metric>:<ratelike>
groups:
- name: node-derived
interval: 1m
rules:
- record: instance:node_cpu_usage:ratio_avg1m
expr: |
1 - avg without (cpu,mode) (
rate(node_cpu_seconds_total{mode="idle"}[1m])
)
- record: instance:node_memory_used:ratio
expr: |
1 - (
node_memory_MemAvailable_bytes
/
node_memory_MemTotal_bytes
)
- record: instance:node_filesystem_used:ratio
expr: |
1 - (
node_filesystem_avail_bytes{fstype!~"tmpfs|overlay|squashfs"}
/
node_filesystem_size_bytes{fstype!~"tmpfs|overlay|squashfs"}
)
- record: instance:node_network_receive:bps5m
expr: |
sum without (device) (
rate(node_network_receive_bytes_total{device!~"lo|docker.*|veth.*"}[5m]) * 8
)
- record: instance:node_network_transmit:bps5m
expr: |
sum without (device) (
rate(node_network_transmit_bytes_total{device!~"lo|docker.*|veth.*"}[5m]) * 8
)
- name: container-derived
interval: 30s
rules:
- record: service:container_cpu_usage:ratio5m
expr: |
sum by (service) (
rate(container_cpu_usage_seconds_total{service!=""}[5m])
)
- record: service:container_memory_rss:bytes
expr: |
sum by (service) (container_memory_rss{service!=""})
- record: service:container_restarts:rate10m
expr: |
sum by (service) (
increase(kube_pod_container_status_restarts_total{service!=""}[10m])
) or
sum by (service) (
changes(container_start_time_seconds{service!=""}[10m])
)
- name: http-derived
interval: 30s
rules:
- record: job:blackbox_http_duration:avg5m
expr: avg by (instance) (probe_duration_seconds{job="blackbox_http"})
- record: job:blackbox_http_success:ratio5m
expr: avg_over_time(probe_success{job="blackbox_http"}[5m])