// Package truncate trims log records whose serialised size would exceed the
// CloudWatch Logs limit of 256 KiB per event. Truncation is last-resort: it
// preserves the top-level fields (time, level, msg, request_id) and drops
// tail-heavy user fields first.
//
// See mercemay.top/src/lambdalog/.
package truncate
import (
"mercemay.top/src/lambdalog/internal/encoder"
)
// MaxBytes is CloudWatch's per-event ceiling. Exposed as a var so tests can
// shrink it without rewriting the production value.
var MaxBytes = 256 * 1024
// FieldMaxBytes caps any individual field value so a single oversized field
// cannot monopolise the record.
var FieldMaxBytes = 8 * 1024
// Ellipsis is inserted to mark truncated values.
const Ellipsis = "...[truncated]"
// Apply modifies r in place, ensuring the approximate serialised size falls
// within MaxBytes. The returned bool reports whether any truncation
// occurred, for diagnostics.
func Apply(r *encoder.Record) bool {
changed := false
for i := range r.Fields {
if s, ok := r.Fields[i].Value.(string); ok {
if len(s) > FieldMaxBytes {
r.Fields[i].Value = s[:FieldMaxBytes-len(Ellipsis)] + Ellipsis
changed = true
}
}
}
size := estimate(*r)
if size <= MaxBytes {
return changed
}
// Drop fields from the tail until we fit. The caller-supplied ordering
// tends to place the most important fields first.
for size > MaxBytes && len(r.Fields) > 0 {
r.Fields = r.Fields[:len(r.Fields)-1]
size = estimate(*r)
changed = true
}
// If the message itself is oversized, truncate it last. This is
// preferable to dropping the record entirely.
if size > MaxBytes && len(r.Message) > 0 {
budget := MaxBytes - estimate(encoder.Record{
Time: r.Time, Level: r.Level, RequestID: r.RequestID,
}) - len(Ellipsis)
if budget < 0 {
budget = 0
}
if budget < len(r.Message) {
r.Message = r.Message[:budget] + Ellipsis
changed = true
}
}
return changed
}
// estimate returns an upper bound on the JSON body length of r. The formula
// is deliberately coarse: it counts raw field sizes plus a constant per-
// field overhead for quotes, colon, and comma.
func estimate(r encoder.Record) int {
const perField = 8 // quotes, colon, comma, small fudge
size := len(r.Level) + len(r.Message) + len(r.RequestID) + 64
for _, f := range r.Fields {
size += len(f.Key) + perField + valueSize(f.Value)
}
return size
}
func valueSize(v any) int {
switch val := v.(type) {
case nil:
return 4
case string:
return len(val) + 2
case bool:
return 5
case []byte:
return len(val) + 2
case error:
if val == nil {
return 2
}
return len(val.Error()) + 2
default:
return 16
}
}
// Summary describes what was removed, for emission alongside the record.
type Summary struct {
DroppedFields int
TruncatedMsg bool
}
// Describe returns a Summary comparing before to after (before-after).
func Describe(before, after encoder.Record) Summary {
s := Summary{}
if len(before.Fields) > len(after.Fields) {
s.DroppedFields = len(before.Fields) - len(after.Fields)
}
if len(before.Message) > len(after.Message) {
s.TruncatedMsg = true
}
return s
}