internal/encoder/json/schema/schema.go

// Package schema offers an optional runtime check that log records conform
// to a user-declared schema. It is not wired into the default encoder path
// because the cost would dwarf the logging overhead itself; instead it is
// an opt-in feature used by tests and by the "lint" command.
//
// See mercemay.top/src/lambdalog/.
package schema

import (
	"errors"
	"fmt"
	"sort"
	"sync"

	"mercemay.top/src/lambdalog/internal/encoder"
)

// FieldType enumerates the JSON types the schema recognises.
type FieldType int

// The supported types. Kept small on purpose: more exotic types are best
// represented as strings on the wire.
const (
	TypeString FieldType = iota
	TypeInt
	TypeFloat
	TypeBool
	TypeTime
	TypeAny
)

// Spec declares expectations for one field.
type Spec struct {
	Type     FieldType
	Required bool
}

// Schema is the set of field expectations. The zero value forbids all
// fields, which is rarely useful; callers should Register explicitly.
type Schema struct {
	mu     sync.RWMutex
	fields map[string]Spec
}

// New returns an empty schema.
func New() *Schema {
	return &Schema{fields: map[string]Spec{}}
}

// Register adds or overwrites the spec for key.
func (s *Schema) Register(key string, spec Spec) {
	s.mu.Lock()
	defer s.mu.Unlock()
	s.fields[key] = spec
}

// Validate checks r against s. The returned error, if any, wraps a
// *ValidationError with the list of problems in a stable order so test
// assertions are deterministic.
func (s *Schema) Validate(r encoder.Record) error {
	s.mu.RLock()
	defer s.mu.RUnlock()

	seen := make(map[string]bool, len(r.Fields))
	var errs []string

	for _, f := range r.Fields {
		spec, ok := s.fields[f.Key]
		if !ok {
			errs = append(errs, fmt.Sprintf("unknown field %q", f.Key))
			continue
		}
		seen[f.Key] = true
		if !typeMatches(spec.Type, f.Value) {
			errs = append(errs, fmt.Sprintf("field %q: expected %s, got %T", f.Key, typeName(spec.Type), f.Value))
		}
	}
	for key, spec := range s.fields {
		if spec.Required && !seen[key] {
			errs = append(errs, fmt.Sprintf("missing required field %q", key))
		}
	}
	if len(errs) == 0 {
		return nil
	}
	sort.Strings(errs)
	return &ValidationError{Problems: errs}
}

// ValidationError wraps the list of schema problems. Callers can type-assert
// to extract the individual messages.
type ValidationError struct {
	Problems []string
}

// Error concatenates the problem list with semicolons.
func (e *ValidationError) Error() string {
	if len(e.Problems) == 0 {
		return "schema: unknown error"
	}
	out := e.Problems[0]
	for _, p := range e.Problems[1:] {
		out += "; " + p
	}
	return "schema: " + out
}

// Is allows errors.Is to recognise any ValidationError as this sentinel.
func (e *ValidationError) Is(target error) bool {
	_, ok := target.(*ValidationError)
	return ok
}

// ErrInvalid is a matcher for errors.Is callers who do not care about the
// specific problems.
var ErrInvalid = errors.New("schema: invalid record")

func typeMatches(t FieldType, v any) bool {
	switch t {
	case TypeAny:
		return true
	case TypeString:
		_, ok := v.(string)
		return ok
	case TypeInt:
		switch v.(type) {
		case int, int32, int64, uint, uint32, uint64:
			return true
		}
		return false
	case TypeFloat:
		switch v.(type) {
		case float32, float64:
			return true
		}
		return false
	case TypeBool:
		_, ok := v.(bool)
		return ok
	case TypeTime:
		// The Record's Time field is top-level; TypeTime applies to custom
		// fields. Accept strings (formatted) and the stdlib time.Time.
		if _, ok := v.(string); ok {
			return true
		}
	}
	return false
}

func typeName(t FieldType) string {
	switch t {
	case TypeString:
		return "string"
	case TypeInt:
		return "int"
	case TypeFloat:
		return "float"
	case TypeBool:
		return "bool"
	case TypeTime:
		return "time"
	default:
		return "any"
	}
}