package stage
import (
"context"
"crypto/sha256"
"encoding/hex"
"fmt"
"sort"
"strings"
"mercemay.top/src/tilstream/internal/pipeline"
)
// Hash computes a deterministic content hash for each post and stores it
// on Post.Hash. The hash is SHA-256 of:
//
// raw markdown body || sorted front-matter k=v pairs
//
// I use it as a cache key for the content-addressed output directory; when
// a post's hash doesn't change between builds, the write stage can skip
// it entirely.
type Hash struct {
Short bool
}
// Name returns the stage name.
func (*Hash) Name() string { return "hash" }
// Run fills Post.Hash for every post.
func (h *Hash) Run(ctx context.Context, st *pipeline.State) error {
for i := range st.Posts {
select {
case <-ctx.Done():
return ctx.Err()
default:
}
hash := ContentHash(st.Posts[i].Raw, st.Posts[i].Meta)
if h.Short {
hash = hash[:12]
}
st.Posts[i].Hash = hash
}
return nil
}
// ContentHash computes the SHA-256 hash described above. Exposed so other
// callers (e.g. the dev server cache) can match the same key.
func ContentHash(raw []byte, meta map[string]string) string {
sum := sha256.New()
sum.Write(raw)
sum.Write([]byte{0}) // separator byte
keys := make([]string, 0, len(meta))
for k := range meta {
keys = append(keys, k)
}
sort.Strings(keys)
for _, k := range keys {
sum.Write([]byte(k))
sum.Write([]byte{'='})
sum.Write([]byte(meta[k]))
sum.Write([]byte{'\n'})
}
return hex.EncodeToString(sum.Sum(nil))
}
// ShortHash returns the first 12 characters of ContentHash.
func ShortHash(raw []byte, meta map[string]string) string {
return ContentHash(raw, meta)[:12]
}
// Verify re-computes each post's hash and returns an error if any stored
// hash no longer matches. Used by the doctor command.
func Verify(st *pipeline.State) error {
var mismatches []string
for _, p := range st.Posts {
want := ContentHash(p.Raw, p.Meta)
stored := p.Hash
if len(stored) == 12 {
want = want[:12]
}
if stored != "" && stored != want {
mismatches = append(mismatches, fmt.Sprintf("%s: stored=%s got=%s", p.Path, stored, want))
}
}
if len(mismatches) > 0 {
return fmt.Errorf("hash mismatch:\n %s", strings.Join(mismatches, "\n "))
}
return nil
}
// Manifest returns a newline-joined list of "path hash" pairs suitable for
// writing to an on-disk manifest file.
func Manifest(st *pipeline.State) string {
rows := make([]string, 0, len(st.Posts))
for _, p := range st.Posts {
rows = append(rows, fmt.Sprintf("%s %s", p.Path, p.Hash))
}
sort.Strings(rows)
return strings.Join(rows, "\n") + "\n"
}