internal/index/index.go

// Package index writes a compact JSON file that the front-end search widget
// loads. Format: [{"title":..., "url":..., "tags":[...], "body":"..."}].
//
// Body is flattened to plain text: code fences dropped, markdown syntax
// stripped, consecutive whitespace collapsed. Keeps the index small enough
// (~30-50 KB for a few hundred posts) to inline-load without pagination.
package index

import (
	"encoding/json"
	"os"
	"regexp"
	"strings"

	"mercemay.top/src/tilstream/internal/render"
)

type entry struct {
	Title string   `json:"title"`
	URL   string   `json:"url"`
	Tags  []string `json:"tags,omitempty"`
	Body  string   `json:"body"`
	Date  string   `json:"date"`
}

var (
	reFence = regexp.MustCompile("(?s)```.*?```")
	reInline = regexp.MustCompile("`[^`]*`")
	reLink  = regexp.MustCompile(`\[([^\]]+)\]\([^\)]+\)`)
	reMark  = regexp.MustCompile(`[*_~>#]+`)
	reWS    = regexp.MustCompile(`\s+`)
)

// Write serialises posts into a flat JSON array at dest. Drafts should
// already be filtered out by the caller; we re-check defensively anyway.
func Write(dest string, posts []render.Post) error {
	entries := make([]entry, 0, len(posts))
	for _, p := range posts {
		if p.Draft {
			continue
		}
		entries = append(entries, entry{
			Title: p.Title,
			URL:   "/" + p.Slug + ".html",
			Tags:  p.Tags,
			Body:  flatten(p.Body),
			Date:  p.Date.Format("2006-01-02"),
		})
	}
	buf, err := json.Marshal(entries)
	if err != nil {
		return err
	}
	return os.WriteFile(dest, buf, 0o644)
}

// flatten strips markdown syntax and collapses whitespace. It is not a
// real tokeniser -- just enough to make substring search not match inside
// backticks or [link](href) URLs.
func flatten(md string) string {
	s := reFence.ReplaceAllString(md, " ")
	s = reInline.ReplaceAllString(s, " ")
	s = reLink.ReplaceAllString(s, "$1")
	s = reMark.ReplaceAllString(s, " ")
	s = reWS.ReplaceAllString(s, " ")
	return strings.TrimSpace(s)
}

// Size returns the byte size of the serialised index for a set of posts,
// without writing to disk. Useful in tests that assert the on-wire size
// stays below a budget.
func Size(posts []render.Post) (int, error) {
	entries := make([]entry, 0, len(posts))
	for _, p := range posts {
		if p.Draft {
			continue
		}
		entries = append(entries, entry{
			Title: p.Title,
			URL:   "/" + p.Slug + ".html",
			Tags:  p.Tags,
			Body:  flatten(p.Body),
			Date:  p.Date.Format("2006-01-02"),
		})
	}
	buf, err := json.Marshal(entries)
	if err != nil {
		return 0, err
	}
	return len(buf), nil
}