internal/pipeline/stage/parse.go

package stage

import (
	"bytes"
	"context"
	"fmt"
	"strings"

	"gopkg.in/yaml.v3"
	"mercemay.top/src/tilstream/internal/pipeline"
)

// Parse splits each post's raw bytes into YAML front matter and a body,
// promoting front-matter fields into Meta. Body retains its original
// markdown text for the downstream Render stage.
type Parse struct{}

// Name returns the stage name.
func (*Parse) Name() string { return "parse" }

// Run iterates over posts and fills in Meta from front-matter.
func (*Parse) Run(ctx context.Context, st *pipeline.State) error {
	for i := range st.Posts {
		select {
		case <-ctx.Done():
			return ctx.Err()
		default:
		}
		p := &st.Posts[i]
		if err := parsePost(p); err != nil {
			return fmt.Errorf("parse %s: %w", p.Path, err)
		}
	}
	return nil
}

func parsePost(p *pipeline.Post) error {
	const delim = "---\n"
	raw := p.Raw
	if !bytes.HasPrefix(raw, []byte(delim)) {
		return nil
	}
	rest := raw[len(delim):]
	end := bytes.Index(rest, []byte("\n"+delim[:3]+"\n"))
	if end < 0 {
		return fmt.Errorf("unterminated front-matter")
	}
	header := rest[:end]
	body := rest[end+len("\n---\n"):]

	var fm map[string]any
	if err := yaml.Unmarshal(header, &fm); err != nil {
		return fmt.Errorf("yaml: %w", err)
	}
	if p.Meta == nil {
		p.Meta = make(map[string]string)
	}
	for k, v := range fm {
		p.Meta[k] = toString(v)
	}
	p.Raw = body
	return nil
}

func toString(v any) string {
	switch x := v.(type) {
	case string:
		return x
	case []any:
		parts := make([]string, 0, len(x))
		for _, v := range x {
			parts = append(parts, toString(v))
		}
		return strings.Join(parts, ",")
	default:
		return fmt.Sprintf("%v", v)
	}
}

// ParseBytes is a helper exposed for direct callers (tests, doctor cmd).
// It returns the front-matter map and the body bytes without mutating a
// Post struct.
func ParseBytes(raw []byte) (map[string]string, []byte, error) {
	p := &pipeline.Post{Raw: raw}
	if err := parsePost(p); err != nil {
		return nil, nil, err
	}
	return p.Meta, p.Raw, nil
}