internal/pipeline/stage/load.go

// Package stage contains the concrete Stage implementations used by the
// tilstream build pipeline.
package stage

import (
	"context"
	"fmt"
	"io/fs"
	"os"
	"path/filepath"
	"strings"

	"mercemay.top/src/tilstream/internal/pipeline"
)

// Load reads every *.md file under State.SourceDir and appends raw Posts.
// Files whose names start with "_" or "." are skipped, mirroring the
// default Hugo convention. I kept the rule because it's familiar.
type Load struct {
	SkipHidden bool
	MaxBytes   int64
}

// NewLoad returns a Load with defaults suitable for most sites.
func NewLoad() *Load {
	return &Load{SkipHidden: true, MaxBytes: 1 << 20} // 1 MiB
}

// Name returns the stage name used in metrics.
func (*Load) Name() string { return "load" }

// Run walks SourceDir and appends each markdown file as a Post.
func (s *Load) Run(ctx context.Context, st *pipeline.State) error {
	if st.SourceDir == "" {
		return fmt.Errorf("load: SourceDir is empty")
	}
	return filepath.WalkDir(st.SourceDir, func(path string, d fs.DirEntry, err error) error {
		if err != nil {
			return err
		}
		select {
		case <-ctx.Done():
			return ctx.Err()
		default:
		}
		if d.IsDir() {
			if s.SkipHidden && isHidden(d.Name()) && path != st.SourceDir {
				return fs.SkipDir
			}
			return nil
		}
		if !strings.HasSuffix(d.Name(), ".md") {
			return nil
		}
		if s.SkipHidden && isHidden(d.Name()) {
			return nil
		}
		info, err := d.Info()
		if err != nil {
			return err
		}
		if s.MaxBytes > 0 && info.Size() > s.MaxBytes {
			return fmt.Errorf("load: %s exceeds max size %d", path, s.MaxBytes)
		}
		raw, err := os.ReadFile(path)
		if err != nil {
			return err
		}
		st.Posts = append(st.Posts, pipeline.Post{
			Path: path,
			Raw:  raw,
			Meta: make(map[string]string),
		})
		return nil
	})
}

func isHidden(name string) bool {
	return len(name) > 0 && (name[0] == '.' || name[0] == '_')
}

// CountPosts is a tiny helper the test suite uses to check expected output.
func CountPosts(st *pipeline.State) int { return len(st.Posts) }

// MustReadAll reads a full markdown directory into memory, bypassing the
// pipeline machinery. Intended for tests and one-off CLI helpers.
func MustReadAll(dir string) ([]pipeline.Post, error) {
	st := &pipeline.State{SourceDir: dir}
	l := NewLoad()
	if err := l.Run(context.Background(), st); err != nil {
		return nil, err
	}
	return st.Posts, nil
}