internal/render/markdown/parser.go

// Package markdown wraps goldmark in a small, opinionated API that exposes a
// Document type holding both the parsed AST and the original source bytes.
// The tilstream build pipeline uses Document as its central IR between the
// file-loading stage and the final HTML renderer.
//
// See mercemay.top/src/tilstream/ for the complete architecture document.
package markdown

import (
	"bytes"
	"fmt"
	"io"
	"sync"

	"github.com/yuin/goldmark"
	"github.com/yuin/goldmark/ast"
	"github.com/yuin/goldmark/extension"
	"github.com/yuin/goldmark/parser"
	"github.com/yuin/goldmark/text"
)

// Document is a parsed markdown note. I keep the raw source around because
// some callers (search index, feed summaries) need to re-read text ranges
// that the AST references by byte offset.
type Document struct {
	Source []byte
	Root   ast.Node
}

// Parser turns markdown bytes into a Document. It is safe for concurrent
// use; the embedded goldmark.Markdown is effectively immutable after setup.
type Parser struct {
	gm   goldmark.Markdown
	once sync.Once
	opts []Option
}

// Option configures the parser at construction time.
type Option func(*config)

type config struct {
	gfm         bool
	footnotes   bool
	typographer bool
	autoID      bool
}

// WithGFM enables GitHub-flavored markdown (tables, strikethrough, autolinks).
func WithGFM() Option { return func(c *config) { c.gfm = true } }

// WithFootnotes enables footnote references and definitions.
func WithFootnotes() Option { return func(c *config) { c.footnotes = true } }

// WithTypographer enables smart quote substitution.
func WithTypographer() Option { return func(c *config) { c.typographer = true } }

// WithAutoHeadingID attaches stable slug IDs to headings.
func WithAutoHeadingID() Option { return func(c *config) { c.autoID = true } }

// NewParser builds a Parser with the given options. The default set is GFM +
// footnotes + auto heading IDs because that matches how I write my own TILs.
func NewParser(opts ...Option) *Parser {
	cfg := config{gfm: true, footnotes: true, autoID: true}
	for _, o := range opts {
		o(&cfg)
	}
	exts := []goldmark.Extender{}
	if cfg.gfm {
		exts = append(exts, extension.GFM)
	}
	if cfg.footnotes {
		exts = append(exts, extension.Footnote)
	}
	if cfg.typographer {
		exts = append(exts, extension.Typographer)
	}
	parserOpts := []parser.Option{}
	if cfg.autoID {
		parserOpts = append(parserOpts, parser.WithAutoHeadingID())
	}
	return &Parser{
		gm: goldmark.New(
			goldmark.WithExtensions(exts...),
			goldmark.WithParserOptions(parserOpts...),
		),
	}
}

// Parse reads markdown bytes and returns the Document. The returned Root is
// the top-level ast.Document node.
func (p *Parser) Parse(src []byte) (*Document, error) {
	if len(src) == 0 {
		return &Document{Source: nil, Root: ast.NewDocument()}, nil
	}
	reader := text.NewReader(src)
	root := p.gm.Parser().Parse(reader)
	if root == nil {
		return nil, fmt.Errorf("markdown: goldmark returned nil root")
	}
	return &Document{Source: src, Root: root}, nil
}

// ParseReader is a convenience for streaming input.
func (p *Parser) ParseReader(r io.Reader) (*Document, error) {
	var buf bytes.Buffer
	if _, err := io.Copy(&buf, r); err != nil {
		return nil, fmt.Errorf("markdown: read: %w", err)
	}
	return p.Parse(buf.Bytes())
}

// Walk visits every node in depth-first order. The callback returns
// ast.WalkStop to abort, ast.WalkSkipChildren to skip the subtree, or
// ast.WalkContinue to proceed.
func (d *Document) Walk(fn func(n ast.Node, entering bool) (ast.WalkStatus, error)) error {
	return ast.Walk(d.Root, fn)
}

// TextOf returns the concatenated literal text of an inline node, following
// goldmark's Lines() convention for block nodes.
func (d *Document) TextOf(n ast.Node) string {
	var buf bytes.Buffer
	for c := n.FirstChild(); c != nil; c = c.NextSibling() {
		if t, ok := c.(*ast.Text); ok {
			buf.Write(t.Segment.Value(d.Source))
		}
	}
	return buf.String()
}

// Goldmark exposes the underlying engine for callers that need to bypass
// the Parser wrapper (e.g. running a one-shot extension in a test).
func (p *Parser) Goldmark() goldmark.Markdown { return p.gm }