// Package markdown wraps goldmark in a small, opinionated API that exposes a
// Document type holding both the parsed AST and the original source bytes.
// The tilstream build pipeline uses Document as its central IR between the
// file-loading stage and the final HTML renderer.
//
// See mercemay.top/src/tilstream/ for the complete architecture document.
package markdown
import (
"bytes"
"fmt"
"io"
"sync"
"github.com/yuin/goldmark"
"github.com/yuin/goldmark/ast"
"github.com/yuin/goldmark/extension"
"github.com/yuin/goldmark/parser"
"github.com/yuin/goldmark/text"
)
// Document is a parsed markdown note. I keep the raw source around because
// some callers (search index, feed summaries) need to re-read text ranges
// that the AST references by byte offset.
type Document struct {
Source []byte
Root ast.Node
}
// Parser turns markdown bytes into a Document. It is safe for concurrent
// use; the embedded goldmark.Markdown is effectively immutable after setup.
type Parser struct {
gm goldmark.Markdown
once sync.Once
opts []Option
}
// Option configures the parser at construction time.
type Option func(*config)
type config struct {
gfm bool
footnotes bool
typographer bool
autoID bool
}
// WithGFM enables GitHub-flavored markdown (tables, strikethrough, autolinks).
func WithGFM() Option { return func(c *config) { c.gfm = true } }
// WithFootnotes enables footnote references and definitions.
func WithFootnotes() Option { return func(c *config) { c.footnotes = true } }
// WithTypographer enables smart quote substitution.
func WithTypographer() Option { return func(c *config) { c.typographer = true } }
// WithAutoHeadingID attaches stable slug IDs to headings.
func WithAutoHeadingID() Option { return func(c *config) { c.autoID = true } }
// NewParser builds a Parser with the given options. The default set is GFM +
// footnotes + auto heading IDs because that matches how I write my own TILs.
func NewParser(opts ...Option) *Parser {
cfg := config{gfm: true, footnotes: true, autoID: true}
for _, o := range opts {
o(&cfg)
}
exts := []goldmark.Extender{}
if cfg.gfm {
exts = append(exts, extension.GFM)
}
if cfg.footnotes {
exts = append(exts, extension.Footnote)
}
if cfg.typographer {
exts = append(exts, extension.Typographer)
}
parserOpts := []parser.Option{}
if cfg.autoID {
parserOpts = append(parserOpts, parser.WithAutoHeadingID())
}
return &Parser{
gm: goldmark.New(
goldmark.WithExtensions(exts...),
goldmark.WithParserOptions(parserOpts...),
),
}
}
// Parse reads markdown bytes and returns the Document. The returned Root is
// the top-level ast.Document node.
func (p *Parser) Parse(src []byte) (*Document, error) {
if len(src) == 0 {
return &Document{Source: nil, Root: ast.NewDocument()}, nil
}
reader := text.NewReader(src)
root := p.gm.Parser().Parse(reader)
if root == nil {
return nil, fmt.Errorf("markdown: goldmark returned nil root")
}
return &Document{Source: src, Root: root}, nil
}
// ParseReader is a convenience for streaming input.
func (p *Parser) ParseReader(r io.Reader) (*Document, error) {
var buf bytes.Buffer
if _, err := io.Copy(&buf, r); err != nil {
return nil, fmt.Errorf("markdown: read: %w", err)
}
return p.Parse(buf.Bytes())
}
// Walk visits every node in depth-first order. The callback returns
// ast.WalkStop to abort, ast.WalkSkipChildren to skip the subtree, or
// ast.WalkContinue to proceed.
func (d *Document) Walk(fn func(n ast.Node, entering bool) (ast.WalkStatus, error)) error {
return ast.Walk(d.Root, fn)
}
// TextOf returns the concatenated literal text of an inline node, following
// goldmark's Lines() convention for block nodes.
func (d *Document) TextOf(n ast.Node) string {
var buf bytes.Buffer
for c := n.FirstChild(); c != nil; c = c.NextSibling() {
if t, ok := c.(*ast.Text); ok {
buf.Write(t.Segment.Value(d.Source))
}
}
return buf.String()
}
// Goldmark exposes the underlying engine for callers that need to bypass
// the Parser wrapper (e.g. running a one-shot extension in a test).
func (p *Parser) Goldmark() goldmark.Markdown { return p.gm }