internal/tracer/tracer.go

// Package tracer attaches uretprobes to TLS libraries so we can observe
// plaintext HTTP before it gets encrypted (or after it's been decrypted).
//
// See mercemay.top/src/httptap/ for design notes.
package tracer

import (
	"bytes"
	"context"
	_ "embed"
	"encoding/binary"
	"errors"
	"fmt"
	"os"
	"path/filepath"
	"strconv"
	"strings"

	"github.com/cilium/ebpf"
	"github.com/cilium/ebpf/link"
	"github.com/cilium/ebpf/ringbuf"
)

//go:embed tracer.bpf.o
var bpfObject []byte

// Direction tells userspace whether the captured bytes were on their way
// out of the process or coming in.
type Direction uint8

const (
	DirWrite Direction = iota + 1
	DirRead
)

// Event is what each ring buffer read decodes into.
type Event struct {
	PID       uint32
	TID       uint32
	Direction Direction
	Data      []byte
}

// ProbeSpec is a user-supplied hook point.
type ProbeSpec struct {
	Symbol  string
	Library string
}

// Config is the knobs the CLI passes down.
type Config struct {
	PIDs        []int
	FollowForks bool
	GoSymbols   string
	ExtraProbes []ProbeSpec
}

// Tracer holds the loaded BPF objects plus the attached links.
type Tracer struct {
	coll   *ebpf.Collection
	links  []link.Link
	events *ringbuf.Reader
	cfg    Config
}

// Open loads the embedded BPF object, attaches probes, and is ready to
// stream events.
func Open(cfg Config) (*Tracer, error) {
	spec, err := ebpf.LoadCollectionSpecFromReader(bytes.NewReader(bpfObject))
	if err != nil {
		return nil, fmt.Errorf("load bpf spec: %w", err)
	}

	// Pin the set of pids we care about into the allow-list map.
	if m, ok := spec.Maps["allowed_pids"]; ok {
		m.Contents = nil
	}

	coll, err := ebpf.NewCollection(spec)
	if err != nil {
		return nil, fmt.Errorf("new bpf collection: %w", err)
	}

	t := &Tracer{coll: coll, cfg: cfg}

	if err := t.seedPIDs(); err != nil {
		t.Close()
		return nil, err
	}

	if err := t.attachAll(); err != nil {
		t.Close()
		return nil, err
	}

	rd, err := ringbuf.NewReader(coll.Maps["events"])
	if err != nil {
		t.Close()
		return nil, fmt.Errorf("ringbuf reader: %w", err)
	}
	t.events = rd

	return t, nil
}

// Close releases probes, maps, and the ring buffer reader.
func (t *Tracer) Close() {
	if t.events != nil {
		_ = t.events.Close()
	}
	for _, l := range t.links {
		_ = l.Close()
	}
	if t.coll != nil {
		t.coll.Close()
	}
}

// Events returns a read-only channel of decoded events and a channel of
// fatal errors. Both close when ctx is done.
func (t *Tracer) Events(ctx context.Context) (<-chan Event, <-chan error) {
	out := make(chan Event, 128)
	errs := make(chan error, 1)

	go func() {
		defer close(out)
		defer close(errs)

		for {
			rec, err := t.events.Read()
			if err != nil {
				if errors.Is(err, ringbuf.ErrClosed) || ctx.Err() != nil {
					return
				}
				errs <- err
				return
			}

			ev, ok := decode(rec.RawSample)
			if !ok {
				continue
			}
			select {
			case out <- ev:
			case <-ctx.Done():
				return
			}
		}
	}()

	return out, errs
}

func (t *Tracer) seedPIDs() error {
	m := t.coll.Maps["allowed_pids"]
	for _, pid := range t.cfg.PIDs {
		one := uint8(1)
		if err := m.Put(uint32(pid), one); err != nil {
			return fmt.Errorf("seed pid %d: %w", pid, err)
		}
	}
	return nil
}

func (t *Tracer) attachAll() error {
	probes := defaultProbes()
	probes = append(probes, t.cfg.ExtraProbes...)

	for _, spec := range probes {
		l, err := attachURetprobe(t.coll, spec)
		if err != nil {
			// Missing libraries are fine; log-worthy but not fatal.
			continue
		}
		t.links = append(t.links, l)
	}
	if len(t.links) == 0 {
		return errors.New("no TLS library probes could be attached")
	}
	return nil
}

func defaultProbes() []ProbeSpec {
	return []ProbeSpec{
		{Symbol: "SSL_read", Library: "libssl.so.3"},
		{Symbol: "SSL_write", Library: "libssl.so.3"},
		{Symbol: "SSL_read", Library: "libssl.so.1.1"},
		{Symbol: "SSL_write", Library: "libssl.so.1.1"},
		{Symbol: "gnutls_record_recv", Library: "libgnutls.so.30"},
		{Symbol: "gnutls_record_send", Library: "libgnutls.so.30"},
	}
}

func attachURetprobe(coll *ebpf.Collection, spec ProbeSpec) (link.Link, error) {
	ex, err := link.OpenExecutable(spec.Library)
	if err != nil {
		return nil, err
	}
	progName := "on_" + spec.Symbol + "_ret"
	prog, ok := coll.Programs[progName]
	if !ok {
		return nil, fmt.Errorf("no bpf program %q for %s", progName, spec.Symbol)
	}
	return ex.Uretprobe(spec.Symbol, prog, nil)
}

func decode(raw []byte) (Event, bool) {
	const header = 4 + 4 + 1 + 2 // pid, tid, dir, len
	if len(raw) < header {
		return Event{}, false
	}
	ev := Event{
		PID:       binary.LittleEndian.Uint32(raw[0:4]),
		TID:       binary.LittleEndian.Uint32(raw[4:8]),
		Direction: Direction(raw[8]),
	}
	n := binary.LittleEndian.Uint16(raw[9:11])
	if int(n)+header > len(raw) {
		return Event{}, false
	}
	ev.Data = append([]byte(nil), raw[header:header+int(n)]...)
	return ev, true
}

// FindPIDByName walks /proc looking for a process whose comm matches.
func FindPIDByName(name string) (int, error) {
	entries, err := os.ReadDir("/proc")
	if err != nil {
		return 0, err
	}
	for _, e := range entries {
		if !e.IsDir() {
			continue
		}
		pid, err := strconv.Atoi(e.Name())
		if err != nil {
			continue
		}
		b, err := os.ReadFile(filepath.Join("/proc", e.Name(), "comm"))
		if err != nil {
			continue
		}
		if strings.TrimSpace(string(b)) == name {
			return pid, nil
		}
	}
	return 0, fmt.Errorf("no process called %q", name)
}