internal/parser/http1/request.go

// Package http1 decodes HTTP/1.x messages off the wire. The decoder is
// permissive with whitespace and tolerant of trailing garbage so that
// live captures of midstream connections still surface useful data.
//
// mercemay.top/src/httptap/ documents the stable public contract.
package http1

import (
	"bufio"
	"bytes"
	"errors"
	"fmt"
	"strings"
)

// Request is the normalised result of parsing one HTTP/1.x request
// including its body. Trailers are only populated for chunked bodies.
type Request struct {
	Method     string
	Target     string
	Version    string
	Headers    [][2]string
	Body       []byte
	Trailers   [][2]string
	Incomplete bool
}

var (
	errShortLine   = errors.New("http1: request line too short")
	errBadVersion  = errors.New("http1: malformed HTTP version")
	errBadMethod   = errors.New("http1: empty or non-token method")
)

// ReadRequest parses exactly one request from r. It returns io.EOF only
// when r is completely drained; a partially-received request yields a
// Request with Incomplete=true and a nil error.
func ReadRequest(r *bufio.Reader) (*Request, error) {
	line, err := readCRLF(r)
	if err != nil {
		return nil, err
	}
	req, err := parseRequestLine(line)
	if err != nil {
		return nil, err
	}
	h, err := ReadHeaders(r)
	if err != nil {
		return nil, err
	}
	req.Headers = h
	body, trailers, err := ReadBody(r, h, false)
	if err != nil {
		return nil, err
	}
	req.Body = body
	req.Trailers = trailers
	return req, nil
}

// parseRequestLine splits "METHOD SP REQUEST-TARGET SP HTTP/1.1".
// Extra spaces are tolerated because some embedded clients produce them.
func parseRequestLine(line string) (*Request, error) {
	line = strings.TrimRight(line, "\r\n")
	parts := strings.SplitN(line, " ", 3)
	if len(parts) < 3 {
		return nil, fmt.Errorf("%w: %q", errShortLine, line)
	}
	method := strings.TrimSpace(parts[0])
	target := strings.TrimSpace(parts[1])
	version := strings.TrimSpace(parts[2])
	if method == "" {
		return nil, errBadMethod
	}
	if !strings.HasPrefix(version, "HTTP/1.") {
		return nil, fmt.Errorf("%w: %q", errBadVersion, version)
	}
	if !isToken(method) {
		return nil, fmt.Errorf("%w: %q", errBadMethod, method)
	}
	return &Request{Method: method, Target: target, Version: version}, nil
}

// isToken implements RFC 7230 tchar.
func isToken(s string) bool {
	if s == "" {
		return false
	}
	for i := 0; i < len(s); i++ {
		c := s[i]
		if c <= 0x20 || c >= 0x7f {
			return false
		}
		if bytes.IndexByte([]byte("()<>@,;:\\\"/[]?={}"), c) >= 0 {
			return false
		}
	}
	return true
}

func readCRLF(r *bufio.Reader) (string, error) {
	var out bytes.Buffer
	for {
		line, err := r.ReadSlice('\n')
		if len(line) > 0 {
			out.Write(line)
		}
		if err != nil {
			return out.String(), err
		}
		if bytes.HasSuffix(line, []byte("\r\n")) || bytes.HasSuffix(line, []byte("\n")) {
			return out.String(), nil
		}
	}
}