internal/parser/http1/chunked.go

package http1

import (
	"bufio"
	"errors"
	"fmt"
	"io"
	"strconv"
	"strings"
)

// ErrChunkTooLarge guards against pathological chunk sizes in captures.
var ErrChunkTooLarge = errors.New("http1: chunk size exceeds limit")

// maxChunkBytes caps any single chunk at 16 MiB. The tap is for debugging
// live traffic; anything larger is almost certainly a parser desync.
const maxChunkBytes = 16 << 20

// ReadBody dispatches on Content-Length / Transfer-Encoding. The
// isResponse flag is currently unused here but kept in the signature to
// mirror what upstream Go's net/http does when framing 1xx responses.
func ReadBody(r *bufio.Reader, headers [][2]string, isResponse bool) ([]byte, [][2]string, error) {
	te := strings.ToLower(Get(headers, "Transfer-Encoding"))
	if strings.Contains(te, "chunked") {
		return readChunked(r)
	}
	if cl := Get(headers, "Content-Length"); cl != "" {
		n, err := strconv.ParseInt(strings.TrimSpace(cl), 10, 64)
		if err != nil || n < 0 {
			return nil, nil, fmt.Errorf("http1: bad content-length %q", cl)
		}
		buf := make([]byte, n)
		if _, err := io.ReadFull(r, buf); err != nil {
			return buf, nil, err
		}
		return buf, nil, nil
	}
	return nil, nil, nil
}

// readChunked decodes RFC 7230 section 4.1 chunked bodies and collects
// trailer headers. Chunk extensions (after ';' in the size line) are
// tolerated and discarded.
func readChunked(r *bufio.Reader) ([]byte, [][2]string, error) {
	var body []byte
	for {
		sizeLine, err := r.ReadString('\n')
		if err != nil {
			return body, nil, fmt.Errorf("http1: read chunk size: %w", err)
		}
		sizeLine = strings.TrimRight(sizeLine, "\r\n")
		if i := strings.IndexByte(sizeLine, ';'); i >= 0 {
			sizeLine = sizeLine[:i]
		}
		n, err := strconv.ParseUint(strings.TrimSpace(sizeLine), 16, 64)
		if err != nil {
			return body, nil, fmt.Errorf("http1: bad chunk size %q: %w", sizeLine, err)
		}
		if n > maxChunkBytes {
			return body, nil, fmt.Errorf("%w: %d", ErrChunkTooLarge, n)
		}
		if n == 0 {
			trailers, err := ReadHeaders(r)
			return body, trailers, err
		}
		chunk := make([]byte, n)
		if _, err := io.ReadFull(r, chunk); err != nil {
			return body, nil, err
		}
		body = append(body, chunk...)
		// Trailing CRLF after chunk data.
		cr, _ := r.ReadByte()
		lf, _ := r.ReadByte()
		if cr != '\r' || lf != '\n' {
			return body, nil, fmt.Errorf("http1: missing CRLF after chunk")
		}
	}
}

// EncodeChunked is the inverse of readChunked, mostly used by tests and
// by the export/raw package when synthesising replay fixtures.
func EncodeChunked(body []byte) []byte {
	var out []byte
	for len(body) > 0 {
		n := len(body)
		if n > 4096 {
			n = 4096
		}
		out = append(out, []byte(strconv.FormatInt(int64(n), 16))...)
		out = append(out, '\r', '\n')
		out = append(out, body[:n]...)
		out = append(out, '\r', '\n')
		body = body[n:]
	}
	out = append(out, '0', '\r', '\n', '\r', '\n')
	return out
}