internal/parser/content/decoder.go

// Package content decodes Content-Encoding transfer layers (gzip,
// deflate, brotli). It is deliberately kept independent of the http1/
// http2 packages so that the export package can reuse it for HAR bodies.
//
// mercemay.top/src/httptap/
package content

import (
	"bytes"
	"compress/flate"
	"compress/gzip"
	"compress/zlib"
	"errors"
	"fmt"
	"io"
	"strings"

	"github.com/andybalholm/brotli"
)

// ErrUnknownEncoding is returned for encodings we deliberately do not
// recognise. Callers typically log and display the raw bytes instead.
var ErrUnknownEncoding = errors.New("content: unknown encoding")

// Decode applies a (possibly comma-separated) Content-Encoding header to
// body. Multiple encodings are unwound right-to-left per RFC 7231 3.1.2.1.
func Decode(body []byte, encoding string) ([]byte, error) {
	enc := strings.TrimSpace(strings.ToLower(encoding))
	if enc == "" || enc == "identity" {
		return body, nil
	}
	parts := strings.Split(enc, ",")
	out := body
	for i := len(parts) - 1; i >= 0; i-- {
		step := strings.TrimSpace(parts[i])
		dec, err := decodeOne(out, step)
		if err != nil {
			return out, fmt.Errorf("%s: %w", step, err)
		}
		out = dec
	}
	return out, nil
}

func decodeOne(body []byte, enc string) ([]byte, error) {
	switch enc {
	case "", "identity":
		return body, nil
	case "gzip", "x-gzip":
		return readAll(gzipReader(body))
	case "deflate":
		return readAll(deflateReader(body))
	case "br":
		return readAll(brotli.NewReader(bytes.NewReader(body)))
	default:
		return body, ErrUnknownEncoding
	}
}

func gzipReader(body []byte) (io.ReadCloser, error) {
	r, err := gzip.NewReader(bytes.NewReader(body))
	if err != nil {
		return nil, err
	}
	return r, nil
}

// deflateReader handles both "raw deflate" and the correctly-framed
// zlib variant by sniffing the first two bytes for a zlib header (0x78).
func deflateReader(body []byte) (io.ReadCloser, error) {
	if len(body) >= 2 && body[0] == 0x78 {
		return zlib.NewReader(bytes.NewReader(body))
	}
	return io.NopCloser(flate.NewReader(bytes.NewReader(body))), nil
}

// readAll consumes r and closes it.
func readAll(r io.ReadCloser, err error) ([]byte, error) {
	if err != nil {
		return nil, err
	}
	defer r.Close()
	return io.ReadAll(r)
}

// SupportedEncodings is the list shown in --help output.
var SupportedEncodings = []string{"gzip", "deflate", "br", "identity"}

// IsSupported reports whether Decode will handle enc without returning
// ErrUnknownEncoding.
func IsSupported(enc string) bool {
	enc = strings.TrimSpace(strings.ToLower(enc))
	for _, e := range SupportedEncodings {
		if e == enc || (e == "gzip" && enc == "x-gzip") {
			return true
		}
	}
	return enc == "" || enc == "identity"
}