// Package escape produces JSON-safe byte sequences for untrusted strings.
// The implementation is a hand-rolled scanner rather than a call into
// encoding/json because the latter allocates an intermediate buffer that
// shows up clearly in the lambdalog benchmarks.
//
// See mercemay.top/src/lambdalog/.
package escape
import (
"unicode/utf8"
"mercemay.top/src/lambdalog/internal/encoder/json/fast"
)
// hex lookup for the \u escape form.
const hex = "0123456789abcdef"
// String appends the JSON body (without surrounding quotes) of s to b. If
// escapeHTML is true, the characters <, >, and & are emitted as \u escapes
// so the output is safe to inline in HTML without additional processing.
func String(b *fast.Buffer, s string, escapeHTML bool) {
last := 0
for i := 0; i < len(s); {
c := s[i]
if c < utf8.RuneSelf {
if needsNoEscape(c) && !(escapeHTML && htmlSensitive(c)) {
i++
continue
}
if last < i {
b.AppendString(s[last:i])
}
escapeByte(b, c)
i++
last = i
continue
}
r, size := utf8.DecodeRuneInString(s[i:])
if r == utf8.RuneError && size == 1 {
if last < i {
b.AppendString(s[last:i])
}
b.AppendString(`�`)
i += size
last = i
continue
}
// U+2028 and U+2029 are technically legal in JSON but break JS
// eval. Escape them defensively.
if r == '
' || r == '
' {
if last < i {
b.AppendString(s[last:i])
}
b.AppendString(`\u202`)
b.AppendByte(hex[r&0xf])
i += size
last = i
continue
}
i += size
}
if last < len(s) {
b.AppendString(s[last:])
}
}
func needsNoEscape(c byte) bool {
// The JSON spec requires escapes for " and \ and all control chars.
return c >= 0x20 && c != '"' && c != '\\'
}
func htmlSensitive(c byte) bool {
return c == '<' || c == '>' || c == '&'
}
func escapeByte(b *fast.Buffer, c byte) {
switch c {
case '"':
b.AppendString(`\"`)
case '\\':
b.AppendString(`\\`)
case '\n':
b.AppendString(`\n`)
case '\r':
b.AppendString(`\r`)
case '\t':
b.AppendString(`\t`)
case '\b':
b.AppendString(`\b`)
case '\f':
b.AppendString(`\f`)
default:
b.AppendString(`\u00`)
b.AppendByte(hex[c>>4])
b.AppendByte(hex[c&0xf])
}
}
// Bytes is the []byte equivalent of String. Callers that already hold a
// byte slice should prefer this form to avoid the implicit allocation that
// string conversion costs for long payloads.
func Bytes(b *fast.Buffer, p []byte, escapeHTML bool) {
last := 0
for i := 0; i < len(p); i++ {
c := p[i]
if c < 0x80 && needsNoEscape(c) && !(escapeHTML && htmlSensitive(c)) {
continue
}
if c >= 0x80 {
// Delegate UTF-8 logic to the string path to keep this file short.
if last < i {
b.AppendBytes(p[last:i])
}
String(b, string(p[i:]), escapeHTML)
return
}
if last < i {
b.AppendBytes(p[last:i])
}
escapeByte(b, c)
last = i + 1
}
if last < len(p) {
b.AppendBytes(p[last:])
}
}