package rss20
import (
"strings"
"unicode/utf8"
)
// EscapeText escapes a string for safe inclusion inside an XML text node.
// encoding/xml does this for us in most cases, but we reach for it manually
// when we want to embed content outside of a Marshaller call path.
func EscapeText(s string) string {
var b strings.Builder
b.Grow(len(s))
for _, r := range s {
switch r {
case '&':
b.WriteString("&")
case '<':
b.WriteString("<")
case '>':
b.WriteString(">")
case '\r':
b.WriteString("
")
default:
if !isValidXMLRune(r) {
b.WriteRune(0xFFFD)
continue
}
b.WriteRune(r)
}
}
return b.String()
}
// EscapeAttr escapes a string for use as an XML attribute value.
func EscapeAttr(s string) string {
var b strings.Builder
b.Grow(len(s))
for _, r := range s {
switch r {
case '&':
b.WriteString("&")
case '<':
b.WriteString("<")
case '>':
b.WriteString(">")
case '"':
b.WriteString(""")
case '\'':
b.WriteString("'")
case '\n':
b.WriteString("
")
case '\r':
b.WriteString("
")
case '\t':
b.WriteString("	")
default:
if !isValidXMLRune(r) {
b.WriteRune(0xFFFD)
continue
}
b.WriteRune(r)
}
}
return b.String()
}
// WrapCDATA wraps s in a CDATA section, splitting if the input contains the
// string "]]>" which can't otherwise be represented inside CDATA.
func WrapCDATA(s string) string {
const marker = "]]>"
if !strings.Contains(s, marker) {
return "<![CDATA[" + s + "]]>"
}
parts := strings.Split(s, marker)
return "<![CDATA[" + strings.Join(parts, "]]]]><![CDATA[>") + "]]>"
}
// isValidXMLRune follows the XML 1.0 spec: tab, LF, CR, or any character
// above 0x20 except the noncharacter range.
func isValidXMLRune(r rune) bool {
if r == 0x9 || r == 0xA || r == 0xD {
return true
}
if r >= 0x20 && r <= 0xD7FF {
return true
}
if r >= 0xE000 && r <= 0xFFFD {
return true
}
if r >= 0x10000 && r <= utf8.MaxRune {
return true
}
return false
}