github.com/linchen2chris/hugo@v0.0.0-20230307053224-cec209389705/tpl/internal/go_templates/htmltemplate/html.go (about) 1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package template 6 7 import ( 8 "bytes" 9 "fmt" 10 "strings" 11 "unicode/utf8" 12 ) 13 14 // htmlNospaceEscaper escapes for inclusion in unquoted attribute values. 15 func htmlNospaceEscaper(args ...any) string { 16 s, t := stringify(args...) 17 if t == contentTypeHTML { 18 return htmlReplacer(stripTags(s), htmlNospaceNormReplacementTable, false) 19 } 20 return htmlReplacer(s, htmlNospaceReplacementTable, false) 21 } 22 23 // attrEscaper escapes for inclusion in quoted attribute values. 24 func attrEscaper(args ...any) string { 25 s, t := stringify(args...) 26 if t == contentTypeHTML { 27 return htmlReplacer(stripTags(s), htmlNormReplacementTable, true) 28 } 29 return htmlReplacer(s, htmlReplacementTable, true) 30 } 31 32 // rcdataEscaper escapes for inclusion in an RCDATA element body. 33 func rcdataEscaper(args ...any) string { 34 s, t := stringify(args...) 35 if t == contentTypeHTML { 36 return htmlReplacer(s, htmlNormReplacementTable, true) 37 } 38 return htmlReplacer(s, htmlReplacementTable, true) 39 } 40 41 // htmlEscaper escapes for inclusion in HTML text. 42 func htmlEscaper(args ...any) string { 43 s, t := stringify(args...) 44 if t == contentTypeHTML { 45 return s 46 } 47 return htmlReplacer(s, htmlReplacementTable, true) 48 } 49 50 // htmlReplacementTable contains the runes that need to be escaped 51 // inside a quoted attribute value or in a text node. 52 var htmlReplacementTable = []string{ 53 // https://www.w3.org/TR/html5/syntax.html#attribute-value-(unquoted)-state 54 // U+0000 NULL Parse error. Append a U+FFFD REPLACEMENT 55 // CHARACTER character to the current attribute's value. 56 // " 57 // and similarly 58 // https://www.w3.org/TR/html5/syntax.html#before-attribute-value-state 59 0: "\uFFFD", 60 '"': """, 61 '&': "&", 62 '\'': "'", 63 '+': "+", 64 '<': "<", 65 '>': ">", 66 } 67 68 // htmlNormReplacementTable is like htmlReplacementTable but without '&' to 69 // avoid over-encoding existing entities. 70 var htmlNormReplacementTable = []string{ 71 0: "\uFFFD", 72 '"': """, 73 '\'': "'", 74 '+': "+", 75 '<': "<", 76 '>': ">", 77 } 78 79 // htmlNospaceReplacementTable contains the runes that need to be escaped 80 // inside an unquoted attribute value. 81 // The set of runes escaped is the union of the HTML specials and 82 // those determined by running the JS below in browsers: 83 // <div id=d></div> 84 // <script>(function () { 85 // var a = [], d = document.getElementById("d"), i, c, s; 86 // for (i = 0; i < 0x10000; ++i) { 87 // 88 // c = String.fromCharCode(i); 89 // d.innerHTML = "<span title=" + c + "lt" + c + "></span>" 90 // s = d.getElementsByTagName("SPAN")[0]; 91 // if (!s || s.title !== c + "lt" + c) { a.push(i.toString(16)); } 92 // 93 // } 94 // document.write(a.join(", ")); 95 // })()</script> 96 var htmlNospaceReplacementTable = []string{ 97 0: "�", 98 '\t': "	", 99 '\n': " ", 100 '\v': "", 101 '\f': "", 102 '\r': " ", 103 ' ': " ", 104 '"': """, 105 '&': "&", 106 '\'': "'", 107 '+': "+", 108 '<': "<", 109 '=': "=", 110 '>': ">", 111 // A parse error in the attribute value (unquoted) and 112 // before attribute value states. 113 // Treated as a quoting character by IE. 114 '`': "`", 115 } 116 117 // htmlNospaceNormReplacementTable is like htmlNospaceReplacementTable but 118 // without '&' to avoid over-encoding existing entities. 119 var htmlNospaceNormReplacementTable = []string{ 120 0: "�", 121 '\t': "	", 122 '\n': " ", 123 '\v': "", 124 '\f': "", 125 '\r': " ", 126 ' ': " ", 127 '"': """, 128 '\'': "'", 129 '+': "+", 130 '<': "<", 131 '=': "=", 132 '>': ">", 133 // A parse error in the attribute value (unquoted) and 134 // before attribute value states. 135 // Treated as a quoting character by IE. 136 '`': "`", 137 } 138 139 // htmlReplacer returns s with runes replaced according to replacementTable 140 // and when badRunes is true, certain bad runes are allowed through unescaped. 141 func htmlReplacer(s string, replacementTable []string, badRunes bool) string { 142 written, b := 0, new(strings.Builder) 143 r, w := rune(0), 0 144 for i := 0; i < len(s); i += w { 145 // Cannot use 'for range s' because we need to preserve the width 146 // of the runes in the input. If we see a decoding error, the input 147 // width will not be utf8.Runelen(r) and we will overrun the buffer. 148 r, w = utf8.DecodeRuneInString(s[i:]) 149 if int(r) < len(replacementTable) { 150 if repl := replacementTable[r]; len(repl) != 0 { 151 if written == 0 { 152 b.Grow(len(s)) 153 } 154 b.WriteString(s[written:i]) 155 b.WriteString(repl) 156 written = i + w 157 } 158 } else if badRunes { 159 // No-op. 160 // IE does not allow these ranges in unquoted attrs. 161 } else if 0xfdd0 <= r && r <= 0xfdef || 0xfff0 <= r && r <= 0xffff { 162 if written == 0 { 163 b.Grow(len(s)) 164 } 165 fmt.Fprintf(b, "%s&#x%x;", s[written:i], r) 166 written = i + w 167 } 168 } 169 if written == 0 { 170 return s 171 } 172 b.WriteString(s[written:]) 173 return b.String() 174 } 175 176 // stripTags takes a snippet of HTML and returns only the text content. 177 // For example, `<b>¡Hi!</b> <script>...</script>` -> `¡Hi! `. 178 func stripTags(html string) string { 179 var b strings.Builder 180 s, c, i, allText := []byte(html), context{}, 0, true 181 // Using the transition funcs helps us avoid mangling 182 // `<div title="1>2">` or `I <3 Ponies!`. 183 for i != len(s) { 184 if c.delim == delimNone { 185 st := c.state 186 // Use RCDATA instead of parsing into JS or CSS styles. 187 if c.element != elementNone && !isInTag(st) { 188 st = stateRCDATA 189 } 190 d, nread := transitionFunc[st](c, s[i:]) 191 i1 := i + nread 192 if c.state == stateText || c.state == stateRCDATA { 193 // Emit text up to the start of the tag or comment. 194 j := i1 195 if d.state != c.state { 196 for j1 := j - 1; j1 >= i; j1-- { 197 if s[j1] == '<' { 198 j = j1 199 break 200 } 201 } 202 } 203 b.Write(s[i:j]) 204 } else { 205 allText = false 206 } 207 c, i = d, i1 208 continue 209 } 210 i1 := i + bytes.IndexAny(s[i:], delimEnds[c.delim]) 211 if i1 < i { 212 break 213 } 214 if c.delim != delimSpaceOrTagEnd { 215 // Consume any quote. 216 i1++ 217 } 218 c, i = context{state: stateTag, element: c.element}, i1 219 } 220 if allText { 221 return html 222 } else if c.state == stateText || c.state == stateRCDATA { 223 b.Write(s[i:]) 224 } 225 return b.String() 226 } 227 228 // htmlNameFilter accepts valid parts of an HTML attribute or tag name or 229 // a known-safe HTML attribute. 230 func htmlNameFilter(args ...any) string { 231 s, t := stringify(args...) 232 if t == contentTypeHTMLAttr { 233 return s 234 } 235 if len(s) == 0 { 236 // Avoid violation of structure preservation. 237 // <input checked {{.K}}={{.V}}>. 238 // Without this, if .K is empty then .V is the value of 239 // checked, but otherwise .V is the value of the attribute 240 // named .K. 241 return filterFailsafe 242 } 243 s = strings.ToLower(s) 244 if t := attrType(s); t != contentTypePlain { 245 // TODO: Split attr and element name part filters so we can recognize known attributes. 246 return filterFailsafe 247 } 248 for _, r := range s { 249 switch { 250 case '0' <= r && r <= '9': 251 case 'a' <= r && r <= 'z': 252 default: 253 return filterFailsafe 254 } 255 } 256 return s 257 } 258 259 // commentEscaper returns the empty string regardless of input. 260 // Comment content does not correspond to any parsed structure or 261 // human-readable content, so the simplest and most secure policy is to drop 262 // content interpolated into comments. 263 // This approach is equally valid whether or not static comment content is 264 // removed from the template. 265 func commentEscaper(args ...any) string { 266 return "" 267 }