github.com/twelsh-aw/go/src@v0.0.0-20230516233729-a56fe86a7c81/html/template/js.go (about) 1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package template 6 7 import ( 8 "bytes" 9 "encoding/json" 10 "fmt" 11 "reflect" 12 "strings" 13 "unicode/utf8" 14 ) 15 16 // jsWhitespace contains all of the JS whitespace characters, as defined 17 // by the \s character class. 18 // See https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_expressions/Character_classes. 19 const jsWhitespace = "\f\n\r\t\v\u0020\u00a0\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u2028\u2029\u202f\u205f\u3000\ufeff" 20 21 // nextJSCtx returns the context that determines whether a slash after the 22 // given run of tokens starts a regular expression instead of a division 23 // operator: / or /=. 24 // 25 // This assumes that the token run does not include any string tokens, comment 26 // tokens, regular expression literal tokens, or division operators. 27 // 28 // This fails on some valid but nonsensical JavaScript programs like 29 // "x = ++/foo/i" which is quite different than "x++/foo/i", but is not known to 30 // fail on any known useful programs. It is based on the draft 31 // JavaScript 2.0 lexical grammar and requires one token of lookbehind: 32 // https://www.mozilla.org/js/language/js20-2000-07/rationale/syntax.html 33 func nextJSCtx(s []byte, preceding jsCtx) jsCtx { 34 // Trim all JS whitespace characters 35 s = bytes.TrimRight(s, jsWhitespace) 36 if len(s) == 0 { 37 return preceding 38 } 39 40 // All cases below are in the single-byte UTF-8 group. 41 switch c, n := s[len(s)-1], len(s); c { 42 case '+', '-': 43 // ++ and -- are not regexp preceders, but + and - are whether 44 // they are used as infix or prefix operators. 45 start := n - 1 46 // Count the number of adjacent dashes or pluses. 47 for start > 0 && s[start-1] == c { 48 start-- 49 } 50 if (n-start)&1 == 1 { 51 // Reached for trailing minus signs since "---" is the 52 // same as "-- -". 53 return jsCtxRegexp 54 } 55 return jsCtxDivOp 56 case '.': 57 // Handle "42." 58 if n != 1 && '0' <= s[n-2] && s[n-2] <= '9' { 59 return jsCtxDivOp 60 } 61 return jsCtxRegexp 62 // Suffixes for all punctuators from section 7.7 of the language spec 63 // that only end binary operators not handled above. 64 case ',', '<', '>', '=', '*', '%', '&', '|', '^', '?': 65 return jsCtxRegexp 66 // Suffixes for all punctuators from section 7.7 of the language spec 67 // that are prefix operators not handled above. 68 case '!', '~': 69 return jsCtxRegexp 70 // Matches all the punctuators from section 7.7 of the language spec 71 // that are open brackets not handled above. 72 case '(', '[': 73 return jsCtxRegexp 74 // Matches all the punctuators from section 7.7 of the language spec 75 // that precede expression starts. 76 case ':', ';', '{': 77 return jsCtxRegexp 78 // CAVEAT: the close punctuators ('}', ']', ')') precede div ops and 79 // are handled in the default except for '}' which can precede a 80 // division op as in 81 // ({ valueOf: function () { return 42 } } / 2 82 // which is valid, but, in practice, developers don't divide object 83 // literals, so our heuristic works well for code like 84 // function () { ... } /foo/.test(x) && sideEffect(); 85 // The ')' punctuator can precede a regular expression as in 86 // if (b) /foo/.test(x) && ... 87 // but this is much less likely than 88 // (a + b) / c 89 case '}': 90 return jsCtxRegexp 91 default: 92 // Look for an IdentifierName and see if it is a keyword that 93 // can precede a regular expression. 94 j := n 95 for j > 0 && isJSIdentPart(rune(s[j-1])) { 96 j-- 97 } 98 if regexpPrecederKeywords[string(s[j:])] { 99 return jsCtxRegexp 100 } 101 } 102 // Otherwise is a punctuator not listed above, or 103 // a string which precedes a div op, or an identifier 104 // which precedes a div op. 105 return jsCtxDivOp 106 } 107 108 // regexpPrecederKeywords is a set of reserved JS keywords that can precede a 109 // regular expression in JS source. 110 var regexpPrecederKeywords = map[string]bool{ 111 "break": true, 112 "case": true, 113 "continue": true, 114 "delete": true, 115 "do": true, 116 "else": true, 117 "finally": true, 118 "in": true, 119 "instanceof": true, 120 "return": true, 121 "throw": true, 122 "try": true, 123 "typeof": true, 124 "void": true, 125 } 126 127 var jsonMarshalType = reflect.TypeOf((*json.Marshaler)(nil)).Elem() 128 129 // indirectToJSONMarshaler returns the value, after dereferencing as many times 130 // as necessary to reach the base type (or nil) or an implementation of json.Marshal. 131 func indirectToJSONMarshaler(a any) any { 132 // text/template now supports passing untyped nil as a func call 133 // argument, so we must support it. Otherwise we'd panic below, as one 134 // cannot call the Type or Interface methods on an invalid 135 // reflect.Value. See golang.org/issue/18716. 136 if a == nil { 137 return nil 138 } 139 140 v := reflect.ValueOf(a) 141 for !v.Type().Implements(jsonMarshalType) && v.Kind() == reflect.Pointer && !v.IsNil() { 142 v = v.Elem() 143 } 144 return v.Interface() 145 } 146 147 // jsValEscaper escapes its inputs to a JS Expression (section 11.14) that has 148 // neither side-effects nor free variables outside (NaN, Infinity). 149 func jsValEscaper(args ...any) string { 150 var a any 151 if len(args) == 1 { 152 a = indirectToJSONMarshaler(args[0]) 153 switch t := a.(type) { 154 case JS: 155 return string(t) 156 case JSStr: 157 // TODO: normalize quotes. 158 return `"` + string(t) + `"` 159 case json.Marshaler: 160 // Do not treat as a Stringer. 161 case fmt.Stringer: 162 a = t.String() 163 } 164 } else { 165 for i, arg := range args { 166 args[i] = indirectToJSONMarshaler(arg) 167 } 168 a = fmt.Sprint(args...) 169 } 170 // TODO: detect cycles before calling Marshal which loops infinitely on 171 // cyclic data. This may be an unacceptable DoS risk. 172 b, err := json.Marshal(a) 173 if err != nil { 174 // Put a space before comment so that if it is flush against 175 // a division operator it is not turned into a line comment: 176 // x/{{y}} 177 // turning into 178 // x//* error marshaling y: 179 // second line of error message */null 180 return fmt.Sprintf(" /* %s */null ", strings.ReplaceAll(err.Error(), "*/", "* /")) 181 } 182 183 // TODO: maybe post-process output to prevent it from containing 184 // "<!--", "-->", "<![CDATA[", "]]>", or "</script" 185 // in case custom marshalers produce output containing those. 186 // Note: Do not use \x escaping to save bytes because it is not JSON compatible and this escaper 187 // supports ld+json content-type. 188 if len(b) == 0 { 189 // In, `x=y/{{.}}*z` a json.Marshaler that produces "" should 190 // not cause the output `x=y/*z`. 191 return " null " 192 } 193 first, _ := utf8.DecodeRune(b) 194 last, _ := utf8.DecodeLastRune(b) 195 var buf strings.Builder 196 // Prevent IdentifierNames and NumericLiterals from running into 197 // keywords: in, instanceof, typeof, void 198 pad := isJSIdentPart(first) || isJSIdentPart(last) 199 if pad { 200 buf.WriteByte(' ') 201 } 202 written := 0 203 // Make sure that json.Marshal escapes codepoints U+2028 & U+2029 204 // so it falls within the subset of JSON which is valid JS. 205 for i := 0; i < len(b); { 206 rune, n := utf8.DecodeRune(b[i:]) 207 repl := "" 208 if rune == 0x2028 { 209 repl = `\u2028` 210 } else if rune == 0x2029 { 211 repl = `\u2029` 212 } 213 if repl != "" { 214 buf.Write(b[written:i]) 215 buf.WriteString(repl) 216 written = i + n 217 } 218 i += n 219 } 220 if buf.Len() != 0 { 221 buf.Write(b[written:]) 222 if pad { 223 buf.WriteByte(' ') 224 } 225 return buf.String() 226 } 227 return string(b) 228 } 229 230 // jsStrEscaper produces a string that can be included between quotes in 231 // JavaScript source, in JavaScript embedded in an HTML5 <script> element, 232 // or in an HTML5 event handler attribute such as onclick. 233 func jsStrEscaper(args ...any) string { 234 s, t := stringify(args...) 235 if t == contentTypeJSStr { 236 return replace(s, jsStrNormReplacementTable) 237 } 238 return replace(s, jsStrReplacementTable) 239 } 240 241 // jsRegexpEscaper behaves like jsStrEscaper but escapes regular expression 242 // specials so the result is treated literally when included in a regular 243 // expression literal. /foo{{.X}}bar/ matches the string "foo" followed by 244 // the literal text of {{.X}} followed by the string "bar". 245 func jsRegexpEscaper(args ...any) string { 246 s, _ := stringify(args...) 247 s = replace(s, jsRegexpReplacementTable) 248 if s == "" { 249 // /{{.X}}/ should not produce a line comment when .X == "". 250 return "(?:)" 251 } 252 return s 253 } 254 255 // replace replaces each rune r of s with replacementTable[r], provided that 256 // r < len(replacementTable). If replacementTable[r] is the empty string then 257 // no replacement is made. 258 // It also replaces runes U+2028 and U+2029 with the raw strings `\u2028` and 259 // `\u2029`. 260 func replace(s string, replacementTable []string) string { 261 var b strings.Builder 262 r, w, written := rune(0), 0, 0 263 for i := 0; i < len(s); i += w { 264 // See comment in htmlEscaper. 265 r, w = utf8.DecodeRuneInString(s[i:]) 266 var repl string 267 switch { 268 case int(r) < len(lowUnicodeReplacementTable): 269 repl = lowUnicodeReplacementTable[r] 270 case int(r) < len(replacementTable) && replacementTable[r] != "": 271 repl = replacementTable[r] 272 case r == '\u2028': 273 repl = `\u2028` 274 case r == '\u2029': 275 repl = `\u2029` 276 default: 277 continue 278 } 279 if written == 0 { 280 b.Grow(len(s)) 281 } 282 b.WriteString(s[written:i]) 283 b.WriteString(repl) 284 written = i + w 285 } 286 if written == 0 { 287 return s 288 } 289 b.WriteString(s[written:]) 290 return b.String() 291 } 292 293 var lowUnicodeReplacementTable = []string{ 294 0: `\u0000`, 1: `\u0001`, 2: `\u0002`, 3: `\u0003`, 4: `\u0004`, 5: `\u0005`, 6: `\u0006`, 295 '\a': `\u0007`, 296 '\b': `\u0008`, 297 '\t': `\t`, 298 '\n': `\n`, 299 '\v': `\u000b`, // "\v" == "v" on IE 6. 300 '\f': `\f`, 301 '\r': `\r`, 302 0xe: `\u000e`, 0xf: `\u000f`, 0x10: `\u0010`, 0x11: `\u0011`, 0x12: `\u0012`, 0x13: `\u0013`, 303 0x14: `\u0014`, 0x15: `\u0015`, 0x16: `\u0016`, 0x17: `\u0017`, 0x18: `\u0018`, 0x19: `\u0019`, 304 0x1a: `\u001a`, 0x1b: `\u001b`, 0x1c: `\u001c`, 0x1d: `\u001d`, 0x1e: `\u001e`, 0x1f: `\u001f`, 305 } 306 307 var jsStrReplacementTable = []string{ 308 0: `\u0000`, 309 '\t': `\t`, 310 '\n': `\n`, 311 '\v': `\u000b`, // "\v" == "v" on IE 6. 312 '\f': `\f`, 313 '\r': `\r`, 314 // Encode HTML specials as hex so the output can be embedded 315 // in HTML attributes without further encoding. 316 '"': `\u0022`, 317 '`': `\u0060`, 318 '&': `\u0026`, 319 '\'': `\u0027`, 320 '+': `\u002b`, 321 '/': `\/`, 322 '<': `\u003c`, 323 '>': `\u003e`, 324 '\\': `\\`, 325 } 326 327 // jsStrNormReplacementTable is like jsStrReplacementTable but does not 328 // overencode existing escapes since this table has no entry for `\`. 329 var jsStrNormReplacementTable = []string{ 330 0: `\u0000`, 331 '\t': `\t`, 332 '\n': `\n`, 333 '\v': `\u000b`, // "\v" == "v" on IE 6. 334 '\f': `\f`, 335 '\r': `\r`, 336 // Encode HTML specials as hex so the output can be embedded 337 // in HTML attributes without further encoding. 338 '"': `\u0022`, 339 '&': `\u0026`, 340 '\'': `\u0027`, 341 '`': `\u0060`, 342 '+': `\u002b`, 343 '/': `\/`, 344 '<': `\u003c`, 345 '>': `\u003e`, 346 } 347 var jsRegexpReplacementTable = []string{ 348 0: `\u0000`, 349 '\t': `\t`, 350 '\n': `\n`, 351 '\v': `\u000b`, // "\v" == "v" on IE 6. 352 '\f': `\f`, 353 '\r': `\r`, 354 // Encode HTML specials as hex so the output can be embedded 355 // in HTML attributes without further encoding. 356 '"': `\u0022`, 357 '$': `\$`, 358 '&': `\u0026`, 359 '\'': `\u0027`, 360 '(': `\(`, 361 ')': `\)`, 362 '*': `\*`, 363 '+': `\u002b`, 364 '-': `\-`, 365 '.': `\.`, 366 '/': `\/`, 367 '<': `\u003c`, 368 '>': `\u003e`, 369 '?': `\?`, 370 '[': `\[`, 371 '\\': `\\`, 372 ']': `\]`, 373 '^': `\^`, 374 '{': `\{`, 375 '|': `\|`, 376 '}': `\}`, 377 } 378 379 // isJSIdentPart reports whether the given rune is a JS identifier part. 380 // It does not handle all the non-Latin letters, joiners, and combining marks, 381 // but it does handle every codepoint that can occur in a numeric literal or 382 // a keyword. 383 func isJSIdentPart(r rune) bool { 384 switch { 385 case r == '$': 386 return true 387 case '0' <= r && r <= '9': 388 return true 389 case 'A' <= r && r <= 'Z': 390 return true 391 case r == '_': 392 return true 393 case 'a' <= r && r <= 'z': 394 return true 395 } 396 return false 397 } 398 399 // isJSType reports whether the given MIME type should be considered JavaScript. 400 // 401 // It is used to determine whether a script tag with a type attribute is a javascript container. 402 func isJSType(mimeType string) bool { 403 // per 404 // https://www.w3.org/TR/html5/scripting-1.html#attr-script-type 405 // https://tools.ietf.org/html/rfc7231#section-3.1.1 406 // https://tools.ietf.org/html/rfc4329#section-3 407 // https://www.ietf.org/rfc/rfc4627.txt 408 // discard parameters 409 mimeType, _, _ = strings.Cut(mimeType, ";") 410 mimeType = strings.ToLower(mimeType) 411 mimeType = strings.TrimSpace(mimeType) 412 switch mimeType { 413 case 414 "application/ecmascript", 415 "application/javascript", 416 "application/json", 417 "application/ld+json", 418 "application/x-ecmascript", 419 "application/x-javascript", 420 "module", 421 "text/ecmascript", 422 "text/javascript", 423 "text/javascript1.0", 424 "text/javascript1.1", 425 "text/javascript1.2", 426 "text/javascript1.3", 427 "text/javascript1.4", 428 "text/javascript1.5", 429 "text/jscript", 430 "text/livescript", 431 "text/x-ecmascript", 432 "text/x-javascript": 433 return true 434 default: 435 return false 436 } 437 }