github.com/anakojm/hugo-katex@v0.0.0-20231023141351-42d6f5de9c0b/tpl/internal/go_templates/htmltemplate/js.go (about) 1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package template 6 7 import ( 8 "bytes" 9 "encoding/json" 10 "fmt" 11 htmltemplate "html/template" 12 "reflect" 13 "strings" 14 "unicode/utf8" 15 ) 16 17 // jsWhitespace contains all of the JS whitespace characters, as defined 18 // by the \s character class. 19 // See https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_expressions/Character_classes. 20 const jsWhitespace = "\f\n\r\t\v\u0020\u00a0\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u2028\u2029\u202f\u205f\u3000\ufeff" 21 22 // nextJSCtx returns the context that determines whether a slash after the 23 // given run of tokens starts a regular expression instead of a division 24 // operator: / or /=. 25 // 26 // This assumes that the token run does not include any string tokens, comment 27 // tokens, regular expression literal tokens, or division operators. 28 // 29 // This fails on some valid but nonsensical JavaScript programs like 30 // "x = ++/foo/i" which is quite different than "x++/foo/i", but is not known to 31 // fail on any known useful programs. It is based on the draft 32 // JavaScript 2.0 lexical grammar and requires one token of lookbehind: 33 // https://www.mozilla.org/js/language/js20-2000-07/rationale/syntax.html 34 func nextJSCtx(s []byte, preceding jsCtx) jsCtx { 35 // Trim all JS whitespace characters 36 s = bytes.TrimRight(s, jsWhitespace) 37 if len(s) == 0 { 38 return preceding 39 } 40 41 // All cases below are in the single-byte UTF-8 group. 42 switch c, n := s[len(s)-1], len(s); c { 43 case '+', '-': 44 // ++ and -- are not regexp preceders, but + and - are whether 45 // they are used as infix or prefix operators. 46 start := n - 1 47 // Count the number of adjacent dashes or pluses. 48 for start > 0 && s[start-1] == c { 49 start-- 50 } 51 if (n-start)&1 == 1 { 52 // Reached for trailing minus signs since "---" is the 53 // same as "-- -". 54 return jsCtxRegexp 55 } 56 return jsCtxDivOp 57 case '.': 58 // Handle "42." 59 if n != 1 && '0' <= s[n-2] && s[n-2] <= '9' { 60 return jsCtxDivOp 61 } 62 return jsCtxRegexp 63 // Suffixes for all punctuators from section 7.7 of the language spec 64 // that only end binary operators not handled above. 65 case ',', '<', '>', '=', '*', '%', '&', '|', '^', '?': 66 return jsCtxRegexp 67 // Suffixes for all punctuators from section 7.7 of the language spec 68 // that are prefix operators not handled above. 69 case '!', '~': 70 return jsCtxRegexp 71 // Matches all the punctuators from section 7.7 of the language spec 72 // that are open brackets not handled above. 73 case '(', '[': 74 return jsCtxRegexp 75 // Matches all the punctuators from section 7.7 of the language spec 76 // that precede expression starts. 77 case ':', ';', '{': 78 return jsCtxRegexp 79 // CAVEAT: the close punctuators ('}', ']', ')') precede div ops and 80 // are handled in the default except for '}' which can precede a 81 // division op as in 82 // ({ valueOf: function () { return 42 } } / 2 83 // which is valid, but, in practice, developers don't divide object 84 // literals, so our heuristic works well for code like 85 // function () { ... } /foo/.test(x) && sideEffect(); 86 // The ')' punctuator can precede a regular expression as in 87 // if (b) /foo/.test(x) && ... 88 // but this is much less likely than 89 // (a + b) / c 90 case '}': 91 return jsCtxRegexp 92 default: 93 // Look for an IdentifierName and see if it is a keyword that 94 // can precede a regular expression. 95 j := n 96 for j > 0 && isJSIdentPart(rune(s[j-1])) { 97 j-- 98 } 99 if regexpPrecederKeywords[string(s[j:])] { 100 return jsCtxRegexp 101 } 102 } 103 // Otherwise is a punctuator not listed above, or 104 // a string which precedes a div op, or an identifier 105 // which precedes a div op. 106 return jsCtxDivOp 107 } 108 109 // regexpPrecederKeywords is a set of reserved JS keywords that can precede a 110 // regular expression in JS source. 111 var regexpPrecederKeywords = map[string]bool{ 112 "break": true, 113 "case": true, 114 "continue": true, 115 "delete": true, 116 "do": true, 117 "else": true, 118 "finally": true, 119 "in": true, 120 "instanceof": true, 121 "return": true, 122 "throw": true, 123 "try": true, 124 "typeof": true, 125 "void": true, 126 } 127 128 var jsonMarshalType = reflect.TypeOf((*json.Marshaler)(nil)).Elem() 129 130 // indirectToJSONMarshaler returns the value, after dereferencing as many times 131 // as necessary to reach the base type (or nil) or an implementation of json.Marshal. 132 func indirectToJSONMarshaler(a any) any { 133 // text/template now supports passing untyped nil as a func call 134 // argument, so we must support it. Otherwise we'd panic below, as one 135 // cannot call the Type or Interface methods on an invalid 136 // reflect.Value. See golang.org/issue/18716. 137 if a == nil { 138 return nil 139 } 140 141 v := reflect.ValueOf(a) 142 for !v.Type().Implements(jsonMarshalType) && v.Kind() == reflect.Pointer && !v.IsNil() { 143 v = v.Elem() 144 } 145 return v.Interface() 146 } 147 148 // jsValEscaper escapes its inputs to a JS Expression (section 11.14) that has 149 // neither side-effects nor free variables outside (NaN, Infinity). 150 func jsValEscaper(args ...any) string { 151 var a any 152 if len(args) == 1 { 153 a = indirectToJSONMarshaler(args[0]) 154 switch t := a.(type) { 155 case htmltemplate.JS: 156 return string(t) 157 case htmltemplate.JSStr: 158 // TODO: normalize quotes. 159 return `"` + string(t) + `"` 160 case json.Marshaler: 161 // Do not treat as a Stringer. 162 case fmt.Stringer: 163 a = t.String() 164 } 165 } else { 166 for i, arg := range args { 167 args[i] = indirectToJSONMarshaler(arg) 168 } 169 a = fmt.Sprint(args...) 170 } 171 // TODO: detect cycles before calling Marshal which loops infinitely on 172 // cyclic data. This may be an unacceptable DoS risk. 173 b, err := json.Marshal(a) 174 if err != nil { 175 // Put a space before comment so that if it is flush against 176 // a division operator it is not turned into a line comment: 177 // x/{{y}} 178 // turning into 179 // x//* error marshaling y: 180 // second line of error message */null 181 return fmt.Sprintf(" /* %s */null ", strings.ReplaceAll(err.Error(), "*/", "* /")) 182 } 183 184 // TODO: maybe post-process output to prevent it from containing 185 // "<!--", "-->", "<![CDATA[", "]]>", or "</script" 186 // in case custom marshalers produce output containing those. 187 // Note: Do not use \x escaping to save bytes because it is not JSON compatible and this escaper 188 // supports ld+json content-type. 189 if len(b) == 0 { 190 // In, `x=y/{{.}}*z` a json.Marshaler that produces "" should 191 // not cause the output `x=y/*z`. 192 return " null " 193 } 194 first, _ := utf8.DecodeRune(b) 195 last, _ := utf8.DecodeLastRune(b) 196 var buf strings.Builder 197 // Prevent IdentifierNames and NumericLiterals from running into 198 // keywords: in, instanceof, typeof, void 199 pad := isJSIdentPart(first) || isJSIdentPart(last) 200 if pad { 201 buf.WriteByte(' ') 202 } 203 written := 0 204 // Make sure that json.Marshal escapes codepoints U+2028 & U+2029 205 // so it falls within the subset of JSON which is valid JS. 206 for i := 0; i < len(b); { 207 rune, n := utf8.DecodeRune(b[i:]) 208 repl := "" 209 if rune == 0x2028 { 210 repl = `\u2028` 211 } else if rune == 0x2029 { 212 repl = `\u2029` 213 } 214 if repl != "" { 215 buf.Write(b[written:i]) 216 buf.WriteString(repl) 217 written = i + n 218 } 219 i += n 220 } 221 if buf.Len() != 0 { 222 buf.Write(b[written:]) 223 if pad { 224 buf.WriteByte(' ') 225 } 226 return buf.String() 227 } 228 return string(b) 229 } 230 231 // jsStrEscaper produces a string that can be included between quotes in 232 // JavaScript source, in JavaScript embedded in an HTML5 <script> element, 233 // or in an HTML5 event handler attribute such as onclick. 234 func jsStrEscaper(args ...any) string { 235 s, t := stringify(args...) 236 if t == contentTypeJSStr { 237 return replace(s, jsStrNormReplacementTable) 238 } 239 return replace(s, jsStrReplacementTable) 240 } 241 242 // jsRegexpEscaper behaves like jsStrEscaper but escapes regular expression 243 // specials so the result is treated literally when included in a regular 244 // expression literal. /foo{{.X}}bar/ matches the string "foo" followed by 245 // the literal text of {{.X}} followed by the string "bar". 246 func jsRegexpEscaper(args ...any) string { 247 s, _ := stringify(args...) 248 s = replace(s, jsRegexpReplacementTable) 249 if s == "" { 250 // /{{.X}}/ should not produce a line comment when .X == "". 251 return "(?:)" 252 } 253 return s 254 } 255 256 // replace replaces each rune r of s with replacementTable[r], provided that 257 // r < len(replacementTable). If replacementTable[r] is the empty string then 258 // no replacement is made. 259 // It also replaces runes U+2028 and U+2029 with the raw strings `\u2028` and 260 // `\u2029`. 261 func replace(s string, replacementTable []string) string { 262 var b strings.Builder 263 r, w, written := rune(0), 0, 0 264 for i := 0; i < len(s); i += w { 265 // See comment in htmlEscaper. 266 r, w = utf8.DecodeRuneInString(s[i:]) 267 var repl string 268 switch { 269 case int(r) < len(lowUnicodeReplacementTable): 270 repl = lowUnicodeReplacementTable[r] 271 case int(r) < len(replacementTable) && replacementTable[r] != "": 272 repl = replacementTable[r] 273 case r == '\u2028': 274 repl = `\u2028` 275 case r == '\u2029': 276 repl = `\u2029` 277 default: 278 continue 279 } 280 if written == 0 { 281 b.Grow(len(s)) 282 } 283 b.WriteString(s[written:i]) 284 b.WriteString(repl) 285 written = i + w 286 } 287 if written == 0 { 288 return s 289 } 290 b.WriteString(s[written:]) 291 return b.String() 292 } 293 294 var lowUnicodeReplacementTable = []string{ 295 0: `\u0000`, 1: `\u0001`, 2: `\u0002`, 3: `\u0003`, 4: `\u0004`, 5: `\u0005`, 6: `\u0006`, 296 '\a': `\u0007`, 297 '\b': `\u0008`, 298 '\t': `\t`, 299 '\n': `\n`, 300 '\v': `\u000b`, // "\v" == "v" on IE 6. 301 '\f': `\f`, 302 '\r': `\r`, 303 0xe: `\u000e`, 0xf: `\u000f`, 0x10: `\u0010`, 0x11: `\u0011`, 0x12: `\u0012`, 0x13: `\u0013`, 304 0x14: `\u0014`, 0x15: `\u0015`, 0x16: `\u0016`, 0x17: `\u0017`, 0x18: `\u0018`, 0x19: `\u0019`, 305 0x1a: `\u001a`, 0x1b: `\u001b`, 0x1c: `\u001c`, 0x1d: `\u001d`, 0x1e: `\u001e`, 0x1f: `\u001f`, 306 } 307 308 var jsStrReplacementTable = []string{ 309 0: `\u0000`, 310 '\t': `\t`, 311 '\n': `\n`, 312 '\v': `\u000b`, // "\v" == "v" on IE 6. 313 '\f': `\f`, 314 '\r': `\r`, 315 // Encode HTML specials as hex so the output can be embedded 316 // in HTML attributes without further encoding. 317 '"': `\u0022`, 318 '`': `\u0060`, 319 '&': `\u0026`, 320 '\'': `\u0027`, 321 '+': `\u002b`, 322 '/': `\/`, 323 '<': `\u003c`, 324 '>': `\u003e`, 325 '\\': `\\`, 326 } 327 328 // jsStrNormReplacementTable is like jsStrReplacementTable but does not 329 // overencode existing escapes since this table has no entry for `\`. 330 var jsStrNormReplacementTable = []string{ 331 0: `\u0000`, 332 '\t': `\t`, 333 '\n': `\n`, 334 '\v': `\u000b`, // "\v" == "v" on IE 6. 335 '\f': `\f`, 336 '\r': `\r`, 337 // Encode HTML specials as hex so the output can be embedded 338 // in HTML attributes without further encoding. 339 '"': `\u0022`, 340 '&': `\u0026`, 341 '\'': `\u0027`, 342 '`': `\u0060`, 343 '+': `\u002b`, 344 '/': `\/`, 345 '<': `\u003c`, 346 '>': `\u003e`, 347 } 348 var jsRegexpReplacementTable = []string{ 349 0: `\u0000`, 350 '\t': `\t`, 351 '\n': `\n`, 352 '\v': `\u000b`, // "\v" == "v" on IE 6. 353 '\f': `\f`, 354 '\r': `\r`, 355 // Encode HTML specials as hex so the output can be embedded 356 // in HTML attributes without further encoding. 357 '"': `\u0022`, 358 '$': `\$`, 359 '&': `\u0026`, 360 '\'': `\u0027`, 361 '(': `\(`, 362 ')': `\)`, 363 '*': `\*`, 364 '+': `\u002b`, 365 '-': `\-`, 366 '.': `\.`, 367 '/': `\/`, 368 '<': `\u003c`, 369 '>': `\u003e`, 370 '?': `\?`, 371 '[': `\[`, 372 '\\': `\\`, 373 ']': `\]`, 374 '^': `\^`, 375 '{': `\{`, 376 '|': `\|`, 377 '}': `\}`, 378 } 379 380 // isJSIdentPart reports whether the given rune is a JS identifier part. 381 // It does not handle all the non-Latin letters, joiners, and combining marks, 382 // but it does handle every codepoint that can occur in a numeric literal or 383 // a keyword. 384 func isJSIdentPart(r rune) bool { 385 switch { 386 case r == '$': 387 return true 388 case '0' <= r && r <= '9': 389 return true 390 case 'A' <= r && r <= 'Z': 391 return true 392 case r == '_': 393 return true 394 case 'a' <= r && r <= 'z': 395 return true 396 } 397 return false 398 } 399 400 // isJSType reports whether the given MIME type should be considered JavaScript. 401 // 402 // It is used to determine whether a script tag with a type attribute is a javascript container. 403 func isJSType(mimeType string) bool { 404 // per 405 // https://www.w3.org/TR/html5/scripting-1.html#attr-script-type 406 // https://tools.ietf.org/html/rfc7231#section-3.1.1 407 // https://tools.ietf.org/html/rfc4329#section-3 408 // https://www.ietf.org/rfc/rfc4627.txt 409 // discard parameters 410 mimeType, _, _ = strings.Cut(mimeType, ";") 411 mimeType = strings.ToLower(mimeType) 412 mimeType = strings.TrimSpace(mimeType) 413 switch mimeType { 414 case 415 "application/ecmascript", 416 "application/javascript", 417 "application/json", 418 "application/ld+json", 419 "application/x-ecmascript", 420 "application/x-javascript", 421 "module", 422 "text/ecmascript", 423 "text/javascript", 424 "text/javascript1.0", 425 "text/javascript1.1", 426 "text/javascript1.2", 427 "text/javascript1.3", 428 "text/javascript1.4", 429 "text/javascript1.5", 430 "text/jscript", 431 "text/livescript", 432 "text/x-ecmascript", 433 "text/x-javascript": 434 return true 435 default: 436 return false 437 } 438 }