github.com/kovansky/hugo@v0.92.3-0.20220224232819-63076e4ff19f/tpl/internal/go_templates/htmltemplate/js.go (about)

     1  // Copyright 2011 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package template
     6  
     7  import (
     8  	"bytes"
     9  	"encoding/json"
    10  	"fmt"
    11  	htmltemplate "html/template"
    12  	"reflect"
    13  	"strings"
    14  	"unicode/utf8"
    15  )
    16  
    17  // nextJSCtx returns the context that determines whether a slash after the
    18  // given run of tokens starts a regular expression instead of a division
    19  // operator: / or /=.
    20  //
    21  // This assumes that the token run does not include any string tokens, comment
    22  // tokens, regular expression literal tokens, or division operators.
    23  //
    24  // This fails on some valid but nonsensical JavaScript programs like
    25  // "x = ++/foo/i" which is quite different than "x++/foo/i", but is not known to
    26  // fail on any known useful programs. It is based on the draft
    27  // JavaScript 2.0 lexical grammar and requires one token of lookbehind:
    28  // https://www.mozilla.org/js/language/js20-2000-07/rationale/syntax.html
    29  func nextJSCtx(s []byte, preceding jsCtx) jsCtx {
    30  	s = bytes.TrimRight(s, "\t\n\f\r \u2028\u2029")
    31  	if len(s) == 0 {
    32  		return preceding
    33  	}
    34  
    35  	// All cases below are in the single-byte UTF-8 group.
    36  	switch c, n := s[len(s)-1], len(s); c {
    37  	case '+', '-':
    38  		// ++ and -- are not regexp preceders, but + and - are whether
    39  		// they are used as infix or prefix operators.
    40  		start := n - 1
    41  		// Count the number of adjacent dashes or pluses.
    42  		for start > 0 && s[start-1] == c {
    43  			start--
    44  		}
    45  		if (n-start)&1 == 1 {
    46  			// Reached for trailing minus signs since "---" is the
    47  			// same as "-- -".
    48  			return jsCtxRegexp
    49  		}
    50  		return jsCtxDivOp
    51  	case '.':
    52  		// Handle "42."
    53  		if n != 1 && '0' <= s[n-2] && s[n-2] <= '9' {
    54  			return jsCtxDivOp
    55  		}
    56  		return jsCtxRegexp
    57  	// Suffixes for all punctuators from section 7.7 of the language spec
    58  	// that only end binary operators not handled above.
    59  	case ',', '<', '>', '=', '*', '%', '&', '|', '^', '?':
    60  		return jsCtxRegexp
    61  	// Suffixes for all punctuators from section 7.7 of the language spec
    62  	// that are prefix operators not handled above.
    63  	case '!', '~':
    64  		return jsCtxRegexp
    65  	// Matches all the punctuators from section 7.7 of the language spec
    66  	// that are open brackets not handled above.
    67  	case '(', '[':
    68  		return jsCtxRegexp
    69  	// Matches all the punctuators from section 7.7 of the language spec
    70  	// that precede expression starts.
    71  	case ':', ';', '{':
    72  		return jsCtxRegexp
    73  	// CAVEAT: the close punctuators ('}', ']', ')') precede div ops and
    74  	// are handled in the default except for '}' which can precede a
    75  	// division op as in
    76  	//    ({ valueOf: function () { return 42 } } / 2
    77  	// which is valid, but, in practice, developers don't divide object
    78  	// literals, so our heuristic works well for code like
    79  	//    function () { ... }  /foo/.test(x) && sideEffect();
    80  	// The ')' punctuator can precede a regular expression as in
    81  	//     if (b) /foo/.test(x) && ...
    82  	// but this is much less likely than
    83  	//     (a + b) / c
    84  	case '}':
    85  		return jsCtxRegexp
    86  	default:
    87  		// Look for an IdentifierName and see if it is a keyword that
    88  		// can precede a regular expression.
    89  		j := n
    90  		for j > 0 && isJSIdentPart(rune(s[j-1])) {
    91  			j--
    92  		}
    93  		if regexpPrecederKeywords[string(s[j:])] {
    94  			return jsCtxRegexp
    95  		}
    96  	}
    97  	// Otherwise is a punctuator not listed above, or
    98  	// a string which precedes a div op, or an identifier
    99  	// which precedes a div op.
   100  	return jsCtxDivOp
   101  }
   102  
   103  // regexpPrecederKeywords is a set of reserved JS keywords that can precede a
   104  // regular expression in JS source.
   105  var regexpPrecederKeywords = map[string]bool{
   106  	"break":      true,
   107  	"case":       true,
   108  	"continue":   true,
   109  	"delete":     true,
   110  	"do":         true,
   111  	"else":       true,
   112  	"finally":    true,
   113  	"in":         true,
   114  	"instanceof": true,
   115  	"return":     true,
   116  	"throw":      true,
   117  	"try":        true,
   118  	"typeof":     true,
   119  	"void":       true,
   120  }
   121  
   122  var jsonMarshalType = reflect.TypeOf((*json.Marshaler)(nil)).Elem()
   123  
   124  // indirectToJSONMarshaler returns the value, after dereferencing as many times
   125  // as necessary to reach the base type (or nil) or an implementation of json.Marshal.
   126  func indirectToJSONMarshaler(a interface{}) interface{} {
   127  	// text/template now supports passing untyped nil as a func call
   128  	// argument, so we must support it. Otherwise we'd panic below, as one
   129  	// cannot call the Type or Interface methods on an invalid
   130  	// reflect.Value. See golang.org/issue/18716.
   131  	if a == nil {
   132  		return nil
   133  	}
   134  
   135  	v := reflect.ValueOf(a)
   136  	for !v.Type().Implements(jsonMarshalType) && v.Kind() == reflect.Ptr && !v.IsNil() {
   137  		v = v.Elem()
   138  	}
   139  	return v.Interface()
   140  }
   141  
   142  // jsValEscaper escapes its inputs to a JS Expression (section 11.14) that has
   143  // neither side-effects nor free variables outside (NaN, Infinity).
   144  func jsValEscaper(args ...interface{}) string {
   145  	var a interface{}
   146  	if len(args) == 1 {
   147  		a = indirectToJSONMarshaler(args[0])
   148  		switch t := a.(type) {
   149  		case htmltemplate.JS:
   150  			return string(t)
   151  		case htmltemplate.JSStr:
   152  			// TODO: normalize quotes.
   153  			return `"` + string(t) + `"`
   154  		case json.Marshaler:
   155  			// Do not treat as a Stringer.
   156  		case fmt.Stringer:
   157  			a = t.String()
   158  		}
   159  	} else {
   160  		for i, arg := range args {
   161  			args[i] = indirectToJSONMarshaler(arg)
   162  		}
   163  		a = fmt.Sprint(args...)
   164  	}
   165  	// TODO: detect cycles before calling Marshal which loops infinitely on
   166  	// cyclic data. This may be an unacceptable DoS risk.
   167  	b, err := json.Marshal(a)
   168  	if err != nil {
   169  		// Put a space before comment so that if it is flush against
   170  		// a division operator it is not turned into a line comment:
   171  		//     x/{{y}}
   172  		// turning into
   173  		//     x//* error marshaling y:
   174  		//          second line of error message */null
   175  		return fmt.Sprintf(" /* %s */null ", strings.ReplaceAll(err.Error(), "*/", "* /"))
   176  	}
   177  
   178  	// TODO: maybe post-process output to prevent it from containing
   179  	// "<!--", "-->", "<![CDATA[", "]]>", or "</script"
   180  	// in case custom marshalers produce output containing those.
   181  	// Note: Do not use \x escaping to save bytes because it is not JSON compatible and this escaper
   182  	// supports ld+json content-type.
   183  	if len(b) == 0 {
   184  		// In, `x=y/{{.}}*z` a json.Marshaler that produces "" should
   185  		// not cause the output `x=y/*z`.
   186  		return " null "
   187  	}
   188  	first, _ := utf8.DecodeRune(b)
   189  	last, _ := utf8.DecodeLastRune(b)
   190  	var buf strings.Builder
   191  	// Prevent IdentifierNames and NumericLiterals from running into
   192  	// keywords: in, instanceof, typeof, void
   193  	pad := isJSIdentPart(first) || isJSIdentPart(last)
   194  	if pad {
   195  		buf.WriteByte(' ')
   196  	}
   197  	written := 0
   198  	// Make sure that json.Marshal escapes codepoints U+2028 & U+2029
   199  	// so it falls within the subset of JSON which is valid JS.
   200  	for i := 0; i < len(b); {
   201  		rune, n := utf8.DecodeRune(b[i:])
   202  		repl := ""
   203  		if rune == 0x2028 {
   204  			repl = `\u2028`
   205  		} else if rune == 0x2029 {
   206  			repl = `\u2029`
   207  		}
   208  		if repl != "" {
   209  			buf.Write(b[written:i])
   210  			buf.WriteString(repl)
   211  			written = i + n
   212  		}
   213  		i += n
   214  	}
   215  	if buf.Len() != 0 {
   216  		buf.Write(b[written:])
   217  		if pad {
   218  			buf.WriteByte(' ')
   219  		}
   220  		return buf.String()
   221  	}
   222  	return string(b)
   223  }
   224  
   225  // jsStrEscaper produces a string that can be included between quotes in
   226  // JavaScript source, in JavaScript embedded in an HTML5 <script> element,
   227  // or in an HTML5 event handler attribute such as onclick.
   228  func jsStrEscaper(args ...interface{}) string {
   229  	s, t := stringify(args...)
   230  	if t == contentTypeJSStr {
   231  		return replace(s, jsStrNormReplacementTable)
   232  	}
   233  	return replace(s, jsStrReplacementTable)
   234  }
   235  
   236  // jsRegexpEscaper behaves like jsStrEscaper but escapes regular expression
   237  // specials so the result is treated literally when included in a regular
   238  // expression literal. /foo{{.X}}bar/ matches the string "foo" followed by
   239  // the literal text of {{.X}} followed by the string "bar".
   240  func jsRegexpEscaper(args ...interface{}) string {
   241  	s, _ := stringify(args...)
   242  	s = replace(s, jsRegexpReplacementTable)
   243  	if s == "" {
   244  		// /{{.X}}/ should not produce a line comment when .X == "".
   245  		return "(?:)"
   246  	}
   247  	return s
   248  }
   249  
   250  // replace replaces each rune r of s with replacementTable[r], provided that
   251  // r < len(replacementTable). If replacementTable[r] is the empty string then
   252  // no replacement is made.
   253  // It also replaces runes U+2028 and U+2029 with the raw strings `\u2028` and
   254  // `\u2029`.
   255  func replace(s string, replacementTable []string) string {
   256  	var b strings.Builder
   257  	r, w, written := rune(0), 0, 0
   258  	for i := 0; i < len(s); i += w {
   259  		// See comment in htmlEscaper.
   260  		r, w = utf8.DecodeRuneInString(s[i:])
   261  		var repl string
   262  		switch {
   263  		case int(r) < len(lowUnicodeReplacementTable):
   264  			repl = lowUnicodeReplacementTable[r]
   265  		case int(r) < len(replacementTable) && replacementTable[r] != "":
   266  			repl = replacementTable[r]
   267  		case r == '\u2028':
   268  			repl = `\u2028`
   269  		case r == '\u2029':
   270  			repl = `\u2029`
   271  		default:
   272  			continue
   273  		}
   274  		if written == 0 {
   275  			b.Grow(len(s))
   276  		}
   277  		b.WriteString(s[written:i])
   278  		b.WriteString(repl)
   279  		written = i + w
   280  	}
   281  	if written == 0 {
   282  		return s
   283  	}
   284  	b.WriteString(s[written:])
   285  	return b.String()
   286  }
   287  
   288  var lowUnicodeReplacementTable = []string{
   289  	0: `\u0000`, 1: `\u0001`, 2: `\u0002`, 3: `\u0003`, 4: `\u0004`, 5: `\u0005`, 6: `\u0006`,
   290  	'\a': `\u0007`,
   291  	'\b': `\u0008`,
   292  	'\t': `\t`,
   293  	'\n': `\n`,
   294  	'\v': `\u000b`, // "\v" == "v" on IE 6.
   295  	'\f': `\f`,
   296  	'\r': `\r`,
   297  	0xe:  `\u000e`, 0xf: `\u000f`, 0x10: `\u0010`, 0x11: `\u0011`, 0x12: `\u0012`, 0x13: `\u0013`,
   298  	0x14: `\u0014`, 0x15: `\u0015`, 0x16: `\u0016`, 0x17: `\u0017`, 0x18: `\u0018`, 0x19: `\u0019`,
   299  	0x1a: `\u001a`, 0x1b: `\u001b`, 0x1c: `\u001c`, 0x1d: `\u001d`, 0x1e: `\u001e`, 0x1f: `\u001f`,
   300  }
   301  
   302  var jsStrReplacementTable = []string{
   303  	0:    `\u0000`,
   304  	'\t': `\t`,
   305  	'\n': `\n`,
   306  	'\v': `\u000b`, // "\v" == "v" on IE 6.
   307  	'\f': `\f`,
   308  	'\r': `\r`,
   309  	// Encode HTML specials as hex so the output can be embedded
   310  	// in HTML attributes without further encoding.
   311  	'"':  `\u0022`,
   312  	'&':  `\u0026`,
   313  	'\'': `\u0027`,
   314  	'+':  `\u002b`,
   315  	'/':  `\/`,
   316  	'<':  `\u003c`,
   317  	'>':  `\u003e`,
   318  	'\\': `\\`,
   319  }
   320  
   321  // jsStrNormReplacementTable is like jsStrReplacementTable but does not
   322  // overencode existing escapes since this table has no entry for `\`.
   323  var jsStrNormReplacementTable = []string{
   324  	0:    `\u0000`,
   325  	'\t': `\t`,
   326  	'\n': `\n`,
   327  	'\v': `\u000b`, // "\v" == "v" on IE 6.
   328  	'\f': `\f`,
   329  	'\r': `\r`,
   330  	// Encode HTML specials as hex so the output can be embedded
   331  	// in HTML attributes without further encoding.
   332  	'"':  `\u0022`,
   333  	'&':  `\u0026`,
   334  	'\'': `\u0027`,
   335  	'+':  `\u002b`,
   336  	'/':  `\/`,
   337  	'<':  `\u003c`,
   338  	'>':  `\u003e`,
   339  }
   340  var jsRegexpReplacementTable = []string{
   341  	0:    `\u0000`,
   342  	'\t': `\t`,
   343  	'\n': `\n`,
   344  	'\v': `\u000b`, // "\v" == "v" on IE 6.
   345  	'\f': `\f`,
   346  	'\r': `\r`,
   347  	// Encode HTML specials as hex so the output can be embedded
   348  	// in HTML attributes without further encoding.
   349  	'"':  `\u0022`,
   350  	'$':  `\$`,
   351  	'&':  `\u0026`,
   352  	'\'': `\u0027`,
   353  	'(':  `\(`,
   354  	')':  `\)`,
   355  	'*':  `\*`,
   356  	'+':  `\u002b`,
   357  	'-':  `\-`,
   358  	'.':  `\.`,
   359  	'/':  `\/`,
   360  	'<':  `\u003c`,
   361  	'>':  `\u003e`,
   362  	'?':  `\?`,
   363  	'[':  `\[`,
   364  	'\\': `\\`,
   365  	']':  `\]`,
   366  	'^':  `\^`,
   367  	'{':  `\{`,
   368  	'|':  `\|`,
   369  	'}':  `\}`,
   370  }
   371  
   372  // isJSIdentPart reports whether the given rune is a JS identifier part.
   373  // It does not handle all the non-Latin letters, joiners, and combining marks,
   374  // but it does handle every codepoint that can occur in a numeric literal or
   375  // a keyword.
   376  func isJSIdentPart(r rune) bool {
   377  	switch {
   378  	case r == '$':
   379  		return true
   380  	case '0' <= r && r <= '9':
   381  		return true
   382  	case 'A' <= r && r <= 'Z':
   383  		return true
   384  	case r == '_':
   385  		return true
   386  	case 'a' <= r && r <= 'z':
   387  		return true
   388  	}
   389  	return false
   390  }
   391  
   392  // isJSType reports whether the given MIME type should be considered JavaScript.
   393  //
   394  // It is used to determine whether a script tag with a type attribute is a javascript container.
   395  func isJSType(mimeType string) bool {
   396  	// per
   397  	//   https://www.w3.org/TR/html5/scripting-1.html#attr-script-type
   398  	//   https://tools.ietf.org/html/rfc7231#section-3.1.1
   399  	//   https://tools.ietf.org/html/rfc4329#section-3
   400  	//   https://www.ietf.org/rfc/rfc4627.txt
   401  	// discard parameters
   402  	if i := strings.Index(mimeType, ";"); i >= 0 {
   403  		mimeType = mimeType[:i]
   404  	}
   405  	mimeType = strings.ToLower(mimeType)
   406  	mimeType = strings.TrimSpace(mimeType)
   407  	switch mimeType {
   408  	case
   409  		"application/ecmascript",
   410  		"application/javascript",
   411  		"application/json",
   412  		"application/ld+json",
   413  		"application/x-ecmascript",
   414  		"application/x-javascript",
   415  		"module",
   416  		"text/ecmascript",
   417  		"text/javascript",
   418  		"text/javascript1.0",
   419  		"text/javascript1.1",
   420  		"text/javascript1.2",
   421  		"text/javascript1.3",
   422  		"text/javascript1.4",
   423  		"text/javascript1.5",
   424  		"text/jscript",
   425  		"text/livescript",
   426  		"text/x-ecmascript",
   427  		"text/x-javascript":
   428  		return true
   429  	default:
   430  		return false
   431  	}
   432  }