github.com/epfl-dcsl/gotee@v0.0.0-20200909122901-014b35f5e5e9/src/html/template/js.go

github.com/epfl-dcsl/gotee@v0.0.0-20200909122901-014b35f5e5e9/src/html/template/js.go (about)

     1  // Copyright 2011 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package template
     6  
     7  import (
     8  	"bytes"
     9  	"encoding/json"
    10  	"fmt"
    11  	"reflect"
    12  	"strings"
    13  	"unicode/utf8"
    14  )
    15  
    16  // nextJSCtx returns the context that determines whether a slash after the
    17  // given run of tokens starts a regular expression instead of a division
    18  // operator: / or /=.
    19  //
    20  // This assumes that the token run does not include any string tokens, comment
    21  // tokens, regular expression literal tokens, or division operators.
    22  //
    23  // This fails on some valid but nonsensical JavaScript programs like
    24  // "x = ++/foo/i" which is quite different than "x++/foo/i", but is not known to
    25  // fail on any known useful programs. It is based on the draft
    26  // JavaScript 2.0 lexical grammar and requires one token of lookbehind:
    27  // http://www.mozilla.org/js/language/js20-2000-07/rationale/syntax.html
    28  func nextJSCtx(s []byte, preceding jsCtx) jsCtx {
    29  	s = bytes.TrimRight(s, "\t\n\f\r \u2028\u2029")
    30  	if len(s) == 0 {
    31  		return preceding
    32  	}
    33  
    34  	// All cases below are in the single-byte UTF-8 group.
    35  	switch c, n := s[len(s)-1], len(s); c {
    36  	case '+', '-':
    37  		// ++ and -- are not regexp preceders, but + and - are whether
    38  		// they are used as infix or prefix operators.
    39  		start := n - 1
    40  		// Count the number of adjacent dashes or pluses.
    41  		for start > 0 && s[start-1] == c {
    42  			start--
    43  		}
    44  		if (n-start)&1 == 1 {
    45  			// Reached for trailing minus signs since "---" is the
    46  			// same as "-- -".
    47  			return jsCtxRegexp
    48  		}
    49  		return jsCtxDivOp
    50  	case '.':
    51  		// Handle "42."
    52  		if n != 1 && '0' <= s[n-2] && s[n-2] <= '9' {
    53  			return jsCtxDivOp
    54  		}
    55  		return jsCtxRegexp
    56  	// Suffixes for all punctuators from section 7.7 of the language spec
    57  	// that only end binary operators not handled above.
    58  	case ',', '<', '>', '=', '*', '%', '&', '|', '^', '?':
    59  		return jsCtxRegexp
    60  	// Suffixes for all punctuators from section 7.7 of the language spec
    61  	// that are prefix operators not handled above.
    62  	case '!', '~':
    63  		return jsCtxRegexp
    64  	// Matches all the punctuators from section 7.7 of the language spec
    65  	// that are open brackets not handled above.
    66  	case '(', '[':
    67  		return jsCtxRegexp
    68  	// Matches all the punctuators from section 7.7 of the language spec
    69  	// that precede expression starts.
    70  	case ':', ';', '{':
    71  		return jsCtxRegexp
    72  	// CAVEAT: the close punctuators ('}', ']', ')') precede div ops and
    73  	// are handled in the default except for '}' which can precede a
    74  	// division op as in
    75  	//    ({ valueOf: function () { return 42 } } / 2
    76  	// which is valid, but, in practice, developers don't divide object
    77  	// literals, so our heuristic works well for code like
    78  	//    function () { ... }  /foo/.test(x) && sideEffect();
    79  	// The ')' punctuator can precede a regular expression as in
    80  	//     if (b) /foo/.test(x) && ...
    81  	// but this is much less likely than
    82  	//     (a + b) / c
    83  	case '}':
    84  		return jsCtxRegexp
    85  	default:
    86  		// Look for an IdentifierName and see if it is a keyword that
    87  		// can precede a regular expression.
    88  		j := n
    89  		for j > 0 && isJSIdentPart(rune(s[j-1])) {
    90  			j--
    91  		}
    92  		if regexpPrecederKeywords[string(s[j:])] {
    93  			return jsCtxRegexp
    94  		}
    95  	}
    96  	// Otherwise is a punctuator not listed above, or
    97  	// a string which precedes a div op, or an identifier
    98  	// which precedes a div op.
    99  	return jsCtxDivOp
   100  }
   101  
   102  // regexpPrecederKeywords is a set of reserved JS keywords that can precede a
   103  // regular expression in JS source.
   104  var regexpPrecederKeywords = map[string]bool{
   105  	"break":      true,
   106  	"case":       true,
   107  	"continue":   true,
   108  	"delete":     true,
   109  	"do":         true,
   110  	"else":       true,
   111  	"finally":    true,
   112  	"in":         true,
   113  	"instanceof": true,
   114  	"return":     true,
   115  	"throw":      true,
   116  	"try":        true,
   117  	"typeof":     true,
   118  	"void":       true,
   119  }
   120  
   121  var jsonMarshalType = reflect.TypeOf((*json.Marshaler)(nil)).Elem()
   122  
   123  // indirectToJSONMarshaler returns the value, after dereferencing as many times
   124  // as necessary to reach the base type (or nil) or an implementation of json.Marshal.
   125  func indirectToJSONMarshaler(a interface{}) interface{} {
   126  	v := reflect.ValueOf(a)
   127  	for !v.Type().Implements(jsonMarshalType) && v.Kind() == reflect.Ptr && !v.IsNil() {
   128  		v = v.Elem()
   129  	}
   130  	return v.Interface()
   131  }
   132  
   133  // jsValEscaper escapes its inputs to a JS Expression (section 11.14) that has
   134  // neither side-effects nor free variables outside (NaN, Infinity).
   135  func jsValEscaper(args ...interface{}) string {
   136  	var a interface{}
   137  	if len(args) == 1 {
   138  		a = indirectToJSONMarshaler(args[0])
   139  		switch t := a.(type) {
   140  		case JS:
   141  			return string(t)
   142  		case JSStr:
   143  			// TODO: normalize quotes.
   144  			return `"` + string(t) + `"`
   145  		case json.Marshaler:
   146  			// Do not treat as a Stringer.
   147  		case fmt.Stringer:
   148  			a = t.String()
   149  		}
   150  	} else {
   151  		for i, arg := range args {
   152  			args[i] = indirectToJSONMarshaler(arg)
   153  		}
   154  		a = fmt.Sprint(args...)
   155  	}
   156  	// TODO: detect cycles before calling Marshal which loops infinitely on
   157  	// cyclic data. This may be an unacceptable DoS risk.
   158  
   159  	b, err := json.Marshal(a)
   160  	if err != nil {
   161  		// Put a space before comment so that if it is flush against
   162  		// a division operator it is not turned into a line comment:
   163  		//     x/{{y}}
   164  		// turning into
   165  		//     x//* error marshaling y:
   166  		//          second line of error message */null
   167  		return fmt.Sprintf(" /* %s */null ", strings.Replace(err.Error(), "*/", "* /", -1))
   168  	}
   169  
   170  	// TODO: maybe post-process output to prevent it from containing
   171  	// "<!--", "-->", "<![CDATA[", "]]>", or "</script"
   172  	// in case custom marshalers produce output containing those.
   173  
   174  	// TODO: Maybe abbreviate \u00ab to \xab to produce more compact output.
   175  	if len(b) == 0 {
   176  		// In, `x=y/{{.}}*z` a json.Marshaler that produces "" should
   177  		// not cause the output `x=y/*z`.
   178  		return " null "
   179  	}
   180  	first, _ := utf8.DecodeRune(b)
   181  	last, _ := utf8.DecodeLastRune(b)
   182  	var buf bytes.Buffer
   183  	// Prevent IdentifierNames and NumericLiterals from running into
   184  	// keywords: in, instanceof, typeof, void
   185  	pad := isJSIdentPart(first) || isJSIdentPart(last)
   186  	if pad {
   187  		buf.WriteByte(' ')
   188  	}
   189  	written := 0
   190  	// Make sure that json.Marshal escapes codepoints U+2028 & U+2029
   191  	// so it falls within the subset of JSON which is valid JS.
   192  	for i := 0; i < len(b); {
   193  		rune, n := utf8.DecodeRune(b[i:])
   194  		repl := ""
   195  		if rune == 0x2028 {
   196  			repl = `\u2028`
   197  		} else if rune == 0x2029 {
   198  			repl = `\u2029`
   199  		}
   200  		if repl != "" {
   201  			buf.Write(b[written:i])
   202  			buf.WriteString(repl)
   203  			written = i + n
   204  		}
   205  		i += n
   206  	}
   207  	if buf.Len() != 0 {
   208  		buf.Write(b[written:])
   209  		if pad {
   210  			buf.WriteByte(' ')
   211  		}
   212  		b = buf.Bytes()
   213  	}
   214  	return string(b)
   215  }
   216  
   217  // jsStrEscaper produces a string that can be included between quotes in
   218  // JavaScript source, in JavaScript embedded in an HTML5 <script> element,
   219  // or in an HTML5 event handler attribute such as onclick.
   220  func jsStrEscaper(args ...interface{}) string {
   221  	s, t := stringify(args...)
   222  	if t == contentTypeJSStr {
   223  		return replace(s, jsStrNormReplacementTable)
   224  	}
   225  	return replace(s, jsStrReplacementTable)
   226  }
   227  
   228  // jsRegexpEscaper behaves like jsStrEscaper but escapes regular expression
   229  // specials so the result is treated literally when included in a regular
   230  // expression literal. /foo{{.X}}bar/ matches the string "foo" followed by
   231  // the literal text of {{.X}} followed by the string "bar".
   232  func jsRegexpEscaper(args ...interface{}) string {
   233  	s, _ := stringify(args...)
   234  	s = replace(s, jsRegexpReplacementTable)
   235  	if s == "" {
   236  		// /{{.X}}/ should not produce a line comment when .X == "".
   237  		return "(?:)"
   238  	}
   239  	return s
   240  }
   241  
   242  // replace replaces each rune r of s with replacementTable[r], provided that
   243  // r < len(replacementTable). If replacementTable[r] is the empty string then
   244  // no replacement is made.
   245  // It also replaces runes U+2028 and U+2029 with the raw strings `\u2028` and
   246  // `\u2029`.
   247  func replace(s string, replacementTable []string) string {
   248  	var b bytes.Buffer
   249  	r, w, written := rune(0), 0, 0
   250  	for i := 0; i < len(s); i += w {
   251  		// See comment in htmlEscaper.
   252  		r, w = utf8.DecodeRuneInString(s[i:])
   253  		var repl string
   254  		switch {
   255  		case int(r) < len(replacementTable) && replacementTable[r] != "":
   256  			repl = replacementTable[r]
   257  		case r == '\u2028':
   258  			repl = `\u2028`
   259  		case r == '\u2029':
   260  			repl = `\u2029`
   261  		default:
   262  			continue
   263  		}
   264  		b.WriteString(s[written:i])
   265  		b.WriteString(repl)
   266  		written = i + w
   267  	}
   268  	if written == 0 {
   269  		return s
   270  	}
   271  	b.WriteString(s[written:])
   272  	return b.String()
   273  }
   274  
   275  var jsStrReplacementTable = []string{
   276  	0:    `\0`,
   277  	'\t': `\t`,
   278  	'\n': `\n`,
   279  	'\v': `\x0b`, // "\v" == "v" on IE 6.
   280  	'\f': `\f`,
   281  	'\r': `\r`,
   282  	// Encode HTML specials as hex so the output can be embedded
   283  	// in HTML attributes without further encoding.
   284  	'"':  `\x22`,
   285  	'&':  `\x26`,
   286  	'\'': `\x27`,
   287  	'+':  `\x2b`,
   288  	'/':  `\/`,
   289  	'<':  `\x3c`,
   290  	'>':  `\x3e`,
   291  	'\\': `\\`,
   292  }
   293  
   294  // jsStrNormReplacementTable is like jsStrReplacementTable but does not
   295  // overencode existing escapes since this table has no entry for `\`.
   296  var jsStrNormReplacementTable = []string{
   297  	0:    `\0`,
   298  	'\t': `\t`,
   299  	'\n': `\n`,
   300  	'\v': `\x0b`, // "\v" == "v" on IE 6.
   301  	'\f': `\f`,
   302  	'\r': `\r`,
   303  	// Encode HTML specials as hex so the output can be embedded
   304  	// in HTML attributes without further encoding.
   305  	'"':  `\x22`,
   306  	'&':  `\x26`,
   307  	'\'': `\x27`,
   308  	'+':  `\x2b`,
   309  	'/':  `\/`,
   310  	'<':  `\x3c`,
   311  	'>':  `\x3e`,
   312  }
   313  
   314  var jsRegexpReplacementTable = []string{
   315  	0:    `\0`,
   316  	'\t': `\t`,
   317  	'\n': `\n`,
   318  	'\v': `\x0b`, // "\v" == "v" on IE 6.
   319  	'\f': `\f`,
   320  	'\r': `\r`,
   321  	// Encode HTML specials as hex so the output can be embedded
   322  	// in HTML attributes without further encoding.
   323  	'"':  `\x22`,
   324  	'$':  `\$`,
   325  	'&':  `\x26`,
   326  	'\'': `\x27`,
   327  	'(':  `\(`,
   328  	')':  `\)`,
   329  	'*':  `\*`,
   330  	'+':  `\x2b`,
   331  	'-':  `\-`,
   332  	'.':  `\.`,
   333  	'/':  `\/`,
   334  	'<':  `\x3c`,
   335  	'>':  `\x3e`,
   336  	'?':  `\?`,
   337  	'[':  `\[`,
   338  	'\\': `\\`,
   339  	']':  `\]`,
   340  	'^':  `\^`,
   341  	'{':  `\{`,
   342  	'|':  `\|`,
   343  	'}':  `\}`,
   344  }
   345  
   346  // isJSIdentPart reports whether the given rune is a JS identifier part.
   347  // It does not handle all the non-Latin letters, joiners, and combining marks,
   348  // but it does handle every codepoint that can occur in a numeric literal or
   349  // a keyword.
   350  func isJSIdentPart(r rune) bool {
   351  	switch {
   352  	case r == '$':
   353  		return true
   354  	case '0' <= r && r <= '9':
   355  		return true
   356  	case 'A' <= r && r <= 'Z':
   357  		return true
   358  	case r == '_':
   359  		return true
   360  	case 'a' <= r && r <= 'z':
   361  		return true
   362  	}
   363  	return false
   364  }
   365  
   366  // isJSType returns true if the given MIME type should be considered JavaScript.
   367  //
   368  // It is used to determine whether a script tag with a type attribute is a javascript container.
   369  func isJSType(mimeType string) bool {
   370  	// per
   371  	//   https://www.w3.org/TR/html5/scripting-1.html#attr-script-type
   372  	//   https://tools.ietf.org/html/rfc7231#section-3.1.1
   373  	//   https://tools.ietf.org/html/rfc4329#section-3
   374  	//   https://www.ietf.org/rfc/rfc4627.txt
   375  	mimeType = strings.ToLower(mimeType)
   376  	// discard parameters
   377  	if i := strings.Index(mimeType, ";"); i >= 0 {
   378  		mimeType = mimeType[:i]
   379  	}
   380  	mimeType = strings.TrimSpace(mimeType)
   381  	switch mimeType {
   382  	case
   383  		"application/ecmascript",
   384  		"application/javascript",
   385  		"application/json",
   386  		"application/x-ecmascript",
   387  		"application/x-javascript",
   388  		"text/ecmascript",
   389  		"text/javascript",
   390  		"text/javascript1.0",
   391  		"text/javascript1.1",
   392  		"text/javascript1.2",
   393  		"text/javascript1.3",
   394  		"text/javascript1.4",
   395  		"text/javascript1.5",
   396  		"text/jscript",
   397  		"text/livescript",
   398  		"text/x-ecmascript",
   399  		"text/x-javascript":
   400  		return true
   401  	default:
   402  		return false
   403  	}
   404  }