github.com/filosottile/go@v0.0.0-20170906193555-dbed9972d994/src/html/template/css.go (about)

     1  // Copyright 2011 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package template
     6  
     7  import (
     8  	"bytes"
     9  	"fmt"
    10  	"unicode"
    11  	"unicode/utf8"
    12  )
    13  
    14  // endsWithCSSKeyword reports whether b ends with an ident that
    15  // case-insensitively matches the lower-case kw.
    16  func endsWithCSSKeyword(b []byte, kw string) bool {
    17  	i := len(b) - len(kw)
    18  	if i < 0 {
    19  		// Too short.
    20  		return false
    21  	}
    22  	if i != 0 {
    23  		r, _ := utf8.DecodeLastRune(b[:i])
    24  		if isCSSNmchar(r) {
    25  			// Too long.
    26  			return false
    27  		}
    28  	}
    29  	// Many CSS keywords, such as "!important" can have characters encoded,
    30  	// but the URI production does not allow that according to
    31  	// http://www.w3.org/TR/css3-syntax/#TOK-URI
    32  	// This does not attempt to recognize encoded keywords. For example,
    33  	// given "\75\72\6c" and "url" this return false.
    34  	return string(bytes.ToLower(b[i:])) == kw
    35  }
    36  
    37  // isCSSNmchar reports whether rune is allowed anywhere in a CSS identifier.
    38  func isCSSNmchar(r rune) bool {
    39  	// Based on the CSS3 nmchar production but ignores multi-rune escape
    40  	// sequences.
    41  	// http://www.w3.org/TR/css3-syntax/#SUBTOK-nmchar
    42  	return 'a' <= r && r <= 'z' ||
    43  		'A' <= r && r <= 'Z' ||
    44  		'0' <= r && r <= '9' ||
    45  		r == '-' ||
    46  		r == '_' ||
    47  		// Non-ASCII cases below.
    48  		0x80 <= r && r <= 0xd7ff ||
    49  		0xe000 <= r && r <= 0xfffd ||
    50  		0x10000 <= r && r <= 0x10ffff
    51  }
    52  
    53  // decodeCSS decodes CSS3 escapes given a sequence of stringchars.
    54  // If there is no change, it returns the input, otherwise it returns a slice
    55  // backed by a new array.
    56  // http://www.w3.org/TR/css3-syntax/#SUBTOK-stringchar defines stringchar.
    57  func decodeCSS(s []byte) []byte {
    58  	i := bytes.IndexByte(s, '\\')
    59  	if i == -1 {
    60  		return s
    61  	}
    62  	// The UTF-8 sequence for a codepoint is never longer than 1 + the
    63  	// number hex digits need to represent that codepoint, so len(s) is an
    64  	// upper bound on the output length.
    65  	b := make([]byte, 0, len(s))
    66  	for len(s) != 0 {
    67  		i := bytes.IndexByte(s, '\\')
    68  		if i == -1 {
    69  			i = len(s)
    70  		}
    71  		b, s = append(b, s[:i]...), s[i:]
    72  		if len(s) < 2 {
    73  			break
    74  		}
    75  		// http://www.w3.org/TR/css3-syntax/#SUBTOK-escape
    76  		// escape ::= unicode | '\' [#x20-#x7E#x80-#xD7FF#xE000-#xFFFD#x10000-#x10FFFF]
    77  		if isHex(s[1]) {
    78  			// http://www.w3.org/TR/css3-syntax/#SUBTOK-unicode
    79  			//   unicode ::= '\' [0-9a-fA-F]{1,6} wc?
    80  			j := 2
    81  			for j < len(s) && j < 7 && isHex(s[j]) {
    82  				j++
    83  			}
    84  			r := hexDecode(s[1:j])
    85  			if r > unicode.MaxRune {
    86  				r, j = r/16, j-1
    87  			}
    88  			n := utf8.EncodeRune(b[len(b):cap(b)], r)
    89  			// The optional space at the end allows a hex
    90  			// sequence to be followed by a literal hex.
    91  			// string(decodeCSS([]byte(`\A B`))) == "\nB"
    92  			b, s = b[:len(b)+n], skipCSSSpace(s[j:])
    93  		} else {
    94  			// `\\` decodes to `\` and `\"` to `"`.
    95  			_, n := utf8.DecodeRune(s[1:])
    96  			b, s = append(b, s[1:1+n]...), s[1+n:]
    97  		}
    98  	}
    99  	return b
   100  }
   101  
   102  // isHex reports whether the given character is a hex digit.
   103  func isHex(c byte) bool {
   104  	return '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F'
   105  }
   106  
   107  // hexDecode decodes a short hex digit sequence: "10" -> 16.
   108  func hexDecode(s []byte) rune {
   109  	n := '\x00'
   110  	for _, c := range s {
   111  		n <<= 4
   112  		switch {
   113  		case '0' <= c && c <= '9':
   114  			n |= rune(c - '0')
   115  		case 'a' <= c && c <= 'f':
   116  			n |= rune(c-'a') + 10
   117  		case 'A' <= c && c <= 'F':
   118  			n |= rune(c-'A') + 10
   119  		default:
   120  			panic(fmt.Sprintf("Bad hex digit in %q", s))
   121  		}
   122  	}
   123  	return n
   124  }
   125  
   126  // skipCSSSpace returns a suffix of c, skipping over a single space.
   127  func skipCSSSpace(c []byte) []byte {
   128  	if len(c) == 0 {
   129  		return c
   130  	}
   131  	// wc ::= #x9 | #xA | #xC | #xD | #x20
   132  	switch c[0] {
   133  	case '\t', '\n', '\f', ' ':
   134  		return c[1:]
   135  	case '\r':
   136  		// This differs from CSS3's wc production because it contains a
   137  		// probable spec error whereby wc contains all the single byte
   138  		// sequences in nl (newline) but not CRLF.
   139  		if len(c) >= 2 && c[1] == '\n' {
   140  			return c[2:]
   141  		}
   142  		return c[1:]
   143  	}
   144  	return c
   145  }
   146  
   147  // isCSSSpace reports whether b is a CSS space char as defined in wc.
   148  func isCSSSpace(b byte) bool {
   149  	switch b {
   150  	case '\t', '\n', '\f', '\r', ' ':
   151  		return true
   152  	}
   153  	return false
   154  }
   155  
   156  // cssEscaper escapes HTML and CSS special characters using \<hex>+ escapes.
   157  func cssEscaper(args ...interface{}) string {
   158  	s, _ := stringify(args...)
   159  	var b bytes.Buffer
   160  	r, w, written := rune(0), 0, 0
   161  	for i := 0; i < len(s); i += w {
   162  		// See comment in htmlEscaper.
   163  		r, w = utf8.DecodeRuneInString(s[i:])
   164  		var repl string
   165  		switch {
   166  		case int(r) < len(cssReplacementTable) && cssReplacementTable[r] != "":
   167  			repl = cssReplacementTable[r]
   168  		default:
   169  			continue
   170  		}
   171  		b.WriteString(s[written:i])
   172  		b.WriteString(repl)
   173  		written = i + w
   174  		if repl != `\\` && (written == len(s) || isHex(s[written]) || isCSSSpace(s[written])) {
   175  			b.WriteByte(' ')
   176  		}
   177  	}
   178  	if written == 0 {
   179  		return s
   180  	}
   181  	b.WriteString(s[written:])
   182  	return b.String()
   183  }
   184  
   185  var cssReplacementTable = []string{
   186  	0:    `\0`,
   187  	'\t': `\9`,
   188  	'\n': `\a`,
   189  	'\f': `\c`,
   190  	'\r': `\d`,
   191  	// Encode HTML specials as hex so the output can be embedded
   192  	// in HTML attributes without further encoding.
   193  	'"':  `\22`,
   194  	'&':  `\26`,
   195  	'\'': `\27`,
   196  	'(':  `\28`,
   197  	')':  `\29`,
   198  	'+':  `\2b`,
   199  	'/':  `\2f`,
   200  	':':  `\3a`,
   201  	';':  `\3b`,
   202  	'<':  `\3c`,
   203  	'>':  `\3e`,
   204  	'\\': `\\`,
   205  	'{':  `\7b`,
   206  	'}':  `\7d`,
   207  }
   208  
   209  var expressionBytes = []byte("expression")
   210  var mozBindingBytes = []byte("mozbinding")
   211  
   212  // cssValueFilter allows innocuous CSS values in the output including CSS
   213  // quantities (10px or 25%), ID or class literals (#foo, .bar), keyword values
   214  // (inherit, blue), and colors (#888).
   215  // It filters out unsafe values, such as those that affect token boundaries,
   216  // and anything that might execute scripts.
   217  func cssValueFilter(args ...interface{}) string {
   218  	s, t := stringify(args...)
   219  	if t == contentTypeCSS {
   220  		return s
   221  	}
   222  	b, id := decodeCSS([]byte(s)), make([]byte, 0, 64)
   223  
   224  	// CSS3 error handling is specified as honoring string boundaries per
   225  	// http://www.w3.org/TR/css3-syntax/#error-handling :
   226  	//     Malformed declarations. User agents must handle unexpected
   227  	//     tokens encountered while parsing a declaration by reading until
   228  	//     the end of the declaration, while observing the rules for
   229  	//     matching pairs of (), [], {}, "", and '', and correctly handling
   230  	//     escapes. For example, a malformed declaration may be missing a
   231  	//     property, colon (:) or value.
   232  	// So we need to make sure that values do not have mismatched bracket
   233  	// or quote characters to prevent the browser from restarting parsing
   234  	// inside a string that might embed JavaScript source.
   235  	for i, c := range b {
   236  		switch c {
   237  		case 0, '"', '\'', '(', ')', '/', ';', '@', '[', '\\', ']', '`', '{', '}':
   238  			return filterFailsafe
   239  		case '-':
   240  			// Disallow <!-- or -->.
   241  			// -- should not appear in valid identifiers.
   242  			if i != 0 && b[i-1] == '-' {
   243  				return filterFailsafe
   244  			}
   245  		default:
   246  			if c < utf8.RuneSelf && isCSSNmchar(rune(c)) {
   247  				id = append(id, c)
   248  			}
   249  		}
   250  	}
   251  	id = bytes.ToLower(id)
   252  	if bytes.Contains(id, expressionBytes) || bytes.Contains(id, mozBindingBytes) {
   253  		return filterFailsafe
   254  	}
   255  	return string(b)
   256  }