github.com/zach-klippenstein/go@v0.0.0-20150108044943-fcfbeb3adf58/src/html/template/css.go

github.com/zach-klippenstein/go@v0.0.0-20150108044943-fcfbeb3adf58/src/html/template/css.go (about)

     1  // Copyright 2011 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package template
     6  
     7  import (
     8  	"bytes"
     9  	"fmt"
    10  	"unicode"
    11  	"unicode/utf8"
    12  )
    13  
    14  // endsWithCSSKeyword reports whether b ends with an ident that
    15  // case-insensitively matches the lower-case kw.
    16  func endsWithCSSKeyword(b []byte, kw string) bool {
    17  	i := len(b) - len(kw)
    18  	if i < 0 {
    19  		// Too short.
    20  		return false
    21  	}
    22  	if i != 0 {
    23  		r, _ := utf8.DecodeLastRune(b[:i])
    24  		if isCSSNmchar(r) {
    25  			// Too long.
    26  			return false
    27  		}
    28  	}
    29  	// Many CSS keywords, such as "!important" can have characters encoded,
    30  	// but the URI production does not allow that according to
    31  	// http://www.w3.org/TR/css3-syntax/#TOK-URI
    32  	// This does not attempt to recognize encoded keywords. For example,
    33  	// given "\75\72\6c" and "url" this return false.
    34  	return string(bytes.ToLower(b[i:])) == kw
    35  }
    36  
    37  // isCSSNmchar reports whether rune is allowed anywhere in a CSS identifier.
    38  func isCSSNmchar(r rune) bool {
    39  	// Based on the CSS3 nmchar production but ignores multi-rune escape
    40  	// sequences.
    41  	// http://www.w3.org/TR/css3-syntax/#SUBTOK-nmchar
    42  	return 'a' <= r && r <= 'z' ||
    43  		'A' <= r && r <= 'Z' ||
    44  		'0' <= r && r <= '9' ||
    45  		r == '-' ||
    46  		r == '_' ||
    47  		// Non-ASCII cases below.
    48  		0x80 <= r && r <= 0xd7ff ||
    49  		0xe000 <= r && r <= 0xfffd ||
    50  		0x10000 <= r && r <= 0x10ffff
    51  }
    52  
    53  // decodeCSS decodes CSS3 escapes given a sequence of stringchars.
    54  // If there is no change, it returns the input, otherwise it returns a slice
    55  // backed by a new array.
    56  // http://www.w3.org/TR/css3-syntax/#SUBTOK-stringchar defines stringchar.
    57  func decodeCSS(s []byte) []byte {
    58  	i := bytes.IndexByte(s, '\\')
    59  	if i == -1 {
    60  		return s
    61  	}
    62  	// The UTF-8 sequence for a codepoint is never longer than 1 + the
    63  	// number hex digits need to represent that codepoint, so len(s) is an
    64  	// upper bound on the output length.
    65  	b := make([]byte, 0, len(s))
    66  	for len(s) != 0 {
    67  		i := bytes.IndexByte(s, '\\')
    68  		if i == -1 {
    69  			i = len(s)
    70  		}
    71  		b, s = append(b, s[:i]...), s[i:]
    72  		if len(s) < 2 {
    73  			break
    74  		}
    75  		// http://www.w3.org/TR/css3-syntax/#SUBTOK-escape
    76  		// escape ::= unicode | '\' [#x20-#x7E#x80-#xD7FF#xE000-#xFFFD#x10000-#x10FFFF]
    77  		if isHex(s[1]) {
    78  			// http://www.w3.org/TR/css3-syntax/#SUBTOK-unicode
    79  			//   unicode ::= '\' [0-9a-fA-F]{1,6} wc?
    80  			j := 2
    81  			for j < len(s) && j < 7 && isHex(s[j]) {
    82  				j++
    83  			}
    84  			r := hexDecode(s[1:j])
    85  			if r > unicode.MaxRune {
    86  				r, j = r/16, j-1
    87  			}
    88  			n := utf8.EncodeRune(b[len(b):cap(b)], r)
    89  			// The optional space at the end allows a hex
    90  			// sequence to be followed by a literal hex.
    91  			// string(decodeCSS([]byte(`\A B`))) == "\nB"
    92  			b, s = b[:len(b)+n], skipCSSSpace(s[j:])
    93  		} else {
    94  			// `\\` decodes to `\` and `\"` to `"`.
    95  			_, n := utf8.DecodeRune(s[1:])
    96  			b, s = append(b, s[1:1+n]...), s[1+n:]
    97  		}
    98  	}
    99  	return b
   100  }
   101  
   102  // isHex reports whether the given character is a hex digit.
   103  func isHex(c byte) bool {
   104  	return '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F'
   105  }
   106  
   107  // hexDecode decodes a short hex digit sequence: "10" -> 16.
   108  func hexDecode(s []byte) rune {
   109  	n := '\x00'
   110  	for _, c := range s {
   111  		n <<= 4
   112  		switch {
   113  		case '0' <= c && c <= '9':
   114  			n |= rune(c - '0')
   115  		case 'a' <= c && c <= 'f':
   116  			n |= rune(c-'a') + 10
   117  		case 'A' <= c && c <= 'F':
   118  			n |= rune(c-'A') + 10
   119  		default:
   120  			panic(fmt.Sprintf("Bad hex digit in %q", s))
   121  		}
   122  	}
   123  	return n
   124  }
   125  
   126  // skipCSSSpace returns a suffix of c, skipping over a single space.
   127  func skipCSSSpace(c []byte) []byte {
   128  	if len(c) == 0 {
   129  		return c
   130  	}
   131  	// wc ::= #x9 | #xA | #xC | #xD | #x20
   132  	switch c[0] {
   133  	case '\t', '\n', '\f', ' ':
   134  		return c[1:]
   135  	case '\r':
   136  		// This differs from CSS3's wc production because it contains a
   137  		// probable spec error whereby wc contains all the single byte
   138  		// sequences in nl (newline) but not CRLF.
   139  		if len(c) >= 2 && c[1] == '\n' {
   140  			return c[2:]
   141  		}
   142  		return c[1:]
   143  	}
   144  	return c
   145  }
   146  
   147  // isCSSSpace reports whether b is a CSS space char as defined in wc.
   148  func isCSSSpace(b byte) bool {
   149  	switch b {
   150  	case '\t', '\n', '\f', '\r', ' ':
   151  		return true
   152  	}
   153  	return false
   154  }
   155  
   156  // cssEscaper escapes HTML and CSS special characters using \<hex>+ escapes.
   157  func cssEscaper(args ...interface{}) string {
   158  	s, _ := stringify(args...)
   159  	var b bytes.Buffer
   160  	written := 0
   161  	for i, r := range s {
   162  		var repl string
   163  		switch r {
   164  		case 0:
   165  			repl = `\0`
   166  		case '\t':
   167  			repl = `\9`
   168  		case '\n':
   169  			repl = `\a`
   170  		case '\f':
   171  			repl = `\c`
   172  		case '\r':
   173  			repl = `\d`
   174  		// Encode HTML specials as hex so the output can be embedded
   175  		// in HTML attributes without further encoding.
   176  		case '"':
   177  			repl = `\22`
   178  		case '&':
   179  			repl = `\26`
   180  		case '\'':
   181  			repl = `\27`
   182  		case '(':
   183  			repl = `\28`
   184  		case ')':
   185  			repl = `\29`
   186  		case '+':
   187  			repl = `\2b`
   188  		case '/':
   189  			repl = `\2f`
   190  		case ':':
   191  			repl = `\3a`
   192  		case ';':
   193  			repl = `\3b`
   194  		case '<':
   195  			repl = `\3c`
   196  		case '>':
   197  			repl = `\3e`
   198  		case '\\':
   199  			repl = `\\`
   200  		case '{':
   201  			repl = `\7b`
   202  		case '}':
   203  			repl = `\7d`
   204  		default:
   205  			continue
   206  		}
   207  		b.WriteString(s[written:i])
   208  		b.WriteString(repl)
   209  		written = i + utf8.RuneLen(r)
   210  		if repl != `\\` && (written == len(s) || isHex(s[written]) || isCSSSpace(s[written])) {
   211  			b.WriteByte(' ')
   212  		}
   213  	}
   214  	if written == 0 {
   215  		return s
   216  	}
   217  	b.WriteString(s[written:])
   218  	return b.String()
   219  }
   220  
   221  var expressionBytes = []byte("expression")
   222  var mozBindingBytes = []byte("mozbinding")
   223  
   224  // cssValueFilter allows innocuous CSS values in the output including CSS
   225  // quantities (10px or 25%), ID or class literals (#foo, .bar), keyword values
   226  // (inherit, blue), and colors (#888).
   227  // It filters out unsafe values, such as those that affect token boundaries,
   228  // and anything that might execute scripts.
   229  func cssValueFilter(args ...interface{}) string {
   230  	s, t := stringify(args...)
   231  	if t == contentTypeCSS {
   232  		return s
   233  	}
   234  	b, id := decodeCSS([]byte(s)), make([]byte, 0, 64)
   235  
   236  	// CSS3 error handling is specified as honoring string boundaries per
   237  	// http://www.w3.org/TR/css3-syntax/#error-handling :
   238  	//     Malformed declarations. User agents must handle unexpected
   239  	//     tokens encountered while parsing a declaration by reading until
   240  	//     the end of the declaration, while observing the rules for
   241  	//     matching pairs of (), [], {}, "", and '', and correctly handling
   242  	//     escapes. For example, a malformed declaration may be missing a
   243  	//     property, colon (:) or value.
   244  	// So we need to make sure that values do not have mismatched bracket
   245  	// or quote characters to prevent the browser from restarting parsing
   246  	// inside a string that might embed JavaScript source.
   247  	for i, c := range b {
   248  		switch c {
   249  		case 0, '"', '\'', '(', ')', '/', ';', '@', '[', '\\', ']', '`', '{', '}':
   250  			return filterFailsafe
   251  		case '-':
   252  			// Disallow <!-- or -->.
   253  			// -- should not appear in valid identifiers.
   254  			if i != 0 && b[i-1] == '-' {
   255  				return filterFailsafe
   256  			}
   257  		default:
   258  			if c < 0x80 && isCSSNmchar(rune(c)) {
   259  				id = append(id, c)
   260  			}
   261  		}
   262  	}
   263  	id = bytes.ToLower(id)
   264  	if bytes.Index(id, expressionBytes) != -1 || bytes.Index(id, mozBindingBytes) != -1 {
   265  		return filterFailsafe
   266  	}
   267  	return string(b)
   268  }