github.com/huandu/go@v0.0.0-20151114150818-04e615e41150/src/html/template/html.go

github.com/huandu/go@v0.0.0-20151114150818-04e615e41150/src/html/template/html.go (about)

     1  // Copyright 2011 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package template
     6  
     7  import (
     8  	"bytes"
     9  	"fmt"
    10  	"strings"
    11  	"unicode/utf8"
    12  )
    13  
    14  // htmlNospaceEscaper escapes for inclusion in unquoted attribute values.
    15  func htmlNospaceEscaper(args ...interface{}) string {
    16  	s, t := stringify(args...)
    17  	if t == contentTypeHTML {
    18  		return htmlReplacer(stripTags(s), htmlNospaceNormReplacementTable, false)
    19  	}
    20  	return htmlReplacer(s, htmlNospaceReplacementTable, false)
    21  }
    22  
    23  // attrEscaper escapes for inclusion in quoted attribute values.
    24  func attrEscaper(args ...interface{}) string {
    25  	s, t := stringify(args...)
    26  	if t == contentTypeHTML {
    27  		return htmlReplacer(stripTags(s), htmlNormReplacementTable, true)
    28  	}
    29  	return htmlReplacer(s, htmlReplacementTable, true)
    30  }
    31  
    32  // rcdataEscaper escapes for inclusion in an RCDATA element body.
    33  func rcdataEscaper(args ...interface{}) string {
    34  	s, t := stringify(args...)
    35  	if t == contentTypeHTML {
    36  		return htmlReplacer(s, htmlNormReplacementTable, true)
    37  	}
    38  	return htmlReplacer(s, htmlReplacementTable, true)
    39  }
    40  
    41  // htmlEscaper escapes for inclusion in HTML text.
    42  func htmlEscaper(args ...interface{}) string {
    43  	s, t := stringify(args...)
    44  	if t == contentTypeHTML {
    45  		return s
    46  	}
    47  	return htmlReplacer(s, htmlReplacementTable, true)
    48  }
    49  
    50  // htmlReplacementTable contains the runes that need to be escaped
    51  // inside a quoted attribute value or in a text node.
    52  var htmlReplacementTable = []string{
    53  	// http://www.w3.org/TR/html5/syntax.html#attribute-value-(unquoted)-state
    54  	// U+0000 NULL Parse error. Append a U+FFFD REPLACEMENT
    55  	// CHARACTER character to the current attribute's value.
    56  	// "
    57  	// and similarly
    58  	// http://www.w3.org/TR/html5/syntax.html#before-attribute-value-state
    59  	0:    "\uFFFD",
    60  	'"':  "&#34;",
    61  	'&':  "&amp;",
    62  	'\'': "&#39;",
    63  	'+':  "&#43;",
    64  	'<':  "&lt;",
    65  	'>':  "&gt;",
    66  }
    67  
    68  // htmlNormReplacementTable is like htmlReplacementTable but without '&' to
    69  // avoid over-encoding existing entities.
    70  var htmlNormReplacementTable = []string{
    71  	0:    "\uFFFD",
    72  	'"':  "&#34;",
    73  	'\'': "&#39;",
    74  	'+':  "&#43;",
    75  	'<':  "&lt;",
    76  	'>':  "&gt;",
    77  }
    78  
    79  // htmlNospaceReplacementTable contains the runes that need to be escaped
    80  // inside an unquoted attribute value.
    81  // The set of runes escaped is the union of the HTML specials and
    82  // those determined by running the JS below in browsers:
    83  // <div id=d></div>
    84  // <script>(function () {
    85  // var a = [], d = document.getElementById("d"), i, c, s;
    86  // for (i = 0; i < 0x10000; ++i) {
    87  //   c = String.fromCharCode(i);
    88  //   d.innerHTML = "<span title=" + c + "lt" + c + "></span>"
    89  //   s = d.getElementsByTagName("SPAN")[0];
    90  //   if (!s || s.title !== c + "lt" + c) { a.push(i.toString(16)); }
    91  // }
    92  // document.write(a.join(", "));
    93  // })()</script>
    94  var htmlNospaceReplacementTable = []string{
    95  	0:    "&#xfffd;",
    96  	'\t': "&#9;",
    97  	'\n': "&#10;",
    98  	'\v': "&#11;",
    99  	'\f': "&#12;",
   100  	'\r': "&#13;",
   101  	' ':  "&#32;",
   102  	'"':  "&#34;",
   103  	'&':  "&amp;",
   104  	'\'': "&#39;",
   105  	'+':  "&#43;",
   106  	'<':  "&lt;",
   107  	'=':  "&#61;",
   108  	'>':  "&gt;",
   109  	// A parse error in the attribute value (unquoted) and
   110  	// before attribute value states.
   111  	// Treated as a quoting character by IE.
   112  	'`': "&#96;",
   113  }
   114  
   115  // htmlNospaceNormReplacementTable is like htmlNospaceReplacementTable but
   116  // without '&' to avoid over-encoding existing entities.
   117  var htmlNospaceNormReplacementTable = []string{
   118  	0:    "&#xfffd;",
   119  	'\t': "&#9;",
   120  	'\n': "&#10;",
   121  	'\v': "&#11;",
   122  	'\f': "&#12;",
   123  	'\r': "&#13;",
   124  	' ':  "&#32;",
   125  	'"':  "&#34;",
   126  	'\'': "&#39;",
   127  	'+':  "&#43;",
   128  	'<':  "&lt;",
   129  	'=':  "&#61;",
   130  	'>':  "&gt;",
   131  	// A parse error in the attribute value (unquoted) and
   132  	// before attribute value states.
   133  	// Treated as a quoting character by IE.
   134  	'`': "&#96;",
   135  }
   136  
   137  // htmlReplacer returns s with runes replaced according to replacementTable
   138  // and when badRunes is true, certain bad runes are allowed through unescaped.
   139  func htmlReplacer(s string, replacementTable []string, badRunes bool) string {
   140  	written, b := 0, new(bytes.Buffer)
   141  	r, w := rune(0), 0
   142  	for i := 0; i < len(s); i += w {
   143  		// Cannot use 'for range s' because we need to preserve the width
   144  		// of the runes in the input. If we see a decoding error, the input
   145  		// width will not be utf8.Runelen(r) and we will overrun the buffer.
   146  		r, w = utf8.DecodeRuneInString(s[i:])
   147  		if int(r) < len(replacementTable) {
   148  			if repl := replacementTable[r]; len(repl) != 0 {
   149  				b.WriteString(s[written:i])
   150  				b.WriteString(repl)
   151  				written = i + w
   152  			}
   153  		} else if badRunes {
   154  			// No-op.
   155  			// IE does not allow these ranges in unquoted attrs.
   156  		} else if 0xfdd0 <= r && r <= 0xfdef || 0xfff0 <= r && r <= 0xffff {
   157  			fmt.Fprintf(b, "%s&#x%x;", s[written:i], r)
   158  			written = i + w
   159  		}
   160  	}
   161  	if written == 0 {
   162  		return s
   163  	}
   164  	b.WriteString(s[written:])
   165  	return b.String()
   166  }
   167  
   168  // stripTags takes a snippet of HTML and returns only the text content.
   169  // For example, `<b>&iexcl;Hi!</b> <script>...</script>` -> `&iexcl;Hi! `.
   170  func stripTags(html string) string {
   171  	var b bytes.Buffer
   172  	s, c, i, allText := []byte(html), context{}, 0, true
   173  	// Using the transition funcs helps us avoid mangling
   174  	// `<div title="1>2">` or `I <3 Ponies!`.
   175  	for i != len(s) {
   176  		if c.delim == delimNone {
   177  			st := c.state
   178  			// Use RCDATA instead of parsing into JS or CSS styles.
   179  			if c.element != elementNone && !isInTag(st) {
   180  				st = stateRCDATA
   181  			}
   182  			d, nread := transitionFunc[st](c, s[i:])
   183  			i1 := i + nread
   184  			if c.state == stateText || c.state == stateRCDATA {
   185  				// Emit text up to the start of the tag or comment.
   186  				j := i1
   187  				if d.state != c.state {
   188  					for j1 := j - 1; j1 >= i; j1-- {
   189  						if s[j1] == '<' {
   190  							j = j1
   191  							break
   192  						}
   193  					}
   194  				}
   195  				b.Write(s[i:j])
   196  			} else {
   197  				allText = false
   198  			}
   199  			c, i = d, i1
   200  			continue
   201  		}
   202  		i1 := i + bytes.IndexAny(s[i:], delimEnds[c.delim])
   203  		if i1 < i {
   204  			break
   205  		}
   206  		if c.delim != delimSpaceOrTagEnd {
   207  			// Consume any quote.
   208  			i1++
   209  		}
   210  		c, i = context{state: stateTag, element: c.element}, i1
   211  	}
   212  	if allText {
   213  		return html
   214  	} else if c.state == stateText || c.state == stateRCDATA {
   215  		b.Write(s[i:])
   216  	}
   217  	return b.String()
   218  }
   219  
   220  // htmlNameFilter accepts valid parts of an HTML attribute or tag name or
   221  // a known-safe HTML attribute.
   222  func htmlNameFilter(args ...interface{}) string {
   223  	s, t := stringify(args...)
   224  	if t == contentTypeHTMLAttr {
   225  		return s
   226  	}
   227  	if len(s) == 0 {
   228  		// Avoid violation of structure preservation.
   229  		// <input checked {{.K}}={{.V}}>.
   230  		// Without this, if .K is empty then .V is the value of
   231  		// checked, but otherwise .V is the value of the attribute
   232  		// named .K.
   233  		return filterFailsafe
   234  	}
   235  	s = strings.ToLower(s)
   236  	if t := attrType(s); t != contentTypePlain {
   237  		// TODO: Split attr and element name part filters so we can whitelist
   238  		// attributes.
   239  		return filterFailsafe
   240  	}
   241  	for _, r := range s {
   242  		switch {
   243  		case '0' <= r && r <= '9':
   244  		case 'a' <= r && r <= 'z':
   245  		default:
   246  			return filterFailsafe
   247  		}
   248  	}
   249  	return s
   250  }
   251  
   252  // commentEscaper returns the empty string regardless of input.
   253  // Comment content does not correspond to any parsed structure or
   254  // human-readable content, so the simplest and most secure policy is to drop
   255  // content interpolated into comments.
   256  // This approach is equally valid whether or not static comment content is
   257  // removed from the template.
   258  func commentEscaper(args ...interface{}) string {
   259  	return ""
   260  }