github.com/hlts2/go@v0.0.0-20170904000733-812b34efaed8/src/html/template/transition.go

github.com/hlts2/go@v0.0.0-20170904000733-812b34efaed8/src/html/template/transition.go (about)

     1  // Copyright 2011 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package template
     6  
     7  import (
     8  	"bytes"
     9  	"strings"
    10  )
    11  
    12  // transitionFunc is the array of context transition functions for text nodes.
    13  // A transition function takes a context and template text input, and returns
    14  // the updated context and the number of bytes consumed from the front of the
    15  // input.
    16  var transitionFunc = [...]func(context, []byte) (context, int){
    17  	stateText:        tText,
    18  	stateTag:         tTag,
    19  	stateAttrName:    tAttrName,
    20  	stateAfterName:   tAfterName,
    21  	stateBeforeValue: tBeforeValue,
    22  	stateHTMLCmt:     tHTMLCmt,
    23  	stateRCDATA:      tSpecialTagEnd,
    24  	stateAttr:        tAttr,
    25  	stateURL:         tURL,
    26  	stateJS:          tJS,
    27  	stateJSDqStr:     tJSDelimited,
    28  	stateJSSqStr:     tJSDelimited,
    29  	stateJSRegexp:    tJSDelimited,
    30  	stateJSBlockCmt:  tBlockCmt,
    31  	stateJSLineCmt:   tLineCmt,
    32  	stateCSS:         tCSS,
    33  	stateCSSDqStr:    tCSSStr,
    34  	stateCSSSqStr:    tCSSStr,
    35  	stateCSSDqURL:    tCSSStr,
    36  	stateCSSSqURL:    tCSSStr,
    37  	stateCSSURL:      tCSSStr,
    38  	stateCSSBlockCmt: tBlockCmt,
    39  	stateCSSLineCmt:  tLineCmt,
    40  	stateError:       tError,
    41  }
    42  
    43  var commentStart = []byte("<!--")
    44  var commentEnd = []byte("-->")
    45  
    46  // tText is the context transition function for the text state.
    47  func tText(c context, s []byte) (context, int) {
    48  	k := 0
    49  	for {
    50  		i := k + bytes.IndexByte(s[k:], '<')
    51  		if i < k || i+1 == len(s) {
    52  			return c, len(s)
    53  		} else if i+4 <= len(s) && bytes.Equal(commentStart, s[i:i+4]) {
    54  			return context{state: stateHTMLCmt}, i + 4
    55  		}
    56  		i++
    57  		end := false
    58  		if s[i] == '/' {
    59  			if i+1 == len(s) {
    60  				return c, len(s)
    61  			}
    62  			end, i = true, i+1
    63  		}
    64  		j, e := eatTagName(s, i)
    65  		if j != i {
    66  			if end {
    67  				e = elementNone
    68  			}
    69  			// We've found an HTML tag.
    70  			return context{state: stateTag, element: e}, j
    71  		}
    72  		k = j
    73  	}
    74  }
    75  
    76  var elementContentType = [...]state{
    77  	elementNone:     stateText,
    78  	elementScript:   stateJS,
    79  	elementStyle:    stateCSS,
    80  	elementTextarea: stateRCDATA,
    81  	elementTitle:    stateRCDATA,
    82  }
    83  
    84  // tTag is the context transition function for the tag state.
    85  func tTag(c context, s []byte) (context, int) {
    86  	// Find the attribute name.
    87  	i := eatWhiteSpace(s, 0)
    88  	if i == len(s) {
    89  		return c, len(s)
    90  	}
    91  	if s[i] == '>' {
    92  		return context{
    93  			state:   elementContentType[c.element],
    94  			element: c.element,
    95  		}, i + 1
    96  	}
    97  	j, err := eatAttrName(s, i)
    98  	if err != nil {
    99  		return context{state: stateError, err: err}, len(s)
   100  	}
   101  	state, attr := stateTag, attrNone
   102  	if i == j {
   103  		return context{
   104  			state: stateError,
   105  			err:   errorf(ErrBadHTML, nil, 0, "expected space, attr name, or end of tag, but got %q", s[i:]),
   106  		}, len(s)
   107  	}
   108  
   109  	attrName := strings.ToLower(string(s[i:j]))
   110  	if c.element == elementScript && attrName == "type" {
   111  		attr = attrScriptType
   112  	} else {
   113  		switch attrType(attrName) {
   114  		case contentTypeURL:
   115  			attr = attrURL
   116  		case contentTypeCSS:
   117  			attr = attrStyle
   118  		case contentTypeJS:
   119  			attr = attrScript
   120  		}
   121  	}
   122  
   123  	if j == len(s) {
   124  		state = stateAttrName
   125  	} else {
   126  		state = stateAfterName
   127  	}
   128  	return context{state: state, element: c.element, attr: attr}, j
   129  }
   130  
   131  // tAttrName is the context transition function for stateAttrName.
   132  func tAttrName(c context, s []byte) (context, int) {
   133  	i, err := eatAttrName(s, 0)
   134  	if err != nil {
   135  		return context{state: stateError, err: err}, len(s)
   136  	} else if i != len(s) {
   137  		c.state = stateAfterName
   138  	}
   139  	return c, i
   140  }
   141  
   142  // tAfterName is the context transition function for stateAfterName.
   143  func tAfterName(c context, s []byte) (context, int) {
   144  	// Look for the start of the value.
   145  	i := eatWhiteSpace(s, 0)
   146  	if i == len(s) {
   147  		return c, len(s)
   148  	} else if s[i] != '=' {
   149  		// Occurs due to tag ending '>', and valueless attribute.
   150  		c.state = stateTag
   151  		return c, i
   152  	}
   153  	c.state = stateBeforeValue
   154  	// Consume the "=".
   155  	return c, i + 1
   156  }
   157  
   158  var attrStartStates = [...]state{
   159  	attrNone:       stateAttr,
   160  	attrScript:     stateJS,
   161  	attrScriptType: stateAttr,
   162  	attrStyle:      stateCSS,
   163  	attrURL:        stateURL,
   164  }
   165  
   166  // tBeforeValue is the context transition function for stateBeforeValue.
   167  func tBeforeValue(c context, s []byte) (context, int) {
   168  	i := eatWhiteSpace(s, 0)
   169  	if i == len(s) {
   170  		return c, len(s)
   171  	}
   172  	// Find the attribute delimiter.
   173  	delim := delimSpaceOrTagEnd
   174  	switch s[i] {
   175  	case '\'':
   176  		delim, i = delimSingleQuote, i+1
   177  	case '"':
   178  		delim, i = delimDoubleQuote, i+1
   179  	}
   180  	c.state, c.delim = attrStartStates[c.attr], delim
   181  	return c, i
   182  }
   183  
   184  // tHTMLCmt is the context transition function for stateHTMLCmt.
   185  func tHTMLCmt(c context, s []byte) (context, int) {
   186  	if i := bytes.Index(s, commentEnd); i != -1 {
   187  		return context{}, i + 3
   188  	}
   189  	return c, len(s)
   190  }
   191  
   192  // specialTagEndMarkers maps element types to the character sequence that
   193  // case-insensitively signals the end of the special tag body.
   194  var specialTagEndMarkers = [...][]byte{
   195  	elementScript:   []byte("script"),
   196  	elementStyle:    []byte("style"),
   197  	elementTextarea: []byte("textarea"),
   198  	elementTitle:    []byte("title"),
   199  }
   200  
   201  var (
   202  	specialTagEndPrefix = []byte("</")
   203  	tagEndSeparators    = []byte("> \t\n\f/")
   204  )
   205  
   206  // tSpecialTagEnd is the context transition function for raw text and RCDATA
   207  // element states.
   208  func tSpecialTagEnd(c context, s []byte) (context, int) {
   209  	if c.element != elementNone {
   210  		if i := indexTagEnd(s, specialTagEndMarkers[c.element]); i != -1 {
   211  			return context{}, i
   212  		}
   213  	}
   214  	return c, len(s)
   215  }
   216  
   217  // indexTagEnd finds the index of a special tag end in a case insensitive way, or returns -1
   218  func indexTagEnd(s []byte, tag []byte) int {
   219  	res := 0
   220  	plen := len(specialTagEndPrefix)
   221  	for len(s) > 0 {
   222  		// Try to find the tag end prefix first
   223  		i := bytes.Index(s, specialTagEndPrefix)
   224  		if i == -1 {
   225  			return i
   226  		}
   227  		s = s[i+plen:]
   228  		// Try to match the actual tag if there is still space for it
   229  		if len(tag) <= len(s) && bytes.EqualFold(tag, s[:len(tag)]) {
   230  			s = s[len(tag):]
   231  			// Check the tag is followed by a proper separator
   232  			if len(s) > 0 && bytes.IndexByte(tagEndSeparators, s[0]) != -1 {
   233  				return res + i
   234  			}
   235  			res += len(tag)
   236  		}
   237  		res += i + plen
   238  	}
   239  	return -1
   240  }
   241  
   242  // tAttr is the context transition function for the attribute state.
   243  func tAttr(c context, s []byte) (context, int) {
   244  	return c, len(s)
   245  }
   246  
   247  // tURL is the context transition function for the URL state.
   248  func tURL(c context, s []byte) (context, int) {
   249  	if bytes.ContainsAny(s, "#?") {
   250  		c.urlPart = urlPartQueryOrFrag
   251  	} else if len(s) != eatWhiteSpace(s, 0) && c.urlPart == urlPartNone {
   252  		// HTML5 uses "Valid URL potentially surrounded by spaces" for
   253  		// attrs: http://www.w3.org/TR/html5/index.html#attributes-1
   254  		c.urlPart = urlPartPreQuery
   255  	}
   256  	return c, len(s)
   257  }
   258  
   259  // tJS is the context transition function for the JS state.
   260  func tJS(c context, s []byte) (context, int) {
   261  	i := bytes.IndexAny(s, `"'/`)
   262  	if i == -1 {
   263  		// Entire input is non string, comment, regexp tokens.
   264  		c.jsCtx = nextJSCtx(s, c.jsCtx)
   265  		return c, len(s)
   266  	}
   267  	c.jsCtx = nextJSCtx(s[:i], c.jsCtx)
   268  	switch s[i] {
   269  	case '"':
   270  		c.state, c.jsCtx = stateJSDqStr, jsCtxRegexp
   271  	case '\'':
   272  		c.state, c.jsCtx = stateJSSqStr, jsCtxRegexp
   273  	case '/':
   274  		switch {
   275  		case i+1 < len(s) && s[i+1] == '/':
   276  			c.state, i = stateJSLineCmt, i+1
   277  		case i+1 < len(s) && s[i+1] == '*':
   278  			c.state, i = stateJSBlockCmt, i+1
   279  		case c.jsCtx == jsCtxRegexp:
   280  			c.state = stateJSRegexp
   281  		case c.jsCtx == jsCtxDivOp:
   282  			c.jsCtx = jsCtxRegexp
   283  		default:
   284  			return context{
   285  				state: stateError,
   286  				err:   errorf(ErrSlashAmbig, nil, 0, "'/' could start a division or regexp: %.32q", s[i:]),
   287  			}, len(s)
   288  		}
   289  	default:
   290  		panic("unreachable")
   291  	}
   292  	return c, i + 1
   293  }
   294  
   295  // tJSDelimited is the context transition function for the JS string and regexp
   296  // states.
   297  func tJSDelimited(c context, s []byte) (context, int) {
   298  	specials := `\"`
   299  	switch c.state {
   300  	case stateJSSqStr:
   301  		specials = `\'`
   302  	case stateJSRegexp:
   303  		specials = `\/[]`
   304  	}
   305  
   306  	k, inCharset := 0, false
   307  	for {
   308  		i := k + bytes.IndexAny(s[k:], specials)
   309  		if i < k {
   310  			break
   311  		}
   312  		switch s[i] {
   313  		case '\\':
   314  			i++
   315  			if i == len(s) {
   316  				return context{
   317  					state: stateError,
   318  					err:   errorf(ErrPartialEscape, nil, 0, "unfinished escape sequence in JS string: %q", s),
   319  				}, len(s)
   320  			}
   321  		case '[':
   322  			inCharset = true
   323  		case ']':
   324  			inCharset = false
   325  		default:
   326  			// end delimiter
   327  			if !inCharset {
   328  				c.state, c.jsCtx = stateJS, jsCtxDivOp
   329  				return c, i + 1
   330  			}
   331  		}
   332  		k = i + 1
   333  	}
   334  
   335  	if inCharset {
   336  		// This can be fixed by making context richer if interpolation
   337  		// into charsets is desired.
   338  		return context{
   339  			state: stateError,
   340  			err:   errorf(ErrPartialCharset, nil, 0, "unfinished JS regexp charset: %q", s),
   341  		}, len(s)
   342  	}
   343  
   344  	return c, len(s)
   345  }
   346  
   347  var blockCommentEnd = []byte("*/")
   348  
   349  // tBlockCmt is the context transition function for /*comment*/ states.
   350  func tBlockCmt(c context, s []byte) (context, int) {
   351  	i := bytes.Index(s, blockCommentEnd)
   352  	if i == -1 {
   353  		return c, len(s)
   354  	}
   355  	switch c.state {
   356  	case stateJSBlockCmt:
   357  		c.state = stateJS
   358  	case stateCSSBlockCmt:
   359  		c.state = stateCSS
   360  	default:
   361  		panic(c.state.String())
   362  	}
   363  	return c, i + 2
   364  }
   365  
   366  // tLineCmt is the context transition function for //comment states.
   367  func tLineCmt(c context, s []byte) (context, int) {
   368  	var lineTerminators string
   369  	var endState state
   370  	switch c.state {
   371  	case stateJSLineCmt:
   372  		lineTerminators, endState = "\n\r\u2028\u2029", stateJS
   373  	case stateCSSLineCmt:
   374  		lineTerminators, endState = "\n\f\r", stateCSS
   375  		// Line comments are not part of any published CSS standard but
   376  		// are supported by the 4 major browsers.
   377  		// This defines line comments as
   378  		//     LINECOMMENT ::= "//" [^\n\f\d]*
   379  		// since http://www.w3.org/TR/css3-syntax/#SUBTOK-nl defines
   380  		// newlines:
   381  		//     nl ::= #xA | #xD #xA | #xD | #xC
   382  	default:
   383  		panic(c.state.String())
   384  	}
   385  
   386  	i := bytes.IndexAny(s, lineTerminators)
   387  	if i == -1 {
   388  		return c, len(s)
   389  	}
   390  	c.state = endState
   391  	// Per section 7.4 of EcmaScript 5 : http://es5.github.com/#x7.4
   392  	// "However, the LineTerminator at the end of the line is not
   393  	// considered to be part of the single-line comment; it is
   394  	// recognized separately by the lexical grammar and becomes part
   395  	// of the stream of input elements for the syntactic grammar."
   396  	return c, i
   397  }
   398  
   399  // tCSS is the context transition function for the CSS state.
   400  func tCSS(c context, s []byte) (context, int) {
   401  	// CSS quoted strings are almost never used except for:
   402  	// (1) URLs as in background: "/foo.png"
   403  	// (2) Multiword font-names as in font-family: "Times New Roman"
   404  	// (3) List separators in content values as in inline-lists:
   405  	//    <style>
   406  	//    ul.inlineList { list-style: none; padding:0 }
   407  	//    ul.inlineList > li { display: inline }
   408  	//    ul.inlineList > li:before { content: ", " }
   409  	//    ul.inlineList > li:first-child:before { content: "" }
   410  	//    </style>
   411  	//    <ul class=inlineList><li>One<li>Two<li>Three</ul>
   412  	// (4) Attribute value selectors as in a[href="http://example.com/"]
   413  	//
   414  	// We conservatively treat all strings as URLs, but make some
   415  	// allowances to avoid confusion.
   416  	//
   417  	// In (1), our conservative assumption is justified.
   418  	// In (2), valid font names do not contain ':', '?', or '#', so our
   419  	// conservative assumption is fine since we will never transition past
   420  	// urlPartPreQuery.
   421  	// In (3), our protocol heuristic should not be tripped, and there
   422  	// should not be non-space content after a '?' or '#', so as long as
   423  	// we only %-encode RFC 3986 reserved characters we are ok.
   424  	// In (4), we should URL escape for URL attributes, and for others we
   425  	// have the attribute name available if our conservative assumption
   426  	// proves problematic for real code.
   427  
   428  	k := 0
   429  	for {
   430  		i := k + bytes.IndexAny(s[k:], `("'/`)
   431  		if i < k {
   432  			return c, len(s)
   433  		}
   434  		switch s[i] {
   435  		case '(':
   436  			// Look for url to the left.
   437  			p := bytes.TrimRight(s[:i], "\t\n\f\r ")
   438  			if endsWithCSSKeyword(p, "url") {
   439  				j := len(s) - len(bytes.TrimLeft(s[i+1:], "\t\n\f\r "))
   440  				switch {
   441  				case j != len(s) && s[j] == '"':
   442  					c.state, j = stateCSSDqURL, j+1
   443  				case j != len(s) && s[j] == '\'':
   444  					c.state, j = stateCSSSqURL, j+1
   445  				default:
   446  					c.state = stateCSSURL
   447  				}
   448  				return c, j
   449  			}
   450  		case '/':
   451  			if i+1 < len(s) {
   452  				switch s[i+1] {
   453  				case '/':
   454  					c.state = stateCSSLineCmt
   455  					return c, i + 2
   456  				case '*':
   457  					c.state = stateCSSBlockCmt
   458  					return c, i + 2
   459  				}
   460  			}
   461  		case '"':
   462  			c.state = stateCSSDqStr
   463  			return c, i + 1
   464  		case '\'':
   465  			c.state = stateCSSSqStr
   466  			return c, i + 1
   467  		}
   468  		k = i + 1
   469  	}
   470  }
   471  
   472  // tCSSStr is the context transition function for the CSS string and URL states.
   473  func tCSSStr(c context, s []byte) (context, int) {
   474  	var endAndEsc string
   475  	switch c.state {
   476  	case stateCSSDqStr, stateCSSDqURL:
   477  		endAndEsc = `\"`
   478  	case stateCSSSqStr, stateCSSSqURL:
   479  		endAndEsc = `\'`
   480  	case stateCSSURL:
   481  		// Unquoted URLs end with a newline or close parenthesis.
   482  		// The below includes the wc (whitespace character) and nl.
   483  		endAndEsc = "\\\t\n\f\r )"
   484  	default:
   485  		panic(c.state.String())
   486  	}
   487  
   488  	k := 0
   489  	for {
   490  		i := k + bytes.IndexAny(s[k:], endAndEsc)
   491  		if i < k {
   492  			c, nread := tURL(c, decodeCSS(s[k:]))
   493  			return c, k + nread
   494  		}
   495  		if s[i] == '\\' {
   496  			i++
   497  			if i == len(s) {
   498  				return context{
   499  					state: stateError,
   500  					err:   errorf(ErrPartialEscape, nil, 0, "unfinished escape sequence in CSS string: %q", s),
   501  				}, len(s)
   502  			}
   503  		} else {
   504  			c.state = stateCSS
   505  			return c, i + 1
   506  		}
   507  		c, _ = tURL(c, decodeCSS(s[:i+1]))
   508  		k = i + 1
   509  	}
   510  }
   511  
   512  // tError is the context transition function for the error state.
   513  func tError(c context, s []byte) (context, int) {
   514  	return c, len(s)
   515  }
   516  
   517  // eatAttrName returns the largest j such that s[i:j] is an attribute name.
   518  // It returns an error if s[i:] does not look like it begins with an
   519  // attribute name, such as encountering a quote mark without a preceding
   520  // equals sign.
   521  func eatAttrName(s []byte, i int) (int, *Error) {
   522  	for j := i; j < len(s); j++ {
   523  		switch s[j] {
   524  		case ' ', '\t', '\n', '\f', '\r', '=', '>':
   525  			return j, nil
   526  		case '\'', '"', '<':
   527  			// These result in a parse warning in HTML5 and are
   528  			// indicative of serious problems if seen in an attr
   529  			// name in a template.
   530  			return -1, errorf(ErrBadHTML, nil, 0, "%q in attribute name: %.32q", s[j:j+1], s)
   531  		default:
   532  			// No-op.
   533  		}
   534  	}
   535  	return len(s), nil
   536  }
   537  
   538  var elementNameMap = map[string]element{
   539  	"script":   elementScript,
   540  	"style":    elementStyle,
   541  	"textarea": elementTextarea,
   542  	"title":    elementTitle,
   543  }
   544  
   545  // asciiAlpha reports whether c is an ASCII letter.
   546  func asciiAlpha(c byte) bool {
   547  	return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'
   548  }
   549  
   550  // asciiAlphaNum reports whether c is an ASCII letter or digit.
   551  func asciiAlphaNum(c byte) bool {
   552  	return asciiAlpha(c) || '0' <= c && c <= '9'
   553  }
   554  
   555  // eatTagName returns the largest j such that s[i:j] is a tag name and the tag type.
   556  func eatTagName(s []byte, i int) (int, element) {
   557  	if i == len(s) || !asciiAlpha(s[i]) {
   558  		return i, elementNone
   559  	}
   560  	j := i + 1
   561  	for j < len(s) {
   562  		x := s[j]
   563  		if asciiAlphaNum(x) {
   564  			j++
   565  			continue
   566  		}
   567  		// Allow "x-y" or "x:y" but not "x-", "-y", or "x--y".
   568  		if (x == ':' || x == '-') && j+1 < len(s) && asciiAlphaNum(s[j+1]) {
   569  			j += 2
   570  			continue
   571  		}
   572  		break
   573  	}
   574  	return j, elementNameMap[strings.ToLower(string(s[i:j]))]
   575  }
   576  
   577  // eatWhiteSpace returns the largest j such that s[i:j] is white space.
   578  func eatWhiteSpace(s []byte, i int) int {
   579  	for j := i; j < len(s); j++ {
   580  		switch s[j] {
   581  		case ' ', '\t', '\n', '\f', '\r':
   582  			// No-op.
   583  		default:
   584  			return j
   585  		}
   586  	}
   587  	return len(s)
   588  }