github.com/geraldss/go/src@v0.0.0-20210511222824-ac7d0ebfc235/html/template/transition.go (about)

     1  // Copyright 2011 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package template
     6  
     7  import (
     8  	"bytes"
     9  	"strings"
    10  )
    11  
    12  // transitionFunc is the array of context transition functions for text nodes.
    13  // A transition function takes a context and template text input, and returns
    14  // the updated context and the number of bytes consumed from the front of the
    15  // input.
    16  var transitionFunc = [...]func(context, []byte) (context, int){
    17  	stateText:        tText,
    18  	stateTag:         tTag,
    19  	stateAttrName:    tAttrName,
    20  	stateAfterName:   tAfterName,
    21  	stateBeforeValue: tBeforeValue,
    22  	stateHTMLCmt:     tHTMLCmt,
    23  	stateRCDATA:      tSpecialTagEnd,
    24  	stateAttr:        tAttr,
    25  	stateURL:         tURL,
    26  	stateSrcset:      tURL,
    27  	stateJS:          tJS,
    28  	stateJSDqStr:     tJSDelimited,
    29  	stateJSSqStr:     tJSDelimited,
    30  	stateJSRegexp:    tJSDelimited,
    31  	stateJSBlockCmt:  tBlockCmt,
    32  	stateJSLineCmt:   tLineCmt,
    33  	stateCSS:         tCSS,
    34  	stateCSSDqStr:    tCSSStr,
    35  	stateCSSSqStr:    tCSSStr,
    36  	stateCSSDqURL:    tCSSStr,
    37  	stateCSSSqURL:    tCSSStr,
    38  	stateCSSURL:      tCSSStr,
    39  	stateCSSBlockCmt: tBlockCmt,
    40  	stateCSSLineCmt:  tLineCmt,
    41  	stateError:       tError,
    42  }
    43  
    44  var commentStart = []byte("<!--")
    45  var commentEnd = []byte("-->")
    46  
    47  // tText is the context transition function for the text state.
    48  func tText(c context, s []byte) (context, int) {
    49  	k := 0
    50  	for {
    51  		i := k + bytes.IndexByte(s[k:], '<')
    52  		if i < k || i+1 == len(s) {
    53  			return c, len(s)
    54  		} else if i+4 <= len(s) && bytes.Equal(commentStart, s[i:i+4]) {
    55  			return context{state: stateHTMLCmt}, i + 4
    56  		}
    57  		i++
    58  		end := false
    59  		if s[i] == '/' {
    60  			if i+1 == len(s) {
    61  				return c, len(s)
    62  			}
    63  			end, i = true, i+1
    64  		}
    65  		j, e := eatTagName(s, i)
    66  		if j != i {
    67  			if end {
    68  				e = elementNone
    69  			}
    70  			// We've found an HTML tag.
    71  			return context{state: stateTag, element: e}, j
    72  		}
    73  		k = j
    74  	}
    75  }
    76  
    77  var elementContentType = [...]state{
    78  	elementNone:     stateText,
    79  	elementScript:   stateJS,
    80  	elementStyle:    stateCSS,
    81  	elementTextarea: stateRCDATA,
    82  	elementTitle:    stateRCDATA,
    83  }
    84  
    85  // tTag is the context transition function for the tag state.
    86  func tTag(c context, s []byte) (context, int) {
    87  	// Find the attribute name.
    88  	i := eatWhiteSpace(s, 0)
    89  	if i == len(s) {
    90  		return c, len(s)
    91  	}
    92  	if s[i] == '>' {
    93  		return context{
    94  			state:   elementContentType[c.element],
    95  			element: c.element,
    96  		}, i + 1
    97  	}
    98  	j, err := eatAttrName(s, i)
    99  	if err != nil {
   100  		return context{state: stateError, err: err}, len(s)
   101  	}
   102  	state, attr := stateTag, attrNone
   103  	if i == j {
   104  		return context{
   105  			state: stateError,
   106  			err:   errorf(ErrBadHTML, nil, 0, "expected space, attr name, or end of tag, but got %q", s[i:]),
   107  		}, len(s)
   108  	}
   109  
   110  	attrName := strings.ToLower(string(s[i:j]))
   111  	if c.element == elementScript && attrName == "type" {
   112  		attr = attrScriptType
   113  	} else {
   114  		switch attrType(attrName) {
   115  		case contentTypeURL:
   116  			attr = attrURL
   117  		case contentTypeCSS:
   118  			attr = attrStyle
   119  		case contentTypeJS:
   120  			attr = attrScript
   121  		case contentTypeSrcset:
   122  			attr = attrSrcset
   123  		}
   124  	}
   125  
   126  	if j == len(s) {
   127  		state = stateAttrName
   128  	} else {
   129  		state = stateAfterName
   130  	}
   131  	return context{state: state, element: c.element, attr: attr}, j
   132  }
   133  
   134  // tAttrName is the context transition function for stateAttrName.
   135  func tAttrName(c context, s []byte) (context, int) {
   136  	i, err := eatAttrName(s, 0)
   137  	if err != nil {
   138  		return context{state: stateError, err: err}, len(s)
   139  	} else if i != len(s) {
   140  		c.state = stateAfterName
   141  	}
   142  	return c, i
   143  }
   144  
   145  // tAfterName is the context transition function for stateAfterName.
   146  func tAfterName(c context, s []byte) (context, int) {
   147  	// Look for the start of the value.
   148  	i := eatWhiteSpace(s, 0)
   149  	if i == len(s) {
   150  		return c, len(s)
   151  	} else if s[i] != '=' {
   152  		// Occurs due to tag ending '>', and valueless attribute.
   153  		c.state = stateTag
   154  		return c, i
   155  	}
   156  	c.state = stateBeforeValue
   157  	// Consume the "=".
   158  	return c, i + 1
   159  }
   160  
   161  var attrStartStates = [...]state{
   162  	attrNone:       stateAttr,
   163  	attrScript:     stateJS,
   164  	attrScriptType: stateAttr,
   165  	attrStyle:      stateCSS,
   166  	attrURL:        stateURL,
   167  	attrSrcset:     stateSrcset,
   168  }
   169  
   170  // tBeforeValue is the context transition function for stateBeforeValue.
   171  func tBeforeValue(c context, s []byte) (context, int) {
   172  	i := eatWhiteSpace(s, 0)
   173  	if i == len(s) {
   174  		return c, len(s)
   175  	}
   176  	// Find the attribute delimiter.
   177  	delim := delimSpaceOrTagEnd
   178  	switch s[i] {
   179  	case '\'':
   180  		delim, i = delimSingleQuote, i+1
   181  	case '"':
   182  		delim, i = delimDoubleQuote, i+1
   183  	}
   184  	c.state, c.delim = attrStartStates[c.attr], delim
   185  	return c, i
   186  }
   187  
   188  // tHTMLCmt is the context transition function for stateHTMLCmt.
   189  func tHTMLCmt(c context, s []byte) (context, int) {
   190  	if i := bytes.Index(s, commentEnd); i != -1 {
   191  		return context{}, i + 3
   192  	}
   193  	return c, len(s)
   194  }
   195  
   196  // specialTagEndMarkers maps element types to the character sequence that
   197  // case-insensitively signals the end of the special tag body.
   198  var specialTagEndMarkers = [...][]byte{
   199  	elementScript:   []byte("script"),
   200  	elementStyle:    []byte("style"),
   201  	elementTextarea: []byte("textarea"),
   202  	elementTitle:    []byte("title"),
   203  }
   204  
   205  var (
   206  	specialTagEndPrefix = []byte("</")
   207  	tagEndSeparators    = []byte("> \t\n\f/")
   208  )
   209  
   210  // tSpecialTagEnd is the context transition function for raw text and RCDATA
   211  // element states.
   212  func tSpecialTagEnd(c context, s []byte) (context, int) {
   213  	if c.element != elementNone {
   214  		if i := indexTagEnd(s, specialTagEndMarkers[c.element]); i != -1 {
   215  			return context{}, i
   216  		}
   217  	}
   218  	return c, len(s)
   219  }
   220  
   221  // indexTagEnd finds the index of a special tag end in a case insensitive way, or returns -1
   222  func indexTagEnd(s []byte, tag []byte) int {
   223  	res := 0
   224  	plen := len(specialTagEndPrefix)
   225  	for len(s) > 0 {
   226  		// Try to find the tag end prefix first
   227  		i := bytes.Index(s, specialTagEndPrefix)
   228  		if i == -1 {
   229  			return i
   230  		}
   231  		s = s[i+plen:]
   232  		// Try to match the actual tag if there is still space for it
   233  		if len(tag) <= len(s) && bytes.EqualFold(tag, s[:len(tag)]) {
   234  			s = s[len(tag):]
   235  			// Check the tag is followed by a proper separator
   236  			if len(s) > 0 && bytes.IndexByte(tagEndSeparators, s[0]) != -1 {
   237  				return res + i
   238  			}
   239  			res += len(tag)
   240  		}
   241  		res += i + plen
   242  	}
   243  	return -1
   244  }
   245  
   246  // tAttr is the context transition function for the attribute state.
   247  func tAttr(c context, s []byte) (context, int) {
   248  	return c, len(s)
   249  }
   250  
   251  // tURL is the context transition function for the URL state.
   252  func tURL(c context, s []byte) (context, int) {
   253  	if bytes.ContainsAny(s, "#?") {
   254  		c.urlPart = urlPartQueryOrFrag
   255  	} else if len(s) != eatWhiteSpace(s, 0) && c.urlPart == urlPartNone {
   256  		// HTML5 uses "Valid URL potentially surrounded by spaces" for
   257  		// attrs: https://www.w3.org/TR/html5/index.html#attributes-1
   258  		c.urlPart = urlPartPreQuery
   259  	}
   260  	return c, len(s)
   261  }
   262  
   263  // tJS is the context transition function for the JS state.
   264  func tJS(c context, s []byte) (context, int) {
   265  	i := bytes.IndexAny(s, `"'/`)
   266  	if i == -1 {
   267  		// Entire input is non string, comment, regexp tokens.
   268  		c.jsCtx = nextJSCtx(s, c.jsCtx)
   269  		return c, len(s)
   270  	}
   271  	c.jsCtx = nextJSCtx(s[:i], c.jsCtx)
   272  	switch s[i] {
   273  	case '"':
   274  		c.state, c.jsCtx = stateJSDqStr, jsCtxRegexp
   275  	case '\'':
   276  		c.state, c.jsCtx = stateJSSqStr, jsCtxRegexp
   277  	case '/':
   278  		switch {
   279  		case i+1 < len(s) && s[i+1] == '/':
   280  			c.state, i = stateJSLineCmt, i+1
   281  		case i+1 < len(s) && s[i+1] == '*':
   282  			c.state, i = stateJSBlockCmt, i+1
   283  		case c.jsCtx == jsCtxRegexp:
   284  			c.state = stateJSRegexp
   285  		case c.jsCtx == jsCtxDivOp:
   286  			c.jsCtx = jsCtxRegexp
   287  		default:
   288  			return context{
   289  				state: stateError,
   290  				err:   errorf(ErrSlashAmbig, nil, 0, "'/' could start a division or regexp: %.32q", s[i:]),
   291  			}, len(s)
   292  		}
   293  	default:
   294  		panic("unreachable")
   295  	}
   296  	return c, i + 1
   297  }
   298  
   299  // tJSDelimited is the context transition function for the JS string and regexp
   300  // states.
   301  func tJSDelimited(c context, s []byte) (context, int) {
   302  	specials := `\"`
   303  	switch c.state {
   304  	case stateJSSqStr:
   305  		specials = `\'`
   306  	case stateJSRegexp:
   307  		specials = `\/[]`
   308  	}
   309  
   310  	k, inCharset := 0, false
   311  	for {
   312  		i := k + bytes.IndexAny(s[k:], specials)
   313  		if i < k {
   314  			break
   315  		}
   316  		switch s[i] {
   317  		case '\\':
   318  			i++
   319  			if i == len(s) {
   320  				return context{
   321  					state: stateError,
   322  					err:   errorf(ErrPartialEscape, nil, 0, "unfinished escape sequence in JS string: %q", s),
   323  				}, len(s)
   324  			}
   325  		case '[':
   326  			inCharset = true
   327  		case ']':
   328  			inCharset = false
   329  		default:
   330  			// end delimiter
   331  			if !inCharset {
   332  				c.state, c.jsCtx = stateJS, jsCtxDivOp
   333  				return c, i + 1
   334  			}
   335  		}
   336  		k = i + 1
   337  	}
   338  
   339  	if inCharset {
   340  		// This can be fixed by making context richer if interpolation
   341  		// into charsets is desired.
   342  		return context{
   343  			state: stateError,
   344  			err:   errorf(ErrPartialCharset, nil, 0, "unfinished JS regexp charset: %q", s),
   345  		}, len(s)
   346  	}
   347  
   348  	return c, len(s)
   349  }
   350  
   351  var blockCommentEnd = []byte("*/")
   352  
   353  // tBlockCmt is the context transition function for /*comment*/ states.
   354  func tBlockCmt(c context, s []byte) (context, int) {
   355  	i := bytes.Index(s, blockCommentEnd)
   356  	if i == -1 {
   357  		return c, len(s)
   358  	}
   359  	switch c.state {
   360  	case stateJSBlockCmt:
   361  		c.state = stateJS
   362  	case stateCSSBlockCmt:
   363  		c.state = stateCSS
   364  	default:
   365  		panic(c.state.String())
   366  	}
   367  	return c, i + 2
   368  }
   369  
   370  // tLineCmt is the context transition function for //comment states.
   371  func tLineCmt(c context, s []byte) (context, int) {
   372  	var lineTerminators string
   373  	var endState state
   374  	switch c.state {
   375  	case stateJSLineCmt:
   376  		lineTerminators, endState = "\n\r\u2028\u2029", stateJS
   377  	case stateCSSLineCmt:
   378  		lineTerminators, endState = "\n\f\r", stateCSS
   379  		// Line comments are not part of any published CSS standard but
   380  		// are supported by the 4 major browsers.
   381  		// This defines line comments as
   382  		//     LINECOMMENT ::= "//" [^\n\f\d]*
   383  		// since https://www.w3.org/TR/css3-syntax/#SUBTOK-nl defines
   384  		// newlines:
   385  		//     nl ::= #xA | #xD #xA | #xD | #xC
   386  	default:
   387  		panic(c.state.String())
   388  	}
   389  
   390  	i := bytes.IndexAny(s, lineTerminators)
   391  	if i == -1 {
   392  		return c, len(s)
   393  	}
   394  	c.state = endState
   395  	// Per section 7.4 of EcmaScript 5 : https://es5.github.com/#x7.4
   396  	// "However, the LineTerminator at the end of the line is not
   397  	// considered to be part of the single-line comment; it is
   398  	// recognized separately by the lexical grammar and becomes part
   399  	// of the stream of input elements for the syntactic grammar."
   400  	return c, i
   401  }
   402  
   403  // tCSS is the context transition function for the CSS state.
   404  func tCSS(c context, s []byte) (context, int) {
   405  	// CSS quoted strings are almost never used except for:
   406  	// (1) URLs as in background: "/foo.png"
   407  	// (2) Multiword font-names as in font-family: "Times New Roman"
   408  	// (3) List separators in content values as in inline-lists:
   409  	//    <style>
   410  	//    ul.inlineList { list-style: none; padding:0 }
   411  	//    ul.inlineList > li { display: inline }
   412  	//    ul.inlineList > li:before { content: ", " }
   413  	//    ul.inlineList > li:first-child:before { content: "" }
   414  	//    </style>
   415  	//    <ul class=inlineList><li>One<li>Two<li>Three</ul>
   416  	// (4) Attribute value selectors as in a[href="http://example.com/"]
   417  	//
   418  	// We conservatively treat all strings as URLs, but make some
   419  	// allowances to avoid confusion.
   420  	//
   421  	// In (1), our conservative assumption is justified.
   422  	// In (2), valid font names do not contain ':', '?', or '#', so our
   423  	// conservative assumption is fine since we will never transition past
   424  	// urlPartPreQuery.
   425  	// In (3), our protocol heuristic should not be tripped, and there
   426  	// should not be non-space content after a '?' or '#', so as long as
   427  	// we only %-encode RFC 3986 reserved characters we are ok.
   428  	// In (4), we should URL escape for URL attributes, and for others we
   429  	// have the attribute name available if our conservative assumption
   430  	// proves problematic for real code.
   431  
   432  	k := 0
   433  	for {
   434  		i := k + bytes.IndexAny(s[k:], `("'/`)
   435  		if i < k {
   436  			return c, len(s)
   437  		}
   438  		switch s[i] {
   439  		case '(':
   440  			// Look for url to the left.
   441  			p := bytes.TrimRight(s[:i], "\t\n\f\r ")
   442  			if endsWithCSSKeyword(p, "url") {
   443  				j := len(s) - len(bytes.TrimLeft(s[i+1:], "\t\n\f\r "))
   444  				switch {
   445  				case j != len(s) && s[j] == '"':
   446  					c.state, j = stateCSSDqURL, j+1
   447  				case j != len(s) && s[j] == '\'':
   448  					c.state, j = stateCSSSqURL, j+1
   449  				default:
   450  					c.state = stateCSSURL
   451  				}
   452  				return c, j
   453  			}
   454  		case '/':
   455  			if i+1 < len(s) {
   456  				switch s[i+1] {
   457  				case '/':
   458  					c.state = stateCSSLineCmt
   459  					return c, i + 2
   460  				case '*':
   461  					c.state = stateCSSBlockCmt
   462  					return c, i + 2
   463  				}
   464  			}
   465  		case '"':
   466  			c.state = stateCSSDqStr
   467  			return c, i + 1
   468  		case '\'':
   469  			c.state = stateCSSSqStr
   470  			return c, i + 1
   471  		}
   472  		k = i + 1
   473  	}
   474  }
   475  
   476  // tCSSStr is the context transition function for the CSS string and URL states.
   477  func tCSSStr(c context, s []byte) (context, int) {
   478  	var endAndEsc string
   479  	switch c.state {
   480  	case stateCSSDqStr, stateCSSDqURL:
   481  		endAndEsc = `\"`
   482  	case stateCSSSqStr, stateCSSSqURL:
   483  		endAndEsc = `\'`
   484  	case stateCSSURL:
   485  		// Unquoted URLs end with a newline or close parenthesis.
   486  		// The below includes the wc (whitespace character) and nl.
   487  		endAndEsc = "\\\t\n\f\r )"
   488  	default:
   489  		panic(c.state.String())
   490  	}
   491  
   492  	k := 0
   493  	for {
   494  		i := k + bytes.IndexAny(s[k:], endAndEsc)
   495  		if i < k {
   496  			c, nread := tURL(c, decodeCSS(s[k:]))
   497  			return c, k + nread
   498  		}
   499  		if s[i] == '\\' {
   500  			i++
   501  			if i == len(s) {
   502  				return context{
   503  					state: stateError,
   504  					err:   errorf(ErrPartialEscape, nil, 0, "unfinished escape sequence in CSS string: %q", s),
   505  				}, len(s)
   506  			}
   507  		} else {
   508  			c.state = stateCSS
   509  			return c, i + 1
   510  		}
   511  		c, _ = tURL(c, decodeCSS(s[:i+1]))
   512  		k = i + 1
   513  	}
   514  }
   515  
   516  // tError is the context transition function for the error state.
   517  func tError(c context, s []byte) (context, int) {
   518  	return c, len(s)
   519  }
   520  
   521  // eatAttrName returns the largest j such that s[i:j] is an attribute name.
   522  // It returns an error if s[i:] does not look like it begins with an
   523  // attribute name, such as encountering a quote mark without a preceding
   524  // equals sign.
   525  func eatAttrName(s []byte, i int) (int, *Error) {
   526  	for j := i; j < len(s); j++ {
   527  		switch s[j] {
   528  		case ' ', '\t', '\n', '\f', '\r', '=', '>':
   529  			return j, nil
   530  		case '\'', '"', '<':
   531  			// These result in a parse warning in HTML5 and are
   532  			// indicative of serious problems if seen in an attr
   533  			// name in a template.
   534  			return -1, errorf(ErrBadHTML, nil, 0, "%q in attribute name: %.32q", s[j:j+1], s)
   535  		default:
   536  			// No-op.
   537  		}
   538  	}
   539  	return len(s), nil
   540  }
   541  
   542  var elementNameMap = map[string]element{
   543  	"script":   elementScript,
   544  	"style":    elementStyle,
   545  	"textarea": elementTextarea,
   546  	"title":    elementTitle,
   547  }
   548  
   549  // asciiAlpha reports whether c is an ASCII letter.
   550  func asciiAlpha(c byte) bool {
   551  	return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'
   552  }
   553  
   554  // asciiAlphaNum reports whether c is an ASCII letter or digit.
   555  func asciiAlphaNum(c byte) bool {
   556  	return asciiAlpha(c) || '0' <= c && c <= '9'
   557  }
   558  
   559  // eatTagName returns the largest j such that s[i:j] is a tag name and the tag type.
   560  func eatTagName(s []byte, i int) (int, element) {
   561  	if i == len(s) || !asciiAlpha(s[i]) {
   562  		return i, elementNone
   563  	}
   564  	j := i + 1
   565  	for j < len(s) {
   566  		x := s[j]
   567  		if asciiAlphaNum(x) {
   568  			j++
   569  			continue
   570  		}
   571  		// Allow "x-y" or "x:y" but not "x-", "-y", or "x--y".
   572  		if (x == ':' || x == '-') && j+1 < len(s) && asciiAlphaNum(s[j+1]) {
   573  			j += 2
   574  			continue
   575  		}
   576  		break
   577  	}
   578  	return j, elementNameMap[strings.ToLower(string(s[i:j]))]
   579  }
   580  
   581  // eatWhiteSpace returns the largest j such that s[i:j] is white space.
   582  func eatWhiteSpace(s []byte, i int) int {
   583  	for j := i; j < len(s); j++ {
   584  		switch s[j] {
   585  		case ' ', '\t', '\n', '\f', '\r':
   586  			// No-op.
   587  		default:
   588  			return j
   589  		}
   590  	}
   591  	return len(s)
   592  }