github.com/twelsh-aw/go/src@v0.0.0-20230516233729-a56fe86a7c81/html/template/transition.go

github.com/twelsh-aw/go/src@v0.0.0-20230516233729-a56fe86a7c81/html/template/transition.go (about)

     1  // Copyright 2011 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package template
     6  
     7  import (
     8  	"bytes"
     9  	"strings"
    10  )
    11  
    12  // transitionFunc is the array of context transition functions for text nodes.
    13  // A transition function takes a context and template text input, and returns
    14  // the updated context and the number of bytes consumed from the front of the
    15  // input.
    16  var transitionFunc = [...]func(context, []byte) (context, int){
    17  	stateText:        tText,
    18  	stateTag:         tTag,
    19  	stateAttrName:    tAttrName,
    20  	stateAfterName:   tAfterName,
    21  	stateBeforeValue: tBeforeValue,
    22  	stateHTMLCmt:     tHTMLCmt,
    23  	stateRCDATA:      tSpecialTagEnd,
    24  	stateAttr:        tAttr,
    25  	stateURL:         tURL,
    26  	stateSrcset:      tURL,
    27  	stateJS:          tJS,
    28  	stateJSDqStr:     tJSDelimited,
    29  	stateJSSqStr:     tJSDelimited,
    30  	stateJSBqStr:     tJSDelimited,
    31  	stateJSRegexp:    tJSDelimited,
    32  	stateJSBlockCmt:  tBlockCmt,
    33  	stateJSLineCmt:   tLineCmt,
    34  	stateCSS:         tCSS,
    35  	stateCSSDqStr:    tCSSStr,
    36  	stateCSSSqStr:    tCSSStr,
    37  	stateCSSDqURL:    tCSSStr,
    38  	stateCSSSqURL:    tCSSStr,
    39  	stateCSSURL:      tCSSStr,
    40  	stateCSSBlockCmt: tBlockCmt,
    41  	stateCSSLineCmt:  tLineCmt,
    42  	stateError:       tError,
    43  }
    44  
    45  var commentStart = []byte("<!--")
    46  var commentEnd = []byte("-->")
    47  
    48  // tText is the context transition function for the text state.
    49  func tText(c context, s []byte) (context, int) {
    50  	k := 0
    51  	for {
    52  		i := k + bytes.IndexByte(s[k:], '<')
    53  		if i < k || i+1 == len(s) {
    54  			return c, len(s)
    55  		} else if i+4 <= len(s) && bytes.Equal(commentStart, s[i:i+4]) {
    56  			return context{state: stateHTMLCmt}, i + 4
    57  		}
    58  		i++
    59  		end := false
    60  		if s[i] == '/' {
    61  			if i+1 == len(s) {
    62  				return c, len(s)
    63  			}
    64  			end, i = true, i+1
    65  		}
    66  		j, e := eatTagName(s, i)
    67  		if j != i {
    68  			if end {
    69  				e = elementNone
    70  			}
    71  			// We've found an HTML tag.
    72  			return context{state: stateTag, element: e}, j
    73  		}
    74  		k = j
    75  	}
    76  }
    77  
    78  var elementContentType = [...]state{
    79  	elementNone:     stateText,
    80  	elementScript:   stateJS,
    81  	elementStyle:    stateCSS,
    82  	elementTextarea: stateRCDATA,
    83  	elementTitle:    stateRCDATA,
    84  }
    85  
    86  // tTag is the context transition function for the tag state.
    87  func tTag(c context, s []byte) (context, int) {
    88  	// Find the attribute name.
    89  	i := eatWhiteSpace(s, 0)
    90  	if i == len(s) {
    91  		return c, len(s)
    92  	}
    93  	if s[i] == '>' {
    94  		return context{
    95  			state:   elementContentType[c.element],
    96  			element: c.element,
    97  		}, i + 1
    98  	}
    99  	j, err := eatAttrName(s, i)
   100  	if err != nil {
   101  		return context{state: stateError, err: err}, len(s)
   102  	}
   103  	state, attr := stateTag, attrNone
   104  	if i == j {
   105  		return context{
   106  			state: stateError,
   107  			err:   errorf(ErrBadHTML, nil, 0, "expected space, attr name, or end of tag, but got %q", s[i:]),
   108  		}, len(s)
   109  	}
   110  
   111  	attrName := strings.ToLower(string(s[i:j]))
   112  	if c.element == elementScript && attrName == "type" {
   113  		attr = attrScriptType
   114  	} else {
   115  		switch attrType(attrName) {
   116  		case contentTypeURL:
   117  			attr = attrURL
   118  		case contentTypeCSS:
   119  			attr = attrStyle
   120  		case contentTypeJS:
   121  			attr = attrScript
   122  		case contentTypeSrcset:
   123  			attr = attrSrcset
   124  		}
   125  	}
   126  
   127  	if j == len(s) {
   128  		state = stateAttrName
   129  	} else {
   130  		state = stateAfterName
   131  	}
   132  	return context{state: state, element: c.element, attr: attr}, j
   133  }
   134  
   135  // tAttrName is the context transition function for stateAttrName.
   136  func tAttrName(c context, s []byte) (context, int) {
   137  	i, err := eatAttrName(s, 0)
   138  	if err != nil {
   139  		return context{state: stateError, err: err}, len(s)
   140  	} else if i != len(s) {
   141  		c.state = stateAfterName
   142  	}
   143  	return c, i
   144  }
   145  
   146  // tAfterName is the context transition function for stateAfterName.
   147  func tAfterName(c context, s []byte) (context, int) {
   148  	// Look for the start of the value.
   149  	i := eatWhiteSpace(s, 0)
   150  	if i == len(s) {
   151  		return c, len(s)
   152  	} else if s[i] != '=' {
   153  		// Occurs due to tag ending '>', and valueless attribute.
   154  		c.state = stateTag
   155  		return c, i
   156  	}
   157  	c.state = stateBeforeValue
   158  	// Consume the "=".
   159  	return c, i + 1
   160  }
   161  
   162  var attrStartStates = [...]state{
   163  	attrNone:       stateAttr,
   164  	attrScript:     stateJS,
   165  	attrScriptType: stateAttr,
   166  	attrStyle:      stateCSS,
   167  	attrURL:        stateURL,
   168  	attrSrcset:     stateSrcset,
   169  }
   170  
   171  // tBeforeValue is the context transition function for stateBeforeValue.
   172  func tBeforeValue(c context, s []byte) (context, int) {
   173  	i := eatWhiteSpace(s, 0)
   174  	if i == len(s) {
   175  		return c, len(s)
   176  	}
   177  	// Find the attribute delimiter.
   178  	delim := delimSpaceOrTagEnd
   179  	switch s[i] {
   180  	case '\'':
   181  		delim, i = delimSingleQuote, i+1
   182  	case '"':
   183  		delim, i = delimDoubleQuote, i+1
   184  	}
   185  	c.state, c.delim = attrStartStates[c.attr], delim
   186  	return c, i
   187  }
   188  
   189  // tHTMLCmt is the context transition function for stateHTMLCmt.
   190  func tHTMLCmt(c context, s []byte) (context, int) {
   191  	if i := bytes.Index(s, commentEnd); i != -1 {
   192  		return context{}, i + 3
   193  	}
   194  	return c, len(s)
   195  }
   196  
   197  // specialTagEndMarkers maps element types to the character sequence that
   198  // case-insensitively signals the end of the special tag body.
   199  var specialTagEndMarkers = [...][]byte{
   200  	elementScript:   []byte("script"),
   201  	elementStyle:    []byte("style"),
   202  	elementTextarea: []byte("textarea"),
   203  	elementTitle:    []byte("title"),
   204  }
   205  
   206  var (
   207  	specialTagEndPrefix = []byte("</")
   208  	tagEndSeparators    = []byte("> \t\n\f/")
   209  )
   210  
   211  // tSpecialTagEnd is the context transition function for raw text and RCDATA
   212  // element states.
   213  func tSpecialTagEnd(c context, s []byte) (context, int) {
   214  	if c.element != elementNone {
   215  		if i := indexTagEnd(s, specialTagEndMarkers[c.element]); i != -1 {
   216  			return context{}, i
   217  		}
   218  	}
   219  	return c, len(s)
   220  }
   221  
   222  // indexTagEnd finds the index of a special tag end in a case insensitive way, or returns -1
   223  func indexTagEnd(s []byte, tag []byte) int {
   224  	res := 0
   225  	plen := len(specialTagEndPrefix)
   226  	for len(s) > 0 {
   227  		// Try to find the tag end prefix first
   228  		i := bytes.Index(s, specialTagEndPrefix)
   229  		if i == -1 {
   230  			return i
   231  		}
   232  		s = s[i+plen:]
   233  		// Try to match the actual tag if there is still space for it
   234  		if len(tag) <= len(s) && bytes.EqualFold(tag, s[:len(tag)]) {
   235  			s = s[len(tag):]
   236  			// Check the tag is followed by a proper separator
   237  			if len(s) > 0 && bytes.IndexByte(tagEndSeparators, s[0]) != -1 {
   238  				return res + i
   239  			}
   240  			res += len(tag)
   241  		}
   242  		res += i + plen
   243  	}
   244  	return -1
   245  }
   246  
   247  // tAttr is the context transition function for the attribute state.
   248  func tAttr(c context, s []byte) (context, int) {
   249  	return c, len(s)
   250  }
   251  
   252  // tURL is the context transition function for the URL state.
   253  func tURL(c context, s []byte) (context, int) {
   254  	if bytes.ContainsAny(s, "#?") {
   255  		c.urlPart = urlPartQueryOrFrag
   256  	} else if len(s) != eatWhiteSpace(s, 0) && c.urlPart == urlPartNone {
   257  		// HTML5 uses "Valid URL potentially surrounded by spaces" for
   258  		// attrs: https://www.w3.org/TR/html5/index.html#attributes-1
   259  		c.urlPart = urlPartPreQuery
   260  	}
   261  	return c, len(s)
   262  }
   263  
   264  // tJS is the context transition function for the JS state.
   265  func tJS(c context, s []byte) (context, int) {
   266  	i := bytes.IndexAny(s, "\"`'/")
   267  	if i == -1 {
   268  		// Entire input is non string, comment, regexp tokens.
   269  		c.jsCtx = nextJSCtx(s, c.jsCtx)
   270  		return c, len(s)
   271  	}
   272  	c.jsCtx = nextJSCtx(s[:i], c.jsCtx)
   273  	switch s[i] {
   274  	case '"':
   275  		c.state, c.jsCtx = stateJSDqStr, jsCtxRegexp
   276  	case '\'':
   277  		c.state, c.jsCtx = stateJSSqStr, jsCtxRegexp
   278  	case '`':
   279  		c.state, c.jsCtx = stateJSBqStr, jsCtxRegexp
   280  	case '/':
   281  		switch {
   282  		case i+1 < len(s) && s[i+1] == '/':
   283  			c.state, i = stateJSLineCmt, i+1
   284  		case i+1 < len(s) && s[i+1] == '*':
   285  			c.state, i = stateJSBlockCmt, i+1
   286  		case c.jsCtx == jsCtxRegexp:
   287  			c.state = stateJSRegexp
   288  		case c.jsCtx == jsCtxDivOp:
   289  			c.jsCtx = jsCtxRegexp
   290  		default:
   291  			return context{
   292  				state: stateError,
   293  				err:   errorf(ErrSlashAmbig, nil, 0, "'/' could start a division or regexp: %.32q", s[i:]),
   294  			}, len(s)
   295  		}
   296  	default:
   297  		panic("unreachable")
   298  	}
   299  	return c, i + 1
   300  }
   301  
   302  // tJSDelimited is the context transition function for the JS string and regexp
   303  // states.
   304  func tJSDelimited(c context, s []byte) (context, int) {
   305  	specials := `\"`
   306  	switch c.state {
   307  	case stateJSSqStr:
   308  		specials = `\'`
   309  	case stateJSBqStr:
   310  		specials = "`\\"
   311  	case stateJSRegexp:
   312  		specials = `\/[]`
   313  	}
   314  
   315  	k, inCharset := 0, false
   316  	for {
   317  		i := k + bytes.IndexAny(s[k:], specials)
   318  		if i < k {
   319  			break
   320  		}
   321  		switch s[i] {
   322  		case '\\':
   323  			i++
   324  			if i == len(s) {
   325  				return context{
   326  					state: stateError,
   327  					err:   errorf(ErrPartialEscape, nil, 0, "unfinished escape sequence in JS string: %q", s),
   328  				}, len(s)
   329  			}
   330  		case '[':
   331  			inCharset = true
   332  		case ']':
   333  			inCharset = false
   334  		default:
   335  			// end delimiter
   336  			if !inCharset {
   337  				c.state, c.jsCtx = stateJS, jsCtxDivOp
   338  				return c, i + 1
   339  			}
   340  		}
   341  		k = i + 1
   342  	}
   343  
   344  	if inCharset {
   345  		// This can be fixed by making context richer if interpolation
   346  		// into charsets is desired.
   347  		return context{
   348  			state: stateError,
   349  			err:   errorf(ErrPartialCharset, nil, 0, "unfinished JS regexp charset: %q", s),
   350  		}, len(s)
   351  	}
   352  
   353  	return c, len(s)
   354  }
   355  
   356  var blockCommentEnd = []byte("*/")
   357  
   358  // tBlockCmt is the context transition function for /*comment*/ states.
   359  func tBlockCmt(c context, s []byte) (context, int) {
   360  	i := bytes.Index(s, blockCommentEnd)
   361  	if i == -1 {
   362  		return c, len(s)
   363  	}
   364  	switch c.state {
   365  	case stateJSBlockCmt:
   366  		c.state = stateJS
   367  	case stateCSSBlockCmt:
   368  		c.state = stateCSS
   369  	default:
   370  		panic(c.state.String())
   371  	}
   372  	return c, i + 2
   373  }
   374  
   375  // tLineCmt is the context transition function for //comment states.
   376  func tLineCmt(c context, s []byte) (context, int) {
   377  	var lineTerminators string
   378  	var endState state
   379  	switch c.state {
   380  	case stateJSLineCmt:
   381  		lineTerminators, endState = "\n\r\u2028\u2029", stateJS
   382  	case stateCSSLineCmt:
   383  		lineTerminators, endState = "\n\f\r", stateCSS
   384  		// Line comments are not part of any published CSS standard but
   385  		// are supported by the 4 major browsers.
   386  		// This defines line comments as
   387  		//     LINECOMMENT ::= "//" [^\n\f\d]*
   388  		// since https://www.w3.org/TR/css3-syntax/#SUBTOK-nl defines
   389  		// newlines:
   390  		//     nl ::= #xA | #xD #xA | #xD | #xC
   391  	default:
   392  		panic(c.state.String())
   393  	}
   394  
   395  	i := bytes.IndexAny(s, lineTerminators)
   396  	if i == -1 {
   397  		return c, len(s)
   398  	}
   399  	c.state = endState
   400  	// Per section 7.4 of EcmaScript 5 : https://es5.github.io/#x7.4
   401  	// "However, the LineTerminator at the end of the line is not
   402  	// considered to be part of the single-line comment; it is
   403  	// recognized separately by the lexical grammar and becomes part
   404  	// of the stream of input elements for the syntactic grammar."
   405  	return c, i
   406  }
   407  
   408  // tCSS is the context transition function for the CSS state.
   409  func tCSS(c context, s []byte) (context, int) {
   410  	// CSS quoted strings are almost never used except for:
   411  	// (1) URLs as in background: "/foo.png"
   412  	// (2) Multiword font-names as in font-family: "Times New Roman"
   413  	// (3) List separators in content values as in inline-lists:
   414  	//    <style>
   415  	//    ul.inlineList { list-style: none; padding:0 }
   416  	//    ul.inlineList > li { display: inline }
   417  	//    ul.inlineList > li:before { content: ", " }
   418  	//    ul.inlineList > li:first-child:before { content: "" }
   419  	//    </style>
   420  	//    <ul class=inlineList><li>One<li>Two<li>Three</ul>
   421  	// (4) Attribute value selectors as in a[href="http://example.com/"]
   422  	//
   423  	// We conservatively treat all strings as URLs, but make some
   424  	// allowances to avoid confusion.
   425  	//
   426  	// In (1), our conservative assumption is justified.
   427  	// In (2), valid font names do not contain ':', '?', or '#', so our
   428  	// conservative assumption is fine since we will never transition past
   429  	// urlPartPreQuery.
   430  	// In (3), our protocol heuristic should not be tripped, and there
   431  	// should not be non-space content after a '?' or '#', so as long as
   432  	// we only %-encode RFC 3986 reserved characters we are ok.
   433  	// In (4), we should URL escape for URL attributes, and for others we
   434  	// have the attribute name available if our conservative assumption
   435  	// proves problematic for real code.
   436  
   437  	k := 0
   438  	for {
   439  		i := k + bytes.IndexAny(s[k:], `("'/`)
   440  		if i < k {
   441  			return c, len(s)
   442  		}
   443  		switch s[i] {
   444  		case '(':
   445  			// Look for url to the left.
   446  			p := bytes.TrimRight(s[:i], "\t\n\f\r ")
   447  			if endsWithCSSKeyword(p, "url") {
   448  				j := len(s) - len(bytes.TrimLeft(s[i+1:], "\t\n\f\r "))
   449  				switch {
   450  				case j != len(s) && s[j] == '"':
   451  					c.state, j = stateCSSDqURL, j+1
   452  				case j != len(s) && s[j] == '\'':
   453  					c.state, j = stateCSSSqURL, j+1
   454  				default:
   455  					c.state = stateCSSURL
   456  				}
   457  				return c, j
   458  			}
   459  		case '/':
   460  			if i+1 < len(s) {
   461  				switch s[i+1] {
   462  				case '/':
   463  					c.state = stateCSSLineCmt
   464  					return c, i + 2
   465  				case '*':
   466  					c.state = stateCSSBlockCmt
   467  					return c, i + 2
   468  				}
   469  			}
   470  		case '"':
   471  			c.state = stateCSSDqStr
   472  			return c, i + 1
   473  		case '\'':
   474  			c.state = stateCSSSqStr
   475  			return c, i + 1
   476  		}
   477  		k = i + 1
   478  	}
   479  }
   480  
   481  // tCSSStr is the context transition function for the CSS string and URL states.
   482  func tCSSStr(c context, s []byte) (context, int) {
   483  	var endAndEsc string
   484  	switch c.state {
   485  	case stateCSSDqStr, stateCSSDqURL:
   486  		endAndEsc = `\"`
   487  	case stateCSSSqStr, stateCSSSqURL:
   488  		endAndEsc = `\'`
   489  	case stateCSSURL:
   490  		// Unquoted URLs end with a newline or close parenthesis.
   491  		// The below includes the wc (whitespace character) and nl.
   492  		endAndEsc = "\\\t\n\f\r )"
   493  	default:
   494  		panic(c.state.String())
   495  	}
   496  
   497  	k := 0
   498  	for {
   499  		i := k + bytes.IndexAny(s[k:], endAndEsc)
   500  		if i < k {
   501  			c, nread := tURL(c, decodeCSS(s[k:]))
   502  			return c, k + nread
   503  		}
   504  		if s[i] == '\\' {
   505  			i++
   506  			if i == len(s) {
   507  				return context{
   508  					state: stateError,
   509  					err:   errorf(ErrPartialEscape, nil, 0, "unfinished escape sequence in CSS string: %q", s),
   510  				}, len(s)
   511  			}
   512  		} else {
   513  			c.state = stateCSS
   514  			return c, i + 1
   515  		}
   516  		c, _ = tURL(c, decodeCSS(s[:i+1]))
   517  		k = i + 1
   518  	}
   519  }
   520  
   521  // tError is the context transition function for the error state.
   522  func tError(c context, s []byte) (context, int) {
   523  	return c, len(s)
   524  }
   525  
   526  // eatAttrName returns the largest j such that s[i:j] is an attribute name.
   527  // It returns an error if s[i:] does not look like it begins with an
   528  // attribute name, such as encountering a quote mark without a preceding
   529  // equals sign.
   530  func eatAttrName(s []byte, i int) (int, *Error) {
   531  	for j := i; j < len(s); j++ {
   532  		switch s[j] {
   533  		case ' ', '\t', '\n', '\f', '\r', '=', '>':
   534  			return j, nil
   535  		case '\'', '"', '<':
   536  			// These result in a parse warning in HTML5 and are
   537  			// indicative of serious problems if seen in an attr
   538  			// name in a template.
   539  			return -1, errorf(ErrBadHTML, nil, 0, "%q in attribute name: %.32q", s[j:j+1], s)
   540  		default:
   541  			// No-op.
   542  		}
   543  	}
   544  	return len(s), nil
   545  }
   546  
   547  var elementNameMap = map[string]element{
   548  	"script":   elementScript,
   549  	"style":    elementStyle,
   550  	"textarea": elementTextarea,
   551  	"title":    elementTitle,
   552  }
   553  
   554  // asciiAlpha reports whether c is an ASCII letter.
   555  func asciiAlpha(c byte) bool {
   556  	return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'
   557  }
   558  
   559  // asciiAlphaNum reports whether c is an ASCII letter or digit.
   560  func asciiAlphaNum(c byte) bool {
   561  	return asciiAlpha(c) || '0' <= c && c <= '9'
   562  }
   563  
   564  // eatTagName returns the largest j such that s[i:j] is a tag name and the tag type.
   565  func eatTagName(s []byte, i int) (int, element) {
   566  	if i == len(s) || !asciiAlpha(s[i]) {
   567  		return i, elementNone
   568  	}
   569  	j := i + 1
   570  	for j < len(s) {
   571  		x := s[j]
   572  		if asciiAlphaNum(x) {
   573  			j++
   574  			continue
   575  		}
   576  		// Allow "x-y" or "x:y" but not "x-", "-y", or "x--y".
   577  		if (x == ':' || x == '-') && j+1 < len(s) && asciiAlphaNum(s[j+1]) {
   578  			j += 2
   579  			continue
   580  		}
   581  		break
   582  	}
   583  	return j, elementNameMap[strings.ToLower(string(s[i:j]))]
   584  }
   585  
   586  // eatWhiteSpace returns the largest j such that s[i:j] is white space.
   587  func eatWhiteSpace(s []byte, i int) int {
   588  	for j := i; j < len(s); j++ {
   589  		switch s[j] {
   590  		case ' ', '\t', '\n', '\f', '\r':
   591  			// No-op.
   592  		default:
   593  			return j
   594  		}
   595  	}
   596  	return len(s)
   597  }