github.com/Andyfoo/golang/x/net@v0.0.0-20190901054642-57c1bf301704/html/parse.go

github.com/Andyfoo/golang/x/net@v0.0.0-20190901054642-57c1bf301704/html/parse.go (about)

     1  // Copyright 2010 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package html
     6  
     7  import (
     8  	"errors"
     9  	"fmt"
    10  	"io"
    11  	"strings"
    12  
    13  	a "github.com/Andyfoo/golang/x/net/html/atom"
    14  )
    15  
    16  // A parser implements the HTML5 parsing algorithm:
    17  // https://html.spec.whatwg.org/multipage/syntax.html#tree-construction
    18  type parser struct {
    19  	// tokenizer provides the tokens for the parser.
    20  	tokenizer *Tokenizer
    21  	// tok is the most recently read token.
    22  	tok Token
    23  	// Self-closing tags like <hr/> are treated as start tags, except that
    24  	// hasSelfClosingToken is set while they are being processed.
    25  	hasSelfClosingToken bool
    26  	// doc is the document root element.
    27  	doc *Node
    28  	// The stack of open elements (section 12.2.4.2) and active formatting
    29  	// elements (section 12.2.4.3).
    30  	oe, afe nodeStack
    31  	// Element pointers (section 12.2.4.4).
    32  	head, form *Node
    33  	// Other parsing state flags (section 12.2.4.5).
    34  	scripting, framesetOK bool
    35  	// The stack of template insertion modes
    36  	templateStack insertionModeStack
    37  	// im is the current insertion mode.
    38  	im insertionMode
    39  	// originalIM is the insertion mode to go back to after completing a text
    40  	// or inTableText insertion mode.
    41  	originalIM insertionMode
    42  	// fosterParenting is whether new elements should be inserted according to
    43  	// the foster parenting rules (section 12.2.6.1).
    44  	fosterParenting bool
    45  	// quirks is whether the parser is operating in "quirks mode."
    46  	quirks bool
    47  	// fragment is whether the parser is parsing an HTML fragment.
    48  	fragment bool
    49  	// context is the context element when parsing an HTML fragment
    50  	// (section 12.4).
    51  	context *Node
    52  }
    53  
    54  func (p *parser) top() *Node {
    55  	if n := p.oe.top(); n != nil {
    56  		return n
    57  	}
    58  	return p.doc
    59  }
    60  
    61  // Stop tags for use in popUntil. These come from section 12.2.4.2.
    62  var (
    63  	defaultScopeStopTags = map[string][]a.Atom{
    64  		"":     {a.Applet, a.Caption, a.Html, a.Table, a.Td, a.Th, a.Marquee, a.Object, a.Template},
    65  		"math": {a.AnnotationXml, a.Mi, a.Mn, a.Mo, a.Ms, a.Mtext},
    66  		"svg":  {a.Desc, a.ForeignObject, a.Title},
    67  	}
    68  )
    69  
    70  type scope int
    71  
    72  const (
    73  	defaultScope scope = iota
    74  	listItemScope
    75  	buttonScope
    76  	tableScope
    77  	tableRowScope
    78  	tableBodyScope
    79  	selectScope
    80  )
    81  
    82  // popUntil pops the stack of open elements at the highest element whose tag
    83  // is in matchTags, provided there is no higher element in the scope's stop
    84  // tags (as defined in section 12.2.4.2). It returns whether or not there was
    85  // such an element. If there was not, popUntil leaves the stack unchanged.
    86  //
    87  // For example, the set of stop tags for table scope is: "html", "table". If
    88  // the stack was:
    89  // ["html", "body", "font", "table", "b", "i", "u"]
    90  // then popUntil(tableScope, "font") would return false, but
    91  // popUntil(tableScope, "i") would return true and the stack would become:
    92  // ["html", "body", "font", "table", "b"]
    93  //
    94  // If an element's tag is in both the stop tags and matchTags, then the stack
    95  // will be popped and the function returns true (provided, of course, there was
    96  // no higher element in the stack that was also in the stop tags). For example,
    97  // popUntil(tableScope, "table") returns true and leaves:
    98  // ["html", "body", "font"]
    99  func (p *parser) popUntil(s scope, matchTags ...a.Atom) bool {
   100  	if i := p.indexOfElementInScope(s, matchTags...); i != -1 {
   101  		p.oe = p.oe[:i]
   102  		return true
   103  	}
   104  	return false
   105  }
   106  
   107  // indexOfElementInScope returns the index in p.oe of the highest element whose
   108  // tag is in matchTags that is in scope. If no matching element is in scope, it
   109  // returns -1.
   110  func (p *parser) indexOfElementInScope(s scope, matchTags ...a.Atom) int {
   111  	for i := len(p.oe) - 1; i >= 0; i-- {
   112  		tagAtom := p.oe[i].DataAtom
   113  		if p.oe[i].Namespace == "" {
   114  			for _, t := range matchTags {
   115  				if t == tagAtom {
   116  					return i
   117  				}
   118  			}
   119  			switch s {
   120  			case defaultScope:
   121  				// No-op.
   122  			case listItemScope:
   123  				if tagAtom == a.Ol || tagAtom == a.Ul {
   124  					return -1
   125  				}
   126  			case buttonScope:
   127  				if tagAtom == a.Button {
   128  					return -1
   129  				}
   130  			case tableScope:
   131  				if tagAtom == a.Html || tagAtom == a.Table || tagAtom == a.Template {
   132  					return -1
   133  				}
   134  			case selectScope:
   135  				if tagAtom != a.Optgroup && tagAtom != a.Option {
   136  					return -1
   137  				}
   138  			default:
   139  				panic("unreachable")
   140  			}
   141  		}
   142  		switch s {
   143  		case defaultScope, listItemScope, buttonScope:
   144  			for _, t := range defaultScopeStopTags[p.oe[i].Namespace] {
   145  				if t == tagAtom {
   146  					return -1
   147  				}
   148  			}
   149  		}
   150  	}
   151  	return -1
   152  }
   153  
   154  // elementInScope is like popUntil, except that it doesn't modify the stack of
   155  // open elements.
   156  func (p *parser) elementInScope(s scope, matchTags ...a.Atom) bool {
   157  	return p.indexOfElementInScope(s, matchTags...) != -1
   158  }
   159  
   160  // clearStackToContext pops elements off the stack of open elements until a
   161  // scope-defined element is found.
   162  func (p *parser) clearStackToContext(s scope) {
   163  	for i := len(p.oe) - 1; i >= 0; i-- {
   164  		tagAtom := p.oe[i].DataAtom
   165  		switch s {
   166  		case tableScope:
   167  			if tagAtom == a.Html || tagAtom == a.Table || tagAtom == a.Template {
   168  				p.oe = p.oe[:i+1]
   169  				return
   170  			}
   171  		case tableRowScope:
   172  			if tagAtom == a.Html || tagAtom == a.Tr || tagAtom == a.Template {
   173  				p.oe = p.oe[:i+1]
   174  				return
   175  			}
   176  		case tableBodyScope:
   177  			if tagAtom == a.Html || tagAtom == a.Tbody || tagAtom == a.Tfoot || tagAtom == a.Thead || tagAtom == a.Template {
   178  				p.oe = p.oe[:i+1]
   179  				return
   180  			}
   181  		default:
   182  			panic("unreachable")
   183  		}
   184  	}
   185  }
   186  
   187  // generateImpliedEndTags pops nodes off the stack of open elements as long as
   188  // the top node has a tag name of dd, dt, li, optgroup, option, p, rb, rp, rt or rtc.
   189  // If exceptions are specified, nodes with that name will not be popped off.
   190  func (p *parser) generateImpliedEndTags(exceptions ...string) {
   191  	var i int
   192  loop:
   193  	for i = len(p.oe) - 1; i >= 0; i-- {
   194  		n := p.oe[i]
   195  		if n.Type == ElementNode {
   196  			switch n.DataAtom {
   197  			case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc:
   198  				for _, except := range exceptions {
   199  					if n.Data == except {
   200  						break loop
   201  					}
   202  				}
   203  				continue
   204  			}
   205  		}
   206  		break
   207  	}
   208  
   209  	p.oe = p.oe[:i+1]
   210  }
   211  
   212  // addChild adds a child node n to the top element, and pushes n onto the stack
   213  // of open elements if it is an element node.
   214  func (p *parser) addChild(n *Node) {
   215  	if p.shouldFosterParent() {
   216  		p.fosterParent(n)
   217  	} else {
   218  		p.top().AppendChild(n)
   219  	}
   220  
   221  	if n.Type == ElementNode {
   222  		p.oe = append(p.oe, n)
   223  	}
   224  }
   225  
   226  // shouldFosterParent returns whether the next node to be added should be
   227  // foster parented.
   228  func (p *parser) shouldFosterParent() bool {
   229  	if p.fosterParenting {
   230  		switch p.top().DataAtom {
   231  		case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
   232  			return true
   233  		}
   234  	}
   235  	return false
   236  }
   237  
   238  // fosterParent adds a child node according to the foster parenting rules.
   239  // Section 12.2.6.1, "foster parenting".
   240  func (p *parser) fosterParent(n *Node) {
   241  	var table, parent, prev, template *Node
   242  	var i int
   243  	for i = len(p.oe) - 1; i >= 0; i-- {
   244  		if p.oe[i].DataAtom == a.Table {
   245  			table = p.oe[i]
   246  			break
   247  		}
   248  	}
   249  
   250  	var j int
   251  	for j = len(p.oe) - 1; j >= 0; j-- {
   252  		if p.oe[j].DataAtom == a.Template {
   253  			template = p.oe[j]
   254  			break
   255  		}
   256  	}
   257  
   258  	if template != nil && (table == nil || j > i) {
   259  		template.AppendChild(n)
   260  		return
   261  	}
   262  
   263  	if table == nil {
   264  		// The foster parent is the html element.
   265  		parent = p.oe[0]
   266  	} else {
   267  		parent = table.Parent
   268  	}
   269  	if parent == nil {
   270  		parent = p.oe[i-1]
   271  	}
   272  
   273  	if table != nil {
   274  		prev = table.PrevSibling
   275  	} else {
   276  		prev = parent.LastChild
   277  	}
   278  	if prev != nil && prev.Type == TextNode && n.Type == TextNode {
   279  		prev.Data += n.Data
   280  		return
   281  	}
   282  
   283  	parent.InsertBefore(n, table)
   284  }
   285  
   286  // addText adds text to the preceding node if it is a text node, or else it
   287  // calls addChild with a new text node.
   288  func (p *parser) addText(text string) {
   289  	if text == "" {
   290  		return
   291  	}
   292  
   293  	if p.shouldFosterParent() {
   294  		p.fosterParent(&Node{
   295  			Type: TextNode,
   296  			Data: text,
   297  		})
   298  		return
   299  	}
   300  
   301  	t := p.top()
   302  	if n := t.LastChild; n != nil && n.Type == TextNode {
   303  		n.Data += text
   304  		return
   305  	}
   306  	p.addChild(&Node{
   307  		Type: TextNode,
   308  		Data: text,
   309  	})
   310  }
   311  
   312  // addElement adds a child element based on the current token.
   313  func (p *parser) addElement() {
   314  	p.addChild(&Node{
   315  		Type:     ElementNode,
   316  		DataAtom: p.tok.DataAtom,
   317  		Data:     p.tok.Data,
   318  		Attr:     p.tok.Attr,
   319  	})
   320  }
   321  
   322  // Section 12.2.4.3.
   323  func (p *parser) addFormattingElement() {
   324  	tagAtom, attr := p.tok.DataAtom, p.tok.Attr
   325  	p.addElement()
   326  
   327  	// Implement the Noah's Ark clause, but with three per family instead of two.
   328  	identicalElements := 0
   329  findIdenticalElements:
   330  	for i := len(p.afe) - 1; i >= 0; i-- {
   331  		n := p.afe[i]
   332  		if n.Type == scopeMarkerNode {
   333  			break
   334  		}
   335  		if n.Type != ElementNode {
   336  			continue
   337  		}
   338  		if n.Namespace != "" {
   339  			continue
   340  		}
   341  		if n.DataAtom != tagAtom {
   342  			continue
   343  		}
   344  		if len(n.Attr) != len(attr) {
   345  			continue
   346  		}
   347  	compareAttributes:
   348  		for _, t0 := range n.Attr {
   349  			for _, t1 := range attr {
   350  				if t0.Key == t1.Key && t0.Namespace == t1.Namespace && t0.Val == t1.Val {
   351  					// Found a match for this attribute, continue with the next attribute.
   352  					continue compareAttributes
   353  				}
   354  			}
   355  			// If we get here, there is no attribute that matches a.
   356  			// Therefore the element is not identical to the new one.
   357  			continue findIdenticalElements
   358  		}
   359  
   360  		identicalElements++
   361  		if identicalElements >= 3 {
   362  			p.afe.remove(n)
   363  		}
   364  	}
   365  
   366  	p.afe = append(p.afe, p.top())
   367  }
   368  
   369  // Section 12.2.4.3.
   370  func (p *parser) clearActiveFormattingElements() {
   371  	for {
   372  		n := p.afe.pop()
   373  		if len(p.afe) == 0 || n.Type == scopeMarkerNode {
   374  			return
   375  		}
   376  	}
   377  }
   378  
   379  // Section 12.2.4.3.
   380  func (p *parser) reconstructActiveFormattingElements() {
   381  	n := p.afe.top()
   382  	if n == nil {
   383  		return
   384  	}
   385  	if n.Type == scopeMarkerNode || p.oe.index(n) != -1 {
   386  		return
   387  	}
   388  	i := len(p.afe) - 1
   389  	for n.Type != scopeMarkerNode && p.oe.index(n) == -1 {
   390  		if i == 0 {
   391  			i = -1
   392  			break
   393  		}
   394  		i--
   395  		n = p.afe[i]
   396  	}
   397  	for {
   398  		i++
   399  		clone := p.afe[i].clone()
   400  		p.addChild(clone)
   401  		p.afe[i] = clone
   402  		if i == len(p.afe)-1 {
   403  			break
   404  		}
   405  	}
   406  }
   407  
   408  // Section 12.2.5.
   409  func (p *parser) acknowledgeSelfClosingTag() {
   410  	p.hasSelfClosingToken = false
   411  }
   412  
   413  // An insertion mode (section 12.2.4.1) is the state transition function from
   414  // a particular state in the HTML5 parser's state machine. It updates the
   415  // parser's fields depending on parser.tok (where ErrorToken means EOF).
   416  // It returns whether the token was consumed.
   417  type insertionMode func(*parser) bool
   418  
   419  // setOriginalIM sets the insertion mode to return to after completing a text or
   420  // inTableText insertion mode.
   421  // Section 12.2.4.1, "using the rules for".
   422  func (p *parser) setOriginalIM() {
   423  	if p.originalIM != nil {
   424  		panic("html: bad parser state: originalIM was set twice")
   425  	}
   426  	p.originalIM = p.im
   427  }
   428  
   429  // Section 12.2.4.1, "reset the insertion mode".
   430  func (p *parser) resetInsertionMode() {
   431  	for i := len(p.oe) - 1; i >= 0; i-- {
   432  		n := p.oe[i]
   433  		last := i == 0
   434  		if last && p.context != nil {
   435  			n = p.context
   436  		}
   437  
   438  		switch n.DataAtom {
   439  		case a.Select:
   440  			if !last {
   441  				for ancestor, first := n, p.oe[0]; ancestor != first; {
   442  					ancestor = p.oe[p.oe.index(ancestor)-1]
   443  					switch ancestor.DataAtom {
   444  					case a.Template:
   445  						p.im = inSelectIM
   446  						return
   447  					case a.Table:
   448  						p.im = inSelectInTableIM
   449  						return
   450  					}
   451  				}
   452  			}
   453  			p.im = inSelectIM
   454  		case a.Td, a.Th:
   455  			// TODO: remove this divergence from the HTML5 spec.
   456  			//
   457  			// See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
   458  			p.im = inCellIM
   459  		case a.Tr:
   460  			p.im = inRowIM
   461  		case a.Tbody, a.Thead, a.Tfoot:
   462  			p.im = inTableBodyIM
   463  		case a.Caption:
   464  			p.im = inCaptionIM
   465  		case a.Colgroup:
   466  			p.im = inColumnGroupIM
   467  		case a.Table:
   468  			p.im = inTableIM
   469  		case a.Template:
   470  			// TODO: remove this divergence from the HTML5 spec.
   471  			if n.Namespace != "" {
   472  				continue
   473  			}
   474  			p.im = p.templateStack.top()
   475  		case a.Head:
   476  			// TODO: remove this divergence from the HTML5 spec.
   477  			//
   478  			// See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
   479  			p.im = inHeadIM
   480  		case a.Body:
   481  			p.im = inBodyIM
   482  		case a.Frameset:
   483  			p.im = inFramesetIM
   484  		case a.Html:
   485  			if p.head == nil {
   486  				p.im = beforeHeadIM
   487  			} else {
   488  				p.im = afterHeadIM
   489  			}
   490  		default:
   491  			if last {
   492  				p.im = inBodyIM
   493  				return
   494  			}
   495  			continue
   496  		}
   497  		return
   498  	}
   499  }
   500  
   501  const whitespace = " \t\r\n\f"
   502  
   503  // Section 12.2.6.4.1.
   504  func initialIM(p *parser) bool {
   505  	switch p.tok.Type {
   506  	case TextToken:
   507  		p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
   508  		if len(p.tok.Data) == 0 {
   509  			// It was all whitespace, so ignore it.
   510  			return true
   511  		}
   512  	case CommentToken:
   513  		p.doc.AppendChild(&Node{
   514  			Type: CommentNode,
   515  			Data: p.tok.Data,
   516  		})
   517  		return true
   518  	case DoctypeToken:
   519  		n, quirks := parseDoctype(p.tok.Data)
   520  		p.doc.AppendChild(n)
   521  		p.quirks = quirks
   522  		p.im = beforeHTMLIM
   523  		return true
   524  	}
   525  	p.quirks = true
   526  	p.im = beforeHTMLIM
   527  	return false
   528  }
   529  
   530  // Section 12.2.6.4.2.
   531  func beforeHTMLIM(p *parser) bool {
   532  	switch p.tok.Type {
   533  	case DoctypeToken:
   534  		// Ignore the token.
   535  		return true
   536  	case TextToken:
   537  		p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
   538  		if len(p.tok.Data) == 0 {
   539  			// It was all whitespace, so ignore it.
   540  			return true
   541  		}
   542  	case StartTagToken:
   543  		if p.tok.DataAtom == a.Html {
   544  			p.addElement()
   545  			p.im = beforeHeadIM
   546  			return true
   547  		}
   548  	case EndTagToken:
   549  		switch p.tok.DataAtom {
   550  		case a.Head, a.Body, a.Html, a.Br:
   551  			p.parseImpliedToken(StartTagToken, a.Html, a.Html.String())
   552  			return false
   553  		default:
   554  			// Ignore the token.
   555  			return true
   556  		}
   557  	case CommentToken:
   558  		p.doc.AppendChild(&Node{
   559  			Type: CommentNode,
   560  			Data: p.tok.Data,
   561  		})
   562  		return true
   563  	}
   564  	p.parseImpliedToken(StartTagToken, a.Html, a.Html.String())
   565  	return false
   566  }
   567  
   568  // Section 12.2.6.4.3.
   569  func beforeHeadIM(p *parser) bool {
   570  	switch p.tok.Type {
   571  	case TextToken:
   572  		p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
   573  		if len(p.tok.Data) == 0 {
   574  			// It was all whitespace, so ignore it.
   575  			return true
   576  		}
   577  	case StartTagToken:
   578  		switch p.tok.DataAtom {
   579  		case a.Head:
   580  			p.addElement()
   581  			p.head = p.top()
   582  			p.im = inHeadIM
   583  			return true
   584  		case a.Html:
   585  			return inBodyIM(p)
   586  		}
   587  	case EndTagToken:
   588  		switch p.tok.DataAtom {
   589  		case a.Head, a.Body, a.Html, a.Br:
   590  			p.parseImpliedToken(StartTagToken, a.Head, a.Head.String())
   591  			return false
   592  		default:
   593  			// Ignore the token.
   594  			return true
   595  		}
   596  	case CommentToken:
   597  		p.addChild(&Node{
   598  			Type: CommentNode,
   599  			Data: p.tok.Data,
   600  		})
   601  		return true
   602  	case DoctypeToken:
   603  		// Ignore the token.
   604  		return true
   605  	}
   606  
   607  	p.parseImpliedToken(StartTagToken, a.Head, a.Head.String())
   608  	return false
   609  }
   610  
   611  // Section 12.2.6.4.4.
   612  func inHeadIM(p *parser) bool {
   613  	switch p.tok.Type {
   614  	case TextToken:
   615  		s := strings.TrimLeft(p.tok.Data, whitespace)
   616  		if len(s) < len(p.tok.Data) {
   617  			// Add the initial whitespace to the current node.
   618  			p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
   619  			if s == "" {
   620  				return true
   621  			}
   622  			p.tok.Data = s
   623  		}
   624  	case StartTagToken:
   625  		switch p.tok.DataAtom {
   626  		case a.Html:
   627  			return inBodyIM(p)
   628  		case a.Base, a.Basefont, a.Bgsound, a.Command, a.Link, a.Meta:
   629  			p.addElement()
   630  			p.oe.pop()
   631  			p.acknowledgeSelfClosingTag()
   632  			return true
   633  		case a.Noscript:
   634  			p.addElement()
   635  			if p.scripting {
   636  				p.setOriginalIM()
   637  				p.im = textIM
   638  			} else {
   639  				p.im = inHeadNoscriptIM
   640  			}
   641  			return true
   642  		case a.Script, a.Title, a.Noframes, a.Style:
   643  			p.addElement()
   644  			p.setOriginalIM()
   645  			p.im = textIM
   646  			return true
   647  		case a.Head:
   648  			// Ignore the token.
   649  			return true
   650  		case a.Template:
   651  			p.addElement()
   652  			p.afe = append(p.afe, &scopeMarker)
   653  			p.framesetOK = false
   654  			p.im = inTemplateIM
   655  			p.templateStack = append(p.templateStack, inTemplateIM)
   656  			return true
   657  		}
   658  	case EndTagToken:
   659  		switch p.tok.DataAtom {
   660  		case a.Head:
   661  			p.oe.pop()
   662  			p.im = afterHeadIM
   663  			return true
   664  		case a.Body, a.Html, a.Br:
   665  			p.parseImpliedToken(EndTagToken, a.Head, a.Head.String())
   666  			return false
   667  		case a.Template:
   668  			if !p.oe.contains(a.Template) {
   669  				return true
   670  			}
   671  			// TODO: remove this divergence from the HTML5 spec.
   672  			//
   673  			// See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
   674  			p.generateImpliedEndTags()
   675  			for i := len(p.oe) - 1; i >= 0; i-- {
   676  				if n := p.oe[i]; n.Namespace == "" && n.DataAtom == a.Template {
   677  					p.oe = p.oe[:i]
   678  					break
   679  				}
   680  			}
   681  			p.clearActiveFormattingElements()
   682  			p.templateStack.pop()
   683  			p.resetInsertionMode()
   684  			return true
   685  		default:
   686  			// Ignore the token.
   687  			return true
   688  		}
   689  	case CommentToken:
   690  		p.addChild(&Node{
   691  			Type: CommentNode,
   692  			Data: p.tok.Data,
   693  		})
   694  		return true
   695  	case DoctypeToken:
   696  		// Ignore the token.
   697  		return true
   698  	}
   699  
   700  	p.parseImpliedToken(EndTagToken, a.Head, a.Head.String())
   701  	return false
   702  }
   703  
   704  // 12.2.6.4.5.
   705  func inHeadNoscriptIM(p *parser) bool {
   706  	switch p.tok.Type {
   707  	case DoctypeToken:
   708  		// Ignore the token.
   709  		return true
   710  	case StartTagToken:
   711  		switch p.tok.DataAtom {
   712  		case a.Html:
   713  			return inBodyIM(p)
   714  		case a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Style:
   715  			return inHeadIM(p)
   716  		case a.Head, a.Noscript:
   717  			// Ignore the token.
   718  			return true
   719  		}
   720  	case EndTagToken:
   721  		switch p.tok.DataAtom {
   722  		case a.Noscript, a.Br:
   723  		default:
   724  			// Ignore the token.
   725  			return true
   726  		}
   727  	case TextToken:
   728  		s := strings.TrimLeft(p.tok.Data, whitespace)
   729  		if len(s) == 0 {
   730  			// It was all whitespace.
   731  			return inHeadIM(p)
   732  		}
   733  	case CommentToken:
   734  		return inHeadIM(p)
   735  	}
   736  	p.oe.pop()
   737  	if p.top().DataAtom != a.Head {
   738  		panic("html: the new current node will be a head element.")
   739  	}
   740  	p.im = inHeadIM
   741  	if p.tok.DataAtom == a.Noscript {
   742  		return true
   743  	}
   744  	return false
   745  }
   746  
   747  // Section 12.2.6.4.6.
   748  func afterHeadIM(p *parser) bool {
   749  	switch p.tok.Type {
   750  	case TextToken:
   751  		s := strings.TrimLeft(p.tok.Data, whitespace)
   752  		if len(s) < len(p.tok.Data) {
   753  			// Add the initial whitespace to the current node.
   754  			p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
   755  			if s == "" {
   756  				return true
   757  			}
   758  			p.tok.Data = s
   759  		}
   760  	case StartTagToken:
   761  		switch p.tok.DataAtom {
   762  		case a.Html:
   763  			return inBodyIM(p)
   764  		case a.Body:
   765  			p.addElement()
   766  			p.framesetOK = false
   767  			p.im = inBodyIM
   768  			return true
   769  		case a.Frameset:
   770  			p.addElement()
   771  			p.im = inFramesetIM
   772  			return true
   773  		case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
   774  			p.oe = append(p.oe, p.head)
   775  			defer p.oe.remove(p.head)
   776  			return inHeadIM(p)
   777  		case a.Head:
   778  			// Ignore the token.
   779  			return true
   780  		}
   781  	case EndTagToken:
   782  		switch p.tok.DataAtom {
   783  		case a.Body, a.Html, a.Br:
   784  			// Drop down to creating an implied <body> tag.
   785  		case a.Template:
   786  			return inHeadIM(p)
   787  		default:
   788  			// Ignore the token.
   789  			return true
   790  		}
   791  	case CommentToken:
   792  		p.addChild(&Node{
   793  			Type: CommentNode,
   794  			Data: p.tok.Data,
   795  		})
   796  		return true
   797  	case DoctypeToken:
   798  		// Ignore the token.
   799  		return true
   800  	}
   801  
   802  	p.parseImpliedToken(StartTagToken, a.Body, a.Body.String())
   803  	p.framesetOK = true
   804  	return false
   805  }
   806  
   807  // copyAttributes copies attributes of src not found on dst to dst.
   808  func copyAttributes(dst *Node, src Token) {
   809  	if len(src.Attr) == 0 {
   810  		return
   811  	}
   812  	attr := map[string]string{}
   813  	for _, t := range dst.Attr {
   814  		attr[t.Key] = t.Val
   815  	}
   816  	for _, t := range src.Attr {
   817  		if _, ok := attr[t.Key]; !ok {
   818  			dst.Attr = append(dst.Attr, t)
   819  			attr[t.Key] = t.Val
   820  		}
   821  	}
   822  }
   823  
   824  // Section 12.2.6.4.7.
   825  func inBodyIM(p *parser) bool {
   826  	switch p.tok.Type {
   827  	case TextToken:
   828  		d := p.tok.Data
   829  		switch n := p.oe.top(); n.DataAtom {
   830  		case a.Pre, a.Listing:
   831  			if n.FirstChild == nil {
   832  				// Ignore a newline at the start of a <pre> block.
   833  				if d != "" && d[0] == '\r' {
   834  					d = d[1:]
   835  				}
   836  				if d != "" && d[0] == '\n' {
   837  					d = d[1:]
   838  				}
   839  			}
   840  		}
   841  		d = strings.Replace(d, "\x00", "", -1)
   842  		if d == "" {
   843  			return true
   844  		}
   845  		p.reconstructActiveFormattingElements()
   846  		p.addText(d)
   847  		if p.framesetOK && strings.TrimLeft(d, whitespace) != "" {
   848  			// There were non-whitespace characters inserted.
   849  			p.framesetOK = false
   850  		}
   851  	case StartTagToken:
   852  		switch p.tok.DataAtom {
   853  		case a.Html:
   854  			if p.oe.contains(a.Template) {
   855  				return true
   856  			}
   857  			copyAttributes(p.oe[0], p.tok)
   858  		case a.Base, a.Basefont, a.Bgsound, a.Command, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
   859  			return inHeadIM(p)
   860  		case a.Body:
   861  			if p.oe.contains(a.Template) {
   862  				return true
   863  			}
   864  			if len(p.oe) >= 2 {
   865  				body := p.oe[1]
   866  				if body.Type == ElementNode && body.DataAtom == a.Body {
   867  					p.framesetOK = false
   868  					copyAttributes(body, p.tok)
   869  				}
   870  			}
   871  		case a.Frameset:
   872  			if !p.framesetOK || len(p.oe) < 2 || p.oe[1].DataAtom != a.Body {
   873  				// Ignore the token.
   874  				return true
   875  			}
   876  			body := p.oe[1]
   877  			if body.Parent != nil {
   878  				body.Parent.RemoveChild(body)
   879  			}
   880  			p.oe = p.oe[:1]
   881  			p.addElement()
   882  			p.im = inFramesetIM
   883  			return true
   884  		case a.Address, a.Article, a.Aside, a.Blockquote, a.Center, a.Details, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Menu, a.Nav, a.Ol, a.P, a.Section, a.Summary, a.Ul:
   885  			p.popUntil(buttonScope, a.P)
   886  			p.addElement()
   887  		case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
   888  			p.popUntil(buttonScope, a.P)
   889  			switch n := p.top(); n.DataAtom {
   890  			case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
   891  				p.oe.pop()
   892  			}
   893  			p.addElement()
   894  		case a.Pre, a.Listing:
   895  			p.popUntil(buttonScope, a.P)
   896  			p.addElement()
   897  			// The newline, if any, will be dealt with by the TextToken case.
   898  			p.framesetOK = false
   899  		case a.Form:
   900  			if p.form != nil && !p.oe.contains(a.Template) {
   901  				// Ignore the token
   902  				return true
   903  			}
   904  			p.popUntil(buttonScope, a.P)
   905  			p.addElement()
   906  			if !p.oe.contains(a.Template) {
   907  				p.form = p.top()
   908  			}
   909  		case a.Li:
   910  			p.framesetOK = false
   911  			for i := len(p.oe) - 1; i >= 0; i-- {
   912  				node := p.oe[i]
   913  				switch node.DataAtom {
   914  				case a.Li:
   915  					p.oe = p.oe[:i]
   916  				case a.Address, a.Div, a.P:
   917  					continue
   918  				default:
   919  					if !isSpecialElement(node) {
   920  						continue
   921  					}
   922  				}
   923  				break
   924  			}
   925  			p.popUntil(buttonScope, a.P)
   926  			p.addElement()
   927  		case a.Dd, a.Dt:
   928  			p.framesetOK = false
   929  			for i := len(p.oe) - 1; i >= 0; i-- {
   930  				node := p.oe[i]
   931  				switch node.DataAtom {
   932  				case a.Dd, a.Dt:
   933  					p.oe = p.oe[:i]
   934  				case a.Address, a.Div, a.P:
   935  					continue
   936  				default:
   937  					if !isSpecialElement(node) {
   938  						continue
   939  					}
   940  				}
   941  				break
   942  			}
   943  			p.popUntil(buttonScope, a.P)
   944  			p.addElement()
   945  		case a.Plaintext:
   946  			p.popUntil(buttonScope, a.P)
   947  			p.addElement()
   948  		case a.Button:
   949  			p.popUntil(defaultScope, a.Button)
   950  			p.reconstructActiveFormattingElements()
   951  			p.addElement()
   952  			p.framesetOK = false
   953  		case a.A:
   954  			for i := len(p.afe) - 1; i >= 0 && p.afe[i].Type != scopeMarkerNode; i-- {
   955  				if n := p.afe[i]; n.Type == ElementNode && n.DataAtom == a.A {
   956  					p.inBodyEndTagFormatting(a.A, "a")
   957  					p.oe.remove(n)
   958  					p.afe.remove(n)
   959  					break
   960  				}
   961  			}
   962  			p.reconstructActiveFormattingElements()
   963  			p.addFormattingElement()
   964  		case a.B, a.Big, a.Code, a.Em, a.Font, a.I, a.S, a.Small, a.Strike, a.Strong, a.Tt, a.U:
   965  			p.reconstructActiveFormattingElements()
   966  			p.addFormattingElement()
   967  		case a.Nobr:
   968  			p.reconstructActiveFormattingElements()
   969  			if p.elementInScope(defaultScope, a.Nobr) {
   970  				p.inBodyEndTagFormatting(a.Nobr, "nobr")
   971  				p.reconstructActiveFormattingElements()
   972  			}
   973  			p.addFormattingElement()
   974  		case a.Applet, a.Marquee, a.Object:
   975  			p.reconstructActiveFormattingElements()
   976  			p.addElement()
   977  			p.afe = append(p.afe, &scopeMarker)
   978  			p.framesetOK = false
   979  		case a.Table:
   980  			if !p.quirks {
   981  				p.popUntil(buttonScope, a.P)
   982  			}
   983  			p.addElement()
   984  			p.framesetOK = false
   985  			p.im = inTableIM
   986  			return true
   987  		case a.Area, a.Br, a.Embed, a.Img, a.Input, a.Keygen, a.Wbr:
   988  			p.reconstructActiveFormattingElements()
   989  			p.addElement()
   990  			p.oe.pop()
   991  			p.acknowledgeSelfClosingTag()
   992  			if p.tok.DataAtom == a.Input {
   993  				for _, t := range p.tok.Attr {
   994  					if t.Key == "type" {
   995  						if strings.ToLower(t.Val) == "hidden" {
   996  							// Skip setting framesetOK = false
   997  							return true
   998  						}
   999  					}
  1000  				}
  1001  			}
  1002  			p.framesetOK = false
  1003  		case a.Param, a.Source, a.Track:
  1004  			p.addElement()
  1005  			p.oe.pop()
  1006  			p.acknowledgeSelfClosingTag()
  1007  		case a.Hr:
  1008  			p.popUntil(buttonScope, a.P)
  1009  			p.addElement()
  1010  			p.oe.pop()
  1011  			p.acknowledgeSelfClosingTag()
  1012  			p.framesetOK = false
  1013  		case a.Image:
  1014  			p.tok.DataAtom = a.Img
  1015  			p.tok.Data = a.Img.String()
  1016  			return false
  1017  		case a.Isindex:
  1018  			if p.form != nil {
  1019  				// Ignore the token.
  1020  				return true
  1021  			}
  1022  			action := ""
  1023  			prompt := "This is a searchable index. Enter search keywords: "
  1024  			attr := []Attribute{{Key: "name", Val: "isindex"}}
  1025  			for _, t := range p.tok.Attr {
  1026  				switch t.Key {
  1027  				case "action":
  1028  					action = t.Val
  1029  				case "name":
  1030  					// Ignore the attribute.
  1031  				case "prompt":
  1032  					prompt = t.Val
  1033  				default:
  1034  					attr = append(attr, t)
  1035  				}
  1036  			}
  1037  			p.acknowledgeSelfClosingTag()
  1038  			p.popUntil(buttonScope, a.P)
  1039  			p.parseImpliedToken(StartTagToken, a.Form, a.Form.String())
  1040  			if p.form == nil {
  1041  				// NOTE: The 'isindex' element has been removed,
  1042  				// and the 'template' element has not been designed to be
  1043  				// collaborative with the index element.
  1044  				//
  1045  				// Ignore the token.
  1046  				return true
  1047  			}
  1048  			if action != "" {
  1049  				p.form.Attr = []Attribute{{Key: "action", Val: action}}
  1050  			}
  1051  			p.parseImpliedToken(StartTagToken, a.Hr, a.Hr.String())
  1052  			p.parseImpliedToken(StartTagToken, a.Label, a.Label.String())
  1053  			p.addText(prompt)
  1054  			p.addChild(&Node{
  1055  				Type:     ElementNode,
  1056  				DataAtom: a.Input,
  1057  				Data:     a.Input.String(),
  1058  				Attr:     attr,
  1059  			})
  1060  			p.oe.pop()
  1061  			p.parseImpliedToken(EndTagToken, a.Label, a.Label.String())
  1062  			p.parseImpliedToken(StartTagToken, a.Hr, a.Hr.String())
  1063  			p.parseImpliedToken(EndTagToken, a.Form, a.Form.String())
  1064  		case a.Textarea:
  1065  			p.addElement()
  1066  			p.setOriginalIM()
  1067  			p.framesetOK = false
  1068  			p.im = textIM
  1069  		case a.Xmp:
  1070  			p.popUntil(buttonScope, a.P)
  1071  			p.reconstructActiveFormattingElements()
  1072  			p.framesetOK = false
  1073  			p.addElement()
  1074  			p.setOriginalIM()
  1075  			p.im = textIM
  1076  		case a.Iframe:
  1077  			p.framesetOK = false
  1078  			p.addElement()
  1079  			p.setOriginalIM()
  1080  			p.im = textIM
  1081  		case a.Noembed, a.Noscript:
  1082  			p.addElement()
  1083  			p.setOriginalIM()
  1084  			p.im = textIM
  1085  		case a.Select:
  1086  			p.reconstructActiveFormattingElements()
  1087  			p.addElement()
  1088  			p.framesetOK = false
  1089  			p.im = inSelectIM
  1090  			return true
  1091  		case a.Optgroup, a.Option:
  1092  			if p.top().DataAtom == a.Option {
  1093  				p.oe.pop()
  1094  			}
  1095  			p.reconstructActiveFormattingElements()
  1096  			p.addElement()
  1097  		case a.Rb, a.Rtc:
  1098  			if p.elementInScope(defaultScope, a.Ruby) {
  1099  				p.generateImpliedEndTags()
  1100  			}
  1101  			p.addElement()
  1102  		case a.Rp, a.Rt:
  1103  			if p.elementInScope(defaultScope, a.Ruby) {
  1104  				p.generateImpliedEndTags("rtc")
  1105  			}
  1106  			p.addElement()
  1107  		case a.Math, a.Svg:
  1108  			p.reconstructActiveFormattingElements()
  1109  			if p.tok.DataAtom == a.Math {
  1110  				adjustAttributeNames(p.tok.Attr, mathMLAttributeAdjustments)
  1111  			} else {
  1112  				adjustAttributeNames(p.tok.Attr, svgAttributeAdjustments)
  1113  			}
  1114  			adjustForeignAttributes(p.tok.Attr)
  1115  			p.addElement()
  1116  			p.top().Namespace = p.tok.Data
  1117  			if p.hasSelfClosingToken {
  1118  				p.oe.pop()
  1119  				p.acknowledgeSelfClosingTag()
  1120  			}
  1121  			return true
  1122  		case a.Caption, a.Col, a.Colgroup, a.Frame, a.Head, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
  1123  			// Ignore the token.
  1124  		default:
  1125  			p.reconstructActiveFormattingElements()
  1126  			p.addElement()
  1127  		}
  1128  	case EndTagToken:
  1129  		switch p.tok.DataAtom {
  1130  		case a.Body:
  1131  			if p.elementInScope(defaultScope, a.Body) {
  1132  				p.im = afterBodyIM
  1133  			}
  1134  		case a.Html:
  1135  			if p.elementInScope(defaultScope, a.Body) {
  1136  				p.parseImpliedToken(EndTagToken, a.Body, a.Body.String())
  1137  				return false
  1138  			}
  1139  			return true
  1140  		case a.Address, a.Article, a.Aside, a.Blockquote, a.Button, a.Center, a.Details, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Listing, a.Menu, a.Nav, a.Ol, a.Pre, a.Section, a.Summary, a.Ul:
  1141  			p.popUntil(defaultScope, p.tok.DataAtom)
  1142  		case a.Form:
  1143  			if p.oe.contains(a.Template) {
  1144  				i := p.indexOfElementInScope(defaultScope, a.Form)
  1145  				if i == -1 {
  1146  					// Ignore the token.
  1147  					return true
  1148  				}
  1149  				p.generateImpliedEndTags()
  1150  				if p.oe[i].DataAtom != a.Form {
  1151  					// Ignore the token.
  1152  					return true
  1153  				}
  1154  				p.popUntil(defaultScope, a.Form)
  1155  			} else {
  1156  				node := p.form
  1157  				p.form = nil
  1158  				i := p.indexOfElementInScope(defaultScope, a.Form)
  1159  				if node == nil || i == -1 || p.oe[i] != node {
  1160  					// Ignore the token.
  1161  					return true
  1162  				}
  1163  				p.generateImpliedEndTags()
  1164  				p.oe.remove(node)
  1165  			}
  1166  		case a.P:
  1167  			if !p.elementInScope(buttonScope, a.P) {
  1168  				p.parseImpliedToken(StartTagToken, a.P, a.P.String())
  1169  			}
  1170  			p.popUntil(buttonScope, a.P)
  1171  		case a.Li:
  1172  			p.popUntil(listItemScope, a.Li)
  1173  		case a.Dd, a.Dt:
  1174  			p.popUntil(defaultScope, p.tok.DataAtom)
  1175  		case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
  1176  			p.popUntil(defaultScope, a.H1, a.H2, a.H3, a.H4, a.H5, a.H6)
  1177  		case a.A, a.B, a.Big, a.Code, a.Em, a.Font, a.I, a.Nobr, a.S, a.Small, a.Strike, a.Strong, a.Tt, a.U:
  1178  			p.inBodyEndTagFormatting(p.tok.DataAtom, p.tok.Data)
  1179  		case a.Applet, a.Marquee, a.Object:
  1180  			if p.popUntil(defaultScope, p.tok.DataAtom) {
  1181  				p.clearActiveFormattingElements()
  1182  			}
  1183  		case a.Br:
  1184  			p.tok.Type = StartTagToken
  1185  			return false
  1186  		case a.Template:
  1187  			return inHeadIM(p)
  1188  		default:
  1189  			p.inBodyEndTagOther(p.tok.DataAtom, p.tok.Data)
  1190  		}
  1191  	case CommentToken:
  1192  		p.addChild(&Node{
  1193  			Type: CommentNode,
  1194  			Data: p.tok.Data,
  1195  		})
  1196  	case ErrorToken:
  1197  		// TODO: remove this divergence from the HTML5 spec.
  1198  		if len(p.templateStack) > 0 {
  1199  			p.im = inTemplateIM
  1200  			return false
  1201  		} else {
  1202  			for _, e := range p.oe {
  1203  				switch e.DataAtom {
  1204  				case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc, a.Tbody, a.Td, a.Tfoot, a.Th,
  1205  					a.Thead, a.Tr, a.Body, a.Html:
  1206  				default:
  1207  					return true
  1208  				}
  1209  			}
  1210  		}
  1211  	}
  1212  
  1213  	return true
  1214  }
  1215  
  1216  func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom, tagName string) {
  1217  	// This is the "adoption agency" algorithm, described at
  1218  	// https://html.spec.whatwg.org/multipage/syntax.html#adoptionAgency
  1219  
  1220  	// TODO: this is a fairly literal line-by-line translation of that algorithm.
  1221  	// Once the code successfully parses the comprehensive test suite, we should
  1222  	// refactor this code to be more idiomatic.
  1223  
  1224  	// Steps 1-4. The outer loop.
  1225  	for i := 0; i < 8; i++ {
  1226  		// Step 5. Find the formatting element.
  1227  		var formattingElement *Node
  1228  		for j := len(p.afe) - 1; j >= 0; j-- {
  1229  			if p.afe[j].Type == scopeMarkerNode {
  1230  				break
  1231  			}
  1232  			if p.afe[j].DataAtom == tagAtom {
  1233  				formattingElement = p.afe[j]
  1234  				break
  1235  			}
  1236  		}
  1237  		if formattingElement == nil {
  1238  			p.inBodyEndTagOther(tagAtom, tagName)
  1239  			return
  1240  		}
  1241  		feIndex := p.oe.index(formattingElement)
  1242  		if feIndex == -1 {
  1243  			p.afe.remove(formattingElement)
  1244  			return
  1245  		}
  1246  		if !p.elementInScope(defaultScope, tagAtom) {
  1247  			// Ignore the tag.
  1248  			return
  1249  		}
  1250  
  1251  		// Steps 9-10. Find the furthest block.
  1252  		var furthestBlock *Node
  1253  		for _, e := range p.oe[feIndex:] {
  1254  			if isSpecialElement(e) {
  1255  				furthestBlock = e
  1256  				break
  1257  			}
  1258  		}
  1259  		if furthestBlock == nil {
  1260  			e := p.oe.pop()
  1261  			for e != formattingElement {
  1262  				e = p.oe.pop()
  1263  			}
  1264  			p.afe.remove(e)
  1265  			return
  1266  		}
  1267  
  1268  		// Steps 11-12. Find the common ancestor and bookmark node.
  1269  		commonAncestor := p.oe[feIndex-1]
  1270  		bookmark := p.afe.index(formattingElement)
  1271  
  1272  		// Step 13. The inner loop. Find the lastNode to reparent.
  1273  		lastNode := furthestBlock
  1274  		node := furthestBlock
  1275  		x := p.oe.index(node)
  1276  		// Steps 13.1-13.2
  1277  		for j := 0; j < 3; j++ {
  1278  			// Step 13.3.
  1279  			x--
  1280  			node = p.oe[x]
  1281  			// Step 13.4 - 13.5.
  1282  			if p.afe.index(node) == -1 {
  1283  				p.oe.remove(node)
  1284  				continue
  1285  			}
  1286  			// Step 13.6.
  1287  			if node == formattingElement {
  1288  				break
  1289  			}
  1290  			// Step 13.7.
  1291  			clone := node.clone()
  1292  			p.afe[p.afe.index(node)] = clone
  1293  			p.oe[p.oe.index(node)] = clone
  1294  			node = clone
  1295  			// Step 13.8.
  1296  			if lastNode == furthestBlock {
  1297  				bookmark = p.afe.index(node) + 1
  1298  			}
  1299  			// Step 13.9.
  1300  			if lastNode.Parent != nil {
  1301  				lastNode.Parent.RemoveChild(lastNode)
  1302  			}
  1303  			node.AppendChild(lastNode)
  1304  			// Step 13.10.
  1305  			lastNode = node
  1306  		}
  1307  
  1308  		// Step 14. Reparent lastNode to the common ancestor,
  1309  		// or for misnested table nodes, to the foster parent.
  1310  		if lastNode.Parent != nil {
  1311  			lastNode.Parent.RemoveChild(lastNode)
  1312  		}
  1313  		switch commonAncestor.DataAtom {
  1314  		case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
  1315  			p.fosterParent(lastNode)
  1316  		default:
  1317  			commonAncestor.AppendChild(lastNode)
  1318  		}
  1319  
  1320  		// Steps 15-17. Reparent nodes from the furthest block's children
  1321  		// to a clone of the formatting element.
  1322  		clone := formattingElement.clone()
  1323  		reparentChildren(clone, furthestBlock)
  1324  		furthestBlock.AppendChild(clone)
  1325  
  1326  		// Step 18. Fix up the list of active formatting elements.
  1327  		if oldLoc := p.afe.index(formattingElement); oldLoc != -1 && oldLoc < bookmark {
  1328  			// Move the bookmark with the rest of the list.
  1329  			bookmark--
  1330  		}
  1331  		p.afe.remove(formattingElement)
  1332  		p.afe.insert(bookmark, clone)
  1333  
  1334  		// Step 19. Fix up the stack of open elements.
  1335  		p.oe.remove(formattingElement)
  1336  		p.oe.insert(p.oe.index(furthestBlock)+1, clone)
  1337  	}
  1338  }
  1339  
  1340  // inBodyEndTagOther performs the "any other end tag" algorithm for inBodyIM.
  1341  // "Any other end tag" handling from 12.2.6.5 The rules for parsing tokens in foreign content
  1342  // https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inforeign
  1343  func (p *parser) inBodyEndTagOther(tagAtom a.Atom, tagName string) {
  1344  	for i := len(p.oe) - 1; i >= 0; i-- {
  1345  		// Two element nodes have the same tag if they have the same Data (a
  1346  		// string-typed field). As an optimization, for common HTML tags, each
  1347  		// Data string is assigned a unique, non-zero DataAtom (a uint32-typed
  1348  		// field), since integer comparison is faster than string comparison.
  1349  		// Uncommon (custom) tags get a zero DataAtom.
  1350  		//
  1351  		// The if condition here is equivalent to (p.oe[i].Data == tagName).
  1352  		if (p.oe[i].DataAtom == tagAtom) &&
  1353  			((tagAtom != 0) || (p.oe[i].Data == tagName)) {
  1354  			p.oe = p.oe[:i]
  1355  			break
  1356  		}
  1357  		if isSpecialElement(p.oe[i]) {
  1358  			break
  1359  		}
  1360  	}
  1361  }
  1362  
  1363  // Section 12.2.6.4.8.
  1364  func textIM(p *parser) bool {
  1365  	switch p.tok.Type {
  1366  	case ErrorToken:
  1367  		p.oe.pop()
  1368  	case TextToken:
  1369  		d := p.tok.Data
  1370  		if n := p.oe.top(); n.DataAtom == a.Textarea && n.FirstChild == nil {
  1371  			// Ignore a newline at the start of a <textarea> block.
  1372  			if d != "" && d[0] == '\r' {
  1373  				d = d[1:]
  1374  			}
  1375  			if d != "" && d[0] == '\n' {
  1376  				d = d[1:]
  1377  			}
  1378  		}
  1379  		if d == "" {
  1380  			return true
  1381  		}
  1382  		p.addText(d)
  1383  		return true
  1384  	case EndTagToken:
  1385  		p.oe.pop()
  1386  	}
  1387  	p.im = p.originalIM
  1388  	p.originalIM = nil
  1389  	return p.tok.Type == EndTagToken
  1390  }
  1391  
  1392  // Section 12.2.6.4.9.
  1393  func inTableIM(p *parser) bool {
  1394  	switch p.tok.Type {
  1395  	case TextToken:
  1396  		p.tok.Data = strings.Replace(p.tok.Data, "\x00", "", -1)
  1397  		switch p.oe.top().DataAtom {
  1398  		case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
  1399  			if strings.Trim(p.tok.Data, whitespace) == "" {
  1400  				p.addText(p.tok.Data)
  1401  				return true
  1402  			}
  1403  		}
  1404  	case StartTagToken:
  1405  		switch p.tok.DataAtom {
  1406  		case a.Caption:
  1407  			p.clearStackToContext(tableScope)
  1408  			p.afe = append(p.afe, &scopeMarker)
  1409  			p.addElement()
  1410  			p.im = inCaptionIM
  1411  			return true
  1412  		case a.Colgroup:
  1413  			p.clearStackToContext(tableScope)
  1414  			p.addElement()
  1415  			p.im = inColumnGroupIM
  1416  			return true
  1417  		case a.Col:
  1418  			p.parseImpliedToken(StartTagToken, a.Colgroup, a.Colgroup.String())
  1419  			return false
  1420  		case a.Tbody, a.Tfoot, a.Thead:
  1421  			p.clearStackToContext(tableScope)
  1422  			p.addElement()
  1423  			p.im = inTableBodyIM
  1424  			return true
  1425  		case a.Td, a.Th, a.Tr:
  1426  			p.parseImpliedToken(StartTagToken, a.Tbody, a.Tbody.String())
  1427  			return false
  1428  		case a.Table:
  1429  			if p.popUntil(tableScope, a.Table) {
  1430  				p.resetInsertionMode()
  1431  				return false
  1432  			}
  1433  			// Ignore the token.
  1434  			return true
  1435  		case a.Style, a.Script, a.Template:
  1436  			return inHeadIM(p)
  1437  		case a.Input:
  1438  			for _, t := range p.tok.Attr {
  1439  				if t.Key == "type" && strings.ToLower(t.Val) == "hidden" {
  1440  					p.addElement()
  1441  					p.oe.pop()
  1442  					return true
  1443  				}
  1444  			}
  1445  			// Otherwise drop down to the default action.
  1446  		case a.Form:
  1447  			if p.oe.contains(a.Template) || p.form != nil {
  1448  				// Ignore the token.
  1449  				return true
  1450  			}
  1451  			p.addElement()
  1452  			p.form = p.oe.pop()
  1453  		case a.Select:
  1454  			p.reconstructActiveFormattingElements()
  1455  			switch p.top().DataAtom {
  1456  			case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
  1457  				p.fosterParenting = true
  1458  			}
  1459  			p.addElement()
  1460  			p.fosterParenting = false
  1461  			p.framesetOK = false
  1462  			p.im = inSelectInTableIM
  1463  			return true
  1464  		}
  1465  	case EndTagToken:
  1466  		switch p.tok.DataAtom {
  1467  		case a.Table:
  1468  			if p.popUntil(tableScope, a.Table) {
  1469  				p.resetInsertionMode()
  1470  				return true
  1471  			}
  1472  			// Ignore the token.
  1473  			return true
  1474  		case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
  1475  			// Ignore the token.
  1476  			return true
  1477  		case a.Template:
  1478  			return inHeadIM(p)
  1479  		}
  1480  	case CommentToken:
  1481  		p.addChild(&Node{
  1482  			Type: CommentNode,
  1483  			Data: p.tok.Data,
  1484  		})
  1485  		return true
  1486  	case DoctypeToken:
  1487  		// Ignore the token.
  1488  		return true
  1489  	case ErrorToken:
  1490  		return inBodyIM(p)
  1491  	}
  1492  
  1493  	p.fosterParenting = true
  1494  	defer func() { p.fosterParenting = false }()
  1495  
  1496  	return inBodyIM(p)
  1497  }
  1498  
  1499  // Section 12.2.6.4.11.
  1500  func inCaptionIM(p *parser) bool {
  1501  	switch p.tok.Type {
  1502  	case StartTagToken:
  1503  		switch p.tok.DataAtom {
  1504  		case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Td, a.Tfoot, a.Thead, a.Tr:
  1505  			if p.popUntil(tableScope, a.Caption) {
  1506  				p.clearActiveFormattingElements()
  1507  				p.im = inTableIM
  1508  				return false
  1509  			} else {
  1510  				// Ignore the token.
  1511  				return true
  1512  			}
  1513  		case a.Select:
  1514  			p.reconstructActiveFormattingElements()
  1515  			p.addElement()
  1516  			p.framesetOK = false
  1517  			p.im = inSelectInTableIM
  1518  			return true
  1519  		}
  1520  	case EndTagToken:
  1521  		switch p.tok.DataAtom {
  1522  		case a.Caption:
  1523  			if p.popUntil(tableScope, a.Caption) {
  1524  				p.clearActiveFormattingElements()
  1525  				p.im = inTableIM
  1526  			}
  1527  			return true
  1528  		case a.Table:
  1529  			if p.popUntil(tableScope, a.Caption) {
  1530  				p.clearActiveFormattingElements()
  1531  				p.im = inTableIM
  1532  				return false
  1533  			} else {
  1534  				// Ignore the token.
  1535  				return true
  1536  			}
  1537  		case a.Body, a.Col, a.Colgroup, a.Html, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
  1538  			// Ignore the token.
  1539  			return true
  1540  		}
  1541  	}
  1542  	return inBodyIM(p)
  1543  }
  1544  
  1545  // Section 12.2.6.4.12.
  1546  func inColumnGroupIM(p *parser) bool {
  1547  	switch p.tok.Type {
  1548  	case TextToken:
  1549  		s := strings.TrimLeft(p.tok.Data, whitespace)
  1550  		if len(s) < len(p.tok.Data) {
  1551  			// Add the initial whitespace to the current node.
  1552  			p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
  1553  			if s == "" {
  1554  				return true
  1555  			}
  1556  			p.tok.Data = s
  1557  		}
  1558  	case CommentToken:
  1559  		p.addChild(&Node{
  1560  			Type: CommentNode,
  1561  			Data: p.tok.Data,
  1562  		})
  1563  		return true
  1564  	case DoctypeToken:
  1565  		// Ignore the token.
  1566  		return true
  1567  	case StartTagToken:
  1568  		switch p.tok.DataAtom {
  1569  		case a.Html:
  1570  			return inBodyIM(p)
  1571  		case a.Col:
  1572  			p.addElement()
  1573  			p.oe.pop()
  1574  			p.acknowledgeSelfClosingTag()
  1575  			return true
  1576  		case a.Template:
  1577  			return inHeadIM(p)
  1578  		}
  1579  	case EndTagToken:
  1580  		switch p.tok.DataAtom {
  1581  		case a.Colgroup:
  1582  			if p.oe.top().DataAtom == a.Colgroup {
  1583  				p.oe.pop()
  1584  				p.im = inTableIM
  1585  			}
  1586  			return true
  1587  		case a.Col:
  1588  			// Ignore the token.
  1589  			return true
  1590  		case a.Template:
  1591  			return inHeadIM(p)
  1592  		}
  1593  	case ErrorToken:
  1594  		return inBodyIM(p)
  1595  	}
  1596  	if p.oe.top().DataAtom != a.Colgroup {
  1597  		return true
  1598  	}
  1599  	p.oe.pop()
  1600  	p.im = inTableIM
  1601  	return false
  1602  }
  1603  
  1604  // Section 12.2.6.4.13.
  1605  func inTableBodyIM(p *parser) bool {
  1606  	switch p.tok.Type {
  1607  	case StartTagToken:
  1608  		switch p.tok.DataAtom {
  1609  		case a.Tr:
  1610  			p.clearStackToContext(tableBodyScope)
  1611  			p.addElement()
  1612  			p.im = inRowIM
  1613  			return true
  1614  		case a.Td, a.Th:
  1615  			p.parseImpliedToken(StartTagToken, a.Tr, a.Tr.String())
  1616  			return false
  1617  		case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Tfoot, a.Thead:
  1618  			if p.popUntil(tableScope, a.Tbody, a.Thead, a.Tfoot) {
  1619  				p.im = inTableIM
  1620  				return false
  1621  			}
  1622  			// Ignore the token.
  1623  			return true
  1624  		}
  1625  	case EndTagToken:
  1626  		switch p.tok.DataAtom {
  1627  		case a.Tbody, a.Tfoot, a.Thead:
  1628  			if p.elementInScope(tableScope, p.tok.DataAtom) {
  1629  				p.clearStackToContext(tableBodyScope)
  1630  				p.oe.pop()
  1631  				p.im = inTableIM
  1632  			}
  1633  			return true
  1634  		case a.Table:
  1635  			if p.popUntil(tableScope, a.Tbody, a.Thead, a.Tfoot) {
  1636  				p.im = inTableIM
  1637  				return false
  1638  			}
  1639  			// Ignore the token.
  1640  			return true
  1641  		case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Td, a.Th, a.Tr:
  1642  			// Ignore the token.
  1643  			return true
  1644  		}
  1645  	case CommentToken:
  1646  		p.addChild(&Node{
  1647  			Type: CommentNode,
  1648  			Data: p.tok.Data,
  1649  		})
  1650  		return true
  1651  	}
  1652  
  1653  	return inTableIM(p)
  1654  }
  1655  
  1656  // Section 12.2.6.4.14.
  1657  func inRowIM(p *parser) bool {
  1658  	switch p.tok.Type {
  1659  	case StartTagToken:
  1660  		switch p.tok.DataAtom {
  1661  		case a.Td, a.Th:
  1662  			p.clearStackToContext(tableRowScope)
  1663  			p.addElement()
  1664  			p.afe = append(p.afe, &scopeMarker)
  1665  			p.im = inCellIM
  1666  			return true
  1667  		case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Tfoot, a.Thead, a.Tr:
  1668  			if p.popUntil(tableScope, a.Tr) {
  1669  				p.im = inTableBodyIM
  1670  				return false
  1671  			}
  1672  			// Ignore the token.
  1673  			return true
  1674  		}
  1675  	case EndTagToken:
  1676  		switch p.tok.DataAtom {
  1677  		case a.Tr:
  1678  			if p.popUntil(tableScope, a.Tr) {
  1679  				p.im = inTableBodyIM
  1680  				return true
  1681  			}
  1682  			// Ignore the token.
  1683  			return true
  1684  		case a.Table:
  1685  			if p.popUntil(tableScope, a.Tr) {
  1686  				p.im = inTableBodyIM
  1687  				return false
  1688  			}
  1689  			// Ignore the token.
  1690  			return true
  1691  		case a.Tbody, a.Tfoot, a.Thead:
  1692  			if p.elementInScope(tableScope, p.tok.DataAtom) {
  1693  				p.parseImpliedToken(EndTagToken, a.Tr, a.Tr.String())
  1694  				return false
  1695  			}
  1696  			// Ignore the token.
  1697  			return true
  1698  		case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Td, a.Th:
  1699  			// Ignore the token.
  1700  			return true
  1701  		}
  1702  	}
  1703  
  1704  	return inTableIM(p)
  1705  }
  1706  
  1707  // Section 12.2.6.4.15.
  1708  func inCellIM(p *parser) bool {
  1709  	switch p.tok.Type {
  1710  	case StartTagToken:
  1711  		switch p.tok.DataAtom {
  1712  		case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
  1713  			if p.popUntil(tableScope, a.Td, a.Th) {
  1714  				// Close the cell and reprocess.
  1715  				p.clearActiveFormattingElements()
  1716  				p.im = inRowIM
  1717  				return false
  1718  			}
  1719  			// Ignore the token.
  1720  			return true
  1721  		case a.Select:
  1722  			p.reconstructActiveFormattingElements()
  1723  			p.addElement()
  1724  			p.framesetOK = false
  1725  			p.im = inSelectInTableIM
  1726  			return true
  1727  		}
  1728  	case EndTagToken:
  1729  		switch p.tok.DataAtom {
  1730  		case a.Td, a.Th:
  1731  			if !p.popUntil(tableScope, p.tok.DataAtom) {
  1732  				// Ignore the token.
  1733  				return true
  1734  			}
  1735  			p.clearActiveFormattingElements()
  1736  			p.im = inRowIM
  1737  			return true
  1738  		case a.Body, a.Caption, a.Col, a.Colgroup, a.Html:
  1739  			// Ignore the token.
  1740  			return true
  1741  		case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
  1742  			if !p.elementInScope(tableScope, p.tok.DataAtom) {
  1743  				// Ignore the token.
  1744  				return true
  1745  			}
  1746  			// Close the cell and reprocess.
  1747  			if p.popUntil(tableScope, a.Td, a.Th) {
  1748  				p.clearActiveFormattingElements()
  1749  			}
  1750  			p.im = inRowIM
  1751  			return false
  1752  		}
  1753  	}
  1754  	return inBodyIM(p)
  1755  }
  1756  
  1757  // Section 12.2.6.4.16.
  1758  func inSelectIM(p *parser) bool {
  1759  	switch p.tok.Type {
  1760  	case TextToken:
  1761  		p.addText(strings.Replace(p.tok.Data, "\x00", "", -1))
  1762  	case StartTagToken:
  1763  		switch p.tok.DataAtom {
  1764  		case a.Html:
  1765  			return inBodyIM(p)
  1766  		case a.Option:
  1767  			if p.top().DataAtom == a.Option {
  1768  				p.oe.pop()
  1769  			}
  1770  			p.addElement()
  1771  		case a.Optgroup:
  1772  			if p.top().DataAtom == a.Option {
  1773  				p.oe.pop()
  1774  			}
  1775  			if p.top().DataAtom == a.Optgroup {
  1776  				p.oe.pop()
  1777  			}
  1778  			p.addElement()
  1779  		case a.Select:
  1780  			if p.popUntil(selectScope, a.Select) {
  1781  				p.resetInsertionMode()
  1782  			} else {
  1783  				// Ignore the token.
  1784  				return true
  1785  			}
  1786  		case a.Input, a.Keygen, a.Textarea:
  1787  			if p.elementInScope(selectScope, a.Select) {
  1788  				p.parseImpliedToken(EndTagToken, a.Select, a.Select.String())
  1789  				return false
  1790  			}
  1791  			// In order to properly ignore <textarea>, we need to change the tokenizer mode.
  1792  			p.tokenizer.NextIsNotRawText()
  1793  			// Ignore the token.
  1794  			return true
  1795  		case a.Script, a.Template:
  1796  			return inHeadIM(p)
  1797  		}
  1798  	case EndTagToken:
  1799  		switch p.tok.DataAtom {
  1800  		case a.Option:
  1801  			if p.top().DataAtom == a.Option {
  1802  				p.oe.pop()
  1803  			}
  1804  		case a.Optgroup:
  1805  			i := len(p.oe) - 1
  1806  			if p.oe[i].DataAtom == a.Option {
  1807  				i--
  1808  			}
  1809  			if p.oe[i].DataAtom == a.Optgroup {
  1810  				p.oe = p.oe[:i]
  1811  			}
  1812  		case a.Select:
  1813  			if p.popUntil(selectScope, a.Select) {
  1814  				p.resetInsertionMode()
  1815  			} else {
  1816  				// Ignore the token.
  1817  				return true
  1818  			}
  1819  		case a.Template:
  1820  			return inHeadIM(p)
  1821  		}
  1822  	case CommentToken:
  1823  		p.addChild(&Node{
  1824  			Type: CommentNode,
  1825  			Data: p.tok.Data,
  1826  		})
  1827  	case DoctypeToken:
  1828  		// Ignore the token.
  1829  		return true
  1830  	case ErrorToken:
  1831  		return inBodyIM(p)
  1832  	}
  1833  
  1834  	return true
  1835  }
  1836  
  1837  // Section 12.2.6.4.17.
  1838  func inSelectInTableIM(p *parser) bool {
  1839  	switch p.tok.Type {
  1840  	case StartTagToken, EndTagToken:
  1841  		switch p.tok.DataAtom {
  1842  		case a.Caption, a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr, a.Td, a.Th:
  1843  			if p.tok.Type == EndTagToken && !p.elementInScope(tableScope, p.tok.DataAtom) {
  1844  				// Ignore the token.
  1845  				return true
  1846  			}
  1847  			// This is like p.popUntil(selectScope, a.Select), but it also
  1848  			// matches <math select>, not just <select>. Matching the MathML
  1849  			// tag is arguably incorrect (conceptually), but it mimics what
  1850  			// Chromium does.
  1851  			for i := len(p.oe) - 1; i >= 0; i-- {
  1852  				if n := p.oe[i]; n.DataAtom == a.Select {
  1853  					p.oe = p.oe[:i]
  1854  					break
  1855  				}
  1856  			}
  1857  			p.resetInsertionMode()
  1858  			return false
  1859  		}
  1860  	}
  1861  	return inSelectIM(p)
  1862  }
  1863  
  1864  // Section 12.2.6.4.18.
  1865  func inTemplateIM(p *parser) bool {
  1866  	switch p.tok.Type {
  1867  	case TextToken, CommentToken, DoctypeToken:
  1868  		return inBodyIM(p)
  1869  	case StartTagToken:
  1870  		switch p.tok.DataAtom {
  1871  		case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
  1872  			return inHeadIM(p)
  1873  		case a.Caption, a.Colgroup, a.Tbody, a.Tfoot, a.Thead:
  1874  			p.templateStack.pop()
  1875  			p.templateStack = append(p.templateStack, inTableIM)
  1876  			p.im = inTableIM
  1877  			return false
  1878  		case a.Col:
  1879  			p.templateStack.pop()
  1880  			p.templateStack = append(p.templateStack, inColumnGroupIM)
  1881  			p.im = inColumnGroupIM
  1882  			return false
  1883  		case a.Tr:
  1884  			p.templateStack.pop()
  1885  			p.templateStack = append(p.templateStack, inTableBodyIM)
  1886  			p.im = inTableBodyIM
  1887  			return false
  1888  		case a.Td, a.Th:
  1889  			p.templateStack.pop()
  1890  			p.templateStack = append(p.templateStack, inRowIM)
  1891  			p.im = inRowIM
  1892  			return false
  1893  		default:
  1894  			p.templateStack.pop()
  1895  			p.templateStack = append(p.templateStack, inBodyIM)
  1896  			p.im = inBodyIM
  1897  			return false
  1898  		}
  1899  	case EndTagToken:
  1900  		switch p.tok.DataAtom {
  1901  		case a.Template:
  1902  			return inHeadIM(p)
  1903  		default:
  1904  			// Ignore the token.
  1905  			return true
  1906  		}
  1907  	case ErrorToken:
  1908  		if !p.oe.contains(a.Template) {
  1909  			// Ignore the token.
  1910  			return true
  1911  		}
  1912  		// TODO: remove this divergence from the HTML5 spec.
  1913  		//
  1914  		// See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
  1915  		p.generateImpliedEndTags()
  1916  		for i := len(p.oe) - 1; i >= 0; i-- {
  1917  			if n := p.oe[i]; n.Namespace == "" && n.DataAtom == a.Template {
  1918  				p.oe = p.oe[:i]
  1919  				break
  1920  			}
  1921  		}
  1922  		p.clearActiveFormattingElements()
  1923  		p.templateStack.pop()
  1924  		p.resetInsertionMode()
  1925  		return false
  1926  	}
  1927  	return false
  1928  }
  1929  
  1930  // Section 12.2.6.4.19.
  1931  func afterBodyIM(p *parser) bool {
  1932  	switch p.tok.Type {
  1933  	case ErrorToken:
  1934  		// Stop parsing.
  1935  		return true
  1936  	case TextToken:
  1937  		s := strings.TrimLeft(p.tok.Data, whitespace)
  1938  		if len(s) == 0 {
  1939  			// It was all whitespace.
  1940  			return inBodyIM(p)
  1941  		}
  1942  	case StartTagToken:
  1943  		if p.tok.DataAtom == a.Html {
  1944  			return inBodyIM(p)
  1945  		}
  1946  	case EndTagToken:
  1947  		if p.tok.DataAtom == a.Html {
  1948  			if !p.fragment {
  1949  				p.im = afterAfterBodyIM
  1950  			}
  1951  			return true
  1952  		}
  1953  	case CommentToken:
  1954  		// The comment is attached to the <html> element.
  1955  		if len(p.oe) < 1 || p.oe[0].DataAtom != a.Html {
  1956  			panic("html: bad parser state: <html> element not found, in the after-body insertion mode")
  1957  		}
  1958  		p.oe[0].AppendChild(&Node{
  1959  			Type: CommentNode,
  1960  			Data: p.tok.Data,
  1961  		})
  1962  		return true
  1963  	}
  1964  	p.im = inBodyIM
  1965  	return false
  1966  }
  1967  
  1968  // Section 12.2.6.4.20.
  1969  func inFramesetIM(p *parser) bool {
  1970  	switch p.tok.Type {
  1971  	case CommentToken:
  1972  		p.addChild(&Node{
  1973  			Type: CommentNode,
  1974  			Data: p.tok.Data,
  1975  		})
  1976  	case TextToken:
  1977  		// Ignore all text but whitespace.
  1978  		s := strings.Map(func(c rune) rune {
  1979  			switch c {
  1980  			case ' ', '\t', '\n', '\f', '\r':
  1981  				return c
  1982  			}
  1983  			return -1
  1984  		}, p.tok.Data)
  1985  		if s != "" {
  1986  			p.addText(s)
  1987  		}
  1988  	case StartTagToken:
  1989  		switch p.tok.DataAtom {
  1990  		case a.Html:
  1991  			return inBodyIM(p)
  1992  		case a.Frameset:
  1993  			p.addElement()
  1994  		case a.Frame:
  1995  			p.addElement()
  1996  			p.oe.pop()
  1997  			p.acknowledgeSelfClosingTag()
  1998  		case a.Noframes:
  1999  			return inHeadIM(p)
  2000  		}
  2001  	case EndTagToken:
  2002  		switch p.tok.DataAtom {
  2003  		case a.Frameset:
  2004  			if p.oe.top().DataAtom != a.Html {
  2005  				p.oe.pop()
  2006  				if p.oe.top().DataAtom != a.Frameset {
  2007  					p.im = afterFramesetIM
  2008  					return true
  2009  				}
  2010  			}
  2011  		}
  2012  	default:
  2013  		// Ignore the token.
  2014  	}
  2015  	return true
  2016  }
  2017  
  2018  // Section 12.2.6.4.21.
  2019  func afterFramesetIM(p *parser) bool {
  2020  	switch p.tok.Type {
  2021  	case CommentToken:
  2022  		p.addChild(&Node{
  2023  			Type: CommentNode,
  2024  			Data: p.tok.Data,
  2025  		})
  2026  	case TextToken:
  2027  		// Ignore all text but whitespace.
  2028  		s := strings.Map(func(c rune) rune {
  2029  			switch c {
  2030  			case ' ', '\t', '\n', '\f', '\r':
  2031  				return c
  2032  			}
  2033  			return -1
  2034  		}, p.tok.Data)
  2035  		if s != "" {
  2036  			p.addText(s)
  2037  		}
  2038  	case StartTagToken:
  2039  		switch p.tok.DataAtom {
  2040  		case a.Html:
  2041  			return inBodyIM(p)
  2042  		case a.Noframes:
  2043  			return inHeadIM(p)
  2044  		}
  2045  	case EndTagToken:
  2046  		switch p.tok.DataAtom {
  2047  		case a.Html:
  2048  			p.im = afterAfterFramesetIM
  2049  			return true
  2050  		}
  2051  	default:
  2052  		// Ignore the token.
  2053  	}
  2054  	return true
  2055  }
  2056  
  2057  // Section 12.2.6.4.22.
  2058  func afterAfterBodyIM(p *parser) bool {
  2059  	switch p.tok.Type {
  2060  	case ErrorToken:
  2061  		// Stop parsing.
  2062  		return true
  2063  	case TextToken:
  2064  		s := strings.TrimLeft(p.tok.Data, whitespace)
  2065  		if len(s) == 0 {
  2066  			// It was all whitespace.
  2067  			return inBodyIM(p)
  2068  		}
  2069  	case StartTagToken:
  2070  		if p.tok.DataAtom == a.Html {
  2071  			return inBodyIM(p)
  2072  		}
  2073  	case CommentToken:
  2074  		p.doc.AppendChild(&Node{
  2075  			Type: CommentNode,
  2076  			Data: p.tok.Data,
  2077  		})
  2078  		return true
  2079  	case DoctypeToken:
  2080  		return inBodyIM(p)
  2081  	}
  2082  	p.im = inBodyIM
  2083  	return false
  2084  }
  2085  
  2086  // Section 12.2.6.4.23.
  2087  func afterAfterFramesetIM(p *parser) bool {
  2088  	switch p.tok.Type {
  2089  	case CommentToken:
  2090  		p.doc.AppendChild(&Node{
  2091  			Type: CommentNode,
  2092  			Data: p.tok.Data,
  2093  		})
  2094  	case TextToken:
  2095  		// Ignore all text but whitespace.
  2096  		s := strings.Map(func(c rune) rune {
  2097  			switch c {
  2098  			case ' ', '\t', '\n', '\f', '\r':
  2099  				return c
  2100  			}
  2101  			return -1
  2102  		}, p.tok.Data)
  2103  		if s != "" {
  2104  			p.tok.Data = s
  2105  			return inBodyIM(p)
  2106  		}
  2107  	case StartTagToken:
  2108  		switch p.tok.DataAtom {
  2109  		case a.Html:
  2110  			return inBodyIM(p)
  2111  		case a.Noframes:
  2112  			return inHeadIM(p)
  2113  		}
  2114  	case DoctypeToken:
  2115  		return inBodyIM(p)
  2116  	default:
  2117  		// Ignore the token.
  2118  	}
  2119  	return true
  2120  }
  2121  
  2122  const whitespaceOrNUL = whitespace + "\x00"
  2123  
  2124  // Section 12.2.6.5
  2125  func parseForeignContent(p *parser) bool {
  2126  	switch p.tok.Type {
  2127  	case TextToken:
  2128  		if p.framesetOK {
  2129  			p.framesetOK = strings.TrimLeft(p.tok.Data, whitespaceOrNUL) == ""
  2130  		}
  2131  		p.tok.Data = strings.Replace(p.tok.Data, "\x00", "\ufffd", -1)
  2132  		p.addText(p.tok.Data)
  2133  	case CommentToken:
  2134  		p.addChild(&Node{
  2135  			Type: CommentNode,
  2136  			Data: p.tok.Data,
  2137  		})
  2138  	case StartTagToken:
  2139  		b := breakout[p.tok.Data]
  2140  		if p.tok.DataAtom == a.Font {
  2141  		loop:
  2142  			for _, attr := range p.tok.Attr {
  2143  				switch attr.Key {
  2144  				case "color", "face", "size":
  2145  					b = true
  2146  					break loop
  2147  				}
  2148  			}
  2149  		}
  2150  		if b {
  2151  			for i := len(p.oe) - 1; i >= 0; i-- {
  2152  				n := p.oe[i]
  2153  				if n.Namespace == "" || htmlIntegrationPoint(n) || mathMLTextIntegrationPoint(n) {
  2154  					p.oe = p.oe[:i+1]
  2155  					break
  2156  				}
  2157  			}
  2158  			return false
  2159  		}
  2160  		switch p.top().Namespace {
  2161  		case "math":
  2162  			adjustAttributeNames(p.tok.Attr, mathMLAttributeAdjustments)
  2163  		case "svg":
  2164  			// Adjust SVG tag names. The tokenizer lower-cases tag names, but
  2165  			// SVG wants e.g. "foreignObject" with a capital second "O".
  2166  			if x := svgTagNameAdjustments[p.tok.Data]; x != "" {
  2167  				p.tok.DataAtom = a.Lookup([]byte(x))
  2168  				p.tok.Data = x
  2169  			}
  2170  			adjustAttributeNames(p.tok.Attr, svgAttributeAdjustments)
  2171  		default:
  2172  			panic("html: bad parser state: unexpected namespace")
  2173  		}
  2174  		adjustForeignAttributes(p.tok.Attr)
  2175  		namespace := p.top().Namespace
  2176  		p.addElement()
  2177  		p.top().Namespace = namespace
  2178  		if namespace != "" {
  2179  			// Don't let the tokenizer go into raw text mode in foreign content
  2180  			// (e.g. in an SVG <title> tag).
  2181  			p.tokenizer.NextIsNotRawText()
  2182  		}
  2183  		if p.hasSelfClosingToken {
  2184  			p.oe.pop()
  2185  			p.acknowledgeSelfClosingTag()
  2186  		}
  2187  	case EndTagToken:
  2188  		for i := len(p.oe) - 1; i >= 0; i-- {
  2189  			if p.oe[i].Namespace == "" {
  2190  				return p.im(p)
  2191  			}
  2192  			if strings.EqualFold(p.oe[i].Data, p.tok.Data) {
  2193  				p.oe = p.oe[:i]
  2194  				break
  2195  			}
  2196  		}
  2197  		return true
  2198  	default:
  2199  		// Ignore the token.
  2200  	}
  2201  	return true
  2202  }
  2203  
  2204  // Section 12.2.6.
  2205  func (p *parser) inForeignContent() bool {
  2206  	if len(p.oe) == 0 {
  2207  		return false
  2208  	}
  2209  	n := p.oe[len(p.oe)-1]
  2210  	if n.Namespace == "" {
  2211  		return false
  2212  	}
  2213  	if mathMLTextIntegrationPoint(n) {
  2214  		if p.tok.Type == StartTagToken && p.tok.DataAtom != a.Mglyph && p.tok.DataAtom != a.Malignmark {
  2215  			return false
  2216  		}
  2217  		if p.tok.Type == TextToken {
  2218  			return false
  2219  		}
  2220  	}
  2221  	if n.Namespace == "math" && n.DataAtom == a.AnnotationXml && p.tok.Type == StartTagToken && p.tok.DataAtom == a.Svg {
  2222  		return false
  2223  	}
  2224  	if htmlIntegrationPoint(n) && (p.tok.Type == StartTagToken || p.tok.Type == TextToken) {
  2225  		return false
  2226  	}
  2227  	if p.tok.Type == ErrorToken {
  2228  		return false
  2229  	}
  2230  	return true
  2231  }
  2232  
  2233  // parseImpliedToken parses a token as though it had appeared in the parser's
  2234  // input.
  2235  func (p *parser) parseImpliedToken(t TokenType, dataAtom a.Atom, data string) {
  2236  	realToken, selfClosing := p.tok, p.hasSelfClosingToken
  2237  	p.tok = Token{
  2238  		Type:     t,
  2239  		DataAtom: dataAtom,
  2240  		Data:     data,
  2241  	}
  2242  	p.hasSelfClosingToken = false
  2243  	p.parseCurrentToken()
  2244  	p.tok, p.hasSelfClosingToken = realToken, selfClosing
  2245  }
  2246  
  2247  // parseCurrentToken runs the current token through the parsing routines
  2248  // until it is consumed.
  2249  func (p *parser) parseCurrentToken() {
  2250  	if p.tok.Type == SelfClosingTagToken {
  2251  		p.hasSelfClosingToken = true
  2252  		p.tok.Type = StartTagToken
  2253  	}
  2254  
  2255  	consumed := false
  2256  	for !consumed {
  2257  		if p.inForeignContent() {
  2258  			consumed = parseForeignContent(p)
  2259  		} else {
  2260  			consumed = p.im(p)
  2261  		}
  2262  	}
  2263  
  2264  	if p.hasSelfClosingToken {
  2265  		// This is a parse error, but ignore it.
  2266  		p.hasSelfClosingToken = false
  2267  	}
  2268  }
  2269  
  2270  func (p *parser) parse() error {
  2271  	// Iterate until EOF. Any other error will cause an early return.
  2272  	var err error
  2273  	for err != io.EOF {
  2274  		// CDATA sections are allowed only in foreign content.
  2275  		n := p.oe.top()
  2276  		p.tokenizer.AllowCDATA(n != nil && n.Namespace != "")
  2277  		// Read and parse the next token.
  2278  		p.tokenizer.Next()
  2279  		p.tok = p.tokenizer.Token()
  2280  		if p.tok.Type == ErrorToken {
  2281  			err = p.tokenizer.Err()
  2282  			if err != nil && err != io.EOF {
  2283  				return err
  2284  			}
  2285  		}
  2286  		p.parseCurrentToken()
  2287  	}
  2288  	return nil
  2289  }
  2290  
  2291  // Parse returns the parse tree for the HTML from the given Reader.
  2292  //
  2293  // It implements the HTML5 parsing algorithm
  2294  // (https://html.spec.whatwg.org/multipage/syntax.html#tree-construction),
  2295  // which is very complicated. The resultant tree can contain implicitly created
  2296  // nodes that have no explicit <tag> listed in r's data, and nodes' parents can
  2297  // differ from the nesting implied by a naive processing of start and end
  2298  // <tag>s. Conversely, explicit <tag>s in r's data can be silently dropped,
  2299  // with no corresponding node in the resulting tree.
  2300  //
  2301  // The input is assumed to be UTF-8 encoded.
  2302  func Parse(r io.Reader) (*Node, error) {
  2303  	return ParseWithOptions(r)
  2304  }
  2305  
  2306  // ParseFragment parses a fragment of HTML and returns the nodes that were
  2307  // found. If the fragment is the InnerHTML for an existing element, pass that
  2308  // element in context.
  2309  //
  2310  // It has the same intricacies as Parse.
  2311  func ParseFragment(r io.Reader, context *Node) ([]*Node, error) {
  2312  	return ParseFragmentWithOptions(r, context)
  2313  }
  2314  
  2315  // ParseOption configures a parser.
  2316  type ParseOption func(p *parser)
  2317  
  2318  // ParseOptionEnableScripting configures the scripting flag.
  2319  // https://html.spec.whatwg.org/multipage/webappapis.html#enabling-and-disabling-scripting
  2320  //
  2321  // By default, scripting is enabled.
  2322  func ParseOptionEnableScripting(enable bool) ParseOption {
  2323  	return func(p *parser) {
  2324  		p.scripting = enable
  2325  	}
  2326  }
  2327  
  2328  // ParseWithOptions is like Parse, with options.
  2329  func ParseWithOptions(r io.Reader, opts ...ParseOption) (*Node, error) {
  2330  	p := &parser{
  2331  		tokenizer: NewTokenizer(r),
  2332  		doc: &Node{
  2333  			Type: DocumentNode,
  2334  		},
  2335  		scripting:  true,
  2336  		framesetOK: true,
  2337  		im:         initialIM,
  2338  	}
  2339  
  2340  	for _, f := range opts {
  2341  		f(p)
  2342  	}
  2343  
  2344  	err := p.parse()
  2345  	if err != nil {
  2346  		return nil, err
  2347  	}
  2348  	return p.doc, nil
  2349  }
  2350  
  2351  // ParseFragmentWithOptions is like ParseFragment, with options.
  2352  func ParseFragmentWithOptions(r io.Reader, context *Node, opts ...ParseOption) ([]*Node, error) {
  2353  	contextTag := ""
  2354  	if context != nil {
  2355  		if context.Type != ElementNode {
  2356  			return nil, errors.New("html: ParseFragment of non-element Node")
  2357  		}
  2358  		// The next check isn't just context.DataAtom.String() == context.Data because
  2359  		// it is valid to pass an element whose tag isn't a known atom. For example,
  2360  		// DataAtom == 0 and Data = "tagfromthefuture" is perfectly consistent.
  2361  		if context.DataAtom != a.Lookup([]byte(context.Data)) {
  2362  			return nil, fmt.Errorf("html: inconsistent Node: DataAtom=%q, Data=%q", context.DataAtom, context.Data)
  2363  		}
  2364  		contextTag = context.DataAtom.String()
  2365  	}
  2366  	p := &parser{
  2367  		tokenizer: NewTokenizerFragment(r, contextTag),
  2368  		doc: &Node{
  2369  			Type: DocumentNode,
  2370  		},
  2371  		scripting: true,
  2372  		fragment:  true,
  2373  		context:   context,
  2374  	}
  2375  
  2376  	for _, f := range opts {
  2377  		f(p)
  2378  	}
  2379  
  2380  	root := &Node{
  2381  		Type:     ElementNode,
  2382  		DataAtom: a.Html,
  2383  		Data:     a.Html.String(),
  2384  	}
  2385  	p.doc.AppendChild(root)
  2386  	p.oe = nodeStack{root}
  2387  	if context != nil && context.DataAtom == a.Template {
  2388  		p.templateStack = append(p.templateStack, inTemplateIM)
  2389  	}
  2390  	p.resetInsertionMode()
  2391  
  2392  	for n := context; n != nil; n = n.Parent {
  2393  		if n.Type == ElementNode && n.DataAtom == a.Form {
  2394  			p.form = n
  2395  			break
  2396  		}
  2397  	}
  2398  
  2399  	err := p.parse()
  2400  	if err != nil {
  2401  		return nil, err
  2402  	}
  2403  
  2404  	parent := p.doc
  2405  	if context != nil {
  2406  		parent = root
  2407  	}
  2408  
  2409  	var result []*Node
  2410  	for c := parent.FirstChild; c != nil; {
  2411  		next := c.NextSibling
  2412  		parent.RemoveChild(c)
  2413  		result = append(result, c)
  2414  		c = next
  2415  	}
  2416  	return result, nil
  2417  }