github.com/vugu/vugu@v0.3.6-0.20240430171613-3f6f402e014b/internal/htmlx/parse.go (about)

     1  // Copyright 2010 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package htmlx
     6  
     7  import (
     8  	"errors"
     9  	"fmt"
    10  	"io"
    11  	"strings"
    12  
    13  	a "github.com/vugu/vugu/internal/htmlx/atom"
    14  )
    15  
    16  // A parser implements the HTML5 parsing algorithm:
    17  // https://html.spec.whatwg.org/multipage/syntax.html#tree-construction
    18  type parser struct {
    19  	// tokenizer provides the tokens for the parser.
    20  	tokenizer *Tokenizer
    21  	// tok is the most recently read token.
    22  	tok Token
    23  	// Self-closing tags like <hr/> are treated as start tags, except that
    24  	// hasSelfClosingToken is set while they are being processed.
    25  	hasSelfClosingToken bool
    26  	// doc is the document root element.
    27  	doc *Node
    28  	// The stack of open elements (section 12.2.4.2) and active formatting
    29  	// elements (section 12.2.4.3).
    30  	oe, afe nodeStack
    31  	// Element pointers (section 12.2.4.4).
    32  	head, form *Node
    33  	// Other parsing state flags (section 12.2.4.5).
    34  	scripting, framesetOK bool
    35  	// The stack of template insertion modes
    36  	templateStack insertionModeStack
    37  	// im is the current insertion mode.
    38  	im insertionMode
    39  	// originalIM is the insertion mode to go back to after completing a text
    40  	// or inTableText insertion mode.
    41  	originalIM insertionMode
    42  	// fosterParenting is whether new elements should be inserted according to
    43  	// the foster parenting rules (section 12.2.6.1).
    44  	fosterParenting bool
    45  	// quirks is whether the parser is operating in "quirks mode."
    46  	quirks bool
    47  	// fragment is whether the parser is parsing an HTML fragment.
    48  	fragment bool
    49  	// context is the context element when parsing an HTML fragment
    50  	// (section 12.4).
    51  	context *Node
    52  }
    53  
    54  func (p *parser) top() *Node {
    55  	if n := p.oe.top(); n != nil {
    56  		return n
    57  	}
    58  	return p.doc
    59  }
    60  
    61  // Stop tags for use in popUntil. These come from section 12.2.4.2.
    62  var (
    63  	defaultScopeStopTags = map[string][]a.Atom{
    64  		"":     {a.Applet, a.Caption, a.Html, a.Table, a.Td, a.Th, a.Marquee, a.Object, a.Template},
    65  		"math": {a.AnnotationXml, a.Mi, a.Mn, a.Mo, a.Ms, a.Mtext},
    66  		"svg":  {a.Desc, a.ForeignObject, a.Title},
    67  	}
    68  )
    69  
    70  type scope int
    71  
    72  const (
    73  	defaultScope scope = iota
    74  	listItemScope
    75  	buttonScope
    76  	tableScope
    77  	tableRowScope
    78  	tableBodyScope
    79  	selectScope
    80  )
    81  
    82  // popUntil pops the stack of open elements at the highest element whose tag
    83  // is in matchTags, provided there is no higher element in the scope's stop
    84  // tags (as defined in section 12.2.4.2). It returns whether or not there was
    85  // such an element. If there was not, popUntil leaves the stack unchanged.
    86  //
    87  // For example, the set of stop tags for table scope is: "html", "table". If
    88  // the stack was:
    89  // ["html", "body", "font", "table", "b", "i", "u"]
    90  // then popUntil(tableScope, "font") would return false, but
    91  // popUntil(tableScope, "i") would return true and the stack would become:
    92  // ["html", "body", "font", "table", "b"]
    93  //
    94  // If an element's tag is in both the stop tags and matchTags, then the stack
    95  // will be popped and the function returns true (provided, of course, there was
    96  // no higher element in the stack that was also in the stop tags). For example,
    97  // popUntil(tableScope, "table") returns true and leaves:
    98  // ["html", "body", "font"]
    99  func (p *parser) popUntil(s scope, matchTags ...a.Atom) bool {
   100  	if i := p.indexOfElementInScope(s, matchTags...); i != -1 {
   101  		p.oe = p.oe[:i]
   102  		return true
   103  	}
   104  	return false
   105  }
   106  
   107  // indexOfElementInScope returns the index in p.oe of the highest element whose
   108  // tag is in matchTags that is in scope. If no matching element is in scope, it
   109  // returns -1.
   110  func (p *parser) indexOfElementInScope(s scope, matchTags ...a.Atom) int {
   111  	for i := len(p.oe) - 1; i >= 0; i-- {
   112  		tagAtom := p.oe[i].DataAtom
   113  		if p.oe[i].Namespace == "" {
   114  			for _, t := range matchTags {
   115  				if t == tagAtom {
   116  					return i
   117  				}
   118  			}
   119  			switch s {
   120  			case defaultScope:
   121  				// No-op.
   122  			case listItemScope:
   123  				if tagAtom == a.Ol || tagAtom == a.Ul {
   124  					return -1
   125  				}
   126  			case buttonScope:
   127  				if tagAtom == a.Button {
   128  					return -1
   129  				}
   130  			case tableScope:
   131  				if tagAtom == a.Html || tagAtom == a.Table || tagAtom == a.Template {
   132  					return -1
   133  				}
   134  			case selectScope:
   135  				if tagAtom != a.Optgroup && tagAtom != a.Option {
   136  					return -1
   137  				}
   138  			default:
   139  				panic("unreachable")
   140  			}
   141  		}
   142  		switch s {
   143  		case defaultScope, listItemScope, buttonScope:
   144  			for _, t := range defaultScopeStopTags[p.oe[i].Namespace] {
   145  				if t == tagAtom {
   146  					return -1
   147  				}
   148  			}
   149  		}
   150  	}
   151  	return -1
   152  }
   153  
   154  // elementInScope is like popUntil, except that it doesn't modify the stack of
   155  // open elements.
   156  func (p *parser) elementInScope(s scope, matchTags ...a.Atom) bool {
   157  	return p.indexOfElementInScope(s, matchTags...) != -1
   158  }
   159  
   160  // clearStackToContext pops elements off the stack of open elements until a
   161  // scope-defined element is found.
   162  func (p *parser) clearStackToContext(s scope) {
   163  	for i := len(p.oe) - 1; i >= 0; i-- {
   164  		tagAtom := p.oe[i].DataAtom
   165  		switch s {
   166  		case tableScope:
   167  			if tagAtom == a.Html || tagAtom == a.Table || tagAtom == a.Template {
   168  				p.oe = p.oe[:i+1]
   169  				return
   170  			}
   171  		case tableRowScope:
   172  			if tagAtom == a.Html || tagAtom == a.Tr || tagAtom == a.Template {
   173  				p.oe = p.oe[:i+1]
   174  				return
   175  			}
   176  		case tableBodyScope:
   177  			if tagAtom == a.Html || tagAtom == a.Tbody || tagAtom == a.Tfoot || tagAtom == a.Thead || tagAtom == a.Template {
   178  				p.oe = p.oe[:i+1]
   179  				return
   180  			}
   181  		default:
   182  			panic("unreachable")
   183  		}
   184  	}
   185  }
   186  
   187  // generateImpliedEndTags pops nodes off the stack of open elements as long as
   188  // the top node has a tag name of dd, dt, li, optgroup, option, p, rb, rp, rt or rtc.
   189  // If exceptions are specified, nodes with that name will not be popped off.
   190  func (p *parser) generateImpliedEndTags(exceptions ...string) {
   191  	var i int
   192  loop:
   193  	for i = len(p.oe) - 1; i >= 0; i-- {
   194  		n := p.oe[i]
   195  		if n.Type == ElementNode {
   196  			switch n.DataAtom {
   197  			case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc:
   198  				for _, except := range exceptions {
   199  					if n.Data == except {
   200  						break loop
   201  					}
   202  				}
   203  				continue
   204  			}
   205  		}
   206  		break
   207  	}
   208  
   209  	p.oe = p.oe[:i+1]
   210  }
   211  
   212  // addChild adds a child node n to the top element, and pushes n onto the stack
   213  // of open elements if it is an element node.
   214  func (p *parser) addChild(n *Node) {
   215  	if p.shouldFosterParent() {
   216  		p.fosterParent(n)
   217  	} else {
   218  		p.top().AppendChild(n)
   219  	}
   220  
   221  	if n.Type == ElementNode {
   222  		p.oe = append(p.oe, n)
   223  	}
   224  }
   225  
   226  // shouldFosterParent returns whether the next node to be added should be
   227  // foster parented.
   228  func (p *parser) shouldFosterParent() bool {
   229  	if p.fosterParenting {
   230  		switch p.top().DataAtom {
   231  		case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
   232  			return true
   233  		}
   234  	}
   235  	return false
   236  }
   237  
   238  // fosterParent adds a child node according to the foster parenting rules.
   239  // Section 12.2.6.1, "foster parenting".
   240  func (p *parser) fosterParent(n *Node) {
   241  	var table, parent, prev, template *Node
   242  	var i int
   243  	for i = len(p.oe) - 1; i >= 0; i-- {
   244  		if p.oe[i].DataAtom == a.Table {
   245  			table = p.oe[i]
   246  			break
   247  		}
   248  	}
   249  
   250  	var j int
   251  	for j = len(p.oe) - 1; j >= 0; j-- {
   252  		if p.oe[j].DataAtom == a.Template {
   253  			template = p.oe[j]
   254  			break
   255  		}
   256  	}
   257  
   258  	if template != nil && (table == nil || j > i) {
   259  		template.AppendChild(n)
   260  		return
   261  	}
   262  
   263  	if table == nil {
   264  		// The foster parent is the html element.
   265  		parent = p.oe[0]
   266  	} else {
   267  		parent = table.Parent
   268  	}
   269  	if parent == nil {
   270  		parent = p.oe[i-1]
   271  	}
   272  
   273  	if table != nil {
   274  		prev = table.PrevSibling
   275  	} else {
   276  		prev = parent.LastChild
   277  	}
   278  	if prev != nil && prev.Type == TextNode && n.Type == TextNode {
   279  		prev.Data += n.Data
   280  		return
   281  	}
   282  
   283  	parent.InsertBefore(n, table)
   284  }
   285  
   286  // addText adds text to the preceding node if it is a text node, or else it
   287  // calls addChild with a new text node.
   288  func (p *parser) addText(text string) {
   289  	if text == "" {
   290  		return
   291  	}
   292  
   293  	if p.shouldFosterParent() {
   294  		p.fosterParent(&Node{
   295  			Type:   TextNode,
   296  			Data:   text,
   297  			Line:   p.tok.Line,   // ?
   298  			Column: p.tok.Column, // ?
   299  		})
   300  		return
   301  	}
   302  
   303  	t := p.top()
   304  	if n := t.LastChild; n != nil && n.Type == TextNode {
   305  		n.Data += text
   306  		return
   307  	}
   308  	p.addChild(&Node{
   309  		Type:   TextNode,
   310  		Data:   text,
   311  		Line:   p.tok.Line,   // ?
   312  		Column: p.tok.Column, // ?
   313  	})
   314  }
   315  
   316  // addElement adds a child element based on the current token.
   317  func (p *parser) addElement() {
   318  	p.addChild(&Node{
   319  		Type:     ElementNode,
   320  		DataAtom: p.tok.DataAtom,
   321  		Data:     p.tok.Data,
   322  		Attr:     p.tok.Attr,
   323  		Line:     p.tok.Line,
   324  		Column:   p.tok.Column,
   325  	})
   326  }
   327  
   328  // Section 12.2.4.3.
   329  func (p *parser) addFormattingElement() {
   330  	tagAtom, attr := p.tok.DataAtom, p.tok.Attr
   331  	p.addElement()
   332  
   333  	// Implement the Noah's Ark clause, but with three per family instead of two.
   334  	identicalElements := 0
   335  findIdenticalElements:
   336  	for i := len(p.afe) - 1; i >= 0; i-- {
   337  		n := p.afe[i]
   338  		if n.Type == scopeMarkerNode {
   339  			break
   340  		}
   341  		if n.Type != ElementNode {
   342  			continue
   343  		}
   344  		if n.Namespace != "" {
   345  			continue
   346  		}
   347  		if n.DataAtom != tagAtom {
   348  			continue
   349  		}
   350  		if len(n.Attr) != len(attr) {
   351  			continue
   352  		}
   353  	compareAttributes:
   354  		for _, t0 := range n.Attr {
   355  			for _, t1 := range attr {
   356  				if t0.Key == t1.Key && t0.Namespace == t1.Namespace && t0.Val == t1.Val {
   357  					// Found a match for this attribute, continue with the next attribute.
   358  					continue compareAttributes
   359  				}
   360  			}
   361  			// If we get here, there is no attribute that matches a.
   362  			// Therefore the element is not identical to the new one.
   363  			continue findIdenticalElements
   364  		}
   365  
   366  		identicalElements++
   367  		if identicalElements >= 3 {
   368  			p.afe.remove(n)
   369  		}
   370  	}
   371  
   372  	p.afe = append(p.afe, p.top())
   373  }
   374  
   375  // Section 12.2.4.3.
   376  func (p *parser) clearActiveFormattingElements() {
   377  	for {
   378  		n := p.afe.pop()
   379  		if len(p.afe) == 0 || n.Type == scopeMarkerNode {
   380  			return
   381  		}
   382  	}
   383  }
   384  
   385  // Section 12.2.4.3.
   386  func (p *parser) reconstructActiveFormattingElements() {
   387  	n := p.afe.top()
   388  	if n == nil {
   389  		return
   390  	}
   391  	if n.Type == scopeMarkerNode || p.oe.index(n) != -1 {
   392  		return
   393  	}
   394  	i := len(p.afe) - 1
   395  	for n.Type != scopeMarkerNode && p.oe.index(n) == -1 {
   396  		if i == 0 {
   397  			i = -1
   398  			break
   399  		}
   400  		i--
   401  		n = p.afe[i]
   402  	}
   403  	for {
   404  		i++
   405  		clone := p.afe[i].clone()
   406  		p.addChild(clone)
   407  		p.afe[i] = clone
   408  		if i == len(p.afe)-1 {
   409  			break
   410  		}
   411  	}
   412  }
   413  
   414  // Section 12.2.5.
   415  func (p *parser) acknowledgeSelfClosingTag() {
   416  	p.hasSelfClosingToken = false
   417  }
   418  
   419  // An insertion mode (section 12.2.4.1) is the state transition function from
   420  // a particular state in the HTML5 parser's state machine. It updates the
   421  // parser's fields depending on parser.tok (where ErrorToken means EOF).
   422  // It returns whether the token was consumed.
   423  type insertionMode func(*parser) bool
   424  
   425  // setOriginalIM sets the insertion mode to return to after completing a text or
   426  // inTableText insertion mode.
   427  // Section 12.2.4.1, "using the rules for".
   428  func (p *parser) setOriginalIM() {
   429  	if p.originalIM != nil {
   430  		panic("html: bad parser state: originalIM was set twice")
   431  	}
   432  	p.originalIM = p.im
   433  }
   434  
   435  // Section 12.2.4.1, "reset the insertion mode".
   436  func (p *parser) resetInsertionMode() {
   437  	for i := len(p.oe) - 1; i >= 0; i-- {
   438  		n := p.oe[i]
   439  		last := i == 0
   440  		if last && p.context != nil {
   441  			n = p.context
   442  		}
   443  
   444  		switch n.DataAtom {
   445  		case a.Select:
   446  			if !last {
   447  				for ancestor, first := n, p.oe[0]; ancestor != first; {
   448  					ancestor = p.oe[p.oe.index(ancestor)-1]
   449  					switch ancestor.DataAtom {
   450  					case a.Template:
   451  						p.im = inSelectIM
   452  						return
   453  					case a.Table:
   454  						p.im = inSelectInTableIM
   455  						return
   456  					}
   457  				}
   458  			}
   459  			p.im = inSelectIM
   460  		case a.Td, a.Th:
   461  			// TODO: remove this divergence from the HTML5 spec.
   462  			//
   463  			// See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
   464  			p.im = inCellIM
   465  		case a.Tr:
   466  			p.im = inRowIM
   467  		case a.Tbody, a.Thead, a.Tfoot:
   468  			p.im = inTableBodyIM
   469  		case a.Caption:
   470  			p.im = inCaptionIM
   471  		case a.Colgroup:
   472  			p.im = inColumnGroupIM
   473  		case a.Table:
   474  			p.im = inTableIM
   475  		case a.Template:
   476  			// TODO: remove this divergence from the HTML5 spec.
   477  			if n.Namespace != "" {
   478  				continue
   479  			}
   480  			p.im = p.templateStack.top()
   481  		case a.Head:
   482  			// TODO: remove this divergence from the HTML5 spec.
   483  			//
   484  			// See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
   485  			p.im = inHeadIM
   486  		case a.Body:
   487  			p.im = inBodyIM
   488  		case a.Frameset:
   489  			p.im = inFramesetIM
   490  		case a.Html:
   491  			if p.head == nil {
   492  				p.im = beforeHeadIM
   493  			} else {
   494  				p.im = afterHeadIM
   495  			}
   496  		default:
   497  			if last {
   498  				p.im = inBodyIM
   499  				return
   500  			}
   501  			continue
   502  		}
   503  		return
   504  	}
   505  }
   506  
   507  const whitespace = " \t\r\n\f"
   508  
   509  // Section 12.2.6.4.1.
   510  func initialIM(p *parser) bool {
   511  	switch p.tok.Type {
   512  	case TextToken:
   513  		p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
   514  		if len(p.tok.Data) == 0 {
   515  			// It was all whitespace, so ignore it.
   516  			return true
   517  		}
   518  	case CommentToken:
   519  		p.doc.AppendChild(&Node{
   520  			Type:   CommentNode,
   521  			Data:   p.tok.Data,
   522  			Line:   p.tok.Line,
   523  			Column: p.tok.Column,
   524  		})
   525  		return true
   526  	case DoctypeToken:
   527  		n, quirks := parseDoctype(p.tok.Data)
   528  		n.Line = p.tok.Line
   529  		n.Column = p.tok.Column
   530  		p.doc.AppendChild(n)
   531  		p.quirks = quirks
   532  		p.im = beforeHTMLIM
   533  		return true
   534  	}
   535  	p.quirks = true
   536  	p.im = beforeHTMLIM
   537  	return false
   538  }
   539  
   540  // Section 12.2.6.4.2.
   541  func beforeHTMLIM(p *parser) bool {
   542  	switch p.tok.Type {
   543  	case DoctypeToken:
   544  		// Ignore the token.
   545  		return true
   546  	case TextToken:
   547  		p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
   548  		if len(p.tok.Data) == 0 {
   549  			// It was all whitespace, so ignore it.
   550  			return true
   551  		}
   552  	case StartTagToken:
   553  		if p.tok.DataAtom == a.Html {
   554  			p.addElement()
   555  			p.im = beforeHeadIM
   556  			return true
   557  		}
   558  	case EndTagToken:
   559  		switch p.tok.DataAtom {
   560  		case a.Head, a.Body, a.Html, a.Br:
   561  			p.parseImpliedToken(StartTagToken, a.Html, a.Html.String())
   562  			return false
   563  		default:
   564  			// Ignore the token.
   565  			return true
   566  		}
   567  	case CommentToken:
   568  		p.doc.AppendChild(&Node{
   569  			Type:   CommentNode,
   570  			Data:   p.tok.Data,
   571  			Line:   p.tok.Line,
   572  			Column: p.tok.Column,
   573  		})
   574  		return true
   575  	}
   576  	p.parseImpliedToken(StartTagToken, a.Html, a.Html.String())
   577  	return false
   578  }
   579  
   580  // Section 12.2.6.4.3.
   581  func beforeHeadIM(p *parser) bool {
   582  	switch p.tok.Type {
   583  	case TextToken:
   584  		p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
   585  		if len(p.tok.Data) == 0 {
   586  			// It was all whitespace, so ignore it.
   587  			return true
   588  		}
   589  	case StartTagToken:
   590  		switch p.tok.DataAtom {
   591  		case a.Head:
   592  			p.addElement()
   593  			p.head = p.top()
   594  			p.im = inHeadIM
   595  			return true
   596  		case a.Html:
   597  			return inBodyIM(p)
   598  		}
   599  	case EndTagToken:
   600  		switch p.tok.DataAtom {
   601  		case a.Head, a.Body, a.Html, a.Br:
   602  			p.parseImpliedToken(StartTagToken, a.Head, a.Head.String())
   603  			return false
   604  		default:
   605  			// Ignore the token.
   606  			return true
   607  		}
   608  	case CommentToken:
   609  		p.addChild(&Node{
   610  			Type:   CommentNode,
   611  			Data:   p.tok.Data,
   612  			Line:   p.tok.Line,
   613  			Column: p.tok.Column,
   614  		})
   615  		return true
   616  	case DoctypeToken:
   617  		// Ignore the token.
   618  		return true
   619  	}
   620  
   621  	p.parseImpliedToken(StartTagToken, a.Head, a.Head.String())
   622  	return false
   623  }
   624  
   625  // Section 12.2.6.4.4.
   626  func inHeadIM(p *parser) bool {
   627  	switch p.tok.Type {
   628  	case TextToken:
   629  		s := strings.TrimLeft(p.tok.Data, whitespace)
   630  		if len(s) < len(p.tok.Data) {
   631  			// Add the initial whitespace to the current node.
   632  			p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
   633  			if s == "" {
   634  				return true
   635  			}
   636  			p.tok.Data = s
   637  		}
   638  	case StartTagToken:
   639  		switch p.tok.DataAtom {
   640  		case a.Html:
   641  			return inBodyIM(p)
   642  		case a.Base, a.Basefont, a.Bgsound, a.Command, a.Link, a.Meta:
   643  			p.addElement()
   644  			p.oe.pop()
   645  			p.acknowledgeSelfClosingTag()
   646  			return true
   647  		case a.Script, a.Title, a.Noscript, a.Noframes, a.Style:
   648  			p.addElement()
   649  			p.setOriginalIM()
   650  			p.im = textIM
   651  			return true
   652  		case a.Head:
   653  			// Ignore the token.
   654  			return true
   655  		case a.Template:
   656  			p.addElement()
   657  			p.afe = append(p.afe, &scopeMarker)
   658  			p.framesetOK = false
   659  			p.im = inTemplateIM
   660  			p.templateStack = append(p.templateStack, inTemplateIM)
   661  			return true
   662  		}
   663  	case EndTagToken:
   664  		switch p.tok.DataAtom {
   665  		case a.Head:
   666  			p.oe.pop()
   667  			p.im = afterHeadIM
   668  			return true
   669  		case a.Body, a.Html, a.Br:
   670  			p.parseImpliedToken(EndTagToken, a.Head, a.Head.String())
   671  			return false
   672  		case a.Template:
   673  			if !p.oe.contains(a.Template) {
   674  				return true
   675  			}
   676  			// TODO: remove this divergence from the HTML5 spec.
   677  			//
   678  			// See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
   679  			p.generateImpliedEndTags()
   680  			for i := len(p.oe) - 1; i >= 0; i-- {
   681  				if n := p.oe[i]; n.Namespace == "" && n.DataAtom == a.Template {
   682  					p.oe = p.oe[:i]
   683  					break
   684  				}
   685  			}
   686  			p.clearActiveFormattingElements()
   687  			p.templateStack.pop()
   688  			p.resetInsertionMode()
   689  			return true
   690  		default:
   691  			// Ignore the token.
   692  			return true
   693  		}
   694  	case CommentToken:
   695  		p.addChild(&Node{
   696  			Type:   CommentNode,
   697  			Data:   p.tok.Data,
   698  			Line:   p.tok.Line,
   699  			Column: p.tok.Column,
   700  		})
   701  		return true
   702  	case DoctypeToken:
   703  		// Ignore the token.
   704  		return true
   705  	}
   706  
   707  	p.parseImpliedToken(EndTagToken, a.Head, a.Head.String())
   708  	return false
   709  }
   710  
   711  // Section 12.2.6.4.6.
   712  func afterHeadIM(p *parser) bool {
   713  	switch p.tok.Type {
   714  	case TextToken:
   715  		s := strings.TrimLeft(p.tok.Data, whitespace)
   716  		if len(s) < len(p.tok.Data) {
   717  			// Add the initial whitespace to the current node.
   718  			p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
   719  			if s == "" {
   720  				return true
   721  			}
   722  			p.tok.Data = s
   723  		}
   724  	case StartTagToken:
   725  		switch p.tok.DataAtom {
   726  		case a.Html:
   727  			return inBodyIM(p)
   728  		case a.Body:
   729  			p.addElement()
   730  			p.framesetOK = false
   731  			p.im = inBodyIM
   732  			return true
   733  		case a.Frameset:
   734  			p.addElement()
   735  			p.im = inFramesetIM
   736  			return true
   737  		case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
   738  			p.oe = append(p.oe, p.head)
   739  			defer p.oe.remove(p.head)
   740  			return inHeadIM(p)
   741  		case a.Head:
   742  			// Ignore the token.
   743  			return true
   744  		}
   745  	case EndTagToken:
   746  		switch p.tok.DataAtom {
   747  		case a.Body, a.Html, a.Br:
   748  			// Drop down to creating an implied <body> tag.
   749  		case a.Template:
   750  			return inHeadIM(p)
   751  		default:
   752  			// Ignore the token.
   753  			return true
   754  		}
   755  	case CommentToken:
   756  		p.addChild(&Node{
   757  			Type:   CommentNode,
   758  			Data:   p.tok.Data,
   759  			Line:   p.tok.Line,
   760  			Column: p.tok.Column,
   761  		})
   762  		return true
   763  	case DoctypeToken:
   764  		// Ignore the token.
   765  		return true
   766  	}
   767  
   768  	p.parseImpliedToken(StartTagToken, a.Body, a.Body.String())
   769  	p.framesetOK = true
   770  	return false
   771  }
   772  
   773  // copyAttributes copies attributes of src not found on dst to dst.
   774  func copyAttributes(dst *Node, src Token) {
   775  	if len(src.Attr) == 0 {
   776  		return
   777  	}
   778  	attr := map[string]string{}
   779  	for _, t := range dst.Attr {
   780  		attr[t.Key] = t.Val
   781  	}
   782  	for _, t := range src.Attr {
   783  		if _, ok := attr[t.Key]; !ok {
   784  			dst.Attr = append(dst.Attr, t)
   785  			attr[t.Key] = t.Val
   786  		}
   787  	}
   788  }
   789  
   790  // Section 12.2.6.4.7.
   791  func inBodyIM(p *parser) bool {
   792  	switch p.tok.Type {
   793  	case TextToken:
   794  		d := p.tok.Data
   795  		switch n := p.oe.top(); n.DataAtom {
   796  		case a.Pre, a.Listing:
   797  			if n.FirstChild == nil {
   798  				// Ignore a newline at the start of a <pre> block.
   799  				if d != "" && d[0] == '\r' {
   800  					d = d[1:]
   801  				}
   802  				if d != "" && d[0] == '\n' {
   803  					d = d[1:]
   804  				}
   805  			}
   806  		}
   807  		d = strings.Replace(d, "\x00", "", -1)
   808  		if d == "" {
   809  			return true
   810  		}
   811  		p.reconstructActiveFormattingElements()
   812  		p.addText(d)
   813  		if p.framesetOK && strings.TrimLeft(d, whitespace) != "" {
   814  			// There were non-whitespace characters inserted.
   815  			p.framesetOK = false
   816  		}
   817  	case StartTagToken:
   818  		switch p.tok.DataAtom {
   819  		case a.Html:
   820  			if p.oe.contains(a.Template) {
   821  				return true
   822  			}
   823  			copyAttributes(p.oe[0], p.tok)
   824  		case a.Base, a.Basefont, a.Bgsound, a.Command, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
   825  			return inHeadIM(p)
   826  		case a.Body:
   827  			if p.oe.contains(a.Template) {
   828  				return true
   829  			}
   830  			if len(p.oe) >= 2 {
   831  				body := p.oe[1]
   832  				if body.Type == ElementNode && body.DataAtom == a.Body {
   833  					p.framesetOK = false
   834  					copyAttributes(body, p.tok)
   835  				}
   836  			}
   837  		case a.Frameset:
   838  			if !p.framesetOK || len(p.oe) < 2 || p.oe[1].DataAtom != a.Body {
   839  				// Ignore the token.
   840  				return true
   841  			}
   842  			body := p.oe[1]
   843  			if body.Parent != nil {
   844  				body.Parent.RemoveChild(body)
   845  			}
   846  			p.oe = p.oe[:1]
   847  			p.addElement()
   848  			p.im = inFramesetIM
   849  			return true
   850  		case a.Address, a.Article, a.Aside, a.Blockquote, a.Center, a.Details, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Menu, a.Nav, a.Ol, a.P, a.Section, a.Summary, a.Ul:
   851  			p.popUntil(buttonScope, a.P)
   852  			p.addElement()
   853  		case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
   854  			p.popUntil(buttonScope, a.P)
   855  			switch n := p.top(); n.DataAtom {
   856  			case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
   857  				p.oe.pop()
   858  			}
   859  			p.addElement()
   860  		case a.Pre, a.Listing:
   861  			p.popUntil(buttonScope, a.P)
   862  			p.addElement()
   863  			// The newline, if any, will be dealt with by the TextToken case.
   864  			p.framesetOK = false
   865  		case a.Form:
   866  			if p.form != nil && !p.oe.contains(a.Template) {
   867  				// Ignore the token
   868  				return true
   869  			}
   870  			p.popUntil(buttonScope, a.P)
   871  			p.addElement()
   872  			if !p.oe.contains(a.Template) {
   873  				p.form = p.top()
   874  			}
   875  		case a.Li:
   876  			p.framesetOK = false
   877  			for i := len(p.oe) - 1; i >= 0; i-- {
   878  				node := p.oe[i]
   879  				switch node.DataAtom {
   880  				case a.Li:
   881  					p.oe = p.oe[:i]
   882  				case a.Address, a.Div, a.P:
   883  					continue
   884  				default:
   885  					if !isSpecialElement(node) {
   886  						continue
   887  					}
   888  				}
   889  				break
   890  			}
   891  			p.popUntil(buttonScope, a.P)
   892  			p.addElement()
   893  		case a.Dd, a.Dt:
   894  			p.framesetOK = false
   895  			for i := len(p.oe) - 1; i >= 0; i-- {
   896  				node := p.oe[i]
   897  				switch node.DataAtom {
   898  				case a.Dd, a.Dt:
   899  					p.oe = p.oe[:i]
   900  				case a.Address, a.Div, a.P:
   901  					continue
   902  				default:
   903  					if !isSpecialElement(node) {
   904  						continue
   905  					}
   906  				}
   907  				break
   908  			}
   909  			p.popUntil(buttonScope, a.P)
   910  			p.addElement()
   911  		case a.Plaintext:
   912  			p.popUntil(buttonScope, a.P)
   913  			p.addElement()
   914  		case a.Button:
   915  			p.popUntil(defaultScope, a.Button)
   916  			p.reconstructActiveFormattingElements()
   917  			p.addElement()
   918  			p.framesetOK = false
   919  		case a.A:
   920  			for i := len(p.afe) - 1; i >= 0 && p.afe[i].Type != scopeMarkerNode; i-- {
   921  				if n := p.afe[i]; n.Type == ElementNode && n.DataAtom == a.A {
   922  					p.inBodyEndTagFormatting(a.A, "a")
   923  					p.oe.remove(n)
   924  					p.afe.remove(n)
   925  					break
   926  				}
   927  			}
   928  			p.reconstructActiveFormattingElements()
   929  			p.addFormattingElement()
   930  		case a.B, a.Big, a.Code, a.Em, a.Font, a.I, a.S, a.Small, a.Strike, a.Strong, a.Tt, a.U:
   931  			p.reconstructActiveFormattingElements()
   932  			p.addFormattingElement()
   933  		case a.Nobr:
   934  			p.reconstructActiveFormattingElements()
   935  			if p.elementInScope(defaultScope, a.Nobr) {
   936  				p.inBodyEndTagFormatting(a.Nobr, "nobr")
   937  				p.reconstructActiveFormattingElements()
   938  			}
   939  			p.addFormattingElement()
   940  		case a.Applet, a.Marquee, a.Object:
   941  			p.reconstructActiveFormattingElements()
   942  			p.addElement()
   943  			p.afe = append(p.afe, &scopeMarker)
   944  			p.framesetOK = false
   945  		case a.Table:
   946  			if !p.quirks {
   947  				p.popUntil(buttonScope, a.P)
   948  			}
   949  			p.addElement()
   950  			p.framesetOK = false
   951  			p.im = inTableIM
   952  			return true
   953  		case a.Area, a.Br, a.Embed, a.Img, a.Input, a.Keygen, a.Wbr:
   954  			p.reconstructActiveFormattingElements()
   955  			p.addElement()
   956  			p.oe.pop()
   957  			p.acknowledgeSelfClosingTag()
   958  			if p.tok.DataAtom == a.Input {
   959  				for _, t := range p.tok.Attr {
   960  					if t.Key == "type" {
   961  						if strings.ToLower(t.Val) == "hidden" {
   962  							// Skip setting framesetOK = false
   963  							return true
   964  						}
   965  					}
   966  				}
   967  			}
   968  			p.framesetOK = false
   969  		case a.Param, a.Source, a.Track:
   970  			p.addElement()
   971  			p.oe.pop()
   972  			p.acknowledgeSelfClosingTag()
   973  		case a.Hr:
   974  			p.popUntil(buttonScope, a.P)
   975  			p.addElement()
   976  			p.oe.pop()
   977  			p.acknowledgeSelfClosingTag()
   978  			p.framesetOK = false
   979  		case a.Image:
   980  			p.tok.DataAtom = a.Img
   981  			p.tok.Data = a.Img.String()
   982  			return false
   983  		case a.Isindex:
   984  			if p.form != nil {
   985  				// Ignore the token.
   986  				return true
   987  			}
   988  			action := ""
   989  			prompt := "This is a searchable index. Enter search keywords: "
   990  			attr := []Attribute{{Key: "name", Val: "isindex"}}
   991  			for _, t := range p.tok.Attr {
   992  				switch t.Key {
   993  				case "action":
   994  					action = t.Val
   995  				case "name":
   996  					// Ignore the attribute.
   997  				case "prompt":
   998  					prompt = t.Val
   999  				default:
  1000  					attr = append(attr, t)
  1001  				}
  1002  			}
  1003  			p.acknowledgeSelfClosingTag()
  1004  			p.popUntil(buttonScope, a.P)
  1005  			p.parseImpliedToken(StartTagToken, a.Form, a.Form.String())
  1006  			if p.form == nil {
  1007  				// NOTE: The 'isindex' element has been removed,
  1008  				// and the 'template' element has not been designed to be
  1009  				// collaborative with the index element.
  1010  				//
  1011  				// Ignore the token.
  1012  				return true
  1013  			}
  1014  			if action != "" {
  1015  				p.form.Attr = []Attribute{{Key: "action", Val: action}}
  1016  			}
  1017  			p.parseImpliedToken(StartTagToken, a.Hr, a.Hr.String())
  1018  			p.parseImpliedToken(StartTagToken, a.Label, a.Label.String())
  1019  			p.addText(prompt)
  1020  			p.addChild(&Node{
  1021  				Type:     ElementNode,
  1022  				DataAtom: a.Input,
  1023  				Data:     a.Input.String(),
  1024  				Attr:     attr,
  1025  				Line:     p.tok.Line,
  1026  				Column:   p.tok.Column,
  1027  			})
  1028  			p.oe.pop()
  1029  			p.parseImpliedToken(EndTagToken, a.Label, a.Label.String())
  1030  			p.parseImpliedToken(StartTagToken, a.Hr, a.Hr.String())
  1031  			p.parseImpliedToken(EndTagToken, a.Form, a.Form.String())
  1032  		case a.Textarea:
  1033  			p.addElement()
  1034  			p.setOriginalIM()
  1035  			p.framesetOK = false
  1036  			p.im = textIM
  1037  		case a.Xmp:
  1038  			p.popUntil(buttonScope, a.P)
  1039  			p.reconstructActiveFormattingElements()
  1040  			p.framesetOK = false
  1041  			p.addElement()
  1042  			p.setOriginalIM()
  1043  			p.im = textIM
  1044  		case a.Iframe:
  1045  			p.framesetOK = false
  1046  			p.addElement()
  1047  			p.setOriginalIM()
  1048  			p.im = textIM
  1049  		case a.Noembed, a.Noscript:
  1050  			p.addElement()
  1051  			p.setOriginalIM()
  1052  			p.im = textIM
  1053  		case a.Select:
  1054  			p.reconstructActiveFormattingElements()
  1055  			p.addElement()
  1056  			p.framesetOK = false
  1057  			p.im = inSelectIM
  1058  			return true
  1059  		case a.Optgroup, a.Option:
  1060  			if p.top().DataAtom == a.Option {
  1061  				p.oe.pop()
  1062  			}
  1063  			p.reconstructActiveFormattingElements()
  1064  			p.addElement()
  1065  		case a.Rb, a.Rtc:
  1066  			if p.elementInScope(defaultScope, a.Ruby) {
  1067  				p.generateImpliedEndTags()
  1068  			}
  1069  			p.addElement()
  1070  		case a.Rp, a.Rt:
  1071  			if p.elementInScope(defaultScope, a.Ruby) {
  1072  				p.generateImpliedEndTags("rtc")
  1073  			}
  1074  			p.addElement()
  1075  		case a.Math, a.Svg:
  1076  			p.reconstructActiveFormattingElements()
  1077  			if p.tok.DataAtom == a.Math {
  1078  				adjustAttributeNames(p.tok.Attr, mathMLAttributeAdjustments)
  1079  			} else {
  1080  				adjustAttributeNames(p.tok.Attr, svgAttributeAdjustments)
  1081  			}
  1082  			adjustForeignAttributes(p.tok.Attr)
  1083  			p.addElement()
  1084  			p.top().Namespace = p.tok.Data
  1085  			if p.hasSelfClosingToken {
  1086  				p.oe.pop()
  1087  				p.acknowledgeSelfClosingTag()
  1088  			}
  1089  			return true
  1090  		case a.Caption, a.Col, a.Colgroup, a.Frame, a.Head, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
  1091  			// Ignore the token.
  1092  		default:
  1093  			p.reconstructActiveFormattingElements()
  1094  			p.addElement()
  1095  		}
  1096  	case EndTagToken:
  1097  		switch p.tok.DataAtom {
  1098  		case a.Body:
  1099  			if p.elementInScope(defaultScope, a.Body) {
  1100  				p.im = afterBodyIM
  1101  			}
  1102  		case a.Html:
  1103  			if p.elementInScope(defaultScope, a.Body) {
  1104  				p.parseImpliedToken(EndTagToken, a.Body, a.Body.String())
  1105  				return false
  1106  			}
  1107  			return true
  1108  		case a.Address, a.Article, a.Aside, a.Blockquote, a.Button, a.Center, a.Details, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Listing, a.Menu, a.Nav, a.Ol, a.Pre, a.Section, a.Summary, a.Ul:
  1109  			p.popUntil(defaultScope, p.tok.DataAtom)
  1110  		case a.Form:
  1111  			if p.oe.contains(a.Template) {
  1112  				i := p.indexOfElementInScope(defaultScope, a.Form)
  1113  				if i == -1 {
  1114  					// Ignore the token.
  1115  					return true
  1116  				}
  1117  				p.generateImpliedEndTags()
  1118  				if p.oe[i].DataAtom != a.Form {
  1119  					// Ignore the token.
  1120  					return true
  1121  				}
  1122  				p.popUntil(defaultScope, a.Form)
  1123  			} else {
  1124  				node := p.form
  1125  				p.form = nil
  1126  				i := p.indexOfElementInScope(defaultScope, a.Form)
  1127  				if node == nil || i == -1 || p.oe[i] != node {
  1128  					// Ignore the token.
  1129  					return true
  1130  				}
  1131  				p.generateImpliedEndTags()
  1132  				p.oe.remove(node)
  1133  			}
  1134  		case a.P:
  1135  			if !p.elementInScope(buttonScope, a.P) {
  1136  				p.parseImpliedToken(StartTagToken, a.P, a.P.String())
  1137  			}
  1138  			p.popUntil(buttonScope, a.P)
  1139  		case a.Li:
  1140  			p.popUntil(listItemScope, a.Li)
  1141  		case a.Dd, a.Dt:
  1142  			p.popUntil(defaultScope, p.tok.DataAtom)
  1143  		case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
  1144  			p.popUntil(defaultScope, a.H1, a.H2, a.H3, a.H4, a.H5, a.H6)
  1145  		case a.A, a.B, a.Big, a.Code, a.Em, a.Font, a.I, a.Nobr, a.S, a.Small, a.Strike, a.Strong, a.Tt, a.U:
  1146  			p.inBodyEndTagFormatting(p.tok.DataAtom, p.tok.Data)
  1147  		case a.Applet, a.Marquee, a.Object:
  1148  			if p.popUntil(defaultScope, p.tok.DataAtom) {
  1149  				p.clearActiveFormattingElements()
  1150  			}
  1151  		case a.Br:
  1152  			p.tok.Type = StartTagToken
  1153  			return false
  1154  		case a.Template:
  1155  			return inHeadIM(p)
  1156  		default:
  1157  			p.inBodyEndTagOther(p.tok.DataAtom, p.tok.Data)
  1158  		}
  1159  	case CommentToken:
  1160  		p.addChild(&Node{
  1161  			Type:   CommentNode,
  1162  			Data:   p.tok.Data,
  1163  			Line:   p.tok.Line,
  1164  			Column: p.tok.Column,
  1165  		})
  1166  	case ErrorToken:
  1167  		// TODO: remove this divergence from the HTML5 spec.
  1168  		if len(p.templateStack) > 0 {
  1169  			p.im = inTemplateIM
  1170  			return false
  1171  		} else {
  1172  			for _, e := range p.oe {
  1173  				switch e.DataAtom {
  1174  				case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc, a.Tbody, a.Td, a.Tfoot, a.Th,
  1175  					a.Thead, a.Tr, a.Body, a.Html:
  1176  				default:
  1177  					return true
  1178  				}
  1179  			}
  1180  		}
  1181  	}
  1182  
  1183  	return true
  1184  }
  1185  
  1186  func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom, tagName string) {
  1187  	// This is the "adoption agency" algorithm, described at
  1188  	// https://html.spec.whatwg.org/multipage/syntax.html#adoptionAgency
  1189  
  1190  	// TODO: this is a fairly literal line-by-line translation of that algorithm.
  1191  	// Once the code successfully parses the comprehensive test suite, we should
  1192  	// refactor this code to be more idiomatic.
  1193  
  1194  	// Steps 1-4. The outer loop.
  1195  	for i := 0; i < 8; i++ {
  1196  		// Step 5. Find the formatting element.
  1197  		var formattingElement *Node
  1198  		for j := len(p.afe) - 1; j >= 0; j-- {
  1199  			if p.afe[j].Type == scopeMarkerNode {
  1200  				break
  1201  			}
  1202  			if p.afe[j].DataAtom == tagAtom {
  1203  				formattingElement = p.afe[j]
  1204  				break
  1205  			}
  1206  		}
  1207  		if formattingElement == nil {
  1208  			p.inBodyEndTagOther(tagAtom, tagName)
  1209  			return
  1210  		}
  1211  		feIndex := p.oe.index(formattingElement)
  1212  		if feIndex == -1 {
  1213  			p.afe.remove(formattingElement)
  1214  			return
  1215  		}
  1216  		if !p.elementInScope(defaultScope, tagAtom) {
  1217  			// Ignore the tag.
  1218  			return
  1219  		}
  1220  
  1221  		// Steps 9-10. Find the furthest block.
  1222  		var furthestBlock *Node
  1223  		for _, e := range p.oe[feIndex:] {
  1224  			if isSpecialElement(e) {
  1225  				furthestBlock = e
  1226  				break
  1227  			}
  1228  		}
  1229  		if furthestBlock == nil {
  1230  			e := p.oe.pop()
  1231  			for e != formattingElement {
  1232  				e = p.oe.pop()
  1233  			}
  1234  			p.afe.remove(e)
  1235  			return
  1236  		}
  1237  
  1238  		// Steps 11-12. Find the common ancestor and bookmark node.
  1239  		commonAncestor := p.oe[feIndex-1]
  1240  		bookmark := p.afe.index(formattingElement)
  1241  
  1242  		// Step 13. The inner loop. Find the lastNode to reparent.
  1243  		lastNode := furthestBlock
  1244  		node := furthestBlock
  1245  		x := p.oe.index(node)
  1246  		// Steps 13.1-13.2
  1247  		for j := 0; j < 3; j++ {
  1248  			// Step 13.3.
  1249  			x--
  1250  			node = p.oe[x]
  1251  			// Step 13.4 - 13.5.
  1252  			if p.afe.index(node) == -1 {
  1253  				p.oe.remove(node)
  1254  				continue
  1255  			}
  1256  			// Step 13.6.
  1257  			if node == formattingElement {
  1258  				break
  1259  			}
  1260  			// Step 13.7.
  1261  			clone := node.clone()
  1262  			p.afe[p.afe.index(node)] = clone
  1263  			p.oe[p.oe.index(node)] = clone
  1264  			node = clone
  1265  			// Step 13.8.
  1266  			if lastNode == furthestBlock {
  1267  				bookmark = p.afe.index(node) + 1
  1268  			}
  1269  			// Step 13.9.
  1270  			if lastNode.Parent != nil {
  1271  				lastNode.Parent.RemoveChild(lastNode)
  1272  			}
  1273  			node.AppendChild(lastNode)
  1274  			// Step 13.10.
  1275  			lastNode = node
  1276  		}
  1277  
  1278  		// Step 14. Reparent lastNode to the common ancestor,
  1279  		// or for misnested table nodes, to the foster parent.
  1280  		if lastNode.Parent != nil {
  1281  			lastNode.Parent.RemoveChild(lastNode)
  1282  		}
  1283  		switch commonAncestor.DataAtom {
  1284  		case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
  1285  			p.fosterParent(lastNode)
  1286  		default:
  1287  			commonAncestor.AppendChild(lastNode)
  1288  		}
  1289  
  1290  		// Steps 15-17. Reparent nodes from the furthest block's children
  1291  		// to a clone of the formatting element.
  1292  		clone := formattingElement.clone()
  1293  		reparentChildren(clone, furthestBlock)
  1294  		furthestBlock.AppendChild(clone)
  1295  
  1296  		// Step 18. Fix up the list of active formatting elements.
  1297  		if oldLoc := p.afe.index(formattingElement); oldLoc != -1 && oldLoc < bookmark {
  1298  			// Move the bookmark with the rest of the list.
  1299  			bookmark--
  1300  		}
  1301  		p.afe.remove(formattingElement)
  1302  		p.afe.insert(bookmark, clone)
  1303  
  1304  		// Step 19. Fix up the stack of open elements.
  1305  		p.oe.remove(formattingElement)
  1306  		p.oe.insert(p.oe.index(furthestBlock)+1, clone)
  1307  	}
  1308  }
  1309  
  1310  // inBodyEndTagOther performs the "any other end tag" algorithm for inBodyIM.
  1311  // "Any other end tag" handling from 12.2.6.5 The rules for parsing tokens in foreign content
  1312  // https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inforeign
  1313  func (p *parser) inBodyEndTagOther(tagAtom a.Atom, tagName string) {
  1314  	for i := len(p.oe) - 1; i >= 0; i-- {
  1315  		// Two element nodes have the same tag if they have the same Data (a
  1316  		// string-typed field). As an optimization, for common HTML tags, each
  1317  		// Data string is assigned a unique, non-zero DataAtom (a uint32-typed
  1318  		// field), since integer comparison is faster than string comparison.
  1319  		// Uncommon (custom) tags get a zero DataAtom.
  1320  		//
  1321  		// The if condition here is equivalent to (p.oe[i].Data == tagName).
  1322  		if (p.oe[i].DataAtom == tagAtom) &&
  1323  			((tagAtom != 0) || (p.oe[i].Data == tagName)) {
  1324  			p.oe = p.oe[:i]
  1325  			break
  1326  		}
  1327  		if isSpecialElement(p.oe[i]) {
  1328  			break
  1329  		}
  1330  	}
  1331  }
  1332  
  1333  // Section 12.2.6.4.8.
  1334  func textIM(p *parser) bool {
  1335  	switch p.tok.Type {
  1336  	case ErrorToken:
  1337  		p.oe.pop()
  1338  	case TextToken:
  1339  		d := p.tok.Data
  1340  		if n := p.oe.top(); n.DataAtom == a.Textarea && n.FirstChild == nil {
  1341  			// Ignore a newline at the start of a <textarea> block.
  1342  			if d != "" && d[0] == '\r' {
  1343  				d = d[1:]
  1344  			}
  1345  			if d != "" && d[0] == '\n' {
  1346  				d = d[1:]
  1347  			}
  1348  		}
  1349  		if d == "" {
  1350  			return true
  1351  		}
  1352  		p.addText(d)
  1353  		return true
  1354  	case EndTagToken:
  1355  		p.oe.pop()
  1356  	}
  1357  	p.im = p.originalIM
  1358  	p.originalIM = nil
  1359  	return p.tok.Type == EndTagToken
  1360  }
  1361  
  1362  // Section 12.2.6.4.9.
  1363  func inTableIM(p *parser) bool {
  1364  	switch p.tok.Type {
  1365  	case TextToken:
  1366  		p.tok.Data = strings.Replace(p.tok.Data, "\x00", "", -1)
  1367  		switch p.oe.top().DataAtom {
  1368  		case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
  1369  			if strings.Trim(p.tok.Data, whitespace) == "" {
  1370  				p.addText(p.tok.Data)
  1371  				return true
  1372  			}
  1373  		}
  1374  	case StartTagToken:
  1375  		switch p.tok.DataAtom {
  1376  		case a.Caption:
  1377  			p.clearStackToContext(tableScope)
  1378  			p.afe = append(p.afe, &scopeMarker)
  1379  			p.addElement()
  1380  			p.im = inCaptionIM
  1381  			return true
  1382  		case a.Colgroup:
  1383  			p.clearStackToContext(tableScope)
  1384  			p.addElement()
  1385  			p.im = inColumnGroupIM
  1386  			return true
  1387  		case a.Col:
  1388  			p.parseImpliedToken(StartTagToken, a.Colgroup, a.Colgroup.String())
  1389  			return false
  1390  		case a.Tbody, a.Tfoot, a.Thead:
  1391  			p.clearStackToContext(tableScope)
  1392  			p.addElement()
  1393  			p.im = inTableBodyIM
  1394  			return true
  1395  		case a.Td, a.Th, a.Tr:
  1396  			p.parseImpliedToken(StartTagToken, a.Tbody, a.Tbody.String())
  1397  			return false
  1398  		case a.Table:
  1399  			if p.popUntil(tableScope, a.Table) {
  1400  				p.resetInsertionMode()
  1401  				return false
  1402  			}
  1403  			// Ignore the token.
  1404  			return true
  1405  		case a.Style, a.Script, a.Template:
  1406  			return inHeadIM(p)
  1407  		case a.Input:
  1408  			for _, t := range p.tok.Attr {
  1409  				if t.Key == "type" && strings.ToLower(t.Val) == "hidden" {
  1410  					p.addElement()
  1411  					p.oe.pop()
  1412  					return true
  1413  				}
  1414  			}
  1415  			// Otherwise drop down to the default action.
  1416  		case a.Form:
  1417  			if p.oe.contains(a.Template) || p.form != nil {
  1418  				// Ignore the token.
  1419  				return true
  1420  			}
  1421  			p.addElement()
  1422  			p.form = p.oe.pop()
  1423  		case a.Select:
  1424  			p.reconstructActiveFormattingElements()
  1425  			switch p.top().DataAtom {
  1426  			case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
  1427  				p.fosterParenting = true
  1428  			}
  1429  			p.addElement()
  1430  			p.fosterParenting = false
  1431  			p.framesetOK = false
  1432  			p.im = inSelectInTableIM
  1433  			return true
  1434  		}
  1435  	case EndTagToken:
  1436  		switch p.tok.DataAtom {
  1437  		case a.Table:
  1438  			if p.popUntil(tableScope, a.Table) {
  1439  				p.resetInsertionMode()
  1440  				return true
  1441  			}
  1442  			// Ignore the token.
  1443  			return true
  1444  		case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
  1445  			// Ignore the token.
  1446  			return true
  1447  		case a.Template:
  1448  			return inHeadIM(p)
  1449  		}
  1450  	case CommentToken:
  1451  		p.addChild(&Node{
  1452  			Type:   CommentNode,
  1453  			Data:   p.tok.Data,
  1454  			Line:   p.tok.Line,
  1455  			Column: p.tok.Column,
  1456  		})
  1457  		return true
  1458  	case DoctypeToken:
  1459  		// Ignore the token.
  1460  		return true
  1461  	case ErrorToken:
  1462  		return inBodyIM(p)
  1463  	}
  1464  
  1465  	p.fosterParenting = true
  1466  	defer func() { p.fosterParenting = false }()
  1467  
  1468  	return inBodyIM(p)
  1469  }
  1470  
  1471  // Section 12.2.6.4.11.
  1472  func inCaptionIM(p *parser) bool {
  1473  	switch p.tok.Type {
  1474  	case StartTagToken:
  1475  		switch p.tok.DataAtom {
  1476  		case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Td, a.Tfoot, a.Thead, a.Tr:
  1477  			if p.popUntil(tableScope, a.Caption) {
  1478  				p.clearActiveFormattingElements()
  1479  				p.im = inTableIM
  1480  				return false
  1481  			} else {
  1482  				// Ignore the token.
  1483  				return true
  1484  			}
  1485  		case a.Select:
  1486  			p.reconstructActiveFormattingElements()
  1487  			p.addElement()
  1488  			p.framesetOK = false
  1489  			p.im = inSelectInTableIM
  1490  			return true
  1491  		}
  1492  	case EndTagToken:
  1493  		switch p.tok.DataAtom {
  1494  		case a.Caption:
  1495  			if p.popUntil(tableScope, a.Caption) {
  1496  				p.clearActiveFormattingElements()
  1497  				p.im = inTableIM
  1498  			}
  1499  			return true
  1500  		case a.Table:
  1501  			if p.popUntil(tableScope, a.Caption) {
  1502  				p.clearActiveFormattingElements()
  1503  				p.im = inTableIM
  1504  				return false
  1505  			} else {
  1506  				// Ignore the token.
  1507  				return true
  1508  			}
  1509  		case a.Body, a.Col, a.Colgroup, a.Html, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
  1510  			// Ignore the token.
  1511  			return true
  1512  		}
  1513  	}
  1514  	return inBodyIM(p)
  1515  }
  1516  
  1517  // Section 12.2.6.4.12.
  1518  func inColumnGroupIM(p *parser) bool {
  1519  	switch p.tok.Type {
  1520  	case TextToken:
  1521  		s := strings.TrimLeft(p.tok.Data, whitespace)
  1522  		if len(s) < len(p.tok.Data) {
  1523  			// Add the initial whitespace to the current node.
  1524  			p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
  1525  			if s == "" {
  1526  				return true
  1527  			}
  1528  			p.tok.Data = s
  1529  		}
  1530  	case CommentToken:
  1531  		p.addChild(&Node{
  1532  			Type:   CommentNode,
  1533  			Data:   p.tok.Data,
  1534  			Line:   p.tok.Line,
  1535  			Column: p.tok.Column,
  1536  		})
  1537  		return true
  1538  	case DoctypeToken:
  1539  		// Ignore the token.
  1540  		return true
  1541  	case StartTagToken:
  1542  		switch p.tok.DataAtom {
  1543  		case a.Html:
  1544  			return inBodyIM(p)
  1545  		case a.Col:
  1546  			p.addElement()
  1547  			p.oe.pop()
  1548  			p.acknowledgeSelfClosingTag()
  1549  			return true
  1550  		case a.Template:
  1551  			return inHeadIM(p)
  1552  		}
  1553  	case EndTagToken:
  1554  		switch p.tok.DataAtom {
  1555  		case a.Colgroup:
  1556  			if p.oe.top().DataAtom == a.Colgroup {
  1557  				p.oe.pop()
  1558  				p.im = inTableIM
  1559  			}
  1560  			return true
  1561  		case a.Col:
  1562  			// Ignore the token.
  1563  			return true
  1564  		case a.Template:
  1565  			return inHeadIM(p)
  1566  		}
  1567  	case ErrorToken:
  1568  		return inBodyIM(p)
  1569  	}
  1570  	if p.oe.top().DataAtom != a.Colgroup {
  1571  		return true
  1572  	}
  1573  	p.oe.pop()
  1574  	p.im = inTableIM
  1575  	return false
  1576  }
  1577  
  1578  // Section 12.2.6.4.13.
  1579  func inTableBodyIM(p *parser) bool {
  1580  	switch p.tok.Type {
  1581  	case StartTagToken:
  1582  		switch p.tok.DataAtom {
  1583  		case a.Tr:
  1584  			p.clearStackToContext(tableBodyScope)
  1585  			p.addElement()
  1586  			p.im = inRowIM
  1587  			return true
  1588  		case a.Td, a.Th:
  1589  			p.parseImpliedToken(StartTagToken, a.Tr, a.Tr.String())
  1590  			return false
  1591  		case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Tfoot, a.Thead:
  1592  			if p.popUntil(tableScope, a.Tbody, a.Thead, a.Tfoot) {
  1593  				p.im = inTableIM
  1594  				return false
  1595  			}
  1596  			// Ignore the token.
  1597  			return true
  1598  		}
  1599  	case EndTagToken:
  1600  		switch p.tok.DataAtom {
  1601  		case a.Tbody, a.Tfoot, a.Thead:
  1602  			if p.elementInScope(tableScope, p.tok.DataAtom) {
  1603  				p.clearStackToContext(tableBodyScope)
  1604  				p.oe.pop()
  1605  				p.im = inTableIM
  1606  			}
  1607  			return true
  1608  		case a.Table:
  1609  			if p.popUntil(tableScope, a.Tbody, a.Thead, a.Tfoot) {
  1610  				p.im = inTableIM
  1611  				return false
  1612  			}
  1613  			// Ignore the token.
  1614  			return true
  1615  		case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Td, a.Th, a.Tr:
  1616  			// Ignore the token.
  1617  			return true
  1618  		}
  1619  	case CommentToken:
  1620  		p.addChild(&Node{
  1621  			Type:   CommentNode,
  1622  			Data:   p.tok.Data,
  1623  			Line:   p.tok.Line,
  1624  			Column: p.tok.Column,
  1625  		})
  1626  		return true
  1627  	}
  1628  
  1629  	return inTableIM(p)
  1630  }
  1631  
  1632  // Section 12.2.6.4.14.
  1633  func inRowIM(p *parser) bool {
  1634  	switch p.tok.Type {
  1635  	case StartTagToken:
  1636  		switch p.tok.DataAtom {
  1637  		case a.Td, a.Th:
  1638  			p.clearStackToContext(tableRowScope)
  1639  			p.addElement()
  1640  			p.afe = append(p.afe, &scopeMarker)
  1641  			p.im = inCellIM
  1642  			return true
  1643  		case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Tfoot, a.Thead, a.Tr:
  1644  			if p.popUntil(tableScope, a.Tr) {
  1645  				p.im = inTableBodyIM
  1646  				return false
  1647  			}
  1648  			// Ignore the token.
  1649  			return true
  1650  		}
  1651  	case EndTagToken:
  1652  		switch p.tok.DataAtom {
  1653  		case a.Tr:
  1654  			if p.popUntil(tableScope, a.Tr) {
  1655  				p.im = inTableBodyIM
  1656  				return true
  1657  			}
  1658  			// Ignore the token.
  1659  			return true
  1660  		case a.Table:
  1661  			if p.popUntil(tableScope, a.Tr) {
  1662  				p.im = inTableBodyIM
  1663  				return false
  1664  			}
  1665  			// Ignore the token.
  1666  			return true
  1667  		case a.Tbody, a.Tfoot, a.Thead:
  1668  			if p.elementInScope(tableScope, p.tok.DataAtom) {
  1669  				p.parseImpliedToken(EndTagToken, a.Tr, a.Tr.String())
  1670  				return false
  1671  			}
  1672  			// Ignore the token.
  1673  			return true
  1674  		case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Td, a.Th:
  1675  			// Ignore the token.
  1676  			return true
  1677  		}
  1678  	}
  1679  
  1680  	return inTableIM(p)
  1681  }
  1682  
  1683  // Section 12.2.6.4.15.
  1684  func inCellIM(p *parser) bool {
  1685  	switch p.tok.Type {
  1686  	case StartTagToken:
  1687  		switch p.tok.DataAtom {
  1688  		case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
  1689  			if p.popUntil(tableScope, a.Td, a.Th) {
  1690  				// Close the cell and reprocess.
  1691  				p.clearActiveFormattingElements()
  1692  				p.im = inRowIM
  1693  				return false
  1694  			}
  1695  			// Ignore the token.
  1696  			return true
  1697  		case a.Select:
  1698  			p.reconstructActiveFormattingElements()
  1699  			p.addElement()
  1700  			p.framesetOK = false
  1701  			p.im = inSelectInTableIM
  1702  			return true
  1703  		}
  1704  	case EndTagToken:
  1705  		switch p.tok.DataAtom {
  1706  		case a.Td, a.Th:
  1707  			if !p.popUntil(tableScope, p.tok.DataAtom) {
  1708  				// Ignore the token.
  1709  				return true
  1710  			}
  1711  			p.clearActiveFormattingElements()
  1712  			p.im = inRowIM
  1713  			return true
  1714  		case a.Body, a.Caption, a.Col, a.Colgroup, a.Html:
  1715  			// Ignore the token.
  1716  			return true
  1717  		case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
  1718  			if !p.elementInScope(tableScope, p.tok.DataAtom) {
  1719  				// Ignore the token.
  1720  				return true
  1721  			}
  1722  			// Close the cell and reprocess.
  1723  			p.popUntil(tableScope, a.Td, a.Th)
  1724  			p.clearActiveFormattingElements()
  1725  			p.im = inRowIM
  1726  			return false
  1727  		}
  1728  	}
  1729  	return inBodyIM(p)
  1730  }
  1731  
  1732  // Section 12.2.6.4.16.
  1733  func inSelectIM(p *parser) bool {
  1734  	switch p.tok.Type {
  1735  	case TextToken:
  1736  		p.addText(strings.Replace(p.tok.Data, "\x00", "", -1))
  1737  	case StartTagToken:
  1738  		switch p.tok.DataAtom {
  1739  		case a.Html:
  1740  			return inBodyIM(p)
  1741  		case a.Option:
  1742  			if p.top().DataAtom == a.Option {
  1743  				p.oe.pop()
  1744  			}
  1745  			p.addElement()
  1746  		case a.Optgroup:
  1747  			if p.top().DataAtom == a.Option {
  1748  				p.oe.pop()
  1749  			}
  1750  			if p.top().DataAtom == a.Optgroup {
  1751  				p.oe.pop()
  1752  			}
  1753  			p.addElement()
  1754  		case a.Select:
  1755  			if p.popUntil(selectScope, a.Select) {
  1756  				p.resetInsertionMode()
  1757  			} else {
  1758  				// Ignore the token.
  1759  				return true
  1760  			}
  1761  		case a.Input, a.Keygen, a.Textarea:
  1762  			if p.elementInScope(selectScope, a.Select) {
  1763  				p.parseImpliedToken(EndTagToken, a.Select, a.Select.String())
  1764  				return false
  1765  			}
  1766  			// In order to properly ignore <textarea>, we need to change the tokenizer mode.
  1767  			p.tokenizer.NextIsNotRawText()
  1768  			// Ignore the token.
  1769  			return true
  1770  		case a.Script, a.Template:
  1771  			return inHeadIM(p)
  1772  		}
  1773  	case EndTagToken:
  1774  		switch p.tok.DataAtom {
  1775  		case a.Option:
  1776  			if p.top().DataAtom == a.Option {
  1777  				p.oe.pop()
  1778  			}
  1779  		case a.Optgroup:
  1780  			i := len(p.oe) - 1
  1781  			if p.oe[i].DataAtom == a.Option {
  1782  				i--
  1783  			}
  1784  			if p.oe[i].DataAtom == a.Optgroup {
  1785  				p.oe = p.oe[:i]
  1786  			}
  1787  		case a.Select:
  1788  			if p.popUntil(selectScope, a.Select) {
  1789  				p.resetInsertionMode()
  1790  			} else {
  1791  				// Ignore the token.
  1792  				return true
  1793  			}
  1794  		case a.Template:
  1795  			return inHeadIM(p)
  1796  		}
  1797  	case CommentToken:
  1798  		p.addChild(&Node{
  1799  			Type:   CommentNode,
  1800  			Data:   p.tok.Data,
  1801  			Line:   p.tok.Line,
  1802  			Column: p.tok.Column,
  1803  		})
  1804  	case DoctypeToken:
  1805  		// Ignore the token.
  1806  		return true
  1807  	case ErrorToken:
  1808  		return inBodyIM(p)
  1809  	}
  1810  
  1811  	return true
  1812  }
  1813  
  1814  // Section 12.2.6.4.17.
  1815  func inSelectInTableIM(p *parser) bool {
  1816  	switch p.tok.Type {
  1817  	case StartTagToken, EndTagToken:
  1818  		switch p.tok.DataAtom {
  1819  		case a.Caption, a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr, a.Td, a.Th:
  1820  			if p.tok.Type == EndTagToken && !p.elementInScope(tableScope, p.tok.DataAtom) {
  1821  				// Ignore the token.
  1822  				return true
  1823  			}
  1824  			// This is like p.popUntil(selectScope, a.Select), but it also
  1825  			// matches <math select>, not just <select>. Matching the MathML
  1826  			// tag is arguably incorrect (conceptually), but it mimics what
  1827  			// Chromium does.
  1828  			for i := len(p.oe) - 1; i >= 0; i-- {
  1829  				if n := p.oe[i]; n.DataAtom == a.Select {
  1830  					p.oe = p.oe[:i]
  1831  					break
  1832  				}
  1833  			}
  1834  			p.resetInsertionMode()
  1835  			return false
  1836  		}
  1837  	}
  1838  	return inSelectIM(p)
  1839  }
  1840  
  1841  // Section 12.2.6.4.18.
  1842  func inTemplateIM(p *parser) bool {
  1843  	switch p.tok.Type {
  1844  	case TextToken, CommentToken, DoctypeToken:
  1845  		return inBodyIM(p)
  1846  	case StartTagToken:
  1847  		switch p.tok.DataAtom {
  1848  		case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
  1849  			return inHeadIM(p)
  1850  		case a.Caption, a.Colgroup, a.Tbody, a.Tfoot, a.Thead:
  1851  			p.templateStack.pop()
  1852  			p.templateStack = append(p.templateStack, inTableIM)
  1853  			p.im = inTableIM
  1854  			return false
  1855  		case a.Col:
  1856  			p.templateStack.pop()
  1857  			p.templateStack = append(p.templateStack, inColumnGroupIM)
  1858  			p.im = inColumnGroupIM
  1859  			return false
  1860  		case a.Tr:
  1861  			p.templateStack.pop()
  1862  			p.templateStack = append(p.templateStack, inTableBodyIM)
  1863  			p.im = inTableBodyIM
  1864  			return false
  1865  		case a.Td, a.Th:
  1866  			p.templateStack.pop()
  1867  			p.templateStack = append(p.templateStack, inRowIM)
  1868  			p.im = inRowIM
  1869  			return false
  1870  		default:
  1871  			p.templateStack.pop()
  1872  			p.templateStack = append(p.templateStack, inBodyIM)
  1873  			p.im = inBodyIM
  1874  			return false
  1875  		}
  1876  	case EndTagToken:
  1877  		switch p.tok.DataAtom {
  1878  		case a.Template:
  1879  			return inHeadIM(p)
  1880  		default:
  1881  			// Ignore the token.
  1882  			return true
  1883  		}
  1884  	case ErrorToken:
  1885  		if !p.oe.contains(a.Template) {
  1886  			// Ignore the token.
  1887  			return true
  1888  		}
  1889  		// TODO: remove this divergence from the HTML5 spec.
  1890  		//
  1891  		// See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
  1892  		p.generateImpliedEndTags()
  1893  		for i := len(p.oe) - 1; i >= 0; i-- {
  1894  			if n := p.oe[i]; n.Namespace == "" && n.DataAtom == a.Template {
  1895  				p.oe = p.oe[:i]
  1896  				break
  1897  			}
  1898  		}
  1899  		p.clearActiveFormattingElements()
  1900  		p.templateStack.pop()
  1901  		p.resetInsertionMode()
  1902  		return false
  1903  	}
  1904  	return false
  1905  }
  1906  
  1907  // Section 12.2.6.4.19.
  1908  func afterBodyIM(p *parser) bool {
  1909  	switch p.tok.Type {
  1910  	case ErrorToken:
  1911  		// Stop parsing.
  1912  		return true
  1913  	case TextToken:
  1914  		s := strings.TrimLeft(p.tok.Data, whitespace)
  1915  		if len(s) == 0 {
  1916  			// It was all whitespace.
  1917  			return inBodyIM(p)
  1918  		}
  1919  	case StartTagToken:
  1920  		if p.tok.DataAtom == a.Html {
  1921  			return inBodyIM(p)
  1922  		}
  1923  	case EndTagToken:
  1924  		if p.tok.DataAtom == a.Html {
  1925  			if !p.fragment {
  1926  				p.im = afterAfterBodyIM
  1927  			}
  1928  			return true
  1929  		}
  1930  	case CommentToken:
  1931  		// The comment is attached to the <html> element.
  1932  		if len(p.oe) < 1 || p.oe[0].DataAtom != a.Html {
  1933  			panic("html: bad parser state: <html> element not found, in the after-body insertion mode")
  1934  		}
  1935  		p.oe[0].AppendChild(&Node{
  1936  			Type:   CommentNode,
  1937  			Data:   p.tok.Data,
  1938  			Line:   p.tok.Line,
  1939  			Column: p.tok.Column,
  1940  		})
  1941  		return true
  1942  	}
  1943  	p.im = inBodyIM
  1944  	return false
  1945  }
  1946  
  1947  // Section 12.2.6.4.20.
  1948  func inFramesetIM(p *parser) bool {
  1949  	switch p.tok.Type {
  1950  	case CommentToken:
  1951  		p.addChild(&Node{
  1952  			Type:   CommentNode,
  1953  			Data:   p.tok.Data,
  1954  			Line:   p.tok.Line,
  1955  			Column: p.tok.Column,
  1956  		})
  1957  	case TextToken:
  1958  		// Ignore all text but whitespace.
  1959  		s := strings.Map(func(c rune) rune {
  1960  			switch c {
  1961  			case ' ', '\t', '\n', '\f', '\r':
  1962  				return c
  1963  			}
  1964  			return -1
  1965  		}, p.tok.Data)
  1966  		if s != "" {
  1967  			p.addText(s)
  1968  		}
  1969  	case StartTagToken:
  1970  		switch p.tok.DataAtom {
  1971  		case a.Html:
  1972  			return inBodyIM(p)
  1973  		case a.Frameset:
  1974  			p.addElement()
  1975  		case a.Frame:
  1976  			p.addElement()
  1977  			p.oe.pop()
  1978  			p.acknowledgeSelfClosingTag()
  1979  		case a.Noframes:
  1980  			return inHeadIM(p)
  1981  		}
  1982  	case EndTagToken:
  1983  		switch p.tok.DataAtom {
  1984  		case a.Frameset:
  1985  			if p.oe.top().DataAtom != a.Html {
  1986  				p.oe.pop()
  1987  				if p.oe.top().DataAtom != a.Frameset {
  1988  					p.im = afterFramesetIM
  1989  					return true
  1990  				}
  1991  			}
  1992  		}
  1993  	default:
  1994  		// Ignore the token.
  1995  	}
  1996  	return true
  1997  }
  1998  
  1999  // Section 12.2.6.4.21.
  2000  func afterFramesetIM(p *parser) bool {
  2001  	switch p.tok.Type {
  2002  	case CommentToken:
  2003  		p.addChild(&Node{
  2004  			Type:   CommentNode,
  2005  			Data:   p.tok.Data,
  2006  			Line:   p.tok.Line,
  2007  			Column: p.tok.Column,
  2008  		})
  2009  	case TextToken:
  2010  		// Ignore all text but whitespace.
  2011  		s := strings.Map(func(c rune) rune {
  2012  			switch c {
  2013  			case ' ', '\t', '\n', '\f', '\r':
  2014  				return c
  2015  			}
  2016  			return -1
  2017  		}, p.tok.Data)
  2018  		if s != "" {
  2019  			p.addText(s)
  2020  		}
  2021  	case StartTagToken:
  2022  		switch p.tok.DataAtom {
  2023  		case a.Html:
  2024  			return inBodyIM(p)
  2025  		case a.Noframes:
  2026  			return inHeadIM(p)
  2027  		}
  2028  	case EndTagToken:
  2029  		switch p.tok.DataAtom {
  2030  		case a.Html:
  2031  			p.im = afterAfterFramesetIM
  2032  			return true
  2033  		}
  2034  	default:
  2035  		// Ignore the token.
  2036  	}
  2037  	return true
  2038  }
  2039  
  2040  // Section 12.2.6.4.22.
  2041  func afterAfterBodyIM(p *parser) bool {
  2042  	switch p.tok.Type {
  2043  	case ErrorToken:
  2044  		// Stop parsing.
  2045  		return true
  2046  	case TextToken:
  2047  		s := strings.TrimLeft(p.tok.Data, whitespace)
  2048  		if len(s) == 0 {
  2049  			// It was all whitespace.
  2050  			return inBodyIM(p)
  2051  		}
  2052  	case StartTagToken:
  2053  		if p.tok.DataAtom == a.Html {
  2054  			return inBodyIM(p)
  2055  		}
  2056  	case CommentToken:
  2057  		p.doc.AppendChild(&Node{
  2058  			Type:   CommentNode,
  2059  			Data:   p.tok.Data,
  2060  			Line:   p.tok.Line,
  2061  			Column: p.tok.Column,
  2062  		})
  2063  		return true
  2064  	case DoctypeToken:
  2065  		return inBodyIM(p)
  2066  	}
  2067  	p.im = inBodyIM
  2068  	return false
  2069  }
  2070  
  2071  // Section 12.2.6.4.23.
  2072  func afterAfterFramesetIM(p *parser) bool {
  2073  	switch p.tok.Type {
  2074  	case CommentToken:
  2075  		p.doc.AppendChild(&Node{
  2076  			Type:   CommentNode,
  2077  			Data:   p.tok.Data,
  2078  			Line:   p.tok.Line,
  2079  			Column: p.tok.Column,
  2080  		})
  2081  	case TextToken:
  2082  		// Ignore all text but whitespace.
  2083  		s := strings.Map(func(c rune) rune {
  2084  			switch c {
  2085  			case ' ', '\t', '\n', '\f', '\r':
  2086  				return c
  2087  			}
  2088  			return -1
  2089  		}, p.tok.Data)
  2090  		if s != "" {
  2091  			p.tok.Data = s
  2092  			return inBodyIM(p)
  2093  		}
  2094  	case StartTagToken:
  2095  		switch p.tok.DataAtom {
  2096  		case a.Html:
  2097  			return inBodyIM(p)
  2098  		case a.Noframes:
  2099  			return inHeadIM(p)
  2100  		}
  2101  	case DoctypeToken:
  2102  		return inBodyIM(p)
  2103  	default:
  2104  		// Ignore the token.
  2105  	}
  2106  	return true
  2107  }
  2108  
  2109  const whitespaceOrNUL = whitespace + "\x00"
  2110  
  2111  // Section 12.2.6.5
  2112  func parseForeignContent(p *parser) bool {
  2113  	switch p.tok.Type {
  2114  	case TextToken:
  2115  		if p.framesetOK {
  2116  			p.framesetOK = strings.TrimLeft(p.tok.Data, whitespaceOrNUL) == ""
  2117  		}
  2118  		p.tok.Data = strings.Replace(p.tok.Data, "\x00", "\ufffd", -1)
  2119  		p.addText(p.tok.Data)
  2120  	case CommentToken:
  2121  		p.addChild(&Node{
  2122  			Type:   CommentNode,
  2123  			Data:   p.tok.Data,
  2124  			Line:   p.tok.Line,
  2125  			Column: p.tok.Column,
  2126  		})
  2127  	case StartTagToken:
  2128  		b := breakout[p.tok.Data]
  2129  		if p.tok.DataAtom == a.Font {
  2130  		loop:
  2131  			for _, attr := range p.tok.Attr {
  2132  				switch attr.Key {
  2133  				case "color", "face", "size":
  2134  					b = true
  2135  					break loop
  2136  				}
  2137  			}
  2138  		}
  2139  		if b {
  2140  			for i := len(p.oe) - 1; i >= 0; i-- {
  2141  				n := p.oe[i]
  2142  				if n.Namespace == "" || htmlIntegrationPoint(n) || mathMLTextIntegrationPoint(n) {
  2143  					p.oe = p.oe[:i+1]
  2144  					break
  2145  				}
  2146  			}
  2147  			return false
  2148  		}
  2149  		switch p.top().Namespace {
  2150  		case "math":
  2151  			adjustAttributeNames(p.tok.Attr, mathMLAttributeAdjustments)
  2152  		case "svg":
  2153  			// Adjust SVG tag names. The tokenizer lower-cases tag names, but
  2154  			// SVG wants e.g. "foreignObject" with a capital second "O".
  2155  			if x := svgTagNameAdjustments[p.tok.Data]; x != "" {
  2156  				p.tok.DataAtom = a.Lookup([]byte(x))
  2157  				p.tok.Data = x
  2158  			}
  2159  			adjustAttributeNames(p.tok.Attr, svgAttributeAdjustments)
  2160  		default:
  2161  			panic("html: bad parser state: unexpected namespace")
  2162  		}
  2163  		adjustForeignAttributes(p.tok.Attr)
  2164  		namespace := p.top().Namespace
  2165  		p.addElement()
  2166  		p.top().Namespace = namespace
  2167  		if namespace != "" {
  2168  			// Don't let the tokenizer go into raw text mode in foreign content
  2169  			// (e.g. in an SVG <title> tag).
  2170  			p.tokenizer.NextIsNotRawText()
  2171  		}
  2172  		if p.hasSelfClosingToken {
  2173  			p.oe.pop()
  2174  			p.acknowledgeSelfClosingTag()
  2175  		}
  2176  	case EndTagToken:
  2177  		for i := len(p.oe) - 1; i >= 0; i-- {
  2178  			if p.oe[i].Namespace == "" {
  2179  				return p.im(p)
  2180  			}
  2181  			if strings.EqualFold(p.oe[i].Data, p.tok.Data) {
  2182  				p.oe = p.oe[:i]
  2183  				break
  2184  			}
  2185  		}
  2186  		return true
  2187  	default:
  2188  		// Ignore the token.
  2189  	}
  2190  	return true
  2191  }
  2192  
  2193  // Section 12.2.6.
  2194  func (p *parser) inForeignContent() bool {
  2195  	if len(p.oe) == 0 {
  2196  		return false
  2197  	}
  2198  	n := p.oe[len(p.oe)-1]
  2199  	if n.Namespace == "" {
  2200  		return false
  2201  	}
  2202  	if mathMLTextIntegrationPoint(n) {
  2203  		if p.tok.Type == StartTagToken && p.tok.DataAtom != a.Mglyph && p.tok.DataAtom != a.Malignmark {
  2204  			return false
  2205  		}
  2206  		if p.tok.Type == TextToken {
  2207  			return false
  2208  		}
  2209  	}
  2210  	if n.Namespace == "math" && n.DataAtom == a.AnnotationXml && p.tok.Type == StartTagToken && p.tok.DataAtom == a.Svg {
  2211  		return false
  2212  	}
  2213  	if htmlIntegrationPoint(n) && (p.tok.Type == StartTagToken || p.tok.Type == TextToken) {
  2214  		return false
  2215  	}
  2216  	if p.tok.Type == ErrorToken {
  2217  		return false
  2218  	}
  2219  	return true
  2220  }
  2221  
  2222  // parseImpliedToken parses a token as though it had appeared in the parser's
  2223  // input.
  2224  func (p *parser) parseImpliedToken(t TokenType, dataAtom a.Atom, data string) {
  2225  	realToken, selfClosing := p.tok, p.hasSelfClosingToken
  2226  	p.tok = Token{
  2227  		Type:     t,
  2228  		DataAtom: dataAtom,
  2229  		Data:     data,
  2230  	}
  2231  	p.hasSelfClosingToken = false
  2232  	p.parseCurrentToken()
  2233  	p.tok, p.hasSelfClosingToken = realToken, selfClosing
  2234  }
  2235  
  2236  // parseCurrentToken runs the current token through the parsing routines
  2237  // until it is consumed.
  2238  func (p *parser) parseCurrentToken() {
  2239  	if p.tok.Type == SelfClosingTagToken {
  2240  		p.hasSelfClosingToken = true
  2241  		p.tok.Type = StartTagToken
  2242  	}
  2243  
  2244  	consumed := false
  2245  	for !consumed {
  2246  		if p.inForeignContent() {
  2247  			consumed = parseForeignContent(p)
  2248  		} else {
  2249  			consumed = p.im(p)
  2250  		}
  2251  	}
  2252  
  2253  	if p.hasSelfClosingToken {
  2254  		// This is a parse error, but ignore it.
  2255  		p.hasSelfClosingToken = false
  2256  	}
  2257  }
  2258  
  2259  func (p *parser) parse() error {
  2260  	// Iterate until EOF. Any other error will cause an early return.
  2261  	var err error
  2262  	for err != io.EOF {
  2263  		// CDATA sections are allowed only in foreign content.
  2264  		n := p.oe.top()
  2265  		p.tokenizer.AllowCDATA(n != nil && n.Namespace != "")
  2266  		// Read and parse the next token.
  2267  		p.tokenizer.Next()
  2268  		p.tok = p.tokenizer.Token()
  2269  		if p.tok.Type == ErrorToken {
  2270  			err = p.tokenizer.Err()
  2271  			if err != nil && err != io.EOF {
  2272  				return err
  2273  			}
  2274  		}
  2275  		p.parseCurrentToken()
  2276  	}
  2277  	return nil
  2278  }
  2279  
  2280  // Parse returns the parse tree for the HTML from the given Reader.
  2281  //
  2282  // It implements the HTML5 parsing algorithm
  2283  // (https://html.spec.whatwg.org/multipage/syntax.html#tree-construction),
  2284  // which is very complicated. The resultant tree can contain implicitly created
  2285  // nodes that have no explicit <tag> listed in r's data, and nodes' parents can
  2286  // differ from the nesting implied by a naive processing of start and end
  2287  // <tag>s. Conversely, explicit <tag>s in r's data can be silently dropped,
  2288  // with no corresponding node in the resulting tree.
  2289  //
  2290  // The input is assumed to be UTF-8 encoded.
  2291  func Parse(r io.Reader) (*Node, error) {
  2292  	p := &parser{
  2293  		tokenizer: NewTokenizer(r),
  2294  		doc: &Node{
  2295  			Type: DocumentNode,
  2296  		},
  2297  		scripting:  true,
  2298  		framesetOK: true,
  2299  		im:         initialIM,
  2300  	}
  2301  	err := p.parse()
  2302  	if err != nil {
  2303  		return nil, err
  2304  	}
  2305  	return p.doc, nil
  2306  }
  2307  
  2308  // ParseFragment parses a fragment of HTML and returns the nodes that were
  2309  // found. If the fragment is the InnerHTML for an existing element, pass that
  2310  // element in context.
  2311  //
  2312  // It has the same intricacies as Parse.
  2313  func ParseFragment(r io.Reader, context *Node) ([]*Node, error) {
  2314  	contextTag := ""
  2315  	if context != nil {
  2316  		if context.Type != ElementNode {
  2317  			return nil, errors.New("html: ParseFragment of non-element Node")
  2318  		}
  2319  		// The next check isn't just context.DataAtom.String() == context.Data because
  2320  		// it is valid to pass an element whose tag isn't a known atom. For example,
  2321  		// DataAtom == 0 and Data = "tagfromthefuture" is perfectly consistent.
  2322  		if context.DataAtom != a.Lookup([]byte(context.Data)) {
  2323  			return nil, fmt.Errorf("html: inconsistent Node: DataAtom=%q, Data=%q", context.DataAtom, context.Data)
  2324  		}
  2325  		contextTag = context.DataAtom.String()
  2326  	}
  2327  	p := &parser{
  2328  		tokenizer: NewTokenizerFragment(r, contextTag),
  2329  		doc: &Node{
  2330  			Type: DocumentNode,
  2331  		},
  2332  		scripting: true,
  2333  		fragment:  true,
  2334  		context:   context,
  2335  	}
  2336  
  2337  	root := &Node{
  2338  		Type:     ElementNode,
  2339  		DataAtom: a.Html,
  2340  		Data:     a.Html.String(),
  2341  	}
  2342  	p.doc.AppendChild(root)
  2343  	p.oe = nodeStack{root}
  2344  	if context != nil && context.DataAtom == a.Template {
  2345  		p.templateStack = append(p.templateStack, inTemplateIM)
  2346  	}
  2347  	p.resetInsertionMode()
  2348  
  2349  	for n := context; n != nil; n = n.Parent {
  2350  		if n.Type == ElementNode && n.DataAtom == a.Form {
  2351  			p.form = n
  2352  			break
  2353  		}
  2354  	}
  2355  
  2356  	err := p.parse()
  2357  	if err != nil {
  2358  		return nil, err
  2359  	}
  2360  
  2361  	parent := p.doc
  2362  	if context != nil {
  2363  		parent = root
  2364  	}
  2365  
  2366  	var result []*Node
  2367  	for c := parent.FirstChild; c != nil; {
  2368  		next := c.NextSibling
  2369  		parent.RemoveChild(c)
  2370  		result = append(result, c)
  2371  		c = next
  2372  	}
  2373  	return result, nil
  2374  }