github.com/lianghucheng/zrddz@v0.0.0-20200923083010-c71f680932e2/src/golang.org/x/net/html/parse.go (about)

     1  // Copyright 2010 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package html
     6  
     7  import (
     8  	"errors"
     9  	"fmt"
    10  	"io"
    11  	"strings"
    12  
    13  	a "golang.org/x/net/html/atom"
    14  )
    15  
    16  // A parser implements the HTML5 parsing algorithm:
    17  // https://html.spec.whatwg.org/multipage/syntax.html#tree-construction
    18  type parser struct {
    19  	// tokenizer provides the tokens for the parser.
    20  	tokenizer *Tokenizer
    21  	// tok is the most recently read token.
    22  	tok Token
    23  	// Self-closing tags like <hr/> are treated as start tags, except that
    24  	// hasSelfClosingToken is set while they are being processed.
    25  	hasSelfClosingToken bool
    26  	// doc is the document root element.
    27  	doc *Node
    28  	// The stack of open elements (section 12.2.4.2) and active formatting
    29  	// elements (section 12.2.4.3).
    30  	oe, afe nodeStack
    31  	// Element pointers (section 12.2.4.4).
    32  	head, form *Node
    33  	// Other parsing state flags (section 12.2.4.5).
    34  	scripting, framesetOK bool
    35  	// The stack of template insertion modes
    36  	templateStack insertionModeStack
    37  	// im is the current insertion mode.
    38  	im insertionMode
    39  	// originalIM is the insertion mode to go back to after completing a text
    40  	// or inTableText insertion mode.
    41  	originalIM insertionMode
    42  	// fosterParenting is whether new elements should be inserted according to
    43  	// the foster parenting rules (section 12.2.6.1).
    44  	fosterParenting bool
    45  	// quirks is whether the parser is operating in "quirks mode."
    46  	quirks bool
    47  	// fragment is whether the parser is parsing an HTML fragment.
    48  	fragment bool
    49  	// context is the context element when parsing an HTML fragment
    50  	// (section 12.4).
    51  	context *Node
    52  }
    53  
    54  func (p *parser) top() *Node {
    55  	if n := p.oe.top(); n != nil {
    56  		return n
    57  	}
    58  	return p.doc
    59  }
    60  
    61  // Stop tags for use in popUntil. These come from section 12.2.4.2.
    62  var (
    63  	defaultScopeStopTags = map[string][]a.Atom{
    64  		"":     {a.Applet, a.Caption, a.Html, a.Table, a.Td, a.Th, a.Marquee, a.Object, a.Template},
    65  		"math": {a.AnnotationXml, a.Mi, a.Mn, a.Mo, a.Ms, a.Mtext},
    66  		"svg":  {a.Desc, a.ForeignObject, a.Title},
    67  	}
    68  )
    69  
    70  type scope int
    71  
    72  const (
    73  	defaultScope scope = iota
    74  	listItemScope
    75  	buttonScope
    76  	tableScope
    77  	tableRowScope
    78  	tableBodyScope
    79  	selectScope
    80  )
    81  
    82  // popUntil pops the stack of open elements at the highest element whose tag
    83  // is in matchTags, provided there is no higher element in the scope's stop
    84  // tags (as defined in section 12.2.4.2). It returns whether or not there was
    85  // such an element. If there was not, popUntil leaves the stack unchanged.
    86  //
    87  // For example, the set of stop tags for table scope is: "html", "table". If
    88  // the stack was:
    89  // ["html", "body", "font", "table", "b", "i", "u"]
    90  // then popUntil(tableScope, "font") would return false, but
    91  // popUntil(tableScope, "i") would return true and the stack would become:
    92  // ["html", "body", "font", "table", "b"]
    93  //
    94  // If an element's tag is in both the stop tags and matchTags, then the stack
    95  // will be popped and the function returns true (provided, of course, there was
    96  // no higher element in the stack that was also in the stop tags). For example,
    97  // popUntil(tableScope, "table") returns true and leaves:
    98  // ["html", "body", "font"]
    99  func (p *parser) popUntil(s scope, matchTags ...a.Atom) bool {
   100  	if i := p.indexOfElementInScope(s, matchTags...); i != -1 {
   101  		p.oe = p.oe[:i]
   102  		return true
   103  	}
   104  	return false
   105  }
   106  
   107  // indexOfElementInScope returns the index in p.oe of the highest element whose
   108  // tag is in matchTags that is in scope. If no matching element is in scope, it
   109  // returns -1.
   110  func (p *parser) indexOfElementInScope(s scope, matchTags ...a.Atom) int {
   111  	for i := len(p.oe) - 1; i >= 0; i-- {
   112  		tagAtom := p.oe[i].DataAtom
   113  		if p.oe[i].Namespace == "" {
   114  			for _, t := range matchTags {
   115  				if t == tagAtom {
   116  					return i
   117  				}
   118  			}
   119  			switch s {
   120  			case defaultScope:
   121  				// No-op.
   122  			case listItemScope:
   123  				if tagAtom == a.Ol || tagAtom == a.Ul {
   124  					return -1
   125  				}
   126  			case buttonScope:
   127  				if tagAtom == a.Button {
   128  					return -1
   129  				}
   130  			case tableScope:
   131  				if tagAtom == a.Html || tagAtom == a.Table || tagAtom == a.Template {
   132  					return -1
   133  				}
   134  			case selectScope:
   135  				if tagAtom != a.Optgroup && tagAtom != a.Option {
   136  					return -1
   137  				}
   138  			default:
   139  				panic("unreachable")
   140  			}
   141  		}
   142  		switch s {
   143  		case defaultScope, listItemScope, buttonScope:
   144  			for _, t := range defaultScopeStopTags[p.oe[i].Namespace] {
   145  				if t == tagAtom {
   146  					return -1
   147  				}
   148  			}
   149  		}
   150  	}
   151  	return -1
   152  }
   153  
   154  // elementInScope is like popUntil, except that it doesn't modify the stack of
   155  // open elements.
   156  func (p *parser) elementInScope(s scope, matchTags ...a.Atom) bool {
   157  	return p.indexOfElementInScope(s, matchTags...) != -1
   158  }
   159  
   160  // clearStackToContext pops elements off the stack of open elements until a
   161  // scope-defined element is found.
   162  func (p *parser) clearStackToContext(s scope) {
   163  	for i := len(p.oe) - 1; i >= 0; i-- {
   164  		tagAtom := p.oe[i].DataAtom
   165  		switch s {
   166  		case tableScope:
   167  			if tagAtom == a.Html || tagAtom == a.Table || tagAtom == a.Template {
   168  				p.oe = p.oe[:i+1]
   169  				return
   170  			}
   171  		case tableRowScope:
   172  			if tagAtom == a.Html || tagAtom == a.Tr || tagAtom == a.Template {
   173  				p.oe = p.oe[:i+1]
   174  				return
   175  			}
   176  		case tableBodyScope:
   177  			if tagAtom == a.Html || tagAtom == a.Tbody || tagAtom == a.Tfoot || tagAtom == a.Thead || tagAtom == a.Template {
   178  				p.oe = p.oe[:i+1]
   179  				return
   180  			}
   181  		default:
   182  			panic("unreachable")
   183  		}
   184  	}
   185  }
   186  
   187  // generateImpliedEndTags pops nodes off the stack of open elements as long as
   188  // the top node has a tag name of dd, dt, li, optgroup, option, p, rb, rp, rt or rtc.
   189  // If exceptions are specified, nodes with that name will not be popped off.
   190  func (p *parser) generateImpliedEndTags(exceptions ...string) {
   191  	var i int
   192  loop:
   193  	for i = len(p.oe) - 1; i >= 0; i-- {
   194  		n := p.oe[i]
   195  		if n.Type == ElementNode {
   196  			switch n.DataAtom {
   197  			case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc:
   198  				for _, except := range exceptions {
   199  					if n.Data == except {
   200  						break loop
   201  					}
   202  				}
   203  				continue
   204  			}
   205  		}
   206  		break
   207  	}
   208  
   209  	p.oe = p.oe[:i+1]
   210  }
   211  
   212  // addChild adds a child node n to the top element, and pushes n onto the stack
   213  // of open elements if it is an element node.
   214  func (p *parser) addChild(n *Node) {
   215  	if p.shouldFosterParent() {
   216  		p.fosterParent(n)
   217  	} else {
   218  		p.top().AppendChild(n)
   219  	}
   220  
   221  	if n.Type == ElementNode {
   222  		p.oe = append(p.oe, n)
   223  	}
   224  }
   225  
   226  // shouldFosterParent returns whether the next node to be added should be
   227  // foster parented.
   228  func (p *parser) shouldFosterParent() bool {
   229  	if p.fosterParenting {
   230  		switch p.top().DataAtom {
   231  		case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
   232  			return true
   233  		}
   234  	}
   235  	return false
   236  }
   237  
   238  // fosterParent adds a child node according to the foster parenting rules.
   239  // Section 12.2.6.1, "foster parenting".
   240  func (p *parser) fosterParent(n *Node) {
   241  	var table, parent, prev, template *Node
   242  	var i int
   243  	for i = len(p.oe) - 1; i >= 0; i-- {
   244  		if p.oe[i].DataAtom == a.Table {
   245  			table = p.oe[i]
   246  			break
   247  		}
   248  	}
   249  
   250  	var j int
   251  	for j = len(p.oe) - 1; j >= 0; j-- {
   252  		if p.oe[j].DataAtom == a.Template {
   253  			template = p.oe[j]
   254  			break
   255  		}
   256  	}
   257  
   258  	if template != nil && (table == nil || j > i) {
   259  		template.AppendChild(n)
   260  		return
   261  	}
   262  
   263  	if table == nil {
   264  		// The foster parent is the html element.
   265  		parent = p.oe[0]
   266  	} else {
   267  		parent = table.Parent
   268  	}
   269  	if parent == nil {
   270  		parent = p.oe[i-1]
   271  	}
   272  
   273  	if table != nil {
   274  		prev = table.PrevSibling
   275  	} else {
   276  		prev = parent.LastChild
   277  	}
   278  	if prev != nil && prev.Type == TextNode && n.Type == TextNode {
   279  		prev.Data += n.Data
   280  		return
   281  	}
   282  
   283  	parent.InsertBefore(n, table)
   284  }
   285  
   286  // addText adds text to the preceding node if it is a text node, or else it
   287  // calls addChild with a new text node.
   288  func (p *parser) addText(text string) {
   289  	if text == "" {
   290  		return
   291  	}
   292  
   293  	if p.shouldFosterParent() {
   294  		p.fosterParent(&Node{
   295  			Type: TextNode,
   296  			Data: text,
   297  		})
   298  		return
   299  	}
   300  
   301  	t := p.top()
   302  	if n := t.LastChild; n != nil && n.Type == TextNode {
   303  		n.Data += text
   304  		return
   305  	}
   306  	p.addChild(&Node{
   307  		Type: TextNode,
   308  		Data: text,
   309  	})
   310  }
   311  
   312  // addElement adds a child element based on the current token.
   313  func (p *parser) addElement() {
   314  	p.addChild(&Node{
   315  		Type:     ElementNode,
   316  		DataAtom: p.tok.DataAtom,
   317  		Data:     p.tok.Data,
   318  		Attr:     p.tok.Attr,
   319  	})
   320  }
   321  
   322  // Section 12.2.4.3.
   323  func (p *parser) addFormattingElement() {
   324  	tagAtom, attr := p.tok.DataAtom, p.tok.Attr
   325  	p.addElement()
   326  
   327  	// Implement the Noah's Ark clause, but with three per family instead of two.
   328  	identicalElements := 0
   329  findIdenticalElements:
   330  	for i := len(p.afe) - 1; i >= 0; i-- {
   331  		n := p.afe[i]
   332  		if n.Type == scopeMarkerNode {
   333  			break
   334  		}
   335  		if n.Type != ElementNode {
   336  			continue
   337  		}
   338  		if n.Namespace != "" {
   339  			continue
   340  		}
   341  		if n.DataAtom != tagAtom {
   342  			continue
   343  		}
   344  		if len(n.Attr) != len(attr) {
   345  			continue
   346  		}
   347  	compareAttributes:
   348  		for _, t0 := range n.Attr {
   349  			for _, t1 := range attr {
   350  				if t0.Key == t1.Key && t0.Namespace == t1.Namespace && t0.Val == t1.Val {
   351  					// Found a match for this attribute, continue with the next attribute.
   352  					continue compareAttributes
   353  				}
   354  			}
   355  			// If we get here, there is no attribute that matches a.
   356  			// Therefore the element is not identical to the new one.
   357  			continue findIdenticalElements
   358  		}
   359  
   360  		identicalElements++
   361  		if identicalElements >= 3 {
   362  			p.afe.remove(n)
   363  		}
   364  	}
   365  
   366  	p.afe = append(p.afe, p.top())
   367  }
   368  
   369  // Section 12.2.4.3.
   370  func (p *parser) clearActiveFormattingElements() {
   371  	for {
   372  		n := p.afe.pop()
   373  		if len(p.afe) == 0 || n.Type == scopeMarkerNode {
   374  			return
   375  		}
   376  	}
   377  }
   378  
   379  // Section 12.2.4.3.
   380  func (p *parser) reconstructActiveFormattingElements() {
   381  	n := p.afe.top()
   382  	if n == nil {
   383  		return
   384  	}
   385  	if n.Type == scopeMarkerNode || p.oe.index(n) != -1 {
   386  		return
   387  	}
   388  	i := len(p.afe) - 1
   389  	for n.Type != scopeMarkerNode && p.oe.index(n) == -1 {
   390  		if i == 0 {
   391  			i = -1
   392  			break
   393  		}
   394  		i--
   395  		n = p.afe[i]
   396  	}
   397  	for {
   398  		i++
   399  		clone := p.afe[i].clone()
   400  		p.addChild(clone)
   401  		p.afe[i] = clone
   402  		if i == len(p.afe)-1 {
   403  			break
   404  		}
   405  	}
   406  }
   407  
   408  // Section 12.2.5.
   409  func (p *parser) acknowledgeSelfClosingTag() {
   410  	p.hasSelfClosingToken = false
   411  }
   412  
   413  // An insertion mode (section 12.2.4.1) is the state transition function from
   414  // a particular state in the HTML5 parser's state machine. It updates the
   415  // parser's fields depending on parser.tok (where ErrorToken means EOF).
   416  // It returns whether the token was consumed.
   417  type insertionMode func(*parser) bool
   418  
   419  // setOriginalIM sets the insertion mode to return to after completing a text or
   420  // inTableText insertion mode.
   421  // Section 12.2.4.1, "using the rules for".
   422  func (p *parser) setOriginalIM() {
   423  	if p.originalIM != nil {
   424  		panic("html: bad parser state: originalIM was set twice")
   425  	}
   426  	p.originalIM = p.im
   427  }
   428  
   429  // Section 12.2.4.1, "reset the insertion mode".
   430  func (p *parser) resetInsertionMode() {
   431  	for i := len(p.oe) - 1; i >= 0; i-- {
   432  		n := p.oe[i]
   433  		last := i == 0
   434  		if last && p.context != nil {
   435  			n = p.context
   436  		}
   437  
   438  		switch n.DataAtom {
   439  		case a.Select:
   440  			if !last {
   441  				for ancestor, first := n, p.oe[0]; ancestor != first; {
   442  					ancestor = p.oe[p.oe.index(ancestor)-1]
   443  					switch ancestor.DataAtom {
   444  					case a.Template:
   445  						p.im = inSelectIM
   446  						return
   447  					case a.Table:
   448  						p.im = inSelectInTableIM
   449  						return
   450  					}
   451  				}
   452  			}
   453  			p.im = inSelectIM
   454  		case a.Td, a.Th:
   455  			// TODO: remove this divergence from the HTML5 spec.
   456  			//
   457  			// See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
   458  			p.im = inCellIM
   459  		case a.Tr:
   460  			p.im = inRowIM
   461  		case a.Tbody, a.Thead, a.Tfoot:
   462  			p.im = inTableBodyIM
   463  		case a.Caption:
   464  			p.im = inCaptionIM
   465  		case a.Colgroup:
   466  			p.im = inColumnGroupIM
   467  		case a.Table:
   468  			p.im = inTableIM
   469  		case a.Template:
   470  			// TODO: remove this divergence from the HTML5 spec.
   471  			if n.Namespace != "" {
   472  				continue
   473  			}
   474  			p.im = p.templateStack.top()
   475  		case a.Head:
   476  			// TODO: remove this divergence from the HTML5 spec.
   477  			//
   478  			// See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
   479  			p.im = inHeadIM
   480  		case a.Body:
   481  			p.im = inBodyIM
   482  		case a.Frameset:
   483  			p.im = inFramesetIM
   484  		case a.Html:
   485  			if p.head == nil {
   486  				p.im = beforeHeadIM
   487  			} else {
   488  				p.im = afterHeadIM
   489  			}
   490  		default:
   491  			if last {
   492  				p.im = inBodyIM
   493  				return
   494  			}
   495  			continue
   496  		}
   497  		return
   498  	}
   499  }
   500  
   501  const whitespace = " \t\r\n\f"
   502  
   503  // Section 12.2.6.4.1.
   504  func initialIM(p *parser) bool {
   505  	switch p.tok.Type {
   506  	case TextToken:
   507  		p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
   508  		if len(p.tok.Data) == 0 {
   509  			// It was all whitespace, so ignore it.
   510  			return true
   511  		}
   512  	case CommentToken:
   513  		p.doc.AppendChild(&Node{
   514  			Type: CommentNode,
   515  			Data: p.tok.Data,
   516  		})
   517  		return true
   518  	case DoctypeToken:
   519  		n, quirks := parseDoctype(p.tok.Data)
   520  		p.doc.AppendChild(n)
   521  		p.quirks = quirks
   522  		p.im = beforeHTMLIM
   523  		return true
   524  	}
   525  	p.quirks = true
   526  	p.im = beforeHTMLIM
   527  	return false
   528  }
   529  
   530  // Section 12.2.6.4.2.
   531  func beforeHTMLIM(p *parser) bool {
   532  	switch p.tok.Type {
   533  	case DoctypeToken:
   534  		// Ignore the token.
   535  		return true
   536  	case TextToken:
   537  		p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
   538  		if len(p.tok.Data) == 0 {
   539  			// It was all whitespace, so ignore it.
   540  			return true
   541  		}
   542  	case StartTagToken:
   543  		if p.tok.DataAtom == a.Html {
   544  			p.addElement()
   545  			p.im = beforeHeadIM
   546  			return true
   547  		}
   548  	case EndTagToken:
   549  		switch p.tok.DataAtom {
   550  		case a.Head, a.Body, a.Html, a.Br:
   551  			p.parseImpliedToken(StartTagToken, a.Html, a.Html.String())
   552  			return false
   553  		default:
   554  			// Ignore the token.
   555  			return true
   556  		}
   557  	case CommentToken:
   558  		p.doc.AppendChild(&Node{
   559  			Type: CommentNode,
   560  			Data: p.tok.Data,
   561  		})
   562  		return true
   563  	}
   564  	p.parseImpliedToken(StartTagToken, a.Html, a.Html.String())
   565  	return false
   566  }
   567  
   568  // Section 12.2.6.4.3.
   569  func beforeHeadIM(p *parser) bool {
   570  	switch p.tok.Type {
   571  	case TextToken:
   572  		p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
   573  		if len(p.tok.Data) == 0 {
   574  			// It was all whitespace, so ignore it.
   575  			return true
   576  		}
   577  	case StartTagToken:
   578  		switch p.tok.DataAtom {
   579  		case a.Head:
   580  			p.addElement()
   581  			p.head = p.top()
   582  			p.im = inHeadIM
   583  			return true
   584  		case a.Html:
   585  			return inBodyIM(p)
   586  		}
   587  	case EndTagToken:
   588  		switch p.tok.DataAtom {
   589  		case a.Head, a.Body, a.Html, a.Br:
   590  			p.parseImpliedToken(StartTagToken, a.Head, a.Head.String())
   591  			return false
   592  		default:
   593  			// Ignore the token.
   594  			return true
   595  		}
   596  	case CommentToken:
   597  		p.addChild(&Node{
   598  			Type: CommentNode,
   599  			Data: p.tok.Data,
   600  		})
   601  		return true
   602  	case DoctypeToken:
   603  		// Ignore the token.
   604  		return true
   605  	}
   606  
   607  	p.parseImpliedToken(StartTagToken, a.Head, a.Head.String())
   608  	return false
   609  }
   610  
   611  // Section 12.2.6.4.4.
   612  func inHeadIM(p *parser) bool {
   613  	switch p.tok.Type {
   614  	case TextToken:
   615  		s := strings.TrimLeft(p.tok.Data, whitespace)
   616  		if len(s) < len(p.tok.Data) {
   617  			// Add the initial whitespace to the current node.
   618  			p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
   619  			if s == "" {
   620  				return true
   621  			}
   622  			p.tok.Data = s
   623  		}
   624  	case StartTagToken:
   625  		switch p.tok.DataAtom {
   626  		case a.Html:
   627  			return inBodyIM(p)
   628  		case a.Base, a.Basefont, a.Bgsound, a.Command, a.Link, a.Meta:
   629  			p.addElement()
   630  			p.oe.pop()
   631  			p.acknowledgeSelfClosingTag()
   632  			return true
   633  		case a.Script, a.Title, a.Noscript, a.Noframes, a.Style:
   634  			p.addElement()
   635  			p.setOriginalIM()
   636  			p.im = textIM
   637  			return true
   638  		case a.Head:
   639  			// Ignore the token.
   640  			return true
   641  		case a.Template:
   642  			p.addElement()
   643  			p.afe = append(p.afe, &scopeMarker)
   644  			p.framesetOK = false
   645  			p.im = inTemplateIM
   646  			p.templateStack = append(p.templateStack, inTemplateIM)
   647  			return true
   648  		}
   649  	case EndTagToken:
   650  		switch p.tok.DataAtom {
   651  		case a.Head:
   652  			p.oe.pop()
   653  			p.im = afterHeadIM
   654  			return true
   655  		case a.Body, a.Html, a.Br:
   656  			p.parseImpliedToken(EndTagToken, a.Head, a.Head.String())
   657  			return false
   658  		case a.Template:
   659  			if !p.oe.contains(a.Template) {
   660  				return true
   661  			}
   662  			// TODO: remove this divergence from the HTML5 spec.
   663  			//
   664  			// See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
   665  			p.generateImpliedEndTags()
   666  			for i := len(p.oe) - 1; i >= 0; i-- {
   667  				if n := p.oe[i]; n.Namespace == "" && n.DataAtom == a.Template {
   668  					p.oe = p.oe[:i]
   669  					break
   670  				}
   671  			}
   672  			p.clearActiveFormattingElements()
   673  			p.templateStack.pop()
   674  			p.resetInsertionMode()
   675  			return true
   676  		default:
   677  			// Ignore the token.
   678  			return true
   679  		}
   680  	case CommentToken:
   681  		p.addChild(&Node{
   682  			Type: CommentNode,
   683  			Data: p.tok.Data,
   684  		})
   685  		return true
   686  	case DoctypeToken:
   687  		// Ignore the token.
   688  		return true
   689  	}
   690  
   691  	p.parseImpliedToken(EndTagToken, a.Head, a.Head.String())
   692  	return false
   693  }
   694  
   695  // Section 12.2.6.4.6.
   696  func afterHeadIM(p *parser) bool {
   697  	switch p.tok.Type {
   698  	case TextToken:
   699  		s := strings.TrimLeft(p.tok.Data, whitespace)
   700  		if len(s) < len(p.tok.Data) {
   701  			// Add the initial whitespace to the current node.
   702  			p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
   703  			if s == "" {
   704  				return true
   705  			}
   706  			p.tok.Data = s
   707  		}
   708  	case StartTagToken:
   709  		switch p.tok.DataAtom {
   710  		case a.Html:
   711  			return inBodyIM(p)
   712  		case a.Body:
   713  			p.addElement()
   714  			p.framesetOK = false
   715  			p.im = inBodyIM
   716  			return true
   717  		case a.Frameset:
   718  			p.addElement()
   719  			p.im = inFramesetIM
   720  			return true
   721  		case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
   722  			p.oe = append(p.oe, p.head)
   723  			defer p.oe.remove(p.head)
   724  			return inHeadIM(p)
   725  		case a.Head:
   726  			// Ignore the token.
   727  			return true
   728  		}
   729  	case EndTagToken:
   730  		switch p.tok.DataAtom {
   731  		case a.Body, a.Html, a.Br:
   732  			// Drop down to creating an implied <body> tag.
   733  		case a.Template:
   734  			return inHeadIM(p)
   735  		default:
   736  			// Ignore the token.
   737  			return true
   738  		}
   739  	case CommentToken:
   740  		p.addChild(&Node{
   741  			Type: CommentNode,
   742  			Data: p.tok.Data,
   743  		})
   744  		return true
   745  	case DoctypeToken:
   746  		// Ignore the token.
   747  		return true
   748  	}
   749  
   750  	p.parseImpliedToken(StartTagToken, a.Body, a.Body.String())
   751  	p.framesetOK = true
   752  	return false
   753  }
   754  
   755  // copyAttributes copies attributes of src not found on dst to dst.
   756  func copyAttributes(dst *Node, src Token) {
   757  	if len(src.Attr) == 0 {
   758  		return
   759  	}
   760  	attr := map[string]string{}
   761  	for _, t := range dst.Attr {
   762  		attr[t.Key] = t.Val
   763  	}
   764  	for _, t := range src.Attr {
   765  		if _, ok := attr[t.Key]; !ok {
   766  			dst.Attr = append(dst.Attr, t)
   767  			attr[t.Key] = t.Val
   768  		}
   769  	}
   770  }
   771  
   772  // Section 12.2.6.4.7.
   773  func inBodyIM(p *parser) bool {
   774  	switch p.tok.Type {
   775  	case TextToken:
   776  		d := p.tok.Data
   777  		switch n := p.oe.top(); n.DataAtom {
   778  		case a.Pre, a.Listing:
   779  			if n.FirstChild == nil {
   780  				// Ignore a newline at the start of a <pre> block.
   781  				if d != "" && d[0] == '\r' {
   782  					d = d[1:]
   783  				}
   784  				if d != "" && d[0] == '\n' {
   785  					d = d[1:]
   786  				}
   787  			}
   788  		}
   789  		d = strings.Replace(d, "\x00", "", -1)
   790  		if d == "" {
   791  			return true
   792  		}
   793  		p.reconstructActiveFormattingElements()
   794  		p.addText(d)
   795  		if p.framesetOK && strings.TrimLeft(d, whitespace) != "" {
   796  			// There were non-whitespace characters inserted.
   797  			p.framesetOK = false
   798  		}
   799  	case StartTagToken:
   800  		switch p.tok.DataAtom {
   801  		case a.Html:
   802  			if p.oe.contains(a.Template) {
   803  				return true
   804  			}
   805  			copyAttributes(p.oe[0], p.tok)
   806  		case a.Base, a.Basefont, a.Bgsound, a.Command, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
   807  			return inHeadIM(p)
   808  		case a.Body:
   809  			if p.oe.contains(a.Template) {
   810  				return true
   811  			}
   812  			if len(p.oe) >= 2 {
   813  				body := p.oe[1]
   814  				if body.Type == ElementNode && body.DataAtom == a.Body {
   815  					p.framesetOK = false
   816  					copyAttributes(body, p.tok)
   817  				}
   818  			}
   819  		case a.Frameset:
   820  			if !p.framesetOK || len(p.oe) < 2 || p.oe[1].DataAtom != a.Body {
   821  				// Ignore the token.
   822  				return true
   823  			}
   824  			body := p.oe[1]
   825  			if body.Parent != nil {
   826  				body.Parent.RemoveChild(body)
   827  			}
   828  			p.oe = p.oe[:1]
   829  			p.addElement()
   830  			p.im = inFramesetIM
   831  			return true
   832  		case a.Address, a.Article, a.Aside, a.Blockquote, a.Center, a.Details, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Menu, a.Nav, a.Ol, a.P, a.Section, a.Summary, a.Ul:
   833  			p.popUntil(buttonScope, a.P)
   834  			p.addElement()
   835  		case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
   836  			p.popUntil(buttonScope, a.P)
   837  			switch n := p.top(); n.DataAtom {
   838  			case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
   839  				p.oe.pop()
   840  			}
   841  			p.addElement()
   842  		case a.Pre, a.Listing:
   843  			p.popUntil(buttonScope, a.P)
   844  			p.addElement()
   845  			// The newline, if any, will be dealt with by the TextToken case.
   846  			p.framesetOK = false
   847  		case a.Form:
   848  			if p.form != nil && !p.oe.contains(a.Template) {
   849  				// Ignore the token
   850  				return true
   851  			}
   852  			p.popUntil(buttonScope, a.P)
   853  			p.addElement()
   854  			if !p.oe.contains(a.Template) {
   855  				p.form = p.top()
   856  			}
   857  		case a.Li:
   858  			p.framesetOK = false
   859  			for i := len(p.oe) - 1; i >= 0; i-- {
   860  				node := p.oe[i]
   861  				switch node.DataAtom {
   862  				case a.Li:
   863  					p.oe = p.oe[:i]
   864  				case a.Address, a.Div, a.P:
   865  					continue
   866  				default:
   867  					if !isSpecialElement(node) {
   868  						continue
   869  					}
   870  				}
   871  				break
   872  			}
   873  			p.popUntil(buttonScope, a.P)
   874  			p.addElement()
   875  		case a.Dd, a.Dt:
   876  			p.framesetOK = false
   877  			for i := len(p.oe) - 1; i >= 0; i-- {
   878  				node := p.oe[i]
   879  				switch node.DataAtom {
   880  				case a.Dd, a.Dt:
   881  					p.oe = p.oe[:i]
   882  				case a.Address, a.Div, a.P:
   883  					continue
   884  				default:
   885  					if !isSpecialElement(node) {
   886  						continue
   887  					}
   888  				}
   889  				break
   890  			}
   891  			p.popUntil(buttonScope, a.P)
   892  			p.addElement()
   893  		case a.Plaintext:
   894  			p.popUntil(buttonScope, a.P)
   895  			p.addElement()
   896  		case a.Button:
   897  			p.popUntil(defaultScope, a.Button)
   898  			p.reconstructActiveFormattingElements()
   899  			p.addElement()
   900  			p.framesetOK = false
   901  		case a.A:
   902  			for i := len(p.afe) - 1; i >= 0 && p.afe[i].Type != scopeMarkerNode; i-- {
   903  				if n := p.afe[i]; n.Type == ElementNode && n.DataAtom == a.A {
   904  					p.inBodyEndTagFormatting(a.A, "a")
   905  					p.oe.remove(n)
   906  					p.afe.remove(n)
   907  					break
   908  				}
   909  			}
   910  			p.reconstructActiveFormattingElements()
   911  			p.addFormattingElement()
   912  		case a.B, a.Big, a.Code, a.Em, a.Font, a.I, a.S, a.Small, a.Strike, a.Strong, a.Tt, a.U:
   913  			p.reconstructActiveFormattingElements()
   914  			p.addFormattingElement()
   915  		case a.Nobr:
   916  			p.reconstructActiveFormattingElements()
   917  			if p.elementInScope(defaultScope, a.Nobr) {
   918  				p.inBodyEndTagFormatting(a.Nobr, "nobr")
   919  				p.reconstructActiveFormattingElements()
   920  			}
   921  			p.addFormattingElement()
   922  		case a.Applet, a.Marquee, a.Object:
   923  			p.reconstructActiveFormattingElements()
   924  			p.addElement()
   925  			p.afe = append(p.afe, &scopeMarker)
   926  			p.framesetOK = false
   927  		case a.Table:
   928  			if !p.quirks {
   929  				p.popUntil(buttonScope, a.P)
   930  			}
   931  			p.addElement()
   932  			p.framesetOK = false
   933  			p.im = inTableIM
   934  			return true
   935  		case a.Area, a.Br, a.Embed, a.Img, a.Input, a.Keygen, a.Wbr:
   936  			p.reconstructActiveFormattingElements()
   937  			p.addElement()
   938  			p.oe.pop()
   939  			p.acknowledgeSelfClosingTag()
   940  			if p.tok.DataAtom == a.Input {
   941  				for _, t := range p.tok.Attr {
   942  					if t.Key == "type" {
   943  						if strings.ToLower(t.Val) == "hidden" {
   944  							// Skip setting framesetOK = false
   945  							return true
   946  						}
   947  					}
   948  				}
   949  			}
   950  			p.framesetOK = false
   951  		case a.Param, a.Source, a.Track:
   952  			p.addElement()
   953  			p.oe.pop()
   954  			p.acknowledgeSelfClosingTag()
   955  		case a.Hr:
   956  			p.popUntil(buttonScope, a.P)
   957  			p.addElement()
   958  			p.oe.pop()
   959  			p.acknowledgeSelfClosingTag()
   960  			p.framesetOK = false
   961  		case a.Image:
   962  			p.tok.DataAtom = a.Img
   963  			p.tok.Data = a.Img.String()
   964  			return false
   965  		case a.Isindex:
   966  			if p.form != nil {
   967  				// Ignore the token.
   968  				return true
   969  			}
   970  			action := ""
   971  			prompt := "This is a searchable index. Enter search keywords: "
   972  			attr := []Attribute{{Key: "name", Val: "isindex"}}
   973  			for _, t := range p.tok.Attr {
   974  				switch t.Key {
   975  				case "action":
   976  					action = t.Val
   977  				case "name":
   978  					// Ignore the attribute.
   979  				case "prompt":
   980  					prompt = t.Val
   981  				default:
   982  					attr = append(attr, t)
   983  				}
   984  			}
   985  			p.acknowledgeSelfClosingTag()
   986  			p.popUntil(buttonScope, a.P)
   987  			p.parseImpliedToken(StartTagToken, a.Form, a.Form.String())
   988  			if p.form == nil {
   989  				// NOTE: The 'isindex' element has been removed,
   990  				// and the 'template' element has not been designed to be
   991  				// collaborative with the index element.
   992  				//
   993  				// Ignore the token.
   994  				return true
   995  			}
   996  			if action != "" {
   997  				p.form.Attr = []Attribute{{Key: "action", Val: action}}
   998  			}
   999  			p.parseImpliedToken(StartTagToken, a.Hr, a.Hr.String())
  1000  			p.parseImpliedToken(StartTagToken, a.Label, a.Label.String())
  1001  			p.addText(prompt)
  1002  			p.addChild(&Node{
  1003  				Type:     ElementNode,
  1004  				DataAtom: a.Input,
  1005  				Data:     a.Input.String(),
  1006  				Attr:     attr,
  1007  			})
  1008  			p.oe.pop()
  1009  			p.parseImpliedToken(EndTagToken, a.Label, a.Label.String())
  1010  			p.parseImpliedToken(StartTagToken, a.Hr, a.Hr.String())
  1011  			p.parseImpliedToken(EndTagToken, a.Form, a.Form.String())
  1012  		case a.Textarea:
  1013  			p.addElement()
  1014  			p.setOriginalIM()
  1015  			p.framesetOK = false
  1016  			p.im = textIM
  1017  		case a.Xmp:
  1018  			p.popUntil(buttonScope, a.P)
  1019  			p.reconstructActiveFormattingElements()
  1020  			p.framesetOK = false
  1021  			p.addElement()
  1022  			p.setOriginalIM()
  1023  			p.im = textIM
  1024  		case a.Iframe:
  1025  			p.framesetOK = false
  1026  			p.addElement()
  1027  			p.setOriginalIM()
  1028  			p.im = textIM
  1029  		case a.Noembed, a.Noscript:
  1030  			p.addElement()
  1031  			p.setOriginalIM()
  1032  			p.im = textIM
  1033  		case a.Select:
  1034  			p.reconstructActiveFormattingElements()
  1035  			p.addElement()
  1036  			p.framesetOK = false
  1037  			p.im = inSelectIM
  1038  			return true
  1039  		case a.Optgroup, a.Option:
  1040  			if p.top().DataAtom == a.Option {
  1041  				p.oe.pop()
  1042  			}
  1043  			p.reconstructActiveFormattingElements()
  1044  			p.addElement()
  1045  		case a.Rb, a.Rtc:
  1046  			if p.elementInScope(defaultScope, a.Ruby) {
  1047  				p.generateImpliedEndTags()
  1048  			}
  1049  			p.addElement()
  1050  		case a.Rp, a.Rt:
  1051  			if p.elementInScope(defaultScope, a.Ruby) {
  1052  				p.generateImpliedEndTags("rtc")
  1053  			}
  1054  			p.addElement()
  1055  		case a.Math, a.Svg:
  1056  			p.reconstructActiveFormattingElements()
  1057  			if p.tok.DataAtom == a.Math {
  1058  				adjustAttributeNames(p.tok.Attr, mathMLAttributeAdjustments)
  1059  			} else {
  1060  				adjustAttributeNames(p.tok.Attr, svgAttributeAdjustments)
  1061  			}
  1062  			adjustForeignAttributes(p.tok.Attr)
  1063  			p.addElement()
  1064  			p.top().Namespace = p.tok.Data
  1065  			if p.hasSelfClosingToken {
  1066  				p.oe.pop()
  1067  				p.acknowledgeSelfClosingTag()
  1068  			}
  1069  			return true
  1070  		case a.Caption, a.Col, a.Colgroup, a.Frame, a.Head, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
  1071  			// Ignore the token.
  1072  		default:
  1073  			p.reconstructActiveFormattingElements()
  1074  			p.addElement()
  1075  		}
  1076  	case EndTagToken:
  1077  		switch p.tok.DataAtom {
  1078  		case a.Body:
  1079  			if p.elementInScope(defaultScope, a.Body) {
  1080  				p.im = afterBodyIM
  1081  			}
  1082  		case a.Html:
  1083  			if p.elementInScope(defaultScope, a.Body) {
  1084  				p.parseImpliedToken(EndTagToken, a.Body, a.Body.String())
  1085  				return false
  1086  			}
  1087  			return true
  1088  		case a.Address, a.Article, a.Aside, a.Blockquote, a.Button, a.Center, a.Details, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Listing, a.Menu, a.Nav, a.Ol, a.Pre, a.Section, a.Summary, a.Ul:
  1089  			p.popUntil(defaultScope, p.tok.DataAtom)
  1090  		case a.Form:
  1091  			if p.oe.contains(a.Template) {
  1092  				i := p.indexOfElementInScope(defaultScope, a.Form)
  1093  				if i == -1 {
  1094  					// Ignore the token.
  1095  					return true
  1096  				}
  1097  				p.generateImpliedEndTags()
  1098  				if p.oe[i].DataAtom != a.Form {
  1099  					// Ignore the token.
  1100  					return true
  1101  				}
  1102  				p.popUntil(defaultScope, a.Form)
  1103  			} else {
  1104  				node := p.form
  1105  				p.form = nil
  1106  				i := p.indexOfElementInScope(defaultScope, a.Form)
  1107  				if node == nil || i == -1 || p.oe[i] != node {
  1108  					// Ignore the token.
  1109  					return true
  1110  				}
  1111  				p.generateImpliedEndTags()
  1112  				p.oe.remove(node)
  1113  			}
  1114  		case a.P:
  1115  			if !p.elementInScope(buttonScope, a.P) {
  1116  				p.parseImpliedToken(StartTagToken, a.P, a.P.String())
  1117  			}
  1118  			p.popUntil(buttonScope, a.P)
  1119  		case a.Li:
  1120  			p.popUntil(listItemScope, a.Li)
  1121  		case a.Dd, a.Dt:
  1122  			p.popUntil(defaultScope, p.tok.DataAtom)
  1123  		case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
  1124  			p.popUntil(defaultScope, a.H1, a.H2, a.H3, a.H4, a.H5, a.H6)
  1125  		case a.A, a.B, a.Big, a.Code, a.Em, a.Font, a.I, a.Nobr, a.S, a.Small, a.Strike, a.Strong, a.Tt, a.U:
  1126  			p.inBodyEndTagFormatting(p.tok.DataAtom, p.tok.Data)
  1127  		case a.Applet, a.Marquee, a.Object:
  1128  			if p.popUntil(defaultScope, p.tok.DataAtom) {
  1129  				p.clearActiveFormattingElements()
  1130  			}
  1131  		case a.Br:
  1132  			p.tok.Type = StartTagToken
  1133  			return false
  1134  		case a.Template:
  1135  			return inHeadIM(p)
  1136  		default:
  1137  			p.inBodyEndTagOther(p.tok.DataAtom, p.tok.Data)
  1138  		}
  1139  	case CommentToken:
  1140  		p.addChild(&Node{
  1141  			Type: CommentNode,
  1142  			Data: p.tok.Data,
  1143  		})
  1144  	case ErrorToken:
  1145  		// TODO: remove this divergence from the HTML5 spec.
  1146  		if len(p.templateStack) > 0 {
  1147  			p.im = inTemplateIM
  1148  			return false
  1149  		} else {
  1150  			for _, e := range p.oe {
  1151  				switch e.DataAtom {
  1152  				case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc, a.Tbody, a.Td, a.Tfoot, a.Th,
  1153  					a.Thead, a.Tr, a.Body, a.Html:
  1154  				default:
  1155  					return true
  1156  				}
  1157  			}
  1158  		}
  1159  	}
  1160  
  1161  	return true
  1162  }
  1163  
  1164  func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom, tagName string) {
  1165  	// This is the "adoption agency" algorithm, described at
  1166  	// https://html.spec.whatwg.org/multipage/syntax.html#adoptionAgency
  1167  
  1168  	// TODO: this is a fairly literal line-by-line translation of that algorithm.
  1169  	// Once the code successfully parses the comprehensive test suite, we should
  1170  	// refactor this code to be more idiomatic.
  1171  
  1172  	// Steps 1-4. The outer loop.
  1173  	for i := 0; i < 8; i++ {
  1174  		// Step 5. Find the formatting element.
  1175  		var formattingElement *Node
  1176  		for j := len(p.afe) - 1; j >= 0; j-- {
  1177  			if p.afe[j].Type == scopeMarkerNode {
  1178  				break
  1179  			}
  1180  			if p.afe[j].DataAtom == tagAtom {
  1181  				formattingElement = p.afe[j]
  1182  				break
  1183  			}
  1184  		}
  1185  		if formattingElement == nil {
  1186  			p.inBodyEndTagOther(tagAtom, tagName)
  1187  			return
  1188  		}
  1189  		feIndex := p.oe.index(formattingElement)
  1190  		if feIndex == -1 {
  1191  			p.afe.remove(formattingElement)
  1192  			return
  1193  		}
  1194  		if !p.elementInScope(defaultScope, tagAtom) {
  1195  			// Ignore the tag.
  1196  			return
  1197  		}
  1198  
  1199  		// Steps 9-10. Find the furthest block.
  1200  		var furthestBlock *Node
  1201  		for _, e := range p.oe[feIndex:] {
  1202  			if isSpecialElement(e) {
  1203  				furthestBlock = e
  1204  				break
  1205  			}
  1206  		}
  1207  		if furthestBlock == nil {
  1208  			e := p.oe.pop()
  1209  			for e != formattingElement {
  1210  				e = p.oe.pop()
  1211  			}
  1212  			p.afe.remove(e)
  1213  			return
  1214  		}
  1215  
  1216  		// Steps 11-12. Find the common ancestor and bookmark node.
  1217  		commonAncestor := p.oe[feIndex-1]
  1218  		bookmark := p.afe.index(formattingElement)
  1219  
  1220  		// Step 13. The inner loop. Find the lastNode to reparent.
  1221  		lastNode := furthestBlock
  1222  		node := furthestBlock
  1223  		x := p.oe.index(node)
  1224  		// Steps 13.1-13.2
  1225  		for j := 0; j < 3; j++ {
  1226  			// Step 13.3.
  1227  			x--
  1228  			node = p.oe[x]
  1229  			// Step 13.4 - 13.5.
  1230  			if p.afe.index(node) == -1 {
  1231  				p.oe.remove(node)
  1232  				continue
  1233  			}
  1234  			// Step 13.6.
  1235  			if node == formattingElement {
  1236  				break
  1237  			}
  1238  			// Step 13.7.
  1239  			clone := node.clone()
  1240  			p.afe[p.afe.index(node)] = clone
  1241  			p.oe[p.oe.index(node)] = clone
  1242  			node = clone
  1243  			// Step 13.8.
  1244  			if lastNode == furthestBlock {
  1245  				bookmark = p.afe.index(node) + 1
  1246  			}
  1247  			// Step 13.9.
  1248  			if lastNode.Parent != nil {
  1249  				lastNode.Parent.RemoveChild(lastNode)
  1250  			}
  1251  			node.AppendChild(lastNode)
  1252  			// Step 13.10.
  1253  			lastNode = node
  1254  		}
  1255  
  1256  		// Step 14. Reparent lastNode to the common ancestor,
  1257  		// or for misnested table nodes, to the foster parent.
  1258  		if lastNode.Parent != nil {
  1259  			lastNode.Parent.RemoveChild(lastNode)
  1260  		}
  1261  		switch commonAncestor.DataAtom {
  1262  		case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
  1263  			p.fosterParent(lastNode)
  1264  		default:
  1265  			commonAncestor.AppendChild(lastNode)
  1266  		}
  1267  
  1268  		// Steps 15-17. Reparent nodes from the furthest block's children
  1269  		// to a clone of the formatting element.
  1270  		clone := formattingElement.clone()
  1271  		reparentChildren(clone, furthestBlock)
  1272  		furthestBlock.AppendChild(clone)
  1273  
  1274  		// Step 18. Fix up the list of active formatting elements.
  1275  		if oldLoc := p.afe.index(formattingElement); oldLoc != -1 && oldLoc < bookmark {
  1276  			// Move the bookmark with the rest of the list.
  1277  			bookmark--
  1278  		}
  1279  		p.afe.remove(formattingElement)
  1280  		p.afe.insert(bookmark, clone)
  1281  
  1282  		// Step 19. Fix up the stack of open elements.
  1283  		p.oe.remove(formattingElement)
  1284  		p.oe.insert(p.oe.index(furthestBlock)+1, clone)
  1285  	}
  1286  }
  1287  
  1288  // inBodyEndTagOther performs the "any other end tag" algorithm for inBodyIM.
  1289  // "Any other end tag" handling from 12.2.6.5 The rules for parsing tokens in foreign content
  1290  // https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inforeign
  1291  func (p *parser) inBodyEndTagOther(tagAtom a.Atom, tagName string) {
  1292  	for i := len(p.oe) - 1; i >= 0; i-- {
  1293  		// Two element nodes have the same tag if they have the same Data (a
  1294  		// string-typed field). As an optimization, for common HTML tags, each
  1295  		// Data string is assigned a unique, non-zero DataAtom (a uint32-typed
  1296  		// field), since integer comparison is faster than string comparison.
  1297  		// Uncommon (custom) tags get a zero DataAtom.
  1298  		//
  1299  		// The if condition here is equivalent to (p.oe[i].Data == tagName).
  1300  		if (p.oe[i].DataAtom == tagAtom) &&
  1301  			((tagAtom != 0) || (p.oe[i].Data == tagName)) {
  1302  			p.oe = p.oe[:i]
  1303  			break
  1304  		}
  1305  		if isSpecialElement(p.oe[i]) {
  1306  			break
  1307  		}
  1308  	}
  1309  }
  1310  
  1311  // Section 12.2.6.4.8.
  1312  func textIM(p *parser) bool {
  1313  	switch p.tok.Type {
  1314  	case ErrorToken:
  1315  		p.oe.pop()
  1316  	case TextToken:
  1317  		d := p.tok.Data
  1318  		if n := p.oe.top(); n.DataAtom == a.Textarea && n.FirstChild == nil {
  1319  			// Ignore a newline at the start of a <textarea> block.
  1320  			if d != "" && d[0] == '\r' {
  1321  				d = d[1:]
  1322  			}
  1323  			if d != "" && d[0] == '\n' {
  1324  				d = d[1:]
  1325  			}
  1326  		}
  1327  		if d == "" {
  1328  			return true
  1329  		}
  1330  		p.addText(d)
  1331  		return true
  1332  	case EndTagToken:
  1333  		p.oe.pop()
  1334  	}
  1335  	p.im = p.originalIM
  1336  	p.originalIM = nil
  1337  	return p.tok.Type == EndTagToken
  1338  }
  1339  
  1340  // Section 12.2.6.4.9.
  1341  func inTableIM(p *parser) bool {
  1342  	switch p.tok.Type {
  1343  	case TextToken:
  1344  		p.tok.Data = strings.Replace(p.tok.Data, "\x00", "", -1)
  1345  		switch p.oe.top().DataAtom {
  1346  		case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
  1347  			if strings.Trim(p.tok.Data, whitespace) == "" {
  1348  				p.addText(p.tok.Data)
  1349  				return true
  1350  			}
  1351  		}
  1352  	case StartTagToken:
  1353  		switch p.tok.DataAtom {
  1354  		case a.Caption:
  1355  			p.clearStackToContext(tableScope)
  1356  			p.afe = append(p.afe, &scopeMarker)
  1357  			p.addElement()
  1358  			p.im = inCaptionIM
  1359  			return true
  1360  		case a.Colgroup:
  1361  			p.clearStackToContext(tableScope)
  1362  			p.addElement()
  1363  			p.im = inColumnGroupIM
  1364  			return true
  1365  		case a.Col:
  1366  			p.parseImpliedToken(StartTagToken, a.Colgroup, a.Colgroup.String())
  1367  			return false
  1368  		case a.Tbody, a.Tfoot, a.Thead:
  1369  			p.clearStackToContext(tableScope)
  1370  			p.addElement()
  1371  			p.im = inTableBodyIM
  1372  			return true
  1373  		case a.Td, a.Th, a.Tr:
  1374  			p.parseImpliedToken(StartTagToken, a.Tbody, a.Tbody.String())
  1375  			return false
  1376  		case a.Table:
  1377  			if p.popUntil(tableScope, a.Table) {
  1378  				p.resetInsertionMode()
  1379  				return false
  1380  			}
  1381  			// Ignore the token.
  1382  			return true
  1383  		case a.Style, a.Script, a.Template:
  1384  			return inHeadIM(p)
  1385  		case a.Input:
  1386  			for _, t := range p.tok.Attr {
  1387  				if t.Key == "type" && strings.ToLower(t.Val) == "hidden" {
  1388  					p.addElement()
  1389  					p.oe.pop()
  1390  					return true
  1391  				}
  1392  			}
  1393  			// Otherwise drop down to the default action.
  1394  		case a.Form:
  1395  			if p.oe.contains(a.Template) || p.form != nil {
  1396  				// Ignore the token.
  1397  				return true
  1398  			}
  1399  			p.addElement()
  1400  			p.form = p.oe.pop()
  1401  		case a.Select:
  1402  			p.reconstructActiveFormattingElements()
  1403  			switch p.top().DataAtom {
  1404  			case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
  1405  				p.fosterParenting = true
  1406  			}
  1407  			p.addElement()
  1408  			p.fosterParenting = false
  1409  			p.framesetOK = false
  1410  			p.im = inSelectInTableIM
  1411  			return true
  1412  		}
  1413  	case EndTagToken:
  1414  		switch p.tok.DataAtom {
  1415  		case a.Table:
  1416  			if p.popUntil(tableScope, a.Table) {
  1417  				p.resetInsertionMode()
  1418  				return true
  1419  			}
  1420  			// Ignore the token.
  1421  			return true
  1422  		case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
  1423  			// Ignore the token.
  1424  			return true
  1425  		case a.Template:
  1426  			return inHeadIM(p)
  1427  		}
  1428  	case CommentToken:
  1429  		p.addChild(&Node{
  1430  			Type: CommentNode,
  1431  			Data: p.tok.Data,
  1432  		})
  1433  		return true
  1434  	case DoctypeToken:
  1435  		// Ignore the token.
  1436  		return true
  1437  	case ErrorToken:
  1438  		return inBodyIM(p)
  1439  	}
  1440  
  1441  	p.fosterParenting = true
  1442  	defer func() { p.fosterParenting = false }()
  1443  
  1444  	return inBodyIM(p)
  1445  }
  1446  
  1447  // Section 12.2.6.4.11.
  1448  func inCaptionIM(p *parser) bool {
  1449  	switch p.tok.Type {
  1450  	case StartTagToken:
  1451  		switch p.tok.DataAtom {
  1452  		case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Td, a.Tfoot, a.Thead, a.Tr:
  1453  			if p.popUntil(tableScope, a.Caption) {
  1454  				p.clearActiveFormattingElements()
  1455  				p.im = inTableIM
  1456  				return false
  1457  			} else {
  1458  				// Ignore the token.
  1459  				return true
  1460  			}
  1461  		case a.Select:
  1462  			p.reconstructActiveFormattingElements()
  1463  			p.addElement()
  1464  			p.framesetOK = false
  1465  			p.im = inSelectInTableIM
  1466  			return true
  1467  		}
  1468  	case EndTagToken:
  1469  		switch p.tok.DataAtom {
  1470  		case a.Caption:
  1471  			if p.popUntil(tableScope, a.Caption) {
  1472  				p.clearActiveFormattingElements()
  1473  				p.im = inTableIM
  1474  			}
  1475  			return true
  1476  		case a.Table:
  1477  			if p.popUntil(tableScope, a.Caption) {
  1478  				p.clearActiveFormattingElements()
  1479  				p.im = inTableIM
  1480  				return false
  1481  			} else {
  1482  				// Ignore the token.
  1483  				return true
  1484  			}
  1485  		case a.Body, a.Col, a.Colgroup, a.Html, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
  1486  			// Ignore the token.
  1487  			return true
  1488  		}
  1489  	}
  1490  	return inBodyIM(p)
  1491  }
  1492  
  1493  // Section 12.2.6.4.12.
  1494  func inColumnGroupIM(p *parser) bool {
  1495  	switch p.tok.Type {
  1496  	case TextToken:
  1497  		s := strings.TrimLeft(p.tok.Data, whitespace)
  1498  		if len(s) < len(p.tok.Data) {
  1499  			// Add the initial whitespace to the current node.
  1500  			p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
  1501  			if s == "" {
  1502  				return true
  1503  			}
  1504  			p.tok.Data = s
  1505  		}
  1506  	case CommentToken:
  1507  		p.addChild(&Node{
  1508  			Type: CommentNode,
  1509  			Data: p.tok.Data,
  1510  		})
  1511  		return true
  1512  	case DoctypeToken:
  1513  		// Ignore the token.
  1514  		return true
  1515  	case StartTagToken:
  1516  		switch p.tok.DataAtom {
  1517  		case a.Html:
  1518  			return inBodyIM(p)
  1519  		case a.Col:
  1520  			p.addElement()
  1521  			p.oe.pop()
  1522  			p.acknowledgeSelfClosingTag()
  1523  			return true
  1524  		case a.Template:
  1525  			return inHeadIM(p)
  1526  		}
  1527  	case EndTagToken:
  1528  		switch p.tok.DataAtom {
  1529  		case a.Colgroup:
  1530  			if p.oe.top().DataAtom == a.Colgroup {
  1531  				p.oe.pop()
  1532  				p.im = inTableIM
  1533  			}
  1534  			return true
  1535  		case a.Col:
  1536  			// Ignore the token.
  1537  			return true
  1538  		case a.Template:
  1539  			return inHeadIM(p)
  1540  		}
  1541  	case ErrorToken:
  1542  		return inBodyIM(p)
  1543  	}
  1544  	if p.oe.top().DataAtom != a.Colgroup {
  1545  		return true
  1546  	}
  1547  	p.oe.pop()
  1548  	p.im = inTableIM
  1549  	return false
  1550  }
  1551  
  1552  // Section 12.2.6.4.13.
  1553  func inTableBodyIM(p *parser) bool {
  1554  	switch p.tok.Type {
  1555  	case StartTagToken:
  1556  		switch p.tok.DataAtom {
  1557  		case a.Tr:
  1558  			p.clearStackToContext(tableBodyScope)
  1559  			p.addElement()
  1560  			p.im = inRowIM
  1561  			return true
  1562  		case a.Td, a.Th:
  1563  			p.parseImpliedToken(StartTagToken, a.Tr, a.Tr.String())
  1564  			return false
  1565  		case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Tfoot, a.Thead:
  1566  			if p.popUntil(tableScope, a.Tbody, a.Thead, a.Tfoot) {
  1567  				p.im = inTableIM
  1568  				return false
  1569  			}
  1570  			// Ignore the token.
  1571  			return true
  1572  		}
  1573  	case EndTagToken:
  1574  		switch p.tok.DataAtom {
  1575  		case a.Tbody, a.Tfoot, a.Thead:
  1576  			if p.elementInScope(tableScope, p.tok.DataAtom) {
  1577  				p.clearStackToContext(tableBodyScope)
  1578  				p.oe.pop()
  1579  				p.im = inTableIM
  1580  			}
  1581  			return true
  1582  		case a.Table:
  1583  			if p.popUntil(tableScope, a.Tbody, a.Thead, a.Tfoot) {
  1584  				p.im = inTableIM
  1585  				return false
  1586  			}
  1587  			// Ignore the token.
  1588  			return true
  1589  		case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Td, a.Th, a.Tr:
  1590  			// Ignore the token.
  1591  			return true
  1592  		}
  1593  	case CommentToken:
  1594  		p.addChild(&Node{
  1595  			Type: CommentNode,
  1596  			Data: p.tok.Data,
  1597  		})
  1598  		return true
  1599  	}
  1600  
  1601  	return inTableIM(p)
  1602  }
  1603  
  1604  // Section 12.2.6.4.14.
  1605  func inRowIM(p *parser) bool {
  1606  	switch p.tok.Type {
  1607  	case StartTagToken:
  1608  		switch p.tok.DataAtom {
  1609  		case a.Td, a.Th:
  1610  			p.clearStackToContext(tableRowScope)
  1611  			p.addElement()
  1612  			p.afe = append(p.afe, &scopeMarker)
  1613  			p.im = inCellIM
  1614  			return true
  1615  		case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Tfoot, a.Thead, a.Tr:
  1616  			if p.popUntil(tableScope, a.Tr) {
  1617  				p.im = inTableBodyIM
  1618  				return false
  1619  			}
  1620  			// Ignore the token.
  1621  			return true
  1622  		}
  1623  	case EndTagToken:
  1624  		switch p.tok.DataAtom {
  1625  		case a.Tr:
  1626  			if p.popUntil(tableScope, a.Tr) {
  1627  				p.im = inTableBodyIM
  1628  				return true
  1629  			}
  1630  			// Ignore the token.
  1631  			return true
  1632  		case a.Table:
  1633  			if p.popUntil(tableScope, a.Tr) {
  1634  				p.im = inTableBodyIM
  1635  				return false
  1636  			}
  1637  			// Ignore the token.
  1638  			return true
  1639  		case a.Tbody, a.Tfoot, a.Thead:
  1640  			if p.elementInScope(tableScope, p.tok.DataAtom) {
  1641  				p.parseImpliedToken(EndTagToken, a.Tr, a.Tr.String())
  1642  				return false
  1643  			}
  1644  			// Ignore the token.
  1645  			return true
  1646  		case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Td, a.Th:
  1647  			// Ignore the token.
  1648  			return true
  1649  		}
  1650  	}
  1651  
  1652  	return inTableIM(p)
  1653  }
  1654  
  1655  // Section 12.2.6.4.15.
  1656  func inCellIM(p *parser) bool {
  1657  	switch p.tok.Type {
  1658  	case StartTagToken:
  1659  		switch p.tok.DataAtom {
  1660  		case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
  1661  			if p.popUntil(tableScope, a.Td, a.Th) {
  1662  				// Close the cell and reprocess.
  1663  				p.clearActiveFormattingElements()
  1664  				p.im = inRowIM
  1665  				return false
  1666  			}
  1667  			// Ignore the token.
  1668  			return true
  1669  		case a.Select:
  1670  			p.reconstructActiveFormattingElements()
  1671  			p.addElement()
  1672  			p.framesetOK = false
  1673  			p.im = inSelectInTableIM
  1674  			return true
  1675  		}
  1676  	case EndTagToken:
  1677  		switch p.tok.DataAtom {
  1678  		case a.Td, a.Th:
  1679  			if !p.popUntil(tableScope, p.tok.DataAtom) {
  1680  				// Ignore the token.
  1681  				return true
  1682  			}
  1683  			p.clearActiveFormattingElements()
  1684  			p.im = inRowIM
  1685  			return true
  1686  		case a.Body, a.Caption, a.Col, a.Colgroup, a.Html:
  1687  			// Ignore the token.
  1688  			return true
  1689  		case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
  1690  			if !p.elementInScope(tableScope, p.tok.DataAtom) {
  1691  				// Ignore the token.
  1692  				return true
  1693  			}
  1694  			// Close the cell and reprocess.
  1695  			p.popUntil(tableScope, a.Td, a.Th)
  1696  			p.clearActiveFormattingElements()
  1697  			p.im = inRowIM
  1698  			return false
  1699  		}
  1700  	}
  1701  	return inBodyIM(p)
  1702  }
  1703  
  1704  // Section 12.2.6.4.16.
  1705  func inSelectIM(p *parser) bool {
  1706  	switch p.tok.Type {
  1707  	case TextToken:
  1708  		p.addText(strings.Replace(p.tok.Data, "\x00", "", -1))
  1709  	case StartTagToken:
  1710  		switch p.tok.DataAtom {
  1711  		case a.Html:
  1712  			return inBodyIM(p)
  1713  		case a.Option:
  1714  			if p.top().DataAtom == a.Option {
  1715  				p.oe.pop()
  1716  			}
  1717  			p.addElement()
  1718  		case a.Optgroup:
  1719  			if p.top().DataAtom == a.Option {
  1720  				p.oe.pop()
  1721  			}
  1722  			if p.top().DataAtom == a.Optgroup {
  1723  				p.oe.pop()
  1724  			}
  1725  			p.addElement()
  1726  		case a.Select:
  1727  			if p.popUntil(selectScope, a.Select) {
  1728  				p.resetInsertionMode()
  1729  			} else {
  1730  				// Ignore the token.
  1731  				return true
  1732  			}
  1733  		case a.Input, a.Keygen, a.Textarea:
  1734  			if p.elementInScope(selectScope, a.Select) {
  1735  				p.parseImpliedToken(EndTagToken, a.Select, a.Select.String())
  1736  				return false
  1737  			}
  1738  			// In order to properly ignore <textarea>, we need to change the tokenizer mode.
  1739  			p.tokenizer.NextIsNotRawText()
  1740  			// Ignore the token.
  1741  			return true
  1742  		case a.Script, a.Template:
  1743  			return inHeadIM(p)
  1744  		}
  1745  	case EndTagToken:
  1746  		switch p.tok.DataAtom {
  1747  		case a.Option:
  1748  			if p.top().DataAtom == a.Option {
  1749  				p.oe.pop()
  1750  			}
  1751  		case a.Optgroup:
  1752  			i := len(p.oe) - 1
  1753  			if p.oe[i].DataAtom == a.Option {
  1754  				i--
  1755  			}
  1756  			if p.oe[i].DataAtom == a.Optgroup {
  1757  				p.oe = p.oe[:i]
  1758  			}
  1759  		case a.Select:
  1760  			if p.popUntil(selectScope, a.Select) {
  1761  				p.resetInsertionMode()
  1762  			} else {
  1763  				// Ignore the token.
  1764  				return true
  1765  			}
  1766  		case a.Template:
  1767  			return inHeadIM(p)
  1768  		}
  1769  	case CommentToken:
  1770  		p.addChild(&Node{
  1771  			Type: CommentNode,
  1772  			Data: p.tok.Data,
  1773  		})
  1774  	case DoctypeToken:
  1775  		// Ignore the token.
  1776  		return true
  1777  	case ErrorToken:
  1778  		return inBodyIM(p)
  1779  	}
  1780  
  1781  	return true
  1782  }
  1783  
  1784  // Section 12.2.6.4.17.
  1785  func inSelectInTableIM(p *parser) bool {
  1786  	switch p.tok.Type {
  1787  	case StartTagToken, EndTagToken:
  1788  		switch p.tok.DataAtom {
  1789  		case a.Caption, a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr, a.Td, a.Th:
  1790  			if p.tok.Type == EndTagToken && !p.elementInScope(tableScope, p.tok.DataAtom) {
  1791  				// Ignore the token.
  1792  				return true
  1793  			}
  1794  			// This is like p.popUntil(selectScope, a.Select), but it also
  1795  			// matches <math select>, not just <select>. Matching the MathML
  1796  			// tag is arguably incorrect (conceptually), but it mimics what
  1797  			// Chromium does.
  1798  			for i := len(p.oe) - 1; i >= 0; i-- {
  1799  				if n := p.oe[i]; n.DataAtom == a.Select {
  1800  					p.oe = p.oe[:i]
  1801  					break
  1802  				}
  1803  			}
  1804  			p.resetInsertionMode()
  1805  			return false
  1806  		}
  1807  	}
  1808  	return inSelectIM(p)
  1809  }
  1810  
  1811  // Section 12.2.6.4.18.
  1812  func inTemplateIM(p *parser) bool {
  1813  	switch p.tok.Type {
  1814  	case TextToken, CommentToken, DoctypeToken:
  1815  		return inBodyIM(p)
  1816  	case StartTagToken:
  1817  		switch p.tok.DataAtom {
  1818  		case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
  1819  			return inHeadIM(p)
  1820  		case a.Caption, a.Colgroup, a.Tbody, a.Tfoot, a.Thead:
  1821  			p.templateStack.pop()
  1822  			p.templateStack = append(p.templateStack, inTableIM)
  1823  			p.im = inTableIM
  1824  			return false
  1825  		case a.Col:
  1826  			p.templateStack.pop()
  1827  			p.templateStack = append(p.templateStack, inColumnGroupIM)
  1828  			p.im = inColumnGroupIM
  1829  			return false
  1830  		case a.Tr:
  1831  			p.templateStack.pop()
  1832  			p.templateStack = append(p.templateStack, inTableBodyIM)
  1833  			p.im = inTableBodyIM
  1834  			return false
  1835  		case a.Td, a.Th:
  1836  			p.templateStack.pop()
  1837  			p.templateStack = append(p.templateStack, inRowIM)
  1838  			p.im = inRowIM
  1839  			return false
  1840  		default:
  1841  			p.templateStack.pop()
  1842  			p.templateStack = append(p.templateStack, inBodyIM)
  1843  			p.im = inBodyIM
  1844  			return false
  1845  		}
  1846  	case EndTagToken:
  1847  		switch p.tok.DataAtom {
  1848  		case a.Template:
  1849  			return inHeadIM(p)
  1850  		default:
  1851  			// Ignore the token.
  1852  			return true
  1853  		}
  1854  	case ErrorToken:
  1855  		if !p.oe.contains(a.Template) {
  1856  			// Ignore the token.
  1857  			return true
  1858  		}
  1859  		// TODO: remove this divergence from the HTML5 spec.
  1860  		//
  1861  		// See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
  1862  		p.generateImpliedEndTags()
  1863  		for i := len(p.oe) - 1; i >= 0; i-- {
  1864  			if n := p.oe[i]; n.Namespace == "" && n.DataAtom == a.Template {
  1865  				p.oe = p.oe[:i]
  1866  				break
  1867  			}
  1868  		}
  1869  		p.clearActiveFormattingElements()
  1870  		p.templateStack.pop()
  1871  		p.resetInsertionMode()
  1872  		return false
  1873  	}
  1874  	return false
  1875  }
  1876  
  1877  // Section 12.2.6.4.19.
  1878  func afterBodyIM(p *parser) bool {
  1879  	switch p.tok.Type {
  1880  	case ErrorToken:
  1881  		// Stop parsing.
  1882  		return true
  1883  	case TextToken:
  1884  		s := strings.TrimLeft(p.tok.Data, whitespace)
  1885  		if len(s) == 0 {
  1886  			// It was all whitespace.
  1887  			return inBodyIM(p)
  1888  		}
  1889  	case StartTagToken:
  1890  		if p.tok.DataAtom == a.Html {
  1891  			return inBodyIM(p)
  1892  		}
  1893  	case EndTagToken:
  1894  		if p.tok.DataAtom == a.Html {
  1895  			if !p.fragment {
  1896  				p.im = afterAfterBodyIM
  1897  			}
  1898  			return true
  1899  		}
  1900  	case CommentToken:
  1901  		// The comment is attached to the <html> element.
  1902  		if len(p.oe) < 1 || p.oe[0].DataAtom != a.Html {
  1903  			panic("html: bad parser state: <html> element not found, in the after-body insertion mode")
  1904  		}
  1905  		p.oe[0].AppendChild(&Node{
  1906  			Type: CommentNode,
  1907  			Data: p.tok.Data,
  1908  		})
  1909  		return true
  1910  	}
  1911  	p.im = inBodyIM
  1912  	return false
  1913  }
  1914  
  1915  // Section 12.2.6.4.20.
  1916  func inFramesetIM(p *parser) bool {
  1917  	switch p.tok.Type {
  1918  	case CommentToken:
  1919  		p.addChild(&Node{
  1920  			Type: CommentNode,
  1921  			Data: p.tok.Data,
  1922  		})
  1923  	case TextToken:
  1924  		// Ignore all text but whitespace.
  1925  		s := strings.Map(func(c rune) rune {
  1926  			switch c {
  1927  			case ' ', '\t', '\n', '\f', '\r':
  1928  				return c
  1929  			}
  1930  			return -1
  1931  		}, p.tok.Data)
  1932  		if s != "" {
  1933  			p.addText(s)
  1934  		}
  1935  	case StartTagToken:
  1936  		switch p.tok.DataAtom {
  1937  		case a.Html:
  1938  			return inBodyIM(p)
  1939  		case a.Frameset:
  1940  			p.addElement()
  1941  		case a.Frame:
  1942  			p.addElement()
  1943  			p.oe.pop()
  1944  			p.acknowledgeSelfClosingTag()
  1945  		case a.Noframes:
  1946  			return inHeadIM(p)
  1947  		}
  1948  	case EndTagToken:
  1949  		switch p.tok.DataAtom {
  1950  		case a.Frameset:
  1951  			if p.oe.top().DataAtom != a.Html {
  1952  				p.oe.pop()
  1953  				if p.oe.top().DataAtom != a.Frameset {
  1954  					p.im = afterFramesetIM
  1955  					return true
  1956  				}
  1957  			}
  1958  		}
  1959  	default:
  1960  		// Ignore the token.
  1961  	}
  1962  	return true
  1963  }
  1964  
  1965  // Section 12.2.6.4.21.
  1966  func afterFramesetIM(p *parser) bool {
  1967  	switch p.tok.Type {
  1968  	case CommentToken:
  1969  		p.addChild(&Node{
  1970  			Type: CommentNode,
  1971  			Data: p.tok.Data,
  1972  		})
  1973  	case TextToken:
  1974  		// Ignore all text but whitespace.
  1975  		s := strings.Map(func(c rune) rune {
  1976  			switch c {
  1977  			case ' ', '\t', '\n', '\f', '\r':
  1978  				return c
  1979  			}
  1980  			return -1
  1981  		}, p.tok.Data)
  1982  		if s != "" {
  1983  			p.addText(s)
  1984  		}
  1985  	case StartTagToken:
  1986  		switch p.tok.DataAtom {
  1987  		case a.Html:
  1988  			return inBodyIM(p)
  1989  		case a.Noframes:
  1990  			return inHeadIM(p)
  1991  		}
  1992  	case EndTagToken:
  1993  		switch p.tok.DataAtom {
  1994  		case a.Html:
  1995  			p.im = afterAfterFramesetIM
  1996  			return true
  1997  		}
  1998  	default:
  1999  		// Ignore the token.
  2000  	}
  2001  	return true
  2002  }
  2003  
  2004  // Section 12.2.6.4.22.
  2005  func afterAfterBodyIM(p *parser) bool {
  2006  	switch p.tok.Type {
  2007  	case ErrorToken:
  2008  		// Stop parsing.
  2009  		return true
  2010  	case TextToken:
  2011  		s := strings.TrimLeft(p.tok.Data, whitespace)
  2012  		if len(s) == 0 {
  2013  			// It was all whitespace.
  2014  			return inBodyIM(p)
  2015  		}
  2016  	case StartTagToken:
  2017  		if p.tok.DataAtom == a.Html {
  2018  			return inBodyIM(p)
  2019  		}
  2020  	case CommentToken:
  2021  		p.doc.AppendChild(&Node{
  2022  			Type: CommentNode,
  2023  			Data: p.tok.Data,
  2024  		})
  2025  		return true
  2026  	case DoctypeToken:
  2027  		return inBodyIM(p)
  2028  	}
  2029  	p.im = inBodyIM
  2030  	return false
  2031  }
  2032  
  2033  // Section 12.2.6.4.23.
  2034  func afterAfterFramesetIM(p *parser) bool {
  2035  	switch p.tok.Type {
  2036  	case CommentToken:
  2037  		p.doc.AppendChild(&Node{
  2038  			Type: CommentNode,
  2039  			Data: p.tok.Data,
  2040  		})
  2041  	case TextToken:
  2042  		// Ignore all text but whitespace.
  2043  		s := strings.Map(func(c rune) rune {
  2044  			switch c {
  2045  			case ' ', '\t', '\n', '\f', '\r':
  2046  				return c
  2047  			}
  2048  			return -1
  2049  		}, p.tok.Data)
  2050  		if s != "" {
  2051  			p.tok.Data = s
  2052  			return inBodyIM(p)
  2053  		}
  2054  	case StartTagToken:
  2055  		switch p.tok.DataAtom {
  2056  		case a.Html:
  2057  			return inBodyIM(p)
  2058  		case a.Noframes:
  2059  			return inHeadIM(p)
  2060  		}
  2061  	case DoctypeToken:
  2062  		return inBodyIM(p)
  2063  	default:
  2064  		// Ignore the token.
  2065  	}
  2066  	return true
  2067  }
  2068  
  2069  const whitespaceOrNUL = whitespace + "\x00"
  2070  
  2071  // Section 12.2.6.5
  2072  func parseForeignContent(p *parser) bool {
  2073  	switch p.tok.Type {
  2074  	case TextToken:
  2075  		if p.framesetOK {
  2076  			p.framesetOK = strings.TrimLeft(p.tok.Data, whitespaceOrNUL) == ""
  2077  		}
  2078  		p.tok.Data = strings.Replace(p.tok.Data, "\x00", "\ufffd", -1)
  2079  		p.addText(p.tok.Data)
  2080  	case CommentToken:
  2081  		p.addChild(&Node{
  2082  			Type: CommentNode,
  2083  			Data: p.tok.Data,
  2084  		})
  2085  	case StartTagToken:
  2086  		b := breakout[p.tok.Data]
  2087  		if p.tok.DataAtom == a.Font {
  2088  		loop:
  2089  			for _, attr := range p.tok.Attr {
  2090  				switch attr.Key {
  2091  				case "color", "face", "size":
  2092  					b = true
  2093  					break loop
  2094  				}
  2095  			}
  2096  		}
  2097  		if b {
  2098  			for i := len(p.oe) - 1; i >= 0; i-- {
  2099  				n := p.oe[i]
  2100  				if n.Namespace == "" || htmlIntegrationPoint(n) || mathMLTextIntegrationPoint(n) {
  2101  					p.oe = p.oe[:i+1]
  2102  					break
  2103  				}
  2104  			}
  2105  			return false
  2106  		}
  2107  		switch p.top().Namespace {
  2108  		case "math":
  2109  			adjustAttributeNames(p.tok.Attr, mathMLAttributeAdjustments)
  2110  		case "svg":
  2111  			// Adjust SVG tag names. The tokenizer lower-cases tag names, but
  2112  			// SVG wants e.g. "foreignObject" with a capital second "O".
  2113  			if x := svgTagNameAdjustments[p.tok.Data]; x != "" {
  2114  				p.tok.DataAtom = a.Lookup([]byte(x))
  2115  				p.tok.Data = x
  2116  			}
  2117  			adjustAttributeNames(p.tok.Attr, svgAttributeAdjustments)
  2118  		default:
  2119  			panic("html: bad parser state: unexpected namespace")
  2120  		}
  2121  		adjustForeignAttributes(p.tok.Attr)
  2122  		namespace := p.top().Namespace
  2123  		p.addElement()
  2124  		p.top().Namespace = namespace
  2125  		if namespace != "" {
  2126  			// Don't let the tokenizer go into raw text mode in foreign content
  2127  			// (e.g. in an SVG <title> tag).
  2128  			p.tokenizer.NextIsNotRawText()
  2129  		}
  2130  		if p.hasSelfClosingToken {
  2131  			p.oe.pop()
  2132  			p.acknowledgeSelfClosingTag()
  2133  		}
  2134  	case EndTagToken:
  2135  		for i := len(p.oe) - 1; i >= 0; i-- {
  2136  			if p.oe[i].Namespace == "" {
  2137  				return p.im(p)
  2138  			}
  2139  			if strings.EqualFold(p.oe[i].Data, p.tok.Data) {
  2140  				p.oe = p.oe[:i]
  2141  				break
  2142  			}
  2143  		}
  2144  		return true
  2145  	default:
  2146  		// Ignore the token.
  2147  	}
  2148  	return true
  2149  }
  2150  
  2151  // Section 12.2.6.
  2152  func (p *parser) inForeignContent() bool {
  2153  	if len(p.oe) == 0 {
  2154  		return false
  2155  	}
  2156  	n := p.oe[len(p.oe)-1]
  2157  	if n.Namespace == "" {
  2158  		return false
  2159  	}
  2160  	if mathMLTextIntegrationPoint(n) {
  2161  		if p.tok.Type == StartTagToken && p.tok.DataAtom != a.Mglyph && p.tok.DataAtom != a.Malignmark {
  2162  			return false
  2163  		}
  2164  		if p.tok.Type == TextToken {
  2165  			return false
  2166  		}
  2167  	}
  2168  	if n.Namespace == "math" && n.DataAtom == a.AnnotationXml && p.tok.Type == StartTagToken && p.tok.DataAtom == a.Svg {
  2169  		return false
  2170  	}
  2171  	if htmlIntegrationPoint(n) && (p.tok.Type == StartTagToken || p.tok.Type == TextToken) {
  2172  		return false
  2173  	}
  2174  	if p.tok.Type == ErrorToken {
  2175  		return false
  2176  	}
  2177  	return true
  2178  }
  2179  
  2180  // parseImpliedToken parses a token as though it had appeared in the parser's
  2181  // input.
  2182  func (p *parser) parseImpliedToken(t TokenType, dataAtom a.Atom, data string) {
  2183  	realToken, selfClosing := p.tok, p.hasSelfClosingToken
  2184  	p.tok = Token{
  2185  		Type:     t,
  2186  		DataAtom: dataAtom,
  2187  		Data:     data,
  2188  	}
  2189  	p.hasSelfClosingToken = false
  2190  	p.parseCurrentToken()
  2191  	p.tok, p.hasSelfClosingToken = realToken, selfClosing
  2192  }
  2193  
  2194  // parseCurrentToken runs the current token through the parsing routines
  2195  // until it is consumed.
  2196  func (p *parser) parseCurrentToken() {
  2197  	if p.tok.Type == SelfClosingTagToken {
  2198  		p.hasSelfClosingToken = true
  2199  		p.tok.Type = StartTagToken
  2200  	}
  2201  
  2202  	consumed := false
  2203  	for !consumed {
  2204  		if p.inForeignContent() {
  2205  			consumed = parseForeignContent(p)
  2206  		} else {
  2207  			consumed = p.im(p)
  2208  		}
  2209  	}
  2210  
  2211  	if p.hasSelfClosingToken {
  2212  		// This is a parse error, but ignore it.
  2213  		p.hasSelfClosingToken = false
  2214  	}
  2215  }
  2216  
  2217  func (p *parser) parse() error {
  2218  	// Iterate until EOF. Any other error will cause an early return.
  2219  	var err error
  2220  	for err != io.EOF {
  2221  		// CDATA sections are allowed only in foreign content.
  2222  		n := p.oe.top()
  2223  		p.tokenizer.AllowCDATA(n != nil && n.Namespace != "")
  2224  		// Read and parse the next token.
  2225  		p.tokenizer.Next()
  2226  		p.tok = p.tokenizer.Token()
  2227  		if p.tok.Type == ErrorToken {
  2228  			err = p.tokenizer.Err()
  2229  			if err != nil && err != io.EOF {
  2230  				return err
  2231  			}
  2232  		}
  2233  		p.parseCurrentToken()
  2234  	}
  2235  	return nil
  2236  }
  2237  
  2238  // Parse returns the parse tree for the HTML from the given Reader.
  2239  //
  2240  // It implements the HTML5 parsing algorithm
  2241  // (https://html.spec.whatwg.org/multipage/syntax.html#tree-construction),
  2242  // which is very complicated. The resultant tree can contain implicitly created
  2243  // nodes that have no explicit <tag> listed in r's data, and nodes' parents can
  2244  // differ from the nesting implied by a naive processing of start and end
  2245  // <tag>s. Conversely, explicit <tag>s in r's data can be silently dropped,
  2246  // with no corresponding node in the resulting tree.
  2247  //
  2248  // The input is assumed to be UTF-8 encoded.
  2249  func Parse(r io.Reader) (*Node, error) {
  2250  	p := &parser{
  2251  		tokenizer: NewTokenizer(r),
  2252  		doc: &Node{
  2253  			Type: DocumentNode,
  2254  		},
  2255  		scripting:  true,
  2256  		framesetOK: true,
  2257  		im:         initialIM,
  2258  	}
  2259  	err := p.parse()
  2260  	if err != nil {
  2261  		return nil, err
  2262  	}
  2263  	return p.doc, nil
  2264  }
  2265  
  2266  // ParseFragment parses a fragment of HTML and returns the nodes that were
  2267  // found. If the fragment is the InnerHTML for an existing element, pass that
  2268  // element in context.
  2269  //
  2270  // It has the same intricacies as Parse.
  2271  func ParseFragment(r io.Reader, context *Node) ([]*Node, error) {
  2272  	contextTag := ""
  2273  	if context != nil {
  2274  		if context.Type != ElementNode {
  2275  			return nil, errors.New("html: ParseFragment of non-element Node")
  2276  		}
  2277  		// The next check isn't just context.DataAtom.String() == context.Data because
  2278  		// it is valid to pass an element whose tag isn't a known atom. For example,
  2279  		// DataAtom == 0 and Data = "tagfromthefuture" is perfectly consistent.
  2280  		if context.DataAtom != a.Lookup([]byte(context.Data)) {
  2281  			return nil, fmt.Errorf("html: inconsistent Node: DataAtom=%q, Data=%q", context.DataAtom, context.Data)
  2282  		}
  2283  		contextTag = context.DataAtom.String()
  2284  	}
  2285  	p := &parser{
  2286  		tokenizer: NewTokenizerFragment(r, contextTag),
  2287  		doc: &Node{
  2288  			Type: DocumentNode,
  2289  		},
  2290  		scripting: true,
  2291  		fragment:  true,
  2292  		context:   context,
  2293  	}
  2294  
  2295  	root := &Node{
  2296  		Type:     ElementNode,
  2297  		DataAtom: a.Html,
  2298  		Data:     a.Html.String(),
  2299  	}
  2300  	p.doc.AppendChild(root)
  2301  	p.oe = nodeStack{root}
  2302  	if context != nil && context.DataAtom == a.Template {
  2303  		p.templateStack = append(p.templateStack, inTemplateIM)
  2304  	}
  2305  	p.resetInsertionMode()
  2306  
  2307  	for n := context; n != nil; n = n.Parent {
  2308  		if n.Type == ElementNode && n.DataAtom == a.Form {
  2309  			p.form = n
  2310  			break
  2311  		}
  2312  	}
  2313  
  2314  	err := p.parse()
  2315  	if err != nil {
  2316  		return nil, err
  2317  	}
  2318  
  2319  	parent := p.doc
  2320  	if context != nil {
  2321  		parent = root
  2322  	}
  2323  
  2324  	var result []*Node
  2325  	for c := parent.FirstChild; c != nil; {
  2326  		next := c.NextSibling
  2327  		parent.RemoveChild(c)
  2328  		result = append(result, c)
  2329  		c = next
  2330  	}
  2331  	return result, nil
  2332  }