github.com/stackdocker/rkt@v0.10.1-0.20151109095037-1aa827478248/Godeps/_workspace/src/golang.org/x/net/html/parse.go

github.com/stackdocker/rkt@v0.10.1-0.20151109095037-1aa827478248/Godeps/_workspace/src/golang.org/x/net/html/parse.go (about)

     1  // Copyright 2010 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package html
     6  
     7  import (
     8  	"errors"
     9  	"fmt"
    10  	"io"
    11  	"strings"
    12  
    13  	a "github.com/coreos/rkt/Godeps/_workspace/src/golang.org/x/net/html/atom"
    14  )
    15  
    16  // A parser implements the HTML5 parsing algorithm:
    17  // https://html.spec.whatwg.org/multipage/syntax.html#tree-construction
    18  type parser struct {
    19  	// tokenizer provides the tokens for the parser.
    20  	tokenizer *Tokenizer
    21  	// tok is the most recently read token.
    22  	tok Token
    23  	// Self-closing tags like <hr/> are treated as start tags, except that
    24  	// hasSelfClosingToken is set while they are being processed.
    25  	hasSelfClosingToken bool
    26  	// doc is the document root element.
    27  	doc *Node
    28  	// The stack of open elements (section 12.2.3.2) and active formatting
    29  	// elements (section 12.2.3.3).
    30  	oe, afe nodeStack
    31  	// Element pointers (section 12.2.3.4).
    32  	head, form *Node
    33  	// Other parsing state flags (section 12.2.3.5).
    34  	scripting, framesetOK bool
    35  	// im is the current insertion mode.
    36  	im insertionMode
    37  	// originalIM is the insertion mode to go back to after completing a text
    38  	// or inTableText insertion mode.
    39  	originalIM insertionMode
    40  	// fosterParenting is whether new elements should be inserted according to
    41  	// the foster parenting rules (section 12.2.5.3).
    42  	fosterParenting bool
    43  	// quirks is whether the parser is operating in "quirks mode."
    44  	quirks bool
    45  	// fragment is whether the parser is parsing an HTML fragment.
    46  	fragment bool
    47  	// context is the context element when parsing an HTML fragment
    48  	// (section 12.4).
    49  	context *Node
    50  }
    51  
    52  func (p *parser) top() *Node {
    53  	if n := p.oe.top(); n != nil {
    54  		return n
    55  	}
    56  	return p.doc
    57  }
    58  
    59  // Stop tags for use in popUntil. These come from section 12.2.3.2.
    60  var (
    61  	defaultScopeStopTags = map[string][]a.Atom{
    62  		"":     {a.Applet, a.Caption, a.Html, a.Table, a.Td, a.Th, a.Marquee, a.Object, a.Template},
    63  		"math": {a.AnnotationXml, a.Mi, a.Mn, a.Mo, a.Ms, a.Mtext},
    64  		"svg":  {a.Desc, a.ForeignObject, a.Title},
    65  	}
    66  )
    67  
    68  type scope int
    69  
    70  const (
    71  	defaultScope scope = iota
    72  	listItemScope
    73  	buttonScope
    74  	tableScope
    75  	tableRowScope
    76  	tableBodyScope
    77  	selectScope
    78  )
    79  
    80  // popUntil pops the stack of open elements at the highest element whose tag
    81  // is in matchTags, provided there is no higher element in the scope's stop
    82  // tags (as defined in section 12.2.3.2). It returns whether or not there was
    83  // such an element. If there was not, popUntil leaves the stack unchanged.
    84  //
    85  // For example, the set of stop tags for table scope is: "html", "table". If
    86  // the stack was:
    87  // ["html", "body", "font", "table", "b", "i", "u"]
    88  // then popUntil(tableScope, "font") would return false, but
    89  // popUntil(tableScope, "i") would return true and the stack would become:
    90  // ["html", "body", "font", "table", "b"]
    91  //
    92  // If an element's tag is in both the stop tags and matchTags, then the stack
    93  // will be popped and the function returns true (provided, of course, there was
    94  // no higher element in the stack that was also in the stop tags). For example,
    95  // popUntil(tableScope, "table") returns true and leaves:
    96  // ["html", "body", "font"]
    97  func (p *parser) popUntil(s scope, matchTags ...a.Atom) bool {
    98  	if i := p.indexOfElementInScope(s, matchTags...); i != -1 {
    99  		p.oe = p.oe[:i]
   100  		return true
   101  	}
   102  	return false
   103  }
   104  
   105  // indexOfElementInScope returns the index in p.oe of the highest element whose
   106  // tag is in matchTags that is in scope. If no matching element is in scope, it
   107  // returns -1.
   108  func (p *parser) indexOfElementInScope(s scope, matchTags ...a.Atom) int {
   109  	for i := len(p.oe) - 1; i >= 0; i-- {
   110  		tagAtom := p.oe[i].DataAtom
   111  		if p.oe[i].Namespace == "" {
   112  			for _, t := range matchTags {
   113  				if t == tagAtom {
   114  					return i
   115  				}
   116  			}
   117  			switch s {
   118  			case defaultScope:
   119  				// No-op.
   120  			case listItemScope:
   121  				if tagAtom == a.Ol || tagAtom == a.Ul {
   122  					return -1
   123  				}
   124  			case buttonScope:
   125  				if tagAtom == a.Button {
   126  					return -1
   127  				}
   128  			case tableScope:
   129  				if tagAtom == a.Html || tagAtom == a.Table {
   130  					return -1
   131  				}
   132  			case selectScope:
   133  				if tagAtom != a.Optgroup && tagAtom != a.Option {
   134  					return -1
   135  				}
   136  			default:
   137  				panic("unreachable")
   138  			}
   139  		}
   140  		switch s {
   141  		case defaultScope, listItemScope, buttonScope:
   142  			for _, t := range defaultScopeStopTags[p.oe[i].Namespace] {
   143  				if t == tagAtom {
   144  					return -1
   145  				}
   146  			}
   147  		}
   148  	}
   149  	return -1
   150  }
   151  
   152  // elementInScope is like popUntil, except that it doesn't modify the stack of
   153  // open elements.
   154  func (p *parser) elementInScope(s scope, matchTags ...a.Atom) bool {
   155  	return p.indexOfElementInScope(s, matchTags...) != -1
   156  }
   157  
   158  // clearStackToContext pops elements off the stack of open elements until a
   159  // scope-defined element is found.
   160  func (p *parser) clearStackToContext(s scope) {
   161  	for i := len(p.oe) - 1; i >= 0; i-- {
   162  		tagAtom := p.oe[i].DataAtom
   163  		switch s {
   164  		case tableScope:
   165  			if tagAtom == a.Html || tagAtom == a.Table {
   166  				p.oe = p.oe[:i+1]
   167  				return
   168  			}
   169  		case tableRowScope:
   170  			if tagAtom == a.Html || tagAtom == a.Tr {
   171  				p.oe = p.oe[:i+1]
   172  				return
   173  			}
   174  		case tableBodyScope:
   175  			if tagAtom == a.Html || tagAtom == a.Tbody || tagAtom == a.Tfoot || tagAtom == a.Thead {
   176  				p.oe = p.oe[:i+1]
   177  				return
   178  			}
   179  		default:
   180  			panic("unreachable")
   181  		}
   182  	}
   183  }
   184  
   185  // generateImpliedEndTags pops nodes off the stack of open elements as long as
   186  // the top node has a tag name of dd, dt, li, option, optgroup, p, rp, or rt.
   187  // If exceptions are specified, nodes with that name will not be popped off.
   188  func (p *parser) generateImpliedEndTags(exceptions ...string) {
   189  	var i int
   190  loop:
   191  	for i = len(p.oe) - 1; i >= 0; i-- {
   192  		n := p.oe[i]
   193  		if n.Type == ElementNode {
   194  			switch n.DataAtom {
   195  			case a.Dd, a.Dt, a.Li, a.Option, a.Optgroup, a.P, a.Rp, a.Rt:
   196  				for _, except := range exceptions {
   197  					if n.Data == except {
   198  						break loop
   199  					}
   200  				}
   201  				continue
   202  			}
   203  		}
   204  		break
   205  	}
   206  
   207  	p.oe = p.oe[:i+1]
   208  }
   209  
   210  // addChild adds a child node n to the top element, and pushes n onto the stack
   211  // of open elements if it is an element node.
   212  func (p *parser) addChild(n *Node) {
   213  	if p.shouldFosterParent() {
   214  		p.fosterParent(n)
   215  	} else {
   216  		p.top().AppendChild(n)
   217  	}
   218  
   219  	if n.Type == ElementNode {
   220  		p.oe = append(p.oe, n)
   221  	}
   222  }
   223  
   224  // shouldFosterParent returns whether the next node to be added should be
   225  // foster parented.
   226  func (p *parser) shouldFosterParent() bool {
   227  	if p.fosterParenting {
   228  		switch p.top().DataAtom {
   229  		case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
   230  			return true
   231  		}
   232  	}
   233  	return false
   234  }
   235  
   236  // fosterParent adds a child node according to the foster parenting rules.
   237  // Section 12.2.5.3, "foster parenting".
   238  func (p *parser) fosterParent(n *Node) {
   239  	var table, parent, prev *Node
   240  	var i int
   241  	for i = len(p.oe) - 1; i >= 0; i-- {
   242  		if p.oe[i].DataAtom == a.Table {
   243  			table = p.oe[i]
   244  			break
   245  		}
   246  	}
   247  
   248  	if table == nil {
   249  		// The foster parent is the html element.
   250  		parent = p.oe[0]
   251  	} else {
   252  		parent = table.Parent
   253  	}
   254  	if parent == nil {
   255  		parent = p.oe[i-1]
   256  	}
   257  
   258  	if table != nil {
   259  		prev = table.PrevSibling
   260  	} else {
   261  		prev = parent.LastChild
   262  	}
   263  	if prev != nil && prev.Type == TextNode && n.Type == TextNode {
   264  		prev.Data += n.Data
   265  		return
   266  	}
   267  
   268  	parent.InsertBefore(n, table)
   269  }
   270  
   271  // addText adds text to the preceding node if it is a text node, or else it
   272  // calls addChild with a new text node.
   273  func (p *parser) addText(text string) {
   274  	if text == "" {
   275  		return
   276  	}
   277  
   278  	if p.shouldFosterParent() {
   279  		p.fosterParent(&Node{
   280  			Type: TextNode,
   281  			Data: text,
   282  		})
   283  		return
   284  	}
   285  
   286  	t := p.top()
   287  	if n := t.LastChild; n != nil && n.Type == TextNode {
   288  		n.Data += text
   289  		return
   290  	}
   291  	p.addChild(&Node{
   292  		Type: TextNode,
   293  		Data: text,
   294  	})
   295  }
   296  
   297  // addElement adds a child element based on the current token.
   298  func (p *parser) addElement() {
   299  	p.addChild(&Node{
   300  		Type:     ElementNode,
   301  		DataAtom: p.tok.DataAtom,
   302  		Data:     p.tok.Data,
   303  		Attr:     p.tok.Attr,
   304  	})
   305  }
   306  
   307  // Section 12.2.3.3.
   308  func (p *parser) addFormattingElement() {
   309  	tagAtom, attr := p.tok.DataAtom, p.tok.Attr
   310  	p.addElement()
   311  
   312  	// Implement the Noah's Ark clause, but with three per family instead of two.
   313  	identicalElements := 0
   314  findIdenticalElements:
   315  	for i := len(p.afe) - 1; i >= 0; i-- {
   316  		n := p.afe[i]
   317  		if n.Type == scopeMarkerNode {
   318  			break
   319  		}
   320  		if n.Type != ElementNode {
   321  			continue
   322  		}
   323  		if n.Namespace != "" {
   324  			continue
   325  		}
   326  		if n.DataAtom != tagAtom {
   327  			continue
   328  		}
   329  		if len(n.Attr) != len(attr) {
   330  			continue
   331  		}
   332  	compareAttributes:
   333  		for _, t0 := range n.Attr {
   334  			for _, t1 := range attr {
   335  				if t0.Key == t1.Key && t0.Namespace == t1.Namespace && t0.Val == t1.Val {
   336  					// Found a match for this attribute, continue with the next attribute.
   337  					continue compareAttributes
   338  				}
   339  			}
   340  			// If we get here, there is no attribute that matches a.
   341  			// Therefore the element is not identical to the new one.
   342  			continue findIdenticalElements
   343  		}
   344  
   345  		identicalElements++
   346  		if identicalElements >= 3 {
   347  			p.afe.remove(n)
   348  		}
   349  	}
   350  
   351  	p.afe = append(p.afe, p.top())
   352  }
   353  
   354  // Section 12.2.3.3.
   355  func (p *parser) clearActiveFormattingElements() {
   356  	for {
   357  		n := p.afe.pop()
   358  		if len(p.afe) == 0 || n.Type == scopeMarkerNode {
   359  			return
   360  		}
   361  	}
   362  }
   363  
   364  // Section 12.2.3.3.
   365  func (p *parser) reconstructActiveFormattingElements() {
   366  	n := p.afe.top()
   367  	if n == nil {
   368  		return
   369  	}
   370  	if n.Type == scopeMarkerNode || p.oe.index(n) != -1 {
   371  		return
   372  	}
   373  	i := len(p.afe) - 1
   374  	for n.Type != scopeMarkerNode && p.oe.index(n) == -1 {
   375  		if i == 0 {
   376  			i = -1
   377  			break
   378  		}
   379  		i--
   380  		n = p.afe[i]
   381  	}
   382  	for {
   383  		i++
   384  		clone := p.afe[i].clone()
   385  		p.addChild(clone)
   386  		p.afe[i] = clone
   387  		if i == len(p.afe)-1 {
   388  			break
   389  		}
   390  	}
   391  }
   392  
   393  // Section 12.2.4.
   394  func (p *parser) acknowledgeSelfClosingTag() {
   395  	p.hasSelfClosingToken = false
   396  }
   397  
   398  // An insertion mode (section 12.2.3.1) is the state transition function from
   399  // a particular state in the HTML5 parser's state machine. It updates the
   400  // parser's fields depending on parser.tok (where ErrorToken means EOF).
   401  // It returns whether the token was consumed.
   402  type insertionMode func(*parser) bool
   403  
   404  // setOriginalIM sets the insertion mode to return to after completing a text or
   405  // inTableText insertion mode.
   406  // Section 12.2.3.1, "using the rules for".
   407  func (p *parser) setOriginalIM() {
   408  	if p.originalIM != nil {
   409  		panic("html: bad parser state: originalIM was set twice")
   410  	}
   411  	p.originalIM = p.im
   412  }
   413  
   414  // Section 12.2.3.1, "reset the insertion mode".
   415  func (p *parser) resetInsertionMode() {
   416  	for i := len(p.oe) - 1; i >= 0; i-- {
   417  		n := p.oe[i]
   418  		if i == 0 && p.context != nil {
   419  			n = p.context
   420  		}
   421  
   422  		switch n.DataAtom {
   423  		case a.Select:
   424  			p.im = inSelectIM
   425  		case a.Td, a.Th:
   426  			p.im = inCellIM
   427  		case a.Tr:
   428  			p.im = inRowIM
   429  		case a.Tbody, a.Thead, a.Tfoot:
   430  			p.im = inTableBodyIM
   431  		case a.Caption:
   432  			p.im = inCaptionIM
   433  		case a.Colgroup:
   434  			p.im = inColumnGroupIM
   435  		case a.Table:
   436  			p.im = inTableIM
   437  		case a.Head:
   438  			p.im = inBodyIM
   439  		case a.Body:
   440  			p.im = inBodyIM
   441  		case a.Frameset:
   442  			p.im = inFramesetIM
   443  		case a.Html:
   444  			p.im = beforeHeadIM
   445  		default:
   446  			continue
   447  		}
   448  		return
   449  	}
   450  	p.im = inBodyIM
   451  }
   452  
   453  const whitespace = " \t\r\n\f"
   454  
   455  // Section 12.2.5.4.1.
   456  func initialIM(p *parser) bool {
   457  	switch p.tok.Type {
   458  	case TextToken:
   459  		p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
   460  		if len(p.tok.Data) == 0 {
   461  			// It was all whitespace, so ignore it.
   462  			return true
   463  		}
   464  	case CommentToken:
   465  		p.doc.AppendChild(&Node{
   466  			Type: CommentNode,
   467  			Data: p.tok.Data,
   468  		})
   469  		return true
   470  	case DoctypeToken:
   471  		n, quirks := parseDoctype(p.tok.Data)
   472  		p.doc.AppendChild(n)
   473  		p.quirks = quirks
   474  		p.im = beforeHTMLIM
   475  		return true
   476  	}
   477  	p.quirks = true
   478  	p.im = beforeHTMLIM
   479  	return false
   480  }
   481  
   482  // Section 12.2.5.4.2.
   483  func beforeHTMLIM(p *parser) bool {
   484  	switch p.tok.Type {
   485  	case DoctypeToken:
   486  		// Ignore the token.
   487  		return true
   488  	case TextToken:
   489  		p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
   490  		if len(p.tok.Data) == 0 {
   491  			// It was all whitespace, so ignore it.
   492  			return true
   493  		}
   494  	case StartTagToken:
   495  		if p.tok.DataAtom == a.Html {
   496  			p.addElement()
   497  			p.im = beforeHeadIM
   498  			return true
   499  		}
   500  	case EndTagToken:
   501  		switch p.tok.DataAtom {
   502  		case a.Head, a.Body, a.Html, a.Br:
   503  			p.parseImpliedToken(StartTagToken, a.Html, a.Html.String())
   504  			return false
   505  		default:
   506  			// Ignore the token.
   507  			return true
   508  		}
   509  	case CommentToken:
   510  		p.doc.AppendChild(&Node{
   511  			Type: CommentNode,
   512  			Data: p.tok.Data,
   513  		})
   514  		return true
   515  	}
   516  	p.parseImpliedToken(StartTagToken, a.Html, a.Html.String())
   517  	return false
   518  }
   519  
   520  // Section 12.2.5.4.3.
   521  func beforeHeadIM(p *parser) bool {
   522  	switch p.tok.Type {
   523  	case TextToken:
   524  		p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
   525  		if len(p.tok.Data) == 0 {
   526  			// It was all whitespace, so ignore it.
   527  			return true
   528  		}
   529  	case StartTagToken:
   530  		switch p.tok.DataAtom {
   531  		case a.Head:
   532  			p.addElement()
   533  			p.head = p.top()
   534  			p.im = inHeadIM
   535  			return true
   536  		case a.Html:
   537  			return inBodyIM(p)
   538  		}
   539  	case EndTagToken:
   540  		switch p.tok.DataAtom {
   541  		case a.Head, a.Body, a.Html, a.Br:
   542  			p.parseImpliedToken(StartTagToken, a.Head, a.Head.String())
   543  			return false
   544  		default:
   545  			// Ignore the token.
   546  			return true
   547  		}
   548  	case CommentToken:
   549  		p.addChild(&Node{
   550  			Type: CommentNode,
   551  			Data: p.tok.Data,
   552  		})
   553  		return true
   554  	case DoctypeToken:
   555  		// Ignore the token.
   556  		return true
   557  	}
   558  
   559  	p.parseImpliedToken(StartTagToken, a.Head, a.Head.String())
   560  	return false
   561  }
   562  
   563  // Section 12.2.5.4.4.
   564  func inHeadIM(p *parser) bool {
   565  	switch p.tok.Type {
   566  	case TextToken:
   567  		s := strings.TrimLeft(p.tok.Data, whitespace)
   568  		if len(s) < len(p.tok.Data) {
   569  			// Add the initial whitespace to the current node.
   570  			p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
   571  			if s == "" {
   572  				return true
   573  			}
   574  			p.tok.Data = s
   575  		}
   576  	case StartTagToken:
   577  		switch p.tok.DataAtom {
   578  		case a.Html:
   579  			return inBodyIM(p)
   580  		case a.Base, a.Basefont, a.Bgsound, a.Command, a.Link, a.Meta:
   581  			p.addElement()
   582  			p.oe.pop()
   583  			p.acknowledgeSelfClosingTag()
   584  			return true
   585  		case a.Script, a.Title, a.Noscript, a.Noframes, a.Style:
   586  			p.addElement()
   587  			p.setOriginalIM()
   588  			p.im = textIM
   589  			return true
   590  		case a.Head:
   591  			// Ignore the token.
   592  			return true
   593  		}
   594  	case EndTagToken:
   595  		switch p.tok.DataAtom {
   596  		case a.Head:
   597  			n := p.oe.pop()
   598  			if n.DataAtom != a.Head {
   599  				panic("html: bad parser state: <head> element not found, in the in-head insertion mode")
   600  			}
   601  			p.im = afterHeadIM
   602  			return true
   603  		case a.Body, a.Html, a.Br:
   604  			p.parseImpliedToken(EndTagToken, a.Head, a.Head.String())
   605  			return false
   606  		default:
   607  			// Ignore the token.
   608  			return true
   609  		}
   610  	case CommentToken:
   611  		p.addChild(&Node{
   612  			Type: CommentNode,
   613  			Data: p.tok.Data,
   614  		})
   615  		return true
   616  	case DoctypeToken:
   617  		// Ignore the token.
   618  		return true
   619  	}
   620  
   621  	p.parseImpliedToken(EndTagToken, a.Head, a.Head.String())
   622  	return false
   623  }
   624  
   625  // Section 12.2.5.4.6.
   626  func afterHeadIM(p *parser) bool {
   627  	switch p.tok.Type {
   628  	case TextToken:
   629  		s := strings.TrimLeft(p.tok.Data, whitespace)
   630  		if len(s) < len(p.tok.Data) {
   631  			// Add the initial whitespace to the current node.
   632  			p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
   633  			if s == "" {
   634  				return true
   635  			}
   636  			p.tok.Data = s
   637  		}
   638  	case StartTagToken:
   639  		switch p.tok.DataAtom {
   640  		case a.Html:
   641  			return inBodyIM(p)
   642  		case a.Body:
   643  			p.addElement()
   644  			p.framesetOK = false
   645  			p.im = inBodyIM
   646  			return true
   647  		case a.Frameset:
   648  			p.addElement()
   649  			p.im = inFramesetIM
   650  			return true
   651  		case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Title:
   652  			p.oe = append(p.oe, p.head)
   653  			defer p.oe.remove(p.head)
   654  			return inHeadIM(p)
   655  		case a.Head:
   656  			// Ignore the token.
   657  			return true
   658  		}
   659  	case EndTagToken:
   660  		switch p.tok.DataAtom {
   661  		case a.Body, a.Html, a.Br:
   662  			// Drop down to creating an implied <body> tag.
   663  		default:
   664  			// Ignore the token.
   665  			return true
   666  		}
   667  	case CommentToken:
   668  		p.addChild(&Node{
   669  			Type: CommentNode,
   670  			Data: p.tok.Data,
   671  		})
   672  		return true
   673  	case DoctypeToken:
   674  		// Ignore the token.
   675  		return true
   676  	}
   677  
   678  	p.parseImpliedToken(StartTagToken, a.Body, a.Body.String())
   679  	p.framesetOK = true
   680  	return false
   681  }
   682  
   683  // copyAttributes copies attributes of src not found on dst to dst.
   684  func copyAttributes(dst *Node, src Token) {
   685  	if len(src.Attr) == 0 {
   686  		return
   687  	}
   688  	attr := map[string]string{}
   689  	for _, t := range dst.Attr {
   690  		attr[t.Key] = t.Val
   691  	}
   692  	for _, t := range src.Attr {
   693  		if _, ok := attr[t.Key]; !ok {
   694  			dst.Attr = append(dst.Attr, t)
   695  			attr[t.Key] = t.Val
   696  		}
   697  	}
   698  }
   699  
   700  // Section 12.2.5.4.7.
   701  func inBodyIM(p *parser) bool {
   702  	switch p.tok.Type {
   703  	case TextToken:
   704  		d := p.tok.Data
   705  		switch n := p.oe.top(); n.DataAtom {
   706  		case a.Pre, a.Listing:
   707  			if n.FirstChild == nil {
   708  				// Ignore a newline at the start of a <pre> block.
   709  				if d != "" && d[0] == '\r' {
   710  					d = d[1:]
   711  				}
   712  				if d != "" && d[0] == '\n' {
   713  					d = d[1:]
   714  				}
   715  			}
   716  		}
   717  		d = strings.Replace(d, "\x00", "", -1)
   718  		if d == "" {
   719  			return true
   720  		}
   721  		p.reconstructActiveFormattingElements()
   722  		p.addText(d)
   723  		if p.framesetOK && strings.TrimLeft(d, whitespace) != "" {
   724  			// There were non-whitespace characters inserted.
   725  			p.framesetOK = false
   726  		}
   727  	case StartTagToken:
   728  		switch p.tok.DataAtom {
   729  		case a.Html:
   730  			copyAttributes(p.oe[0], p.tok)
   731  		case a.Base, a.Basefont, a.Bgsound, a.Command, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Title:
   732  			return inHeadIM(p)
   733  		case a.Body:
   734  			if len(p.oe) >= 2 {
   735  				body := p.oe[1]
   736  				if body.Type == ElementNode && body.DataAtom == a.Body {
   737  					p.framesetOK = false
   738  					copyAttributes(body, p.tok)
   739  				}
   740  			}
   741  		case a.Frameset:
   742  			if !p.framesetOK || len(p.oe) < 2 || p.oe[1].DataAtom != a.Body {
   743  				// Ignore the token.
   744  				return true
   745  			}
   746  			body := p.oe[1]
   747  			if body.Parent != nil {
   748  				body.Parent.RemoveChild(body)
   749  			}
   750  			p.oe = p.oe[:1]
   751  			p.addElement()
   752  			p.im = inFramesetIM
   753  			return true
   754  		case a.Address, a.Article, a.Aside, a.Blockquote, a.Center, a.Details, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Menu, a.Nav, a.Ol, a.P, a.Section, a.Summary, a.Ul:
   755  			p.popUntil(buttonScope, a.P)
   756  			p.addElement()
   757  		case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
   758  			p.popUntil(buttonScope, a.P)
   759  			switch n := p.top(); n.DataAtom {
   760  			case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
   761  				p.oe.pop()
   762  			}
   763  			p.addElement()
   764  		case a.Pre, a.Listing:
   765  			p.popUntil(buttonScope, a.P)
   766  			p.addElement()
   767  			// The newline, if any, will be dealt with by the TextToken case.
   768  			p.framesetOK = false
   769  		case a.Form:
   770  			if p.form == nil {
   771  				p.popUntil(buttonScope, a.P)
   772  				p.addElement()
   773  				p.form = p.top()
   774  			}
   775  		case a.Li:
   776  			p.framesetOK = false
   777  			for i := len(p.oe) - 1; i >= 0; i-- {
   778  				node := p.oe[i]
   779  				switch node.DataAtom {
   780  				case a.Li:
   781  					p.oe = p.oe[:i]
   782  				case a.Address, a.Div, a.P:
   783  					continue
   784  				default:
   785  					if !isSpecialElement(node) {
   786  						continue
   787  					}
   788  				}
   789  				break
   790  			}
   791  			p.popUntil(buttonScope, a.P)
   792  			p.addElement()
   793  		case a.Dd, a.Dt:
   794  			p.framesetOK = false
   795  			for i := len(p.oe) - 1; i >= 0; i-- {
   796  				node := p.oe[i]
   797  				switch node.DataAtom {
   798  				case a.Dd, a.Dt:
   799  					p.oe = p.oe[:i]
   800  				case a.Address, a.Div, a.P:
   801  					continue
   802  				default:
   803  					if !isSpecialElement(node) {
   804  						continue
   805  					}
   806  				}
   807  				break
   808  			}
   809  			p.popUntil(buttonScope, a.P)
   810  			p.addElement()
   811  		case a.Plaintext:
   812  			p.popUntil(buttonScope, a.P)
   813  			p.addElement()
   814  		case a.Button:
   815  			p.popUntil(defaultScope, a.Button)
   816  			p.reconstructActiveFormattingElements()
   817  			p.addElement()
   818  			p.framesetOK = false
   819  		case a.A:
   820  			for i := len(p.afe) - 1; i >= 0 && p.afe[i].Type != scopeMarkerNode; i-- {
   821  				if n := p.afe[i]; n.Type == ElementNode && n.DataAtom == a.A {
   822  					p.inBodyEndTagFormatting(a.A)
   823  					p.oe.remove(n)
   824  					p.afe.remove(n)
   825  					break
   826  				}
   827  			}
   828  			p.reconstructActiveFormattingElements()
   829  			p.addFormattingElement()
   830  		case a.B, a.Big, a.Code, a.Em, a.Font, a.I, a.S, a.Small, a.Strike, a.Strong, a.Tt, a.U:
   831  			p.reconstructActiveFormattingElements()
   832  			p.addFormattingElement()
   833  		case a.Nobr:
   834  			p.reconstructActiveFormattingElements()
   835  			if p.elementInScope(defaultScope, a.Nobr) {
   836  				p.inBodyEndTagFormatting(a.Nobr)
   837  				p.reconstructActiveFormattingElements()
   838  			}
   839  			p.addFormattingElement()
   840  		case a.Applet, a.Marquee, a.Object:
   841  			p.reconstructActiveFormattingElements()
   842  			p.addElement()
   843  			p.afe = append(p.afe, &scopeMarker)
   844  			p.framesetOK = false
   845  		case a.Table:
   846  			if !p.quirks {
   847  				p.popUntil(buttonScope, a.P)
   848  			}
   849  			p.addElement()
   850  			p.framesetOK = false
   851  			p.im = inTableIM
   852  			return true
   853  		case a.Area, a.Br, a.Embed, a.Img, a.Input, a.Keygen, a.Wbr:
   854  			p.reconstructActiveFormattingElements()
   855  			p.addElement()
   856  			p.oe.pop()
   857  			p.acknowledgeSelfClosingTag()
   858  			if p.tok.DataAtom == a.Input {
   859  				for _, t := range p.tok.Attr {
   860  					if t.Key == "type" {
   861  						if strings.ToLower(t.Val) == "hidden" {
   862  							// Skip setting framesetOK = false
   863  							return true
   864  						}
   865  					}
   866  				}
   867  			}
   868  			p.framesetOK = false
   869  		case a.Param, a.Source, a.Track:
   870  			p.addElement()
   871  			p.oe.pop()
   872  			p.acknowledgeSelfClosingTag()
   873  		case a.Hr:
   874  			p.popUntil(buttonScope, a.P)
   875  			p.addElement()
   876  			p.oe.pop()
   877  			p.acknowledgeSelfClosingTag()
   878  			p.framesetOK = false
   879  		case a.Image:
   880  			p.tok.DataAtom = a.Img
   881  			p.tok.Data = a.Img.String()
   882  			return false
   883  		case a.Isindex:
   884  			if p.form != nil {
   885  				// Ignore the token.
   886  				return true
   887  			}
   888  			action := ""
   889  			prompt := "This is a searchable index. Enter search keywords: "
   890  			attr := []Attribute{{Key: "name", Val: "isindex"}}
   891  			for _, t := range p.tok.Attr {
   892  				switch t.Key {
   893  				case "action":
   894  					action = t.Val
   895  				case "name":
   896  					// Ignore the attribute.
   897  				case "prompt":
   898  					prompt = t.Val
   899  				default:
   900  					attr = append(attr, t)
   901  				}
   902  			}
   903  			p.acknowledgeSelfClosingTag()
   904  			p.popUntil(buttonScope, a.P)
   905  			p.parseImpliedToken(StartTagToken, a.Form, a.Form.String())
   906  			if action != "" {
   907  				p.form.Attr = []Attribute{{Key: "action", Val: action}}
   908  			}
   909  			p.parseImpliedToken(StartTagToken, a.Hr, a.Hr.String())
   910  			p.parseImpliedToken(StartTagToken, a.Label, a.Label.String())
   911  			p.addText(prompt)
   912  			p.addChild(&Node{
   913  				Type:     ElementNode,
   914  				DataAtom: a.Input,
   915  				Data:     a.Input.String(),
   916  				Attr:     attr,
   917  			})
   918  			p.oe.pop()
   919  			p.parseImpliedToken(EndTagToken, a.Label, a.Label.String())
   920  			p.parseImpliedToken(StartTagToken, a.Hr, a.Hr.String())
   921  			p.parseImpliedToken(EndTagToken, a.Form, a.Form.String())
   922  		case a.Textarea:
   923  			p.addElement()
   924  			p.setOriginalIM()
   925  			p.framesetOK = false
   926  			p.im = textIM
   927  		case a.Xmp:
   928  			p.popUntil(buttonScope, a.P)
   929  			p.reconstructActiveFormattingElements()
   930  			p.framesetOK = false
   931  			p.addElement()
   932  			p.setOriginalIM()
   933  			p.im = textIM
   934  		case a.Iframe:
   935  			p.framesetOK = false
   936  			p.addElement()
   937  			p.setOriginalIM()
   938  			p.im = textIM
   939  		case a.Noembed, a.Noscript:
   940  			p.addElement()
   941  			p.setOriginalIM()
   942  			p.im = textIM
   943  		case a.Select:
   944  			p.reconstructActiveFormattingElements()
   945  			p.addElement()
   946  			p.framesetOK = false
   947  			p.im = inSelectIM
   948  			return true
   949  		case a.Optgroup, a.Option:
   950  			if p.top().DataAtom == a.Option {
   951  				p.oe.pop()
   952  			}
   953  			p.reconstructActiveFormattingElements()
   954  			p.addElement()
   955  		case a.Rp, a.Rt:
   956  			if p.elementInScope(defaultScope, a.Ruby) {
   957  				p.generateImpliedEndTags()
   958  			}
   959  			p.addElement()
   960  		case a.Math, a.Svg:
   961  			p.reconstructActiveFormattingElements()
   962  			if p.tok.DataAtom == a.Math {
   963  				adjustAttributeNames(p.tok.Attr, mathMLAttributeAdjustments)
   964  			} else {
   965  				adjustAttributeNames(p.tok.Attr, svgAttributeAdjustments)
   966  			}
   967  			adjustForeignAttributes(p.tok.Attr)
   968  			p.addElement()
   969  			p.top().Namespace = p.tok.Data
   970  			if p.hasSelfClosingToken {
   971  				p.oe.pop()
   972  				p.acknowledgeSelfClosingTag()
   973  			}
   974  			return true
   975  		case a.Caption, a.Col, a.Colgroup, a.Frame, a.Head, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
   976  			// Ignore the token.
   977  		default:
   978  			p.reconstructActiveFormattingElements()
   979  			p.addElement()
   980  		}
   981  	case EndTagToken:
   982  		switch p.tok.DataAtom {
   983  		case a.Body:
   984  			if p.elementInScope(defaultScope, a.Body) {
   985  				p.im = afterBodyIM
   986  			}
   987  		case a.Html:
   988  			if p.elementInScope(defaultScope, a.Body) {
   989  				p.parseImpliedToken(EndTagToken, a.Body, a.Body.String())
   990  				return false
   991  			}
   992  			return true
   993  		case a.Address, a.Article, a.Aside, a.Blockquote, a.Button, a.Center, a.Details, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Listing, a.Menu, a.Nav, a.Ol, a.Pre, a.Section, a.Summary, a.Ul:
   994  			p.popUntil(defaultScope, p.tok.DataAtom)
   995  		case a.Form:
   996  			node := p.form
   997  			p.form = nil
   998  			i := p.indexOfElementInScope(defaultScope, a.Form)
   999  			if node == nil || i == -1 || p.oe[i] != node {
  1000  				// Ignore the token.
  1001  				return true
  1002  			}
  1003  			p.generateImpliedEndTags()
  1004  			p.oe.remove(node)
  1005  		case a.P:
  1006  			if !p.elementInScope(buttonScope, a.P) {
  1007  				p.parseImpliedToken(StartTagToken, a.P, a.P.String())
  1008  			}
  1009  			p.popUntil(buttonScope, a.P)
  1010  		case a.Li:
  1011  			p.popUntil(listItemScope, a.Li)
  1012  		case a.Dd, a.Dt:
  1013  			p.popUntil(defaultScope, p.tok.DataAtom)
  1014  		case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
  1015  			p.popUntil(defaultScope, a.H1, a.H2, a.H3, a.H4, a.H5, a.H6)
  1016  		case a.A, a.B, a.Big, a.Code, a.Em, a.Font, a.I, a.Nobr, a.S, a.Small, a.Strike, a.Strong, a.Tt, a.U:
  1017  			p.inBodyEndTagFormatting(p.tok.DataAtom)
  1018  		case a.Applet, a.Marquee, a.Object:
  1019  			if p.popUntil(defaultScope, p.tok.DataAtom) {
  1020  				p.clearActiveFormattingElements()
  1021  			}
  1022  		case a.Br:
  1023  			p.tok.Type = StartTagToken
  1024  			return false
  1025  		default:
  1026  			p.inBodyEndTagOther(p.tok.DataAtom)
  1027  		}
  1028  	case CommentToken:
  1029  		p.addChild(&Node{
  1030  			Type: CommentNode,
  1031  			Data: p.tok.Data,
  1032  		})
  1033  	}
  1034  
  1035  	return true
  1036  }
  1037  
  1038  func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom) {
  1039  	// This is the "adoption agency" algorithm, described at
  1040  	// https://html.spec.whatwg.org/multipage/syntax.html#adoptionAgency
  1041  
  1042  	// TODO: this is a fairly literal line-by-line translation of that algorithm.
  1043  	// Once the code successfully parses the comprehensive test suite, we should
  1044  	// refactor this code to be more idiomatic.
  1045  
  1046  	// Steps 1-4. The outer loop.
  1047  	for i := 0; i < 8; i++ {
  1048  		// Step 5. Find the formatting element.
  1049  		var formattingElement *Node
  1050  		for j := len(p.afe) - 1; j >= 0; j-- {
  1051  			if p.afe[j].Type == scopeMarkerNode {
  1052  				break
  1053  			}
  1054  			if p.afe[j].DataAtom == tagAtom {
  1055  				formattingElement = p.afe[j]
  1056  				break
  1057  			}
  1058  		}
  1059  		if formattingElement == nil {
  1060  			p.inBodyEndTagOther(tagAtom)
  1061  			return
  1062  		}
  1063  		feIndex := p.oe.index(formattingElement)
  1064  		if feIndex == -1 {
  1065  			p.afe.remove(formattingElement)
  1066  			return
  1067  		}
  1068  		if !p.elementInScope(defaultScope, tagAtom) {
  1069  			// Ignore the tag.
  1070  			return
  1071  		}
  1072  
  1073  		// Steps 9-10. Find the furthest block.
  1074  		var furthestBlock *Node
  1075  		for _, e := range p.oe[feIndex:] {
  1076  			if isSpecialElement(e) {
  1077  				furthestBlock = e
  1078  				break
  1079  			}
  1080  		}
  1081  		if furthestBlock == nil {
  1082  			e := p.oe.pop()
  1083  			for e != formattingElement {
  1084  				e = p.oe.pop()
  1085  			}
  1086  			p.afe.remove(e)
  1087  			return
  1088  		}
  1089  
  1090  		// Steps 11-12. Find the common ancestor and bookmark node.
  1091  		commonAncestor := p.oe[feIndex-1]
  1092  		bookmark := p.afe.index(formattingElement)
  1093  
  1094  		// Step 13. The inner loop. Find the lastNode to reparent.
  1095  		lastNode := furthestBlock
  1096  		node := furthestBlock
  1097  		x := p.oe.index(node)
  1098  		// Steps 13.1-13.2
  1099  		for j := 0; j < 3; j++ {
  1100  			// Step 13.3.
  1101  			x--
  1102  			node = p.oe[x]
  1103  			// Step 13.4 - 13.5.
  1104  			if p.afe.index(node) == -1 {
  1105  				p.oe.remove(node)
  1106  				continue
  1107  			}
  1108  			// Step 13.6.
  1109  			if node == formattingElement {
  1110  				break
  1111  			}
  1112  			// Step 13.7.
  1113  			clone := node.clone()
  1114  			p.afe[p.afe.index(node)] = clone
  1115  			p.oe[p.oe.index(node)] = clone
  1116  			node = clone
  1117  			// Step 13.8.
  1118  			if lastNode == furthestBlock {
  1119  				bookmark = p.afe.index(node) + 1
  1120  			}
  1121  			// Step 13.9.
  1122  			if lastNode.Parent != nil {
  1123  				lastNode.Parent.RemoveChild(lastNode)
  1124  			}
  1125  			node.AppendChild(lastNode)
  1126  			// Step 13.10.
  1127  			lastNode = node
  1128  		}
  1129  
  1130  		// Step 14. Reparent lastNode to the common ancestor,
  1131  		// or for misnested table nodes, to the foster parent.
  1132  		if lastNode.Parent != nil {
  1133  			lastNode.Parent.RemoveChild(lastNode)
  1134  		}
  1135  		switch commonAncestor.DataAtom {
  1136  		case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
  1137  			p.fosterParent(lastNode)
  1138  		default:
  1139  			commonAncestor.AppendChild(lastNode)
  1140  		}
  1141  
  1142  		// Steps 15-17. Reparent nodes from the furthest block's children
  1143  		// to a clone of the formatting element.
  1144  		clone := formattingElement.clone()
  1145  		reparentChildren(clone, furthestBlock)
  1146  		furthestBlock.AppendChild(clone)
  1147  
  1148  		// Step 18. Fix up the list of active formatting elements.
  1149  		if oldLoc := p.afe.index(formattingElement); oldLoc != -1 && oldLoc < bookmark {
  1150  			// Move the bookmark with the rest of the list.
  1151  			bookmark--
  1152  		}
  1153  		p.afe.remove(formattingElement)
  1154  		p.afe.insert(bookmark, clone)
  1155  
  1156  		// Step 19. Fix up the stack of open elements.
  1157  		p.oe.remove(formattingElement)
  1158  		p.oe.insert(p.oe.index(furthestBlock)+1, clone)
  1159  	}
  1160  }
  1161  
  1162  // inBodyEndTagOther performs the "any other end tag" algorithm for inBodyIM.
  1163  // "Any other end tag" handling from 12.2.5.5 The rules for parsing tokens in foreign content
  1164  // https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inforeign
  1165  func (p *parser) inBodyEndTagOther(tagAtom a.Atom) {
  1166  	for i := len(p.oe) - 1; i >= 0; i-- {
  1167  		if p.oe[i].DataAtom == tagAtom {
  1168  			p.oe = p.oe[:i]
  1169  			break
  1170  		}
  1171  		if isSpecialElement(p.oe[i]) {
  1172  			break
  1173  		}
  1174  	}
  1175  }
  1176  
  1177  // Section 12.2.5.4.8.
  1178  func textIM(p *parser) bool {
  1179  	switch p.tok.Type {
  1180  	case ErrorToken:
  1181  		p.oe.pop()
  1182  	case TextToken:
  1183  		d := p.tok.Data
  1184  		if n := p.oe.top(); n.DataAtom == a.Textarea && n.FirstChild == nil {
  1185  			// Ignore a newline at the start of a <textarea> block.
  1186  			if d != "" && d[0] == '\r' {
  1187  				d = d[1:]
  1188  			}
  1189  			if d != "" && d[0] == '\n' {
  1190  				d = d[1:]
  1191  			}
  1192  		}
  1193  		if d == "" {
  1194  			return true
  1195  		}
  1196  		p.addText(d)
  1197  		return true
  1198  	case EndTagToken:
  1199  		p.oe.pop()
  1200  	}
  1201  	p.im = p.originalIM
  1202  	p.originalIM = nil
  1203  	return p.tok.Type == EndTagToken
  1204  }
  1205  
  1206  // Section 12.2.5.4.9.
  1207  func inTableIM(p *parser) bool {
  1208  	switch p.tok.Type {
  1209  	case ErrorToken:
  1210  		// Stop parsing.
  1211  		return true
  1212  	case TextToken:
  1213  		p.tok.Data = strings.Replace(p.tok.Data, "\x00", "", -1)
  1214  		switch p.oe.top().DataAtom {
  1215  		case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
  1216  			if strings.Trim(p.tok.Data, whitespace) == "" {
  1217  				p.addText(p.tok.Data)
  1218  				return true
  1219  			}
  1220  		}
  1221  	case StartTagToken:
  1222  		switch p.tok.DataAtom {
  1223  		case a.Caption:
  1224  			p.clearStackToContext(tableScope)
  1225  			p.afe = append(p.afe, &scopeMarker)
  1226  			p.addElement()
  1227  			p.im = inCaptionIM
  1228  			return true
  1229  		case a.Colgroup:
  1230  			p.clearStackToContext(tableScope)
  1231  			p.addElement()
  1232  			p.im = inColumnGroupIM
  1233  			return true
  1234  		case a.Col:
  1235  			p.parseImpliedToken(StartTagToken, a.Colgroup, a.Colgroup.String())
  1236  			return false
  1237  		case a.Tbody, a.Tfoot, a.Thead:
  1238  			p.clearStackToContext(tableScope)
  1239  			p.addElement()
  1240  			p.im = inTableBodyIM
  1241  			return true
  1242  		case a.Td, a.Th, a.Tr:
  1243  			p.parseImpliedToken(StartTagToken, a.Tbody, a.Tbody.String())
  1244  			return false
  1245  		case a.Table:
  1246  			if p.popUntil(tableScope, a.Table) {
  1247  				p.resetInsertionMode()
  1248  				return false
  1249  			}
  1250  			// Ignore the token.
  1251  			return true
  1252  		case a.Style, a.Script:
  1253  			return inHeadIM(p)
  1254  		case a.Input:
  1255  			for _, t := range p.tok.Attr {
  1256  				if t.Key == "type" && strings.ToLower(t.Val) == "hidden" {
  1257  					p.addElement()
  1258  					p.oe.pop()
  1259  					return true
  1260  				}
  1261  			}
  1262  			// Otherwise drop down to the default action.
  1263  		case a.Form:
  1264  			if p.form != nil {
  1265  				// Ignore the token.
  1266  				return true
  1267  			}
  1268  			p.addElement()
  1269  			p.form = p.oe.pop()
  1270  		case a.Select:
  1271  			p.reconstructActiveFormattingElements()
  1272  			switch p.top().DataAtom {
  1273  			case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
  1274  				p.fosterParenting = true
  1275  			}
  1276  			p.addElement()
  1277  			p.fosterParenting = false
  1278  			p.framesetOK = false
  1279  			p.im = inSelectInTableIM
  1280  			return true
  1281  		}
  1282  	case EndTagToken:
  1283  		switch p.tok.DataAtom {
  1284  		case a.Table:
  1285  			if p.popUntil(tableScope, a.Table) {
  1286  				p.resetInsertionMode()
  1287  				return true
  1288  			}
  1289  			// Ignore the token.
  1290  			return true
  1291  		case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
  1292  			// Ignore the token.
  1293  			return true
  1294  		}
  1295  	case CommentToken:
  1296  		p.addChild(&Node{
  1297  			Type: CommentNode,
  1298  			Data: p.tok.Data,
  1299  		})
  1300  		return true
  1301  	case DoctypeToken:
  1302  		// Ignore the token.
  1303  		return true
  1304  	}
  1305  
  1306  	p.fosterParenting = true
  1307  	defer func() { p.fosterParenting = false }()
  1308  
  1309  	return inBodyIM(p)
  1310  }
  1311  
  1312  // Section 12.2.5.4.11.
  1313  func inCaptionIM(p *parser) bool {
  1314  	switch p.tok.Type {
  1315  	case StartTagToken:
  1316  		switch p.tok.DataAtom {
  1317  		case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Td, a.Tfoot, a.Thead, a.Tr:
  1318  			if p.popUntil(tableScope, a.Caption) {
  1319  				p.clearActiveFormattingElements()
  1320  				p.im = inTableIM
  1321  				return false
  1322  			} else {
  1323  				// Ignore the token.
  1324  				return true
  1325  			}
  1326  		case a.Select:
  1327  			p.reconstructActiveFormattingElements()
  1328  			p.addElement()
  1329  			p.framesetOK = false
  1330  			p.im = inSelectInTableIM
  1331  			return true
  1332  		}
  1333  	case EndTagToken:
  1334  		switch p.tok.DataAtom {
  1335  		case a.Caption:
  1336  			if p.popUntil(tableScope, a.Caption) {
  1337  				p.clearActiveFormattingElements()
  1338  				p.im = inTableIM
  1339  			}
  1340  			return true
  1341  		case a.Table:
  1342  			if p.popUntil(tableScope, a.Caption) {
  1343  				p.clearActiveFormattingElements()
  1344  				p.im = inTableIM
  1345  				return false
  1346  			} else {
  1347  				// Ignore the token.
  1348  				return true
  1349  			}
  1350  		case a.Body, a.Col, a.Colgroup, a.Html, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
  1351  			// Ignore the token.
  1352  			return true
  1353  		}
  1354  	}
  1355  	return inBodyIM(p)
  1356  }
  1357  
  1358  // Section 12.2.5.4.12.
  1359  func inColumnGroupIM(p *parser) bool {
  1360  	switch p.tok.Type {
  1361  	case TextToken:
  1362  		s := strings.TrimLeft(p.tok.Data, whitespace)
  1363  		if len(s) < len(p.tok.Data) {
  1364  			// Add the initial whitespace to the current node.
  1365  			p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
  1366  			if s == "" {
  1367  				return true
  1368  			}
  1369  			p.tok.Data = s
  1370  		}
  1371  	case CommentToken:
  1372  		p.addChild(&Node{
  1373  			Type: CommentNode,
  1374  			Data: p.tok.Data,
  1375  		})
  1376  		return true
  1377  	case DoctypeToken:
  1378  		// Ignore the token.
  1379  		return true
  1380  	case StartTagToken:
  1381  		switch p.tok.DataAtom {
  1382  		case a.Html:
  1383  			return inBodyIM(p)
  1384  		case a.Col:
  1385  			p.addElement()
  1386  			p.oe.pop()
  1387  			p.acknowledgeSelfClosingTag()
  1388  			return true
  1389  		}
  1390  	case EndTagToken:
  1391  		switch p.tok.DataAtom {
  1392  		case a.Colgroup:
  1393  			if p.oe.top().DataAtom != a.Html {
  1394  				p.oe.pop()
  1395  				p.im = inTableIM
  1396  			}
  1397  			return true
  1398  		case a.Col:
  1399  			// Ignore the token.
  1400  			return true
  1401  		}
  1402  	}
  1403  	if p.oe.top().DataAtom != a.Html {
  1404  		p.oe.pop()
  1405  		p.im = inTableIM
  1406  		return false
  1407  	}
  1408  	return true
  1409  }
  1410  
  1411  // Section 12.2.5.4.13.
  1412  func inTableBodyIM(p *parser) bool {
  1413  	switch p.tok.Type {
  1414  	case StartTagToken:
  1415  		switch p.tok.DataAtom {
  1416  		case a.Tr:
  1417  			p.clearStackToContext(tableBodyScope)
  1418  			p.addElement()
  1419  			p.im = inRowIM
  1420  			return true
  1421  		case a.Td, a.Th:
  1422  			p.parseImpliedToken(StartTagToken, a.Tr, a.Tr.String())
  1423  			return false
  1424  		case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Tfoot, a.Thead:
  1425  			if p.popUntil(tableScope, a.Tbody, a.Thead, a.Tfoot) {
  1426  				p.im = inTableIM
  1427  				return false
  1428  			}
  1429  			// Ignore the token.
  1430  			return true
  1431  		}
  1432  	case EndTagToken:
  1433  		switch p.tok.DataAtom {
  1434  		case a.Tbody, a.Tfoot, a.Thead:
  1435  			if p.elementInScope(tableScope, p.tok.DataAtom) {
  1436  				p.clearStackToContext(tableBodyScope)
  1437  				p.oe.pop()
  1438  				p.im = inTableIM
  1439  			}
  1440  			return true
  1441  		case a.Table:
  1442  			if p.popUntil(tableScope, a.Tbody, a.Thead, a.Tfoot) {
  1443  				p.im = inTableIM
  1444  				return false
  1445  			}
  1446  			// Ignore the token.
  1447  			return true
  1448  		case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Td, a.Th, a.Tr:
  1449  			// Ignore the token.
  1450  			return true
  1451  		}
  1452  	case CommentToken:
  1453  		p.addChild(&Node{
  1454  			Type: CommentNode,
  1455  			Data: p.tok.Data,
  1456  		})
  1457  		return true
  1458  	}
  1459  
  1460  	return inTableIM(p)
  1461  }
  1462  
  1463  // Section 12.2.5.4.14.
  1464  func inRowIM(p *parser) bool {
  1465  	switch p.tok.Type {
  1466  	case StartTagToken:
  1467  		switch p.tok.DataAtom {
  1468  		case a.Td, a.Th:
  1469  			p.clearStackToContext(tableRowScope)
  1470  			p.addElement()
  1471  			p.afe = append(p.afe, &scopeMarker)
  1472  			p.im = inCellIM
  1473  			return true
  1474  		case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Tfoot, a.Thead, a.Tr:
  1475  			if p.popUntil(tableScope, a.Tr) {
  1476  				p.im = inTableBodyIM
  1477  				return false
  1478  			}
  1479  			// Ignore the token.
  1480  			return true
  1481  		}
  1482  	case EndTagToken:
  1483  		switch p.tok.DataAtom {
  1484  		case a.Tr:
  1485  			if p.popUntil(tableScope, a.Tr) {
  1486  				p.im = inTableBodyIM
  1487  				return true
  1488  			}
  1489  			// Ignore the token.
  1490  			return true
  1491  		case a.Table:
  1492  			if p.popUntil(tableScope, a.Tr) {
  1493  				p.im = inTableBodyIM
  1494  				return false
  1495  			}
  1496  			// Ignore the token.
  1497  			return true
  1498  		case a.Tbody, a.Tfoot, a.Thead:
  1499  			if p.elementInScope(tableScope, p.tok.DataAtom) {
  1500  				p.parseImpliedToken(EndTagToken, a.Tr, a.Tr.String())
  1501  				return false
  1502  			}
  1503  			// Ignore the token.
  1504  			return true
  1505  		case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Td, a.Th:
  1506  			// Ignore the token.
  1507  			return true
  1508  		}
  1509  	}
  1510  
  1511  	return inTableIM(p)
  1512  }
  1513  
  1514  // Section 12.2.5.4.15.
  1515  func inCellIM(p *parser) bool {
  1516  	switch p.tok.Type {
  1517  	case StartTagToken:
  1518  		switch p.tok.DataAtom {
  1519  		case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
  1520  			if p.popUntil(tableScope, a.Td, a.Th) {
  1521  				// Close the cell and reprocess.
  1522  				p.clearActiveFormattingElements()
  1523  				p.im = inRowIM
  1524  				return false
  1525  			}
  1526  			// Ignore the token.
  1527  			return true
  1528  		case a.Select:
  1529  			p.reconstructActiveFormattingElements()
  1530  			p.addElement()
  1531  			p.framesetOK = false
  1532  			p.im = inSelectInTableIM
  1533  			return true
  1534  		}
  1535  	case EndTagToken:
  1536  		switch p.tok.DataAtom {
  1537  		case a.Td, a.Th:
  1538  			if !p.popUntil(tableScope, p.tok.DataAtom) {
  1539  				// Ignore the token.
  1540  				return true
  1541  			}
  1542  			p.clearActiveFormattingElements()
  1543  			p.im = inRowIM
  1544  			return true
  1545  		case a.Body, a.Caption, a.Col, a.Colgroup, a.Html:
  1546  			// Ignore the token.
  1547  			return true
  1548  		case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
  1549  			if !p.elementInScope(tableScope, p.tok.DataAtom) {
  1550  				// Ignore the token.
  1551  				return true
  1552  			}
  1553  			// Close the cell and reprocess.
  1554  			p.popUntil(tableScope, a.Td, a.Th)
  1555  			p.clearActiveFormattingElements()
  1556  			p.im = inRowIM
  1557  			return false
  1558  		}
  1559  	}
  1560  	return inBodyIM(p)
  1561  }
  1562  
  1563  // Section 12.2.5.4.16.
  1564  func inSelectIM(p *parser) bool {
  1565  	switch p.tok.Type {
  1566  	case ErrorToken:
  1567  		// Stop parsing.
  1568  		return true
  1569  	case TextToken:
  1570  		p.addText(strings.Replace(p.tok.Data, "\x00", "", -1))
  1571  	case StartTagToken:
  1572  		switch p.tok.DataAtom {
  1573  		case a.Html:
  1574  			return inBodyIM(p)
  1575  		case a.Option:
  1576  			if p.top().DataAtom == a.Option {
  1577  				p.oe.pop()
  1578  			}
  1579  			p.addElement()
  1580  		case a.Optgroup:
  1581  			if p.top().DataAtom == a.Option {
  1582  				p.oe.pop()
  1583  			}
  1584  			if p.top().DataAtom == a.Optgroup {
  1585  				p.oe.pop()
  1586  			}
  1587  			p.addElement()
  1588  		case a.Select:
  1589  			p.tok.Type = EndTagToken
  1590  			return false
  1591  		case a.Input, a.Keygen, a.Textarea:
  1592  			if p.elementInScope(selectScope, a.Select) {
  1593  				p.parseImpliedToken(EndTagToken, a.Select, a.Select.String())
  1594  				return false
  1595  			}
  1596  			// In order to properly ignore <textarea>, we need to change the tokenizer mode.
  1597  			p.tokenizer.NextIsNotRawText()
  1598  			// Ignore the token.
  1599  			return true
  1600  		case a.Script:
  1601  			return inHeadIM(p)
  1602  		}
  1603  	case EndTagToken:
  1604  		switch p.tok.DataAtom {
  1605  		case a.Option:
  1606  			if p.top().DataAtom == a.Option {
  1607  				p.oe.pop()
  1608  			}
  1609  		case a.Optgroup:
  1610  			i := len(p.oe) - 1
  1611  			if p.oe[i].DataAtom == a.Option {
  1612  				i--
  1613  			}
  1614  			if p.oe[i].DataAtom == a.Optgroup {
  1615  				p.oe = p.oe[:i]
  1616  			}
  1617  		case a.Select:
  1618  			if p.popUntil(selectScope, a.Select) {
  1619  				p.resetInsertionMode()
  1620  			}
  1621  		}
  1622  	case CommentToken:
  1623  		p.addChild(&Node{
  1624  			Type: CommentNode,
  1625  			Data: p.tok.Data,
  1626  		})
  1627  	case DoctypeToken:
  1628  		// Ignore the token.
  1629  		return true
  1630  	}
  1631  
  1632  	return true
  1633  }
  1634  
  1635  // Section 12.2.5.4.17.
  1636  func inSelectInTableIM(p *parser) bool {
  1637  	switch p.tok.Type {
  1638  	case StartTagToken, EndTagToken:
  1639  		switch p.tok.DataAtom {
  1640  		case a.Caption, a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr, a.Td, a.Th:
  1641  			if p.tok.Type == StartTagToken || p.elementInScope(tableScope, p.tok.DataAtom) {
  1642  				p.parseImpliedToken(EndTagToken, a.Select, a.Select.String())
  1643  				return false
  1644  			} else {
  1645  				// Ignore the token.
  1646  				return true
  1647  			}
  1648  		}
  1649  	}
  1650  	return inSelectIM(p)
  1651  }
  1652  
  1653  // Section 12.2.5.4.18.
  1654  func afterBodyIM(p *parser) bool {
  1655  	switch p.tok.Type {
  1656  	case ErrorToken:
  1657  		// Stop parsing.
  1658  		return true
  1659  	case TextToken:
  1660  		s := strings.TrimLeft(p.tok.Data, whitespace)
  1661  		if len(s) == 0 {
  1662  			// It was all whitespace.
  1663  			return inBodyIM(p)
  1664  		}
  1665  	case StartTagToken:
  1666  		if p.tok.DataAtom == a.Html {
  1667  			return inBodyIM(p)
  1668  		}
  1669  	case EndTagToken:
  1670  		if p.tok.DataAtom == a.Html {
  1671  			if !p.fragment {
  1672  				p.im = afterAfterBodyIM
  1673  			}
  1674  			return true
  1675  		}
  1676  	case CommentToken:
  1677  		// The comment is attached to the <html> element.
  1678  		if len(p.oe) < 1 || p.oe[0].DataAtom != a.Html {
  1679  			panic("html: bad parser state: <html> element not found, in the after-body insertion mode")
  1680  		}
  1681  		p.oe[0].AppendChild(&Node{
  1682  			Type: CommentNode,
  1683  			Data: p.tok.Data,
  1684  		})
  1685  		return true
  1686  	}
  1687  	p.im = inBodyIM
  1688  	return false
  1689  }
  1690  
  1691  // Section 12.2.5.4.19.
  1692  func inFramesetIM(p *parser) bool {
  1693  	switch p.tok.Type {
  1694  	case CommentToken:
  1695  		p.addChild(&Node{
  1696  			Type: CommentNode,
  1697  			Data: p.tok.Data,
  1698  		})
  1699  	case TextToken:
  1700  		// Ignore all text but whitespace.
  1701  		s := strings.Map(func(c rune) rune {
  1702  			switch c {
  1703  			case ' ', '\t', '\n', '\f', '\r':
  1704  				return c
  1705  			}
  1706  			return -1
  1707  		}, p.tok.Data)
  1708  		if s != "" {
  1709  			p.addText(s)
  1710  		}
  1711  	case StartTagToken:
  1712  		switch p.tok.DataAtom {
  1713  		case a.Html:
  1714  			return inBodyIM(p)
  1715  		case a.Frameset:
  1716  			p.addElement()
  1717  		case a.Frame:
  1718  			p.addElement()
  1719  			p.oe.pop()
  1720  			p.acknowledgeSelfClosingTag()
  1721  		case a.Noframes:
  1722  			return inHeadIM(p)
  1723  		}
  1724  	case EndTagToken:
  1725  		switch p.tok.DataAtom {
  1726  		case a.Frameset:
  1727  			if p.oe.top().DataAtom != a.Html {
  1728  				p.oe.pop()
  1729  				if p.oe.top().DataAtom != a.Frameset {
  1730  					p.im = afterFramesetIM
  1731  					return true
  1732  				}
  1733  			}
  1734  		}
  1735  	default:
  1736  		// Ignore the token.
  1737  	}
  1738  	return true
  1739  }
  1740  
  1741  // Section 12.2.5.4.20.
  1742  func afterFramesetIM(p *parser) bool {
  1743  	switch p.tok.Type {
  1744  	case CommentToken:
  1745  		p.addChild(&Node{
  1746  			Type: CommentNode,
  1747  			Data: p.tok.Data,
  1748  		})
  1749  	case TextToken:
  1750  		// Ignore all text but whitespace.
  1751  		s := strings.Map(func(c rune) rune {
  1752  			switch c {
  1753  			case ' ', '\t', '\n', '\f', '\r':
  1754  				return c
  1755  			}
  1756  			return -1
  1757  		}, p.tok.Data)
  1758  		if s != "" {
  1759  			p.addText(s)
  1760  		}
  1761  	case StartTagToken:
  1762  		switch p.tok.DataAtom {
  1763  		case a.Html:
  1764  			return inBodyIM(p)
  1765  		case a.Noframes:
  1766  			return inHeadIM(p)
  1767  		}
  1768  	case EndTagToken:
  1769  		switch p.tok.DataAtom {
  1770  		case a.Html:
  1771  			p.im = afterAfterFramesetIM
  1772  			return true
  1773  		}
  1774  	default:
  1775  		// Ignore the token.
  1776  	}
  1777  	return true
  1778  }
  1779  
  1780  // Section 12.2.5.4.21.
  1781  func afterAfterBodyIM(p *parser) bool {
  1782  	switch p.tok.Type {
  1783  	case ErrorToken:
  1784  		// Stop parsing.
  1785  		return true
  1786  	case TextToken:
  1787  		s := strings.TrimLeft(p.tok.Data, whitespace)
  1788  		if len(s) == 0 {
  1789  			// It was all whitespace.
  1790  			return inBodyIM(p)
  1791  		}
  1792  	case StartTagToken:
  1793  		if p.tok.DataAtom == a.Html {
  1794  			return inBodyIM(p)
  1795  		}
  1796  	case CommentToken:
  1797  		p.doc.AppendChild(&Node{
  1798  			Type: CommentNode,
  1799  			Data: p.tok.Data,
  1800  		})
  1801  		return true
  1802  	case DoctypeToken:
  1803  		return inBodyIM(p)
  1804  	}
  1805  	p.im = inBodyIM
  1806  	return false
  1807  }
  1808  
  1809  // Section 12.2.5.4.22.
  1810  func afterAfterFramesetIM(p *parser) bool {
  1811  	switch p.tok.Type {
  1812  	case CommentToken:
  1813  		p.doc.AppendChild(&Node{
  1814  			Type: CommentNode,
  1815  			Data: p.tok.Data,
  1816  		})
  1817  	case TextToken:
  1818  		// Ignore all text but whitespace.
  1819  		s := strings.Map(func(c rune) rune {
  1820  			switch c {
  1821  			case ' ', '\t', '\n', '\f', '\r':
  1822  				return c
  1823  			}
  1824  			return -1
  1825  		}, p.tok.Data)
  1826  		if s != "" {
  1827  			p.tok.Data = s
  1828  			return inBodyIM(p)
  1829  		}
  1830  	case StartTagToken:
  1831  		switch p.tok.DataAtom {
  1832  		case a.Html:
  1833  			return inBodyIM(p)
  1834  		case a.Noframes:
  1835  			return inHeadIM(p)
  1836  		}
  1837  	case DoctypeToken:
  1838  		return inBodyIM(p)
  1839  	default:
  1840  		// Ignore the token.
  1841  	}
  1842  	return true
  1843  }
  1844  
  1845  const whitespaceOrNUL = whitespace + "\x00"
  1846  
  1847  // Section 12.2.5.5.
  1848  func parseForeignContent(p *parser) bool {
  1849  	switch p.tok.Type {
  1850  	case TextToken:
  1851  		if p.framesetOK {
  1852  			p.framesetOK = strings.TrimLeft(p.tok.Data, whitespaceOrNUL) == ""
  1853  		}
  1854  		p.tok.Data = strings.Replace(p.tok.Data, "\x00", "\ufffd", -1)
  1855  		p.addText(p.tok.Data)
  1856  	case CommentToken:
  1857  		p.addChild(&Node{
  1858  			Type: CommentNode,
  1859  			Data: p.tok.Data,
  1860  		})
  1861  	case StartTagToken:
  1862  		b := breakout[p.tok.Data]
  1863  		if p.tok.DataAtom == a.Font {
  1864  		loop:
  1865  			for _, attr := range p.tok.Attr {
  1866  				switch attr.Key {
  1867  				case "color", "face", "size":
  1868  					b = true
  1869  					break loop
  1870  				}
  1871  			}
  1872  		}
  1873  		if b {
  1874  			for i := len(p.oe) - 1; i >= 0; i-- {
  1875  				n := p.oe[i]
  1876  				if n.Namespace == "" || htmlIntegrationPoint(n) || mathMLTextIntegrationPoint(n) {
  1877  					p.oe = p.oe[:i+1]
  1878  					break
  1879  				}
  1880  			}
  1881  			return false
  1882  		}
  1883  		switch p.top().Namespace {
  1884  		case "math":
  1885  			adjustAttributeNames(p.tok.Attr, mathMLAttributeAdjustments)
  1886  		case "svg":
  1887  			// Adjust SVG tag names. The tokenizer lower-cases tag names, but
  1888  			// SVG wants e.g. "foreignObject" with a capital second "O".
  1889  			if x := svgTagNameAdjustments[p.tok.Data]; x != "" {
  1890  				p.tok.DataAtom = a.Lookup([]byte(x))
  1891  				p.tok.Data = x
  1892  			}
  1893  			adjustAttributeNames(p.tok.Attr, svgAttributeAdjustments)
  1894  		default:
  1895  			panic("html: bad parser state: unexpected namespace")
  1896  		}
  1897  		adjustForeignAttributes(p.tok.Attr)
  1898  		namespace := p.top().Namespace
  1899  		p.addElement()
  1900  		p.top().Namespace = namespace
  1901  		if namespace != "" {
  1902  			// Don't let the tokenizer go into raw text mode in foreign content
  1903  			// (e.g. in an SVG <title> tag).
  1904  			p.tokenizer.NextIsNotRawText()
  1905  		}
  1906  		if p.hasSelfClosingToken {
  1907  			p.oe.pop()
  1908  			p.acknowledgeSelfClosingTag()
  1909  		}
  1910  	case EndTagToken:
  1911  		for i := len(p.oe) - 1; i >= 0; i-- {
  1912  			if p.oe[i].Namespace == "" {
  1913  				return p.im(p)
  1914  			}
  1915  			if strings.EqualFold(p.oe[i].Data, p.tok.Data) {
  1916  				p.oe = p.oe[:i]
  1917  				break
  1918  			}
  1919  		}
  1920  		return true
  1921  	default:
  1922  		// Ignore the token.
  1923  	}
  1924  	return true
  1925  }
  1926  
  1927  // Section 12.2.5.
  1928  func (p *parser) inForeignContent() bool {
  1929  	if len(p.oe) == 0 {
  1930  		return false
  1931  	}
  1932  	n := p.oe[len(p.oe)-1]
  1933  	if n.Namespace == "" {
  1934  		return false
  1935  	}
  1936  	if mathMLTextIntegrationPoint(n) {
  1937  		if p.tok.Type == StartTagToken && p.tok.DataAtom != a.Mglyph && p.tok.DataAtom != a.Malignmark {
  1938  			return false
  1939  		}
  1940  		if p.tok.Type == TextToken {
  1941  			return false
  1942  		}
  1943  	}
  1944  	if n.Namespace == "math" && n.DataAtom == a.AnnotationXml && p.tok.Type == StartTagToken && p.tok.DataAtom == a.Svg {
  1945  		return false
  1946  	}
  1947  	if htmlIntegrationPoint(n) && (p.tok.Type == StartTagToken || p.tok.Type == TextToken) {
  1948  		return false
  1949  	}
  1950  	if p.tok.Type == ErrorToken {
  1951  		return false
  1952  	}
  1953  	return true
  1954  }
  1955  
  1956  // parseImpliedToken parses a token as though it had appeared in the parser's
  1957  // input.
  1958  func (p *parser) parseImpliedToken(t TokenType, dataAtom a.Atom, data string) {
  1959  	realToken, selfClosing := p.tok, p.hasSelfClosingToken
  1960  	p.tok = Token{
  1961  		Type:     t,
  1962  		DataAtom: dataAtom,
  1963  		Data:     data,
  1964  	}
  1965  	p.hasSelfClosingToken = false
  1966  	p.parseCurrentToken()
  1967  	p.tok, p.hasSelfClosingToken = realToken, selfClosing
  1968  }
  1969  
  1970  // parseCurrentToken runs the current token through the parsing routines
  1971  // until it is consumed.
  1972  func (p *parser) parseCurrentToken() {
  1973  	if p.tok.Type == SelfClosingTagToken {
  1974  		p.hasSelfClosingToken = true
  1975  		p.tok.Type = StartTagToken
  1976  	}
  1977  
  1978  	consumed := false
  1979  	for !consumed {
  1980  		if p.inForeignContent() {
  1981  			consumed = parseForeignContent(p)
  1982  		} else {
  1983  			consumed = p.im(p)
  1984  		}
  1985  	}
  1986  
  1987  	if p.hasSelfClosingToken {
  1988  		// This is a parse error, but ignore it.
  1989  		p.hasSelfClosingToken = false
  1990  	}
  1991  }
  1992  
  1993  func (p *parser) parse() error {
  1994  	// Iterate until EOF. Any other error will cause an early return.
  1995  	var err error
  1996  	for err != io.EOF {
  1997  		// CDATA sections are allowed only in foreign content.
  1998  		n := p.oe.top()
  1999  		p.tokenizer.AllowCDATA(n != nil && n.Namespace != "")
  2000  		// Read and parse the next token.
  2001  		p.tokenizer.Next()
  2002  		p.tok = p.tokenizer.Token()
  2003  		if p.tok.Type == ErrorToken {
  2004  			err = p.tokenizer.Err()
  2005  			if err != nil && err != io.EOF {
  2006  				return err
  2007  			}
  2008  		}
  2009  		p.parseCurrentToken()
  2010  	}
  2011  	return nil
  2012  }
  2013  
  2014  // Parse returns the parse tree for the HTML from the given Reader.
  2015  // The input is assumed to be UTF-8 encoded.
  2016  func Parse(r io.Reader) (*Node, error) {
  2017  	p := &parser{
  2018  		tokenizer: NewTokenizer(r),
  2019  		doc: &Node{
  2020  			Type: DocumentNode,
  2021  		},
  2022  		scripting:  true,
  2023  		framesetOK: true,
  2024  		im:         initialIM,
  2025  	}
  2026  	err := p.parse()
  2027  	if err != nil {
  2028  		return nil, err
  2029  	}
  2030  	return p.doc, nil
  2031  }
  2032  
  2033  // ParseFragment parses a fragment of HTML and returns the nodes that were
  2034  // found. If the fragment is the InnerHTML for an existing element, pass that
  2035  // element in context.
  2036  func ParseFragment(r io.Reader, context *Node) ([]*Node, error) {
  2037  	contextTag := ""
  2038  	if context != nil {
  2039  		if context.Type != ElementNode {
  2040  			return nil, errors.New("html: ParseFragment of non-element Node")
  2041  		}
  2042  		// The next check isn't just context.DataAtom.String() == context.Data because
  2043  		// it is valid to pass an element whose tag isn't a known atom. For example,
  2044  		// DataAtom == 0 and Data = "tagfromthefuture" is perfectly consistent.
  2045  		if context.DataAtom != a.Lookup([]byte(context.Data)) {
  2046  			return nil, fmt.Errorf("html: inconsistent Node: DataAtom=%q, Data=%q", context.DataAtom, context.Data)
  2047  		}
  2048  		contextTag = context.DataAtom.String()
  2049  	}
  2050  	p := &parser{
  2051  		tokenizer: NewTokenizerFragment(r, contextTag),
  2052  		doc: &Node{
  2053  			Type: DocumentNode,
  2054  		},
  2055  		scripting: true,
  2056  		fragment:  true,
  2057  		context:   context,
  2058  	}
  2059  
  2060  	root := &Node{
  2061  		Type:     ElementNode,
  2062  		DataAtom: a.Html,
  2063  		Data:     a.Html.String(),
  2064  	}
  2065  	p.doc.AppendChild(root)
  2066  	p.oe = nodeStack{root}
  2067  	p.resetInsertionMode()
  2068  
  2069  	for n := context; n != nil; n = n.Parent {
  2070  		if n.Type == ElementNode && n.DataAtom == a.Form {
  2071  			p.form = n
  2072  			break
  2073  		}
  2074  	}
  2075  
  2076  	err := p.parse()
  2077  	if err != nil {
  2078  		return nil, err
  2079  	}
  2080  
  2081  	parent := p.doc
  2082  	if context != nil {
  2083  		parent = root
  2084  	}
  2085  
  2086  	var result []*Node
  2087  	for c := parent.FirstChild; c != nil; {
  2088  		next := c.NextSibling
  2089  		parent.RemoveChild(c)
  2090  		result = append(result, c)
  2091  		c = next
  2092  	}
  2093  	return result, nil
  2094  }