github.com/slspeek/camlistore_namedsearch@v0.0.0-20140519202248-ed6f70f7721a/third_party/code.google.com/p/go.net/html/parse.go

github.com/slspeek/camlistore_namedsearch@v0.0.0-20140519202248-ed6f70f7721a/third_party/code.google.com/p/go.net/html/parse.go (about)

     1  // Copyright 2010 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package html
     6  
     7  import (
     8  	"errors"
     9  	"fmt"
    10  	"io"
    11  	"strings"
    12  
    13  	a "camlistore.org/third_party/code.google.com/p/go.net/html/atom"
    14  )
    15  
    16  // A parser implements the HTML5 parsing algorithm:
    17  // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#tree-construction
    18  type parser struct {
    19  	// tokenizer provides the tokens for the parser.
    20  	tokenizer *Tokenizer
    21  	// tok is the most recently read token.
    22  	tok Token
    23  	// Self-closing tags like <hr/> are treated as start tags, except that
    24  	// hasSelfClosingToken is set while they are being processed.
    25  	hasSelfClosingToken bool
    26  	// doc is the document root element.
    27  	doc *Node
    28  	// The stack of open elements (section 12.2.3.2) and active formatting
    29  	// elements (section 12.2.3.3).
    30  	oe, afe nodeStack
    31  	// Element pointers (section 12.2.3.4).
    32  	head, form *Node
    33  	// Other parsing state flags (section 12.2.3.5).
    34  	scripting, framesetOK bool
    35  	// im is the current insertion mode.
    36  	im insertionMode
    37  	// originalIM is the insertion mode to go back to after completing a text
    38  	// or inTableText insertion mode.
    39  	originalIM insertionMode
    40  	// fosterParenting is whether new elements should be inserted according to
    41  	// the foster parenting rules (section 12.2.5.3).
    42  	fosterParenting bool
    43  	// quirks is whether the parser is operating in "quirks mode."
    44  	quirks bool
    45  	// fragment is whether the parser is parsing an HTML fragment.
    46  	fragment bool
    47  	// context is the context element when parsing an HTML fragment
    48  	// (section 12.4).
    49  	context *Node
    50  }
    51  
    52  func (p *parser) top() *Node {
    53  	if n := p.oe.top(); n != nil {
    54  		return n
    55  	}
    56  	return p.doc
    57  }
    58  
    59  // Stop tags for use in popUntil. These come from section 12.2.3.2.
    60  var (
    61  	defaultScopeStopTags = map[string][]a.Atom{
    62  		"":     {a.Applet, a.Caption, a.Html, a.Table, a.Td, a.Th, a.Marquee, a.Object},
    63  		"math": {a.AnnotationXml, a.Mi, a.Mn, a.Mo, a.Ms, a.Mtext},
    64  		"svg":  {a.Desc, a.ForeignObject, a.Title},
    65  	}
    66  )
    67  
    68  type scope int
    69  
    70  const (
    71  	defaultScope scope = iota
    72  	listItemScope
    73  	buttonScope
    74  	tableScope
    75  	tableRowScope
    76  	tableBodyScope
    77  	selectScope
    78  )
    79  
    80  // popUntil pops the stack of open elements at the highest element whose tag
    81  // is in matchTags, provided there is no higher element in the scope's stop
    82  // tags (as defined in section 12.2.3.2). It returns whether or not there was
    83  // such an element. If there was not, popUntil leaves the stack unchanged.
    84  //
    85  // For example, the set of stop tags for table scope is: "html", "table". If
    86  // the stack was:
    87  // ["html", "body", "font", "table", "b", "i", "u"]
    88  // then popUntil(tableScope, "font") would return false, but
    89  // popUntil(tableScope, "i") would return true and the stack would become:
    90  // ["html", "body", "font", "table", "b"]
    91  //
    92  // If an element's tag is in both the stop tags and matchTags, then the stack
    93  // will be popped and the function returns true (provided, of course, there was
    94  // no higher element in the stack that was also in the stop tags). For example,
    95  // popUntil(tableScope, "table") returns true and leaves:
    96  // ["html", "body", "font"]
    97  func (p *parser) popUntil(s scope, matchTags ...a.Atom) bool {
    98  	if i := p.indexOfElementInScope(s, matchTags...); i != -1 {
    99  		p.oe = p.oe[:i]
   100  		return true
   101  	}
   102  	return false
   103  }
   104  
   105  // indexOfElementInScope returns the index in p.oe of the highest element whose
   106  // tag is in matchTags that is in scope. If no matching element is in scope, it
   107  // returns -1.
   108  func (p *parser) indexOfElementInScope(s scope, matchTags ...a.Atom) int {
   109  	for i := len(p.oe) - 1; i >= 0; i-- {
   110  		tagAtom := p.oe[i].DataAtom
   111  		if p.oe[i].Namespace == "" {
   112  			for _, t := range matchTags {
   113  				if t == tagAtom {
   114  					return i
   115  				}
   116  			}
   117  			switch s {
   118  			case defaultScope:
   119  				// No-op.
   120  			case listItemScope:
   121  				if tagAtom == a.Ol || tagAtom == a.Ul {
   122  					return -1
   123  				}
   124  			case buttonScope:
   125  				if tagAtom == a.Button {
   126  					return -1
   127  				}
   128  			case tableScope:
   129  				if tagAtom == a.Html || tagAtom == a.Table {
   130  					return -1
   131  				}
   132  			case selectScope:
   133  				if tagAtom != a.Optgroup && tagAtom != a.Option {
   134  					return -1
   135  				}
   136  			default:
   137  				panic("unreachable")
   138  			}
   139  		}
   140  		switch s {
   141  		case defaultScope, listItemScope, buttonScope:
   142  			for _, t := range defaultScopeStopTags[p.oe[i].Namespace] {
   143  				if t == tagAtom {
   144  					return -1
   145  				}
   146  			}
   147  		}
   148  	}
   149  	return -1
   150  }
   151  
   152  // elementInScope is like popUntil, except that it doesn't modify the stack of
   153  // open elements.
   154  func (p *parser) elementInScope(s scope, matchTags ...a.Atom) bool {
   155  	return p.indexOfElementInScope(s, matchTags...) != -1
   156  }
   157  
   158  // clearStackToContext pops elements off the stack of open elements until a
   159  // scope-defined element is found.
   160  func (p *parser) clearStackToContext(s scope) {
   161  	for i := len(p.oe) - 1; i >= 0; i-- {
   162  		tagAtom := p.oe[i].DataAtom
   163  		switch s {
   164  		case tableScope:
   165  			if tagAtom == a.Html || tagAtom == a.Table {
   166  				p.oe = p.oe[:i+1]
   167  				return
   168  			}
   169  		case tableRowScope:
   170  			if tagAtom == a.Html || tagAtom == a.Tr {
   171  				p.oe = p.oe[:i+1]
   172  				return
   173  			}
   174  		case tableBodyScope:
   175  			if tagAtom == a.Html || tagAtom == a.Tbody || tagAtom == a.Tfoot || tagAtom == a.Thead {
   176  				p.oe = p.oe[:i+1]
   177  				return
   178  			}
   179  		default:
   180  			panic("unreachable")
   181  		}
   182  	}
   183  }
   184  
   185  // generateImpliedEndTags pops nodes off the stack of open elements as long as
   186  // the top node has a tag name of dd, dt, li, option, optgroup, p, rp, or rt.
   187  // If exceptions are specified, nodes with that name will not be popped off.
   188  func (p *parser) generateImpliedEndTags(exceptions ...string) {
   189  	var i int
   190  loop:
   191  	for i = len(p.oe) - 1; i >= 0; i-- {
   192  		n := p.oe[i]
   193  		if n.Type == ElementNode {
   194  			switch n.DataAtom {
   195  			case a.Dd, a.Dt, a.Li, a.Option, a.Optgroup, a.P, a.Rp, a.Rt:
   196  				for _, except := range exceptions {
   197  					if n.Data == except {
   198  						break loop
   199  					}
   200  				}
   201  				continue
   202  			}
   203  		}
   204  		break
   205  	}
   206  
   207  	p.oe = p.oe[:i+1]
   208  }
   209  
   210  // addChild adds a child node n to the top element, and pushes n onto the stack
   211  // of open elements if it is an element node.
   212  func (p *parser) addChild(n *Node) {
   213  	if p.shouldFosterParent() {
   214  		p.fosterParent(n)
   215  	} else {
   216  		p.top().AppendChild(n)
   217  	}
   218  
   219  	if n.Type == ElementNode {
   220  		p.oe = append(p.oe, n)
   221  	}
   222  }
   223  
   224  // shouldFosterParent returns whether the next node to be added should be
   225  // foster parented.
   226  func (p *parser) shouldFosterParent() bool {
   227  	if p.fosterParenting {
   228  		switch p.top().DataAtom {
   229  		case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
   230  			return true
   231  		}
   232  	}
   233  	return false
   234  }
   235  
   236  // fosterParent adds a child node according to the foster parenting rules.
   237  // Section 12.2.5.3, "foster parenting".
   238  func (p *parser) fosterParent(n *Node) {
   239  	var table, parent, prev *Node
   240  	var i int
   241  	for i = len(p.oe) - 1; i >= 0; i-- {
   242  		if p.oe[i].DataAtom == a.Table {
   243  			table = p.oe[i]
   244  			break
   245  		}
   246  	}
   247  
   248  	if table == nil {
   249  		// The foster parent is the html element.
   250  		parent = p.oe[0]
   251  	} else {
   252  		parent = table.Parent
   253  	}
   254  	if parent == nil {
   255  		parent = p.oe[i-1]
   256  	}
   257  
   258  	if table != nil {
   259  		prev = table.PrevSibling
   260  	} else {
   261  		prev = parent.LastChild
   262  	}
   263  	if prev != nil && prev.Type == TextNode && n.Type == TextNode {
   264  		prev.Data += n.Data
   265  		return
   266  	}
   267  
   268  	parent.InsertBefore(n, table)
   269  }
   270  
   271  // addText adds text to the preceding node if it is a text node, or else it
   272  // calls addChild with a new text node.
   273  func (p *parser) addText(text string) {
   274  	if text == "" {
   275  		return
   276  	}
   277  
   278  	if p.shouldFosterParent() {
   279  		p.fosterParent(&Node{
   280  			Type: TextNode,
   281  			Data: text,
   282  		})
   283  		return
   284  	}
   285  
   286  	t := p.top()
   287  	if n := t.LastChild; n != nil && n.Type == TextNode {
   288  		n.Data += text
   289  		return
   290  	}
   291  	p.addChild(&Node{
   292  		Type: TextNode,
   293  		Data: text,
   294  	})
   295  }
   296  
   297  // addElement adds a child element based on the current token.
   298  func (p *parser) addElement() {
   299  	p.addChild(&Node{
   300  		Type:     ElementNode,
   301  		DataAtom: p.tok.DataAtom,
   302  		Data:     p.tok.Data,
   303  		Attr:     p.tok.Attr,
   304  	})
   305  }
   306  
   307  // Section 12.2.3.3.
   308  func (p *parser) addFormattingElement() {
   309  	tagAtom, attr := p.tok.DataAtom, p.tok.Attr
   310  	p.addElement()
   311  
   312  	// Implement the Noah's Ark clause, but with three per family instead of two.
   313  	identicalElements := 0
   314  findIdenticalElements:
   315  	for i := len(p.afe) - 1; i >= 0; i-- {
   316  		n := p.afe[i]
   317  		if n.Type == scopeMarkerNode {
   318  			break
   319  		}
   320  		if n.Type != ElementNode {
   321  			continue
   322  		}
   323  		if n.Namespace != "" {
   324  			continue
   325  		}
   326  		if n.DataAtom != tagAtom {
   327  			continue
   328  		}
   329  		if len(n.Attr) != len(attr) {
   330  			continue
   331  		}
   332  	compareAttributes:
   333  		for _, t0 := range n.Attr {
   334  			for _, t1 := range attr {
   335  				if t0.Key == t1.Key && t0.Namespace == t1.Namespace && t0.Val == t1.Val {
   336  					// Found a match for this attribute, continue with the next attribute.
   337  					continue compareAttributes
   338  				}
   339  			}
   340  			// If we get here, there is no attribute that matches a.
   341  			// Therefore the element is not identical to the new one.
   342  			continue findIdenticalElements
   343  		}
   344  
   345  		identicalElements++
   346  		if identicalElements >= 3 {
   347  			p.afe.remove(n)
   348  		}
   349  	}
   350  
   351  	p.afe = append(p.afe, p.top())
   352  }
   353  
   354  // Section 12.2.3.3.
   355  func (p *parser) clearActiveFormattingElements() {
   356  	for {
   357  		n := p.afe.pop()
   358  		if len(p.afe) == 0 || n.Type == scopeMarkerNode {
   359  			return
   360  		}
   361  	}
   362  }
   363  
   364  // Section 12.2.3.3.
   365  func (p *parser) reconstructActiveFormattingElements() {
   366  	n := p.afe.top()
   367  	if n == nil {
   368  		return
   369  	}
   370  	if n.Type == scopeMarkerNode || p.oe.index(n) != -1 {
   371  		return
   372  	}
   373  	i := len(p.afe) - 1
   374  	for n.Type != scopeMarkerNode && p.oe.index(n) == -1 {
   375  		if i == 0 {
   376  			i = -1
   377  			break
   378  		}
   379  		i--
   380  		n = p.afe[i]
   381  	}
   382  	for {
   383  		i++
   384  		clone := p.afe[i].clone()
   385  		p.addChild(clone)
   386  		p.afe[i] = clone
   387  		if i == len(p.afe)-1 {
   388  			break
   389  		}
   390  	}
   391  }
   392  
   393  // Section 12.2.4.
   394  func (p *parser) acknowledgeSelfClosingTag() {
   395  	p.hasSelfClosingToken = false
   396  }
   397  
   398  // An insertion mode (section 12.2.3.1) is the state transition function from
   399  // a particular state in the HTML5 parser's state machine. It updates the
   400  // parser's fields depending on parser.tok (where ErrorToken means EOF).
   401  // It returns whether the token was consumed.
   402  type insertionMode func(*parser) bool
   403  
   404  // setOriginalIM sets the insertion mode to return to after completing a text or
   405  // inTableText insertion mode.
   406  // Section 12.2.3.1, "using the rules for".
   407  func (p *parser) setOriginalIM() {
   408  	if p.originalIM != nil {
   409  		panic("html: bad parser state: originalIM was set twice")
   410  	}
   411  	p.originalIM = p.im
   412  }
   413  
   414  // Section 12.2.3.1, "reset the insertion mode".
   415  func (p *parser) resetInsertionMode() {
   416  	for i := len(p.oe) - 1; i >= 0; i-- {
   417  		n := p.oe[i]
   418  		if i == 0 && p.context != nil {
   419  			n = p.context
   420  		}
   421  
   422  		switch n.DataAtom {
   423  		case a.Select:
   424  			p.im = inSelectIM
   425  		case a.Td, a.Th:
   426  			p.im = inCellIM
   427  		case a.Tr:
   428  			p.im = inRowIM
   429  		case a.Tbody, a.Thead, a.Tfoot:
   430  			p.im = inTableBodyIM
   431  		case a.Caption:
   432  			p.im = inCaptionIM
   433  		case a.Colgroup:
   434  			p.im = inColumnGroupIM
   435  		case a.Table:
   436  			p.im = inTableIM
   437  		case a.Head:
   438  			p.im = inBodyIM
   439  		case a.Body:
   440  			p.im = inBodyIM
   441  		case a.Frameset:
   442  			p.im = inFramesetIM
   443  		case a.Html:
   444  			p.im = beforeHeadIM
   445  		default:
   446  			continue
   447  		}
   448  		return
   449  	}
   450  	p.im = inBodyIM
   451  }
   452  
   453  const whitespace = " \t\r\n\f"
   454  
   455  // Section 12.2.5.4.1.
   456  func initialIM(p *parser) bool {
   457  	switch p.tok.Type {
   458  	case TextToken:
   459  		p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
   460  		if len(p.tok.Data) == 0 {
   461  			// It was all whitespace, so ignore it.
   462  			return true
   463  		}
   464  	case CommentToken:
   465  		p.doc.AppendChild(&Node{
   466  			Type: CommentNode,
   467  			Data: p.tok.Data,
   468  		})
   469  		return true
   470  	case DoctypeToken:
   471  		n, quirks := parseDoctype(p.tok.Data)
   472  		p.doc.AppendChild(n)
   473  		p.quirks = quirks
   474  		p.im = beforeHTMLIM
   475  		return true
   476  	}
   477  	p.quirks = true
   478  	p.im = beforeHTMLIM
   479  	return false
   480  }
   481  
   482  // Section 12.2.5.4.2.
   483  func beforeHTMLIM(p *parser) bool {
   484  	switch p.tok.Type {
   485  	case DoctypeToken:
   486  		// Ignore the token.
   487  		return true
   488  	case TextToken:
   489  		p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
   490  		if len(p.tok.Data) == 0 {
   491  			// It was all whitespace, so ignore it.
   492  			return true
   493  		}
   494  	case StartTagToken:
   495  		if p.tok.DataAtom == a.Html {
   496  			p.addElement()
   497  			p.im = beforeHeadIM
   498  			return true
   499  		}
   500  	case EndTagToken:
   501  		switch p.tok.DataAtom {
   502  		case a.Head, a.Body, a.Html, a.Br:
   503  			p.parseImpliedToken(StartTagToken, a.Html, a.Html.String())
   504  			return false
   505  		default:
   506  			// Ignore the token.
   507  			return true
   508  		}
   509  	case CommentToken:
   510  		p.doc.AppendChild(&Node{
   511  			Type: CommentNode,
   512  			Data: p.tok.Data,
   513  		})
   514  		return true
   515  	}
   516  	p.parseImpliedToken(StartTagToken, a.Html, a.Html.String())
   517  	return false
   518  }
   519  
   520  // Section 12.2.5.4.3.
   521  func beforeHeadIM(p *parser) bool {
   522  	switch p.tok.Type {
   523  	case TextToken:
   524  		p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
   525  		if len(p.tok.Data) == 0 {
   526  			// It was all whitespace, so ignore it.
   527  			return true
   528  		}
   529  	case StartTagToken:
   530  		switch p.tok.DataAtom {
   531  		case a.Head:
   532  			p.addElement()
   533  			p.head = p.top()
   534  			p.im = inHeadIM
   535  			return true
   536  		case a.Html:
   537  			return inBodyIM(p)
   538  		}
   539  	case EndTagToken:
   540  		switch p.tok.DataAtom {
   541  		case a.Head, a.Body, a.Html, a.Br:
   542  			p.parseImpliedToken(StartTagToken, a.Head, a.Head.String())
   543  			return false
   544  		default:
   545  			// Ignore the token.
   546  			return true
   547  		}
   548  	case CommentToken:
   549  		p.addChild(&Node{
   550  			Type: CommentNode,
   551  			Data: p.tok.Data,
   552  		})
   553  		return true
   554  	case DoctypeToken:
   555  		// Ignore the token.
   556  		return true
   557  	}
   558  
   559  	p.parseImpliedToken(StartTagToken, a.Head, a.Head.String())
   560  	return false
   561  }
   562  
   563  // Section 12.2.5.4.4.
   564  func inHeadIM(p *parser) bool {
   565  	switch p.tok.Type {
   566  	case TextToken:
   567  		s := strings.TrimLeft(p.tok.Data, whitespace)
   568  		if len(s) < len(p.tok.Data) {
   569  			// Add the initial whitespace to the current node.
   570  			p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
   571  			if s == "" {
   572  				return true
   573  			}
   574  			p.tok.Data = s
   575  		}
   576  	case StartTagToken:
   577  		switch p.tok.DataAtom {
   578  		case a.Html:
   579  			return inBodyIM(p)
   580  		case a.Base, a.Basefont, a.Bgsound, a.Command, a.Link, a.Meta:
   581  			p.addElement()
   582  			p.oe.pop()
   583  			p.acknowledgeSelfClosingTag()
   584  			return true
   585  		case a.Script, a.Title, a.Noscript, a.Noframes, a.Style:
   586  			p.addElement()
   587  			p.setOriginalIM()
   588  			p.im = textIM
   589  			return true
   590  		case a.Head:
   591  			// Ignore the token.
   592  			return true
   593  		}
   594  	case EndTagToken:
   595  		switch p.tok.DataAtom {
   596  		case a.Head:
   597  			n := p.oe.pop()
   598  			if n.DataAtom != a.Head {
   599  				panic("html: bad parser state: <head> element not found, in the in-head insertion mode")
   600  			}
   601  			p.im = afterHeadIM
   602  			return true
   603  		case a.Body, a.Html, a.Br:
   604  			p.parseImpliedToken(EndTagToken, a.Head, a.Head.String())
   605  			return false
   606  		default:
   607  			// Ignore the token.
   608  			return true
   609  		}
   610  	case CommentToken:
   611  		p.addChild(&Node{
   612  			Type: CommentNode,
   613  			Data: p.tok.Data,
   614  		})
   615  		return true
   616  	case DoctypeToken:
   617  		// Ignore the token.
   618  		return true
   619  	}
   620  
   621  	p.parseImpliedToken(EndTagToken, a.Head, a.Head.String())
   622  	return false
   623  }
   624  
   625  // Section 12.2.5.4.6.
   626  func afterHeadIM(p *parser) bool {
   627  	switch p.tok.Type {
   628  	case TextToken:
   629  		s := strings.TrimLeft(p.tok.Data, whitespace)
   630  		if len(s) < len(p.tok.Data) {
   631  			// Add the initial whitespace to the current node.
   632  			p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
   633  			if s == "" {
   634  				return true
   635  			}
   636  			p.tok.Data = s
   637  		}
   638  	case StartTagToken:
   639  		switch p.tok.DataAtom {
   640  		case a.Html:
   641  			return inBodyIM(p)
   642  		case a.Body:
   643  			p.addElement()
   644  			p.framesetOK = false
   645  			p.im = inBodyIM
   646  			return true
   647  		case a.Frameset:
   648  			p.addElement()
   649  			p.im = inFramesetIM
   650  			return true
   651  		case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Title:
   652  			p.oe = append(p.oe, p.head)
   653  			defer p.oe.remove(p.head)
   654  			return inHeadIM(p)
   655  		case a.Head:
   656  			// Ignore the token.
   657  			return true
   658  		}
   659  	case EndTagToken:
   660  		switch p.tok.DataAtom {
   661  		case a.Body, a.Html, a.Br:
   662  			// Drop down to creating an implied <body> tag.
   663  		default:
   664  			// Ignore the token.
   665  			return true
   666  		}
   667  	case CommentToken:
   668  		p.addChild(&Node{
   669  			Type: CommentNode,
   670  			Data: p.tok.Data,
   671  		})
   672  		return true
   673  	case DoctypeToken:
   674  		// Ignore the token.
   675  		return true
   676  	}
   677  
   678  	p.parseImpliedToken(StartTagToken, a.Body, a.Body.String())
   679  	p.framesetOK = true
   680  	return false
   681  }
   682  
   683  // copyAttributes copies attributes of src not found on dst to dst.
   684  func copyAttributes(dst *Node, src Token) {
   685  	if len(src.Attr) == 0 {
   686  		return
   687  	}
   688  	attr := map[string]string{}
   689  	for _, t := range dst.Attr {
   690  		attr[t.Key] = t.Val
   691  	}
   692  	for _, t := range src.Attr {
   693  		if _, ok := attr[t.Key]; !ok {
   694  			dst.Attr = append(dst.Attr, t)
   695  			attr[t.Key] = t.Val
   696  		}
   697  	}
   698  }
   699  
   700  // Section 12.2.5.4.7.
   701  func inBodyIM(p *parser) bool {
   702  	switch p.tok.Type {
   703  	case TextToken:
   704  		d := p.tok.Data
   705  		switch n := p.oe.top(); n.DataAtom {
   706  		case a.Pre, a.Listing:
   707  			if n.FirstChild == nil {
   708  				// Ignore a newline at the start of a <pre> block.
   709  				if d != "" && d[0] == '\r' {
   710  					d = d[1:]
   711  				}
   712  				if d != "" && d[0] == '\n' {
   713  					d = d[1:]
   714  				}
   715  			}
   716  		}
   717  		d = strings.Replace(d, "\x00", "", -1)
   718  		if d == "" {
   719  			return true
   720  		}
   721  		p.reconstructActiveFormattingElements()
   722  		p.addText(d)
   723  		if p.framesetOK && strings.TrimLeft(d, whitespace) != "" {
   724  			// There were non-whitespace characters inserted.
   725  			p.framesetOK = false
   726  		}
   727  	case StartTagToken:
   728  		switch p.tok.DataAtom {
   729  		case a.Html:
   730  			copyAttributes(p.oe[0], p.tok)
   731  		case a.Base, a.Basefont, a.Bgsound, a.Command, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Title:
   732  			return inHeadIM(p)
   733  		case a.Body:
   734  			if len(p.oe) >= 2 {
   735  				body := p.oe[1]
   736  				if body.Type == ElementNode && body.DataAtom == a.Body {
   737  					p.framesetOK = false
   738  					copyAttributes(body, p.tok)
   739  				}
   740  			}
   741  		case a.Frameset:
   742  			if !p.framesetOK || len(p.oe) < 2 || p.oe[1].DataAtom != a.Body {
   743  				// Ignore the token.
   744  				return true
   745  			}
   746  			body := p.oe[1]
   747  			if body.Parent != nil {
   748  				body.Parent.RemoveChild(body)
   749  			}
   750  			p.oe = p.oe[:1]
   751  			p.addElement()
   752  			p.im = inFramesetIM
   753  			return true
   754  		case a.Address, a.Article, a.Aside, a.Blockquote, a.Center, a.Details, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Menu, a.Nav, a.Ol, a.P, a.Section, a.Summary, a.Ul:
   755  			p.popUntil(buttonScope, a.P)
   756  			p.addElement()
   757  		case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
   758  			p.popUntil(buttonScope, a.P)
   759  			switch n := p.top(); n.DataAtom {
   760  			case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
   761  				p.oe.pop()
   762  			}
   763  			p.addElement()
   764  		case a.Pre, a.Listing:
   765  			p.popUntil(buttonScope, a.P)
   766  			p.addElement()
   767  			// The newline, if any, will be dealt with by the TextToken case.
   768  			p.framesetOK = false
   769  		case a.Form:
   770  			if p.form == nil {
   771  				p.popUntil(buttonScope, a.P)
   772  				p.addElement()
   773  				p.form = p.top()
   774  			}
   775  		case a.Li:
   776  			p.framesetOK = false
   777  			for i := len(p.oe) - 1; i >= 0; i-- {
   778  				node := p.oe[i]
   779  				switch node.DataAtom {
   780  				case a.Li:
   781  					p.oe = p.oe[:i]
   782  				case a.Address, a.Div, a.P:
   783  					continue
   784  				default:
   785  					if !isSpecialElement(node) {
   786  						continue
   787  					}
   788  				}
   789  				break
   790  			}
   791  			p.popUntil(buttonScope, a.P)
   792  			p.addElement()
   793  		case a.Dd, a.Dt:
   794  			p.framesetOK = false
   795  			for i := len(p.oe) - 1; i >= 0; i-- {
   796  				node := p.oe[i]
   797  				switch node.DataAtom {
   798  				case a.Dd, a.Dt:
   799  					p.oe = p.oe[:i]
   800  				case a.Address, a.Div, a.P:
   801  					continue
   802  				default:
   803  					if !isSpecialElement(node) {
   804  						continue
   805  					}
   806  				}
   807  				break
   808  			}
   809  			p.popUntil(buttonScope, a.P)
   810  			p.addElement()
   811  		case a.Plaintext:
   812  			p.popUntil(buttonScope, a.P)
   813  			p.addElement()
   814  		case a.Button:
   815  			p.popUntil(defaultScope, a.Button)
   816  			p.reconstructActiveFormattingElements()
   817  			p.addElement()
   818  			p.framesetOK = false
   819  		case a.A:
   820  			for i := len(p.afe) - 1; i >= 0 && p.afe[i].Type != scopeMarkerNode; i-- {
   821  				if n := p.afe[i]; n.Type == ElementNode && n.DataAtom == a.A {
   822  					p.inBodyEndTagFormatting(a.A)
   823  					p.oe.remove(n)
   824  					p.afe.remove(n)
   825  					break
   826  				}
   827  			}
   828  			p.reconstructActiveFormattingElements()
   829  			p.addFormattingElement()
   830  		case a.B, a.Big, a.Code, a.Em, a.Font, a.I, a.S, a.Small, a.Strike, a.Strong, a.Tt, a.U:
   831  			p.reconstructActiveFormattingElements()
   832  			p.addFormattingElement()
   833  		case a.Nobr:
   834  			p.reconstructActiveFormattingElements()
   835  			if p.elementInScope(defaultScope, a.Nobr) {
   836  				p.inBodyEndTagFormatting(a.Nobr)
   837  				p.reconstructActiveFormattingElements()
   838  			}
   839  			p.addFormattingElement()
   840  		case a.Applet, a.Marquee, a.Object:
   841  			p.reconstructActiveFormattingElements()
   842  			p.addElement()
   843  			p.afe = append(p.afe, &scopeMarker)
   844  			p.framesetOK = false
   845  		case a.Table:
   846  			if !p.quirks {
   847  				p.popUntil(buttonScope, a.P)
   848  			}
   849  			p.addElement()
   850  			p.framesetOK = false
   851  			p.im = inTableIM
   852  			return true
   853  		case a.Area, a.Br, a.Embed, a.Img, a.Input, a.Keygen, a.Wbr:
   854  			p.reconstructActiveFormattingElements()
   855  			p.addElement()
   856  			p.oe.pop()
   857  			p.acknowledgeSelfClosingTag()
   858  			if p.tok.DataAtom == a.Input {
   859  				for _, t := range p.tok.Attr {
   860  					if t.Key == "type" {
   861  						if strings.ToLower(t.Val) == "hidden" {
   862  							// Skip setting framesetOK = false
   863  							return true
   864  						}
   865  					}
   866  				}
   867  			}
   868  			p.framesetOK = false
   869  		case a.Param, a.Source, a.Track:
   870  			p.addElement()
   871  			p.oe.pop()
   872  			p.acknowledgeSelfClosingTag()
   873  		case a.Hr:
   874  			p.popUntil(buttonScope, a.P)
   875  			p.addElement()
   876  			p.oe.pop()
   877  			p.acknowledgeSelfClosingTag()
   878  			p.framesetOK = false
   879  		case a.Image:
   880  			p.tok.DataAtom = a.Img
   881  			p.tok.Data = a.Img.String()
   882  			return false
   883  		case a.Isindex:
   884  			if p.form != nil {
   885  				// Ignore the token.
   886  				return true
   887  			}
   888  			action := ""
   889  			prompt := "This is a searchable index. Enter search keywords: "
   890  			attr := []Attribute{{Key: "name", Val: "isindex"}}
   891  			for _, t := range p.tok.Attr {
   892  				switch t.Key {
   893  				case "action":
   894  					action = t.Val
   895  				case "name":
   896  					// Ignore the attribute.
   897  				case "prompt":
   898  					prompt = t.Val
   899  				default:
   900  					attr = append(attr, t)
   901  				}
   902  			}
   903  			p.acknowledgeSelfClosingTag()
   904  			p.popUntil(buttonScope, a.P)
   905  			p.parseImpliedToken(StartTagToken, a.Form, a.Form.String())
   906  			if action != "" {
   907  				p.form.Attr = []Attribute{{Key: "action", Val: action}}
   908  			}
   909  			p.parseImpliedToken(StartTagToken, a.Hr, a.Hr.String())
   910  			p.parseImpliedToken(StartTagToken, a.Label, a.Label.String())
   911  			p.addText(prompt)
   912  			p.addChild(&Node{
   913  				Type:     ElementNode,
   914  				DataAtom: a.Input,
   915  				Data:     a.Input.String(),
   916  				Attr:     attr,
   917  			})
   918  			p.oe.pop()
   919  			p.parseImpliedToken(EndTagToken, a.Label, a.Label.String())
   920  			p.parseImpliedToken(StartTagToken, a.Hr, a.Hr.String())
   921  			p.parseImpliedToken(EndTagToken, a.Form, a.Form.String())
   922  		case a.Textarea:
   923  			p.addElement()
   924  			p.setOriginalIM()
   925  			p.framesetOK = false
   926  			p.im = textIM
   927  		case a.Xmp:
   928  			p.popUntil(buttonScope, a.P)
   929  			p.reconstructActiveFormattingElements()
   930  			p.framesetOK = false
   931  			p.addElement()
   932  			p.setOriginalIM()
   933  			p.im = textIM
   934  		case a.Iframe:
   935  			p.framesetOK = false
   936  			p.addElement()
   937  			p.setOriginalIM()
   938  			p.im = textIM
   939  		case a.Noembed, a.Noscript:
   940  			p.addElement()
   941  			p.setOriginalIM()
   942  			p.im = textIM
   943  		case a.Select:
   944  			p.reconstructActiveFormattingElements()
   945  			p.addElement()
   946  			p.framesetOK = false
   947  			p.im = inSelectIM
   948  			return true
   949  		case a.Optgroup, a.Option:
   950  			if p.top().DataAtom == a.Option {
   951  				p.oe.pop()
   952  			}
   953  			p.reconstructActiveFormattingElements()
   954  			p.addElement()
   955  		case a.Rp, a.Rt:
   956  			if p.elementInScope(defaultScope, a.Ruby) {
   957  				p.generateImpliedEndTags()
   958  			}
   959  			p.addElement()
   960  		case a.Math, a.Svg:
   961  			p.reconstructActiveFormattingElements()
   962  			if p.tok.DataAtom == a.Math {
   963  				adjustAttributeNames(p.tok.Attr, mathMLAttributeAdjustments)
   964  			} else {
   965  				adjustAttributeNames(p.tok.Attr, svgAttributeAdjustments)
   966  			}
   967  			adjustForeignAttributes(p.tok.Attr)
   968  			p.addElement()
   969  			p.top().Namespace = p.tok.Data
   970  			if p.hasSelfClosingToken {
   971  				p.oe.pop()
   972  				p.acknowledgeSelfClosingTag()
   973  			}
   974  			return true
   975  		case a.Caption, a.Col, a.Colgroup, a.Frame, a.Head, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
   976  			// Ignore the token.
   977  		default:
   978  			p.reconstructActiveFormattingElements()
   979  			p.addElement()
   980  		}
   981  	case EndTagToken:
   982  		switch p.tok.DataAtom {
   983  		case a.Body:
   984  			if p.elementInScope(defaultScope, a.Body) {
   985  				p.im = afterBodyIM
   986  			}
   987  		case a.Html:
   988  			if p.elementInScope(defaultScope, a.Body) {
   989  				p.parseImpliedToken(EndTagToken, a.Body, a.Body.String())
   990  				return false
   991  			}
   992  			return true
   993  		case a.Address, a.Article, a.Aside, a.Blockquote, a.Button, a.Center, a.Details, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Listing, a.Menu, a.Nav, a.Ol, a.Pre, a.Section, a.Summary, a.Ul:
   994  			p.popUntil(defaultScope, p.tok.DataAtom)
   995  		case a.Form:
   996  			node := p.form
   997  			p.form = nil
   998  			i := p.indexOfElementInScope(defaultScope, a.Form)
   999  			if node == nil || i == -1 || p.oe[i] != node {
  1000  				// Ignore the token.
  1001  				return true
  1002  			}
  1003  			p.generateImpliedEndTags()
  1004  			p.oe.remove(node)
  1005  		case a.P:
  1006  			if !p.elementInScope(buttonScope, a.P) {
  1007  				p.parseImpliedToken(StartTagToken, a.P, a.P.String())
  1008  			}
  1009  			p.popUntil(buttonScope, a.P)
  1010  		case a.Li:
  1011  			p.popUntil(listItemScope, a.Li)
  1012  		case a.Dd, a.Dt:
  1013  			p.popUntil(defaultScope, p.tok.DataAtom)
  1014  		case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
  1015  			p.popUntil(defaultScope, a.H1, a.H2, a.H3, a.H4, a.H5, a.H6)
  1016  		case a.A, a.B, a.Big, a.Code, a.Em, a.Font, a.I, a.Nobr, a.S, a.Small, a.Strike, a.Strong, a.Tt, a.U:
  1017  			p.inBodyEndTagFormatting(p.tok.DataAtom)
  1018  		case a.Applet, a.Marquee, a.Object:
  1019  			if p.popUntil(defaultScope, p.tok.DataAtom) {
  1020  				p.clearActiveFormattingElements()
  1021  			}
  1022  		case a.Br:
  1023  			p.tok.Type = StartTagToken
  1024  			return false
  1025  		default:
  1026  			p.inBodyEndTagOther(p.tok.DataAtom)
  1027  		}
  1028  	case CommentToken:
  1029  		p.addChild(&Node{
  1030  			Type: CommentNode,
  1031  			Data: p.tok.Data,
  1032  		})
  1033  	}
  1034  
  1035  	return true
  1036  }
  1037  
  1038  func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom) {
  1039  	// This is the "adoption agency" algorithm, described at
  1040  	// http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#adoptionAgency
  1041  
  1042  	// TODO: this is a fairly literal line-by-line translation of that algorithm.
  1043  	// Once the code successfully parses the comprehensive test suite, we should
  1044  	// refactor this code to be more idiomatic.
  1045  
  1046  	// Steps 1-3. The outer loop.
  1047  	for i := 0; i < 8; i++ {
  1048  		// Step 4. Find the formatting element.
  1049  		var formattingElement *Node
  1050  		for j := len(p.afe) - 1; j >= 0; j-- {
  1051  			if p.afe[j].Type == scopeMarkerNode {
  1052  				break
  1053  			}
  1054  			if p.afe[j].DataAtom == tagAtom {
  1055  				formattingElement = p.afe[j]
  1056  				break
  1057  			}
  1058  		}
  1059  		if formattingElement == nil {
  1060  			p.inBodyEndTagOther(tagAtom)
  1061  			return
  1062  		}
  1063  		feIndex := p.oe.index(formattingElement)
  1064  		if feIndex == -1 {
  1065  			p.afe.remove(formattingElement)
  1066  			return
  1067  		}
  1068  		if !p.elementInScope(defaultScope, tagAtom) {
  1069  			// Ignore the tag.
  1070  			return
  1071  		}
  1072  
  1073  		// Steps 5-6. Find the furthest block.
  1074  		var furthestBlock *Node
  1075  		for _, e := range p.oe[feIndex:] {
  1076  			if isSpecialElement(e) {
  1077  				furthestBlock = e
  1078  				break
  1079  			}
  1080  		}
  1081  		if furthestBlock == nil {
  1082  			e := p.oe.pop()
  1083  			for e != formattingElement {
  1084  				e = p.oe.pop()
  1085  			}
  1086  			p.afe.remove(e)
  1087  			return
  1088  		}
  1089  
  1090  		// Steps 7-8. Find the common ancestor and bookmark node.
  1091  		commonAncestor := p.oe[feIndex-1]
  1092  		bookmark := p.afe.index(formattingElement)
  1093  
  1094  		// Step 9. The inner loop. Find the lastNode to reparent.
  1095  		lastNode := furthestBlock
  1096  		node := furthestBlock
  1097  		x := p.oe.index(node)
  1098  		// Steps 9.1-9.3.
  1099  		for j := 0; j < 3; j++ {
  1100  			// Step 9.4.
  1101  			x--
  1102  			node = p.oe[x]
  1103  			// Step 9.5.
  1104  			if p.afe.index(node) == -1 {
  1105  				p.oe.remove(node)
  1106  				continue
  1107  			}
  1108  			// Step 9.6.
  1109  			if node == formattingElement {
  1110  				break
  1111  			}
  1112  			// Step 9.7.
  1113  			clone := node.clone()
  1114  			p.afe[p.afe.index(node)] = clone
  1115  			p.oe[p.oe.index(node)] = clone
  1116  			node = clone
  1117  			// Step 9.8.
  1118  			if lastNode == furthestBlock {
  1119  				bookmark = p.afe.index(node) + 1
  1120  			}
  1121  			// Step 9.9.
  1122  			if lastNode.Parent != nil {
  1123  				lastNode.Parent.RemoveChild(lastNode)
  1124  			}
  1125  			node.AppendChild(lastNode)
  1126  			// Step 9.10.
  1127  			lastNode = node
  1128  		}
  1129  
  1130  		// Step 10. Reparent lastNode to the common ancestor,
  1131  		// or for misnested table nodes, to the foster parent.
  1132  		if lastNode.Parent != nil {
  1133  			lastNode.Parent.RemoveChild(lastNode)
  1134  		}
  1135  		switch commonAncestor.DataAtom {
  1136  		case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
  1137  			p.fosterParent(lastNode)
  1138  		default:
  1139  			commonAncestor.AppendChild(lastNode)
  1140  		}
  1141  
  1142  		// Steps 11-13. Reparent nodes from the furthest block's children
  1143  		// to a clone of the formatting element.
  1144  		clone := formattingElement.clone()
  1145  		reparentChildren(clone, furthestBlock)
  1146  		furthestBlock.AppendChild(clone)
  1147  
  1148  		// Step 14. Fix up the list of active formatting elements.
  1149  		if oldLoc := p.afe.index(formattingElement); oldLoc != -1 && oldLoc < bookmark {
  1150  			// Move the bookmark with the rest of the list.
  1151  			bookmark--
  1152  		}
  1153  		p.afe.remove(formattingElement)
  1154  		p.afe.insert(bookmark, clone)
  1155  
  1156  		// Step 15. Fix up the stack of open elements.
  1157  		p.oe.remove(formattingElement)
  1158  		p.oe.insert(p.oe.index(furthestBlock)+1, clone)
  1159  	}
  1160  }
  1161  
  1162  // inBodyEndTagOther performs the "any other end tag" algorithm for inBodyIM.
  1163  func (p *parser) inBodyEndTagOther(tagAtom a.Atom) {
  1164  	for i := len(p.oe) - 1; i >= 0; i-- {
  1165  		if p.oe[i].DataAtom == tagAtom {
  1166  			p.oe = p.oe[:i]
  1167  			break
  1168  		}
  1169  		if isSpecialElement(p.oe[i]) {
  1170  			break
  1171  		}
  1172  	}
  1173  }
  1174  
  1175  // Section 12.2.5.4.8.
  1176  func textIM(p *parser) bool {
  1177  	switch p.tok.Type {
  1178  	case ErrorToken:
  1179  		p.oe.pop()
  1180  	case TextToken:
  1181  		d := p.tok.Data
  1182  		if n := p.oe.top(); n.DataAtom == a.Textarea && n.FirstChild == nil {
  1183  			// Ignore a newline at the start of a <textarea> block.
  1184  			if d != "" && d[0] == '\r' {
  1185  				d = d[1:]
  1186  			}
  1187  			if d != "" && d[0] == '\n' {
  1188  				d = d[1:]
  1189  			}
  1190  		}
  1191  		if d == "" {
  1192  			return true
  1193  		}
  1194  		p.addText(d)
  1195  		return true
  1196  	case EndTagToken:
  1197  		p.oe.pop()
  1198  	}
  1199  	p.im = p.originalIM
  1200  	p.originalIM = nil
  1201  	return p.tok.Type == EndTagToken
  1202  }
  1203  
  1204  // Section 12.2.5.4.9.
  1205  func inTableIM(p *parser) bool {
  1206  	switch p.tok.Type {
  1207  	case ErrorToken:
  1208  		// Stop parsing.
  1209  		return true
  1210  	case TextToken:
  1211  		p.tok.Data = strings.Replace(p.tok.Data, "\x00", "", -1)
  1212  		switch p.oe.top().DataAtom {
  1213  		case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
  1214  			if strings.Trim(p.tok.Data, whitespace) == "" {
  1215  				p.addText(p.tok.Data)
  1216  				return true
  1217  			}
  1218  		}
  1219  	case StartTagToken:
  1220  		switch p.tok.DataAtom {
  1221  		case a.Caption:
  1222  			p.clearStackToContext(tableScope)
  1223  			p.afe = append(p.afe, &scopeMarker)
  1224  			p.addElement()
  1225  			p.im = inCaptionIM
  1226  			return true
  1227  		case a.Colgroup:
  1228  			p.clearStackToContext(tableScope)
  1229  			p.addElement()
  1230  			p.im = inColumnGroupIM
  1231  			return true
  1232  		case a.Col:
  1233  			p.parseImpliedToken(StartTagToken, a.Colgroup, a.Colgroup.String())
  1234  			return false
  1235  		case a.Tbody, a.Tfoot, a.Thead:
  1236  			p.clearStackToContext(tableScope)
  1237  			p.addElement()
  1238  			p.im = inTableBodyIM
  1239  			return true
  1240  		case a.Td, a.Th, a.Tr:
  1241  			p.parseImpliedToken(StartTagToken, a.Tbody, a.Tbody.String())
  1242  			return false
  1243  		case a.Table:
  1244  			if p.popUntil(tableScope, a.Table) {
  1245  				p.resetInsertionMode()
  1246  				return false
  1247  			}
  1248  			// Ignore the token.
  1249  			return true
  1250  		case a.Style, a.Script:
  1251  			return inHeadIM(p)
  1252  		case a.Input:
  1253  			for _, t := range p.tok.Attr {
  1254  				if t.Key == "type" && strings.ToLower(t.Val) == "hidden" {
  1255  					p.addElement()
  1256  					p.oe.pop()
  1257  					return true
  1258  				}
  1259  			}
  1260  			// Otherwise drop down to the default action.
  1261  		case a.Form:
  1262  			if p.form != nil {
  1263  				// Ignore the token.
  1264  				return true
  1265  			}
  1266  			p.addElement()
  1267  			p.form = p.oe.pop()
  1268  		case a.Select:
  1269  			p.reconstructActiveFormattingElements()
  1270  			switch p.top().DataAtom {
  1271  			case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
  1272  				p.fosterParenting = true
  1273  			}
  1274  			p.addElement()
  1275  			p.fosterParenting = false
  1276  			p.framesetOK = false
  1277  			p.im = inSelectInTableIM
  1278  			return true
  1279  		}
  1280  	case EndTagToken:
  1281  		switch p.tok.DataAtom {
  1282  		case a.Table:
  1283  			if p.popUntil(tableScope, a.Table) {
  1284  				p.resetInsertionMode()
  1285  				return true
  1286  			}
  1287  			// Ignore the token.
  1288  			return true
  1289  		case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
  1290  			// Ignore the token.
  1291  			return true
  1292  		}
  1293  	case CommentToken:
  1294  		p.addChild(&Node{
  1295  			Type: CommentNode,
  1296  			Data: p.tok.Data,
  1297  		})
  1298  		return true
  1299  	case DoctypeToken:
  1300  		// Ignore the token.
  1301  		return true
  1302  	}
  1303  
  1304  	p.fosterParenting = true
  1305  	defer func() { p.fosterParenting = false }()
  1306  
  1307  	return inBodyIM(p)
  1308  }
  1309  
  1310  // Section 12.2.5.4.11.
  1311  func inCaptionIM(p *parser) bool {
  1312  	switch p.tok.Type {
  1313  	case StartTagToken:
  1314  		switch p.tok.DataAtom {
  1315  		case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Td, a.Tfoot, a.Thead, a.Tr:
  1316  			if p.popUntil(tableScope, a.Caption) {
  1317  				p.clearActiveFormattingElements()
  1318  				p.im = inTableIM
  1319  				return false
  1320  			} else {
  1321  				// Ignore the token.
  1322  				return true
  1323  			}
  1324  		case a.Select:
  1325  			p.reconstructActiveFormattingElements()
  1326  			p.addElement()
  1327  			p.framesetOK = false
  1328  			p.im = inSelectInTableIM
  1329  			return true
  1330  		}
  1331  	case EndTagToken:
  1332  		switch p.tok.DataAtom {
  1333  		case a.Caption:
  1334  			if p.popUntil(tableScope, a.Caption) {
  1335  				p.clearActiveFormattingElements()
  1336  				p.im = inTableIM
  1337  			}
  1338  			return true
  1339  		case a.Table:
  1340  			if p.popUntil(tableScope, a.Caption) {
  1341  				p.clearActiveFormattingElements()
  1342  				p.im = inTableIM
  1343  				return false
  1344  			} else {
  1345  				// Ignore the token.
  1346  				return true
  1347  			}
  1348  		case a.Body, a.Col, a.Colgroup, a.Html, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
  1349  			// Ignore the token.
  1350  			return true
  1351  		}
  1352  	}
  1353  	return inBodyIM(p)
  1354  }
  1355  
  1356  // Section 12.2.5.4.12.
  1357  func inColumnGroupIM(p *parser) bool {
  1358  	switch p.tok.Type {
  1359  	case TextToken:
  1360  		s := strings.TrimLeft(p.tok.Data, whitespace)
  1361  		if len(s) < len(p.tok.Data) {
  1362  			// Add the initial whitespace to the current node.
  1363  			p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
  1364  			if s == "" {
  1365  				return true
  1366  			}
  1367  			p.tok.Data = s
  1368  		}
  1369  	case CommentToken:
  1370  		p.addChild(&Node{
  1371  			Type: CommentNode,
  1372  			Data: p.tok.Data,
  1373  		})
  1374  		return true
  1375  	case DoctypeToken:
  1376  		// Ignore the token.
  1377  		return true
  1378  	case StartTagToken:
  1379  		switch p.tok.DataAtom {
  1380  		case a.Html:
  1381  			return inBodyIM(p)
  1382  		case a.Col:
  1383  			p.addElement()
  1384  			p.oe.pop()
  1385  			p.acknowledgeSelfClosingTag()
  1386  			return true
  1387  		}
  1388  	case EndTagToken:
  1389  		switch p.tok.DataAtom {
  1390  		case a.Colgroup:
  1391  			if p.oe.top().DataAtom != a.Html {
  1392  				p.oe.pop()
  1393  				p.im = inTableIM
  1394  			}
  1395  			return true
  1396  		case a.Col:
  1397  			// Ignore the token.
  1398  			return true
  1399  		}
  1400  	}
  1401  	if p.oe.top().DataAtom != a.Html {
  1402  		p.oe.pop()
  1403  		p.im = inTableIM
  1404  		return false
  1405  	}
  1406  	return true
  1407  }
  1408  
  1409  // Section 12.2.5.4.13.
  1410  func inTableBodyIM(p *parser) bool {
  1411  	switch p.tok.Type {
  1412  	case StartTagToken:
  1413  		switch p.tok.DataAtom {
  1414  		case a.Tr:
  1415  			p.clearStackToContext(tableBodyScope)
  1416  			p.addElement()
  1417  			p.im = inRowIM
  1418  			return true
  1419  		case a.Td, a.Th:
  1420  			p.parseImpliedToken(StartTagToken, a.Tr, a.Tr.String())
  1421  			return false
  1422  		case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Tfoot, a.Thead:
  1423  			if p.popUntil(tableScope, a.Tbody, a.Thead, a.Tfoot) {
  1424  				p.im = inTableIM
  1425  				return false
  1426  			}
  1427  			// Ignore the token.
  1428  			return true
  1429  		}
  1430  	case EndTagToken:
  1431  		switch p.tok.DataAtom {
  1432  		case a.Tbody, a.Tfoot, a.Thead:
  1433  			if p.elementInScope(tableScope, p.tok.DataAtom) {
  1434  				p.clearStackToContext(tableBodyScope)
  1435  				p.oe.pop()
  1436  				p.im = inTableIM
  1437  			}
  1438  			return true
  1439  		case a.Table:
  1440  			if p.popUntil(tableScope, a.Tbody, a.Thead, a.Tfoot) {
  1441  				p.im = inTableIM
  1442  				return false
  1443  			}
  1444  			// Ignore the token.
  1445  			return true
  1446  		case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Td, a.Th, a.Tr:
  1447  			// Ignore the token.
  1448  			return true
  1449  		}
  1450  	case CommentToken:
  1451  		p.addChild(&Node{
  1452  			Type: CommentNode,
  1453  			Data: p.tok.Data,
  1454  		})
  1455  		return true
  1456  	}
  1457  
  1458  	return inTableIM(p)
  1459  }
  1460  
  1461  // Section 12.2.5.4.14.
  1462  func inRowIM(p *parser) bool {
  1463  	switch p.tok.Type {
  1464  	case StartTagToken:
  1465  		switch p.tok.DataAtom {
  1466  		case a.Td, a.Th:
  1467  			p.clearStackToContext(tableRowScope)
  1468  			p.addElement()
  1469  			p.afe = append(p.afe, &scopeMarker)
  1470  			p.im = inCellIM
  1471  			return true
  1472  		case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Tfoot, a.Thead, a.Tr:
  1473  			if p.popUntil(tableScope, a.Tr) {
  1474  				p.im = inTableBodyIM
  1475  				return false
  1476  			}
  1477  			// Ignore the token.
  1478  			return true
  1479  		}
  1480  	case EndTagToken:
  1481  		switch p.tok.DataAtom {
  1482  		case a.Tr:
  1483  			if p.popUntil(tableScope, a.Tr) {
  1484  				p.im = inTableBodyIM
  1485  				return true
  1486  			}
  1487  			// Ignore the token.
  1488  			return true
  1489  		case a.Table:
  1490  			if p.popUntil(tableScope, a.Tr) {
  1491  				p.im = inTableBodyIM
  1492  				return false
  1493  			}
  1494  			// Ignore the token.
  1495  			return true
  1496  		case a.Tbody, a.Tfoot, a.Thead:
  1497  			if p.elementInScope(tableScope, p.tok.DataAtom) {
  1498  				p.parseImpliedToken(EndTagToken, a.Tr, a.Tr.String())
  1499  				return false
  1500  			}
  1501  			// Ignore the token.
  1502  			return true
  1503  		case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Td, a.Th:
  1504  			// Ignore the token.
  1505  			return true
  1506  		}
  1507  	}
  1508  
  1509  	return inTableIM(p)
  1510  }
  1511  
  1512  // Section 12.2.5.4.15.
  1513  func inCellIM(p *parser) bool {
  1514  	switch p.tok.Type {
  1515  	case StartTagToken:
  1516  		switch p.tok.DataAtom {
  1517  		case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
  1518  			if p.popUntil(tableScope, a.Td, a.Th) {
  1519  				// Close the cell and reprocess.
  1520  				p.clearActiveFormattingElements()
  1521  				p.im = inRowIM
  1522  				return false
  1523  			}
  1524  			// Ignore the token.
  1525  			return true
  1526  		case a.Select:
  1527  			p.reconstructActiveFormattingElements()
  1528  			p.addElement()
  1529  			p.framesetOK = false
  1530  			p.im = inSelectInTableIM
  1531  			return true
  1532  		}
  1533  	case EndTagToken:
  1534  		switch p.tok.DataAtom {
  1535  		case a.Td, a.Th:
  1536  			if !p.popUntil(tableScope, p.tok.DataAtom) {
  1537  				// Ignore the token.
  1538  				return true
  1539  			}
  1540  			p.clearActiveFormattingElements()
  1541  			p.im = inRowIM
  1542  			return true
  1543  		case a.Body, a.Caption, a.Col, a.Colgroup, a.Html:
  1544  			// Ignore the token.
  1545  			return true
  1546  		case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
  1547  			if !p.elementInScope(tableScope, p.tok.DataAtom) {
  1548  				// Ignore the token.
  1549  				return true
  1550  			}
  1551  			// Close the cell and reprocess.
  1552  			p.popUntil(tableScope, a.Td, a.Th)
  1553  			p.clearActiveFormattingElements()
  1554  			p.im = inRowIM
  1555  			return false
  1556  		}
  1557  	}
  1558  	return inBodyIM(p)
  1559  }
  1560  
  1561  // Section 12.2.5.4.16.
  1562  func inSelectIM(p *parser) bool {
  1563  	switch p.tok.Type {
  1564  	case ErrorToken:
  1565  		// Stop parsing.
  1566  		return true
  1567  	case TextToken:
  1568  		p.addText(strings.Replace(p.tok.Data, "\x00", "", -1))
  1569  	case StartTagToken:
  1570  		switch p.tok.DataAtom {
  1571  		case a.Html:
  1572  			return inBodyIM(p)
  1573  		case a.Option:
  1574  			if p.top().DataAtom == a.Option {
  1575  				p.oe.pop()
  1576  			}
  1577  			p.addElement()
  1578  		case a.Optgroup:
  1579  			if p.top().DataAtom == a.Option {
  1580  				p.oe.pop()
  1581  			}
  1582  			if p.top().DataAtom == a.Optgroup {
  1583  				p.oe.pop()
  1584  			}
  1585  			p.addElement()
  1586  		case a.Select:
  1587  			p.tok.Type = EndTagToken
  1588  			return false
  1589  		case a.Input, a.Keygen, a.Textarea:
  1590  			if p.elementInScope(selectScope, a.Select) {
  1591  				p.parseImpliedToken(EndTagToken, a.Select, a.Select.String())
  1592  				return false
  1593  			}
  1594  			// In order to properly ignore <textarea>, we need to change the tokenizer mode.
  1595  			p.tokenizer.NextIsNotRawText()
  1596  			// Ignore the token.
  1597  			return true
  1598  		case a.Script:
  1599  			return inHeadIM(p)
  1600  		}
  1601  	case EndTagToken:
  1602  		switch p.tok.DataAtom {
  1603  		case a.Option:
  1604  			if p.top().DataAtom == a.Option {
  1605  				p.oe.pop()
  1606  			}
  1607  		case a.Optgroup:
  1608  			i := len(p.oe) - 1
  1609  			if p.oe[i].DataAtom == a.Option {
  1610  				i--
  1611  			}
  1612  			if p.oe[i].DataAtom == a.Optgroup {
  1613  				p.oe = p.oe[:i]
  1614  			}
  1615  		case a.Select:
  1616  			if p.popUntil(selectScope, a.Select) {
  1617  				p.resetInsertionMode()
  1618  			}
  1619  		}
  1620  	case CommentToken:
  1621  		p.doc.AppendChild(&Node{
  1622  			Type: CommentNode,
  1623  			Data: p.tok.Data,
  1624  		})
  1625  	case DoctypeToken:
  1626  		// Ignore the token.
  1627  		return true
  1628  	}
  1629  
  1630  	return true
  1631  }
  1632  
  1633  // Section 12.2.5.4.17.
  1634  func inSelectInTableIM(p *parser) bool {
  1635  	switch p.tok.Type {
  1636  	case StartTagToken, EndTagToken:
  1637  		switch p.tok.DataAtom {
  1638  		case a.Caption, a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr, a.Td, a.Th:
  1639  			if p.tok.Type == StartTagToken || p.elementInScope(tableScope, p.tok.DataAtom) {
  1640  				p.parseImpliedToken(EndTagToken, a.Select, a.Select.String())
  1641  				return false
  1642  			} else {
  1643  				// Ignore the token.
  1644  				return true
  1645  			}
  1646  		}
  1647  	}
  1648  	return inSelectIM(p)
  1649  }
  1650  
  1651  // Section 12.2.5.4.18.
  1652  func afterBodyIM(p *parser) bool {
  1653  	switch p.tok.Type {
  1654  	case ErrorToken:
  1655  		// Stop parsing.
  1656  		return true
  1657  	case TextToken:
  1658  		s := strings.TrimLeft(p.tok.Data, whitespace)
  1659  		if len(s) == 0 {
  1660  			// It was all whitespace.
  1661  			return inBodyIM(p)
  1662  		}
  1663  	case StartTagToken:
  1664  		if p.tok.DataAtom == a.Html {
  1665  			return inBodyIM(p)
  1666  		}
  1667  	case EndTagToken:
  1668  		if p.tok.DataAtom == a.Html {
  1669  			if !p.fragment {
  1670  				p.im = afterAfterBodyIM
  1671  			}
  1672  			return true
  1673  		}
  1674  	case CommentToken:
  1675  		// The comment is attached to the <html> element.
  1676  		if len(p.oe) < 1 || p.oe[0].DataAtom != a.Html {
  1677  			panic("html: bad parser state: <html> element not found, in the after-body insertion mode")
  1678  		}
  1679  		p.oe[0].AppendChild(&Node{
  1680  			Type: CommentNode,
  1681  			Data: p.tok.Data,
  1682  		})
  1683  		return true
  1684  	}
  1685  	p.im = inBodyIM
  1686  	return false
  1687  }
  1688  
  1689  // Section 12.2.5.4.19.
  1690  func inFramesetIM(p *parser) bool {
  1691  	switch p.tok.Type {
  1692  	case CommentToken:
  1693  		p.addChild(&Node{
  1694  			Type: CommentNode,
  1695  			Data: p.tok.Data,
  1696  		})
  1697  	case TextToken:
  1698  		// Ignore all text but whitespace.
  1699  		s := strings.Map(func(c rune) rune {
  1700  			switch c {
  1701  			case ' ', '\t', '\n', '\f', '\r':
  1702  				return c
  1703  			}
  1704  			return -1
  1705  		}, p.tok.Data)
  1706  		if s != "" {
  1707  			p.addText(s)
  1708  		}
  1709  	case StartTagToken:
  1710  		switch p.tok.DataAtom {
  1711  		case a.Html:
  1712  			return inBodyIM(p)
  1713  		case a.Frameset:
  1714  			p.addElement()
  1715  		case a.Frame:
  1716  			p.addElement()
  1717  			p.oe.pop()
  1718  			p.acknowledgeSelfClosingTag()
  1719  		case a.Noframes:
  1720  			return inHeadIM(p)
  1721  		}
  1722  	case EndTagToken:
  1723  		switch p.tok.DataAtom {
  1724  		case a.Frameset:
  1725  			if p.oe.top().DataAtom != a.Html {
  1726  				p.oe.pop()
  1727  				if p.oe.top().DataAtom != a.Frameset {
  1728  					p.im = afterFramesetIM
  1729  					return true
  1730  				}
  1731  			}
  1732  		}
  1733  	default:
  1734  		// Ignore the token.
  1735  	}
  1736  	return true
  1737  }
  1738  
  1739  // Section 12.2.5.4.20.
  1740  func afterFramesetIM(p *parser) bool {
  1741  	switch p.tok.Type {
  1742  	case CommentToken:
  1743  		p.addChild(&Node{
  1744  			Type: CommentNode,
  1745  			Data: p.tok.Data,
  1746  		})
  1747  	case TextToken:
  1748  		// Ignore all text but whitespace.
  1749  		s := strings.Map(func(c rune) rune {
  1750  			switch c {
  1751  			case ' ', '\t', '\n', '\f', '\r':
  1752  				return c
  1753  			}
  1754  			return -1
  1755  		}, p.tok.Data)
  1756  		if s != "" {
  1757  			p.addText(s)
  1758  		}
  1759  	case StartTagToken:
  1760  		switch p.tok.DataAtom {
  1761  		case a.Html:
  1762  			return inBodyIM(p)
  1763  		case a.Noframes:
  1764  			return inHeadIM(p)
  1765  		}
  1766  	case EndTagToken:
  1767  		switch p.tok.DataAtom {
  1768  		case a.Html:
  1769  			p.im = afterAfterFramesetIM
  1770  			return true
  1771  		}
  1772  	default:
  1773  		// Ignore the token.
  1774  	}
  1775  	return true
  1776  }
  1777  
  1778  // Section 12.2.5.4.21.
  1779  func afterAfterBodyIM(p *parser) bool {
  1780  	switch p.tok.Type {
  1781  	case ErrorToken:
  1782  		// Stop parsing.
  1783  		return true
  1784  	case TextToken:
  1785  		s := strings.TrimLeft(p.tok.Data, whitespace)
  1786  		if len(s) == 0 {
  1787  			// It was all whitespace.
  1788  			return inBodyIM(p)
  1789  		}
  1790  	case StartTagToken:
  1791  		if p.tok.DataAtom == a.Html {
  1792  			return inBodyIM(p)
  1793  		}
  1794  	case CommentToken:
  1795  		p.doc.AppendChild(&Node{
  1796  			Type: CommentNode,
  1797  			Data: p.tok.Data,
  1798  		})
  1799  		return true
  1800  	case DoctypeToken:
  1801  		return inBodyIM(p)
  1802  	}
  1803  	p.im = inBodyIM
  1804  	return false
  1805  }
  1806  
  1807  // Section 12.2.5.4.22.
  1808  func afterAfterFramesetIM(p *parser) bool {
  1809  	switch p.tok.Type {
  1810  	case CommentToken:
  1811  		p.doc.AppendChild(&Node{
  1812  			Type: CommentNode,
  1813  			Data: p.tok.Data,
  1814  		})
  1815  	case TextToken:
  1816  		// Ignore all text but whitespace.
  1817  		s := strings.Map(func(c rune) rune {
  1818  			switch c {
  1819  			case ' ', '\t', '\n', '\f', '\r':
  1820  				return c
  1821  			}
  1822  			return -1
  1823  		}, p.tok.Data)
  1824  		if s != "" {
  1825  			p.tok.Data = s
  1826  			return inBodyIM(p)
  1827  		}
  1828  	case StartTagToken:
  1829  		switch p.tok.DataAtom {
  1830  		case a.Html:
  1831  			return inBodyIM(p)
  1832  		case a.Noframes:
  1833  			return inHeadIM(p)
  1834  		}
  1835  	case DoctypeToken:
  1836  		return inBodyIM(p)
  1837  	default:
  1838  		// Ignore the token.
  1839  	}
  1840  	return true
  1841  }
  1842  
  1843  const whitespaceOrNUL = whitespace + "\x00"
  1844  
  1845  // Section 12.2.5.5.
  1846  func parseForeignContent(p *parser) bool {
  1847  	switch p.tok.Type {
  1848  	case TextToken:
  1849  		if p.framesetOK {
  1850  			p.framesetOK = strings.TrimLeft(p.tok.Data, whitespaceOrNUL) == ""
  1851  		}
  1852  		p.tok.Data = strings.Replace(p.tok.Data, "\x00", "\ufffd", -1)
  1853  		p.addText(p.tok.Data)
  1854  	case CommentToken:
  1855  		p.addChild(&Node{
  1856  			Type: CommentNode,
  1857  			Data: p.tok.Data,
  1858  		})
  1859  	case StartTagToken:
  1860  		b := breakout[p.tok.Data]
  1861  		if p.tok.DataAtom == a.Font {
  1862  		loop:
  1863  			for _, attr := range p.tok.Attr {
  1864  				switch attr.Key {
  1865  				case "color", "face", "size":
  1866  					b = true
  1867  					break loop
  1868  				}
  1869  			}
  1870  		}
  1871  		if b {
  1872  			for i := len(p.oe) - 1; i >= 0; i-- {
  1873  				n := p.oe[i]
  1874  				if n.Namespace == "" || htmlIntegrationPoint(n) || mathMLTextIntegrationPoint(n) {
  1875  					p.oe = p.oe[:i+1]
  1876  					break
  1877  				}
  1878  			}
  1879  			return false
  1880  		}
  1881  		switch p.top().Namespace {
  1882  		case "math":
  1883  			adjustAttributeNames(p.tok.Attr, mathMLAttributeAdjustments)
  1884  		case "svg":
  1885  			// Adjust SVG tag names. The tokenizer lower-cases tag names, but
  1886  			// SVG wants e.g. "foreignObject" with a capital second "O".
  1887  			if x := svgTagNameAdjustments[p.tok.Data]; x != "" {
  1888  				p.tok.DataAtom = a.Lookup([]byte(x))
  1889  				p.tok.Data = x
  1890  			}
  1891  			adjustAttributeNames(p.tok.Attr, svgAttributeAdjustments)
  1892  		default:
  1893  			panic("html: bad parser state: unexpected namespace")
  1894  		}
  1895  		adjustForeignAttributes(p.tok.Attr)
  1896  		namespace := p.top().Namespace
  1897  		p.addElement()
  1898  		p.top().Namespace = namespace
  1899  		if namespace != "" {
  1900  			// Don't let the tokenizer go into raw text mode in foreign content
  1901  			// (e.g. in an SVG <title> tag).
  1902  			p.tokenizer.NextIsNotRawText()
  1903  		}
  1904  		if p.hasSelfClosingToken {
  1905  			p.oe.pop()
  1906  			p.acknowledgeSelfClosingTag()
  1907  		}
  1908  	case EndTagToken:
  1909  		for i := len(p.oe) - 1; i >= 0; i-- {
  1910  			if p.oe[i].Namespace == "" {
  1911  				return p.im(p)
  1912  			}
  1913  			if strings.EqualFold(p.oe[i].Data, p.tok.Data) {
  1914  				p.oe = p.oe[:i]
  1915  				break
  1916  			}
  1917  		}
  1918  		return true
  1919  	default:
  1920  		// Ignore the token.
  1921  	}
  1922  	return true
  1923  }
  1924  
  1925  // Section 12.2.5.
  1926  func (p *parser) inForeignContent() bool {
  1927  	if len(p.oe) == 0 {
  1928  		return false
  1929  	}
  1930  	n := p.oe[len(p.oe)-1]
  1931  	if n.Namespace == "" {
  1932  		return false
  1933  	}
  1934  	if mathMLTextIntegrationPoint(n) {
  1935  		if p.tok.Type == StartTagToken && p.tok.DataAtom != a.Mglyph && p.tok.DataAtom != a.Malignmark {
  1936  			return false
  1937  		}
  1938  		if p.tok.Type == TextToken {
  1939  			return false
  1940  		}
  1941  	}
  1942  	if n.Namespace == "math" && n.DataAtom == a.AnnotationXml && p.tok.Type == StartTagToken && p.tok.DataAtom == a.Svg {
  1943  		return false
  1944  	}
  1945  	if htmlIntegrationPoint(n) && (p.tok.Type == StartTagToken || p.tok.Type == TextToken) {
  1946  		return false
  1947  	}
  1948  	if p.tok.Type == ErrorToken {
  1949  		return false
  1950  	}
  1951  	return true
  1952  }
  1953  
  1954  // parseImpliedToken parses a token as though it had appeared in the parser's
  1955  // input.
  1956  func (p *parser) parseImpliedToken(t TokenType, dataAtom a.Atom, data string) {
  1957  	realToken, selfClosing := p.tok, p.hasSelfClosingToken
  1958  	p.tok = Token{
  1959  		Type:     t,
  1960  		DataAtom: dataAtom,
  1961  		Data:     data,
  1962  	}
  1963  	p.hasSelfClosingToken = false
  1964  	p.parseCurrentToken()
  1965  	p.tok, p.hasSelfClosingToken = realToken, selfClosing
  1966  }
  1967  
  1968  // parseCurrentToken runs the current token through the parsing routines
  1969  // until it is consumed.
  1970  func (p *parser) parseCurrentToken() {
  1971  	if p.tok.Type == SelfClosingTagToken {
  1972  		p.hasSelfClosingToken = true
  1973  		p.tok.Type = StartTagToken
  1974  	}
  1975  
  1976  	consumed := false
  1977  	for !consumed {
  1978  		if p.inForeignContent() {
  1979  			consumed = parseForeignContent(p)
  1980  		} else {
  1981  			consumed = p.im(p)
  1982  		}
  1983  	}
  1984  
  1985  	if p.hasSelfClosingToken {
  1986  		// This is a parse error, but ignore it.
  1987  		p.hasSelfClosingToken = false
  1988  	}
  1989  }
  1990  
  1991  func (p *parser) parse() error {
  1992  	// Iterate until EOF. Any other error will cause an early return.
  1993  	var err error
  1994  	for err != io.EOF {
  1995  		// CDATA sections are allowed only in foreign content.
  1996  		n := p.oe.top()
  1997  		p.tokenizer.AllowCDATA(n != nil && n.Namespace != "")
  1998  		// Read and parse the next token.
  1999  		p.tokenizer.Next()
  2000  		p.tok = p.tokenizer.Token()
  2001  		if p.tok.Type == ErrorToken {
  2002  			err = p.tokenizer.Err()
  2003  			if err != nil && err != io.EOF {
  2004  				return err
  2005  			}
  2006  		}
  2007  		p.parseCurrentToken()
  2008  	}
  2009  	return nil
  2010  }
  2011  
  2012  // Parse returns the parse tree for the HTML from the given Reader.
  2013  // The input is assumed to be UTF-8 encoded.
  2014  func Parse(r io.Reader) (*Node, error) {
  2015  	p := &parser{
  2016  		tokenizer: NewTokenizer(r),
  2017  		doc: &Node{
  2018  			Type: DocumentNode,
  2019  		},
  2020  		scripting:  true,
  2021  		framesetOK: true,
  2022  		im:         initialIM,
  2023  	}
  2024  	err := p.parse()
  2025  	if err != nil {
  2026  		return nil, err
  2027  	}
  2028  	return p.doc, nil
  2029  }
  2030  
  2031  // ParseFragment parses a fragment of HTML and returns the nodes that were
  2032  // found. If the fragment is the InnerHTML for an existing element, pass that
  2033  // element in context.
  2034  func ParseFragment(r io.Reader, context *Node) ([]*Node, error) {
  2035  	contextTag := ""
  2036  	if context != nil {
  2037  		if context.Type != ElementNode {
  2038  			return nil, errors.New("html: ParseFragment of non-element Node")
  2039  		}
  2040  		// The next check isn't just context.DataAtom.String() == context.Data because
  2041  		// it is valid to pass an element whose tag isn't a known atom. For example,
  2042  		// DataAtom == 0 and Data = "tagfromthefuture" is perfectly consistent.
  2043  		if context.DataAtom != a.Lookup([]byte(context.Data)) {
  2044  			return nil, fmt.Errorf("html: inconsistent Node: DataAtom=%q, Data=%q", context.DataAtom, context.Data)
  2045  		}
  2046  		contextTag = context.DataAtom.String()
  2047  	}
  2048  	p := &parser{
  2049  		tokenizer: NewTokenizerFragment(r, contextTag),
  2050  		doc: &Node{
  2051  			Type: DocumentNode,
  2052  		},
  2053  		scripting: true,
  2054  		fragment:  true,
  2055  		context:   context,
  2056  	}
  2057  
  2058  	root := &Node{
  2059  		Type:     ElementNode,
  2060  		DataAtom: a.Html,
  2061  		Data:     a.Html.String(),
  2062  	}
  2063  	p.doc.AppendChild(root)
  2064  	p.oe = nodeStack{root}
  2065  	p.resetInsertionMode()
  2066  
  2067  	for n := context; n != nil; n = n.Parent {
  2068  		if n.Type == ElementNode && n.DataAtom == a.Form {
  2069  			p.form = n
  2070  			break
  2071  		}
  2072  	}
  2073  
  2074  	err := p.parse()
  2075  	if err != nil {
  2076  		return nil, err
  2077  	}
  2078  
  2079  	parent := p.doc
  2080  	if context != nil {
  2081  		parent = root
  2082  	}
  2083  
  2084  	var result []*Node
  2085  	for c := parent.FirstChild; c != nil; {
  2086  		next := c.NextSibling
  2087  		parent.RemoveChild(c)
  2088  		result = append(result, c)
  2089  		c = next
  2090  	}
  2091  	return result, nil
  2092  }