github.com/hxx258456/ccgo@v0.0.5-0.20230213014102-48b35f46f66f/net/html/token_test.go (about)

     1  // Copyright 2010 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package html
     6  
     7  import (
     8  	"bytes"
     9  	"io"
    10  	"io/ioutil"
    11  	"reflect"
    12  	"runtime"
    13  	"strings"
    14  	"testing"
    15  )
    16  
    17  type tokenTest struct {
    18  	// A short description of the test case.
    19  	desc string
    20  	// The HTML to parse.
    21  	html string
    22  	// The string representations of the expected tokens, joined by '$'.
    23  	golden string
    24  }
    25  
    26  var tokenTests = []tokenTest{
    27  	{
    28  		"empty",
    29  		"",
    30  		"",
    31  	},
    32  	// A single text node. The tokenizer should not break text nodes on whitespace,
    33  	// nor should it normalize whitespace within a text node.
    34  	{
    35  		"text",
    36  		"foo  bar",
    37  		"foo  bar",
    38  	},
    39  	// An entity.
    40  	{
    41  		"entity",
    42  		"one < two",
    43  		"one < two",
    44  	},
    45  	// A start, self-closing and end tag. The tokenizer does not care if the start
    46  	// and end tokens don't match; that is the job of the parser.
    47  	{
    48  		"tags",
    49  		"<a>b<c/>d</e>",
    50  		"<a>$b$<c/>$d$</e>",
    51  	},
    52  	// Angle brackets that aren't a tag.
    53  	{
    54  		"not a tag #0",
    55  		"<",
    56  		"&lt;",
    57  	},
    58  	{
    59  		"not a tag #1",
    60  		"</",
    61  		"&lt;/",
    62  	},
    63  	{
    64  		"not a tag #2",
    65  		"</>",
    66  		"<!---->",
    67  	},
    68  	{
    69  		"not a tag #3",
    70  		"a</>b",
    71  		"a$<!---->$b",
    72  	},
    73  	{
    74  		"not a tag #4",
    75  		"</ >",
    76  		"<!-- -->",
    77  	},
    78  	{
    79  		"not a tag #5",
    80  		"</.",
    81  		"<!--.-->",
    82  	},
    83  	{
    84  		"not a tag #6",
    85  		"</.>",
    86  		"<!--.-->",
    87  	},
    88  	{
    89  		"not a tag #7",
    90  		"a < b",
    91  		"a &lt; b",
    92  	},
    93  	{
    94  		"not a tag #8",
    95  		"<.>",
    96  		"&lt;.&gt;",
    97  	},
    98  	{
    99  		"not a tag #9",
   100  		"a<<<b>>>c",
   101  		"a&lt;&lt;$<b>$&gt;&gt;c",
   102  	},
   103  	{
   104  		"not a tag #10",
   105  		"if x<0 and y < 0 then x*y>0",
   106  		"if x&lt;0 and y &lt; 0 then x*y&gt;0",
   107  	},
   108  	{
   109  		"not a tag #11",
   110  		"<<p>",
   111  		"&lt;$<p>",
   112  	},
   113  	// EOF in a tag name.
   114  	{
   115  		"tag name eof #0",
   116  		"<a",
   117  		"",
   118  	},
   119  	{
   120  		"tag name eof #1",
   121  		"<a ",
   122  		"",
   123  	},
   124  	{
   125  		"tag name eof #2",
   126  		"a<b",
   127  		"a",
   128  	},
   129  	{
   130  		"tag name eof #3",
   131  		"<a><b",
   132  		"<a>",
   133  	},
   134  	{
   135  		"tag name eof #4",
   136  		`<a x`,
   137  		``,
   138  	},
   139  	// Some malformed tags that are missing a '>'.
   140  	{
   141  		"malformed tag #0",
   142  		`<p</p>`,
   143  		`<p< p="">`,
   144  	},
   145  	{
   146  		"malformed tag #1",
   147  		`<p </p>`,
   148  		`<p <="" p="">`,
   149  	},
   150  	{
   151  		"malformed tag #2",
   152  		`<p id`,
   153  		``,
   154  	},
   155  	{
   156  		"malformed tag #3",
   157  		`<p id=`,
   158  		``,
   159  	},
   160  	{
   161  		"malformed tag #4",
   162  		`<p id=>`,
   163  		`<p id="">`,
   164  	},
   165  	{
   166  		"malformed tag #5",
   167  		`<p id=0`,
   168  		``,
   169  	},
   170  	{
   171  		"malformed tag #6",
   172  		`<p id=0</p>`,
   173  		`<p id="0&lt;/p">`,
   174  	},
   175  	{
   176  		"malformed tag #7",
   177  		`<p id="0</p>`,
   178  		``,
   179  	},
   180  	{
   181  		"malformed tag #8",
   182  		`<p id="0"</p>`,
   183  		`<p id="0" <="" p="">`,
   184  	},
   185  	{
   186  		"malformed tag #9",
   187  		`<p></p id`,
   188  		`<p>`,
   189  	},
   190  	// Raw text and RCDATA.
   191  	{
   192  		"basic raw text",
   193  		"<script><a></b></script>",
   194  		"<script>$&lt;a&gt;&lt;/b&gt;$</script>",
   195  	},
   196  	{
   197  		"unfinished script end tag",
   198  		"<SCRIPT>a</SCR",
   199  		"<script>$a&lt;/SCR",
   200  	},
   201  	{
   202  		"broken script end tag",
   203  		"<SCRIPT>a</SCR ipt>",
   204  		"<script>$a&lt;/SCR ipt&gt;",
   205  	},
   206  	{
   207  		"EOF in script end tag",
   208  		"<SCRIPT>a</SCRipt",
   209  		"<script>$a&lt;/SCRipt",
   210  	},
   211  	{
   212  		"scriptx end tag",
   213  		"<SCRIPT>a</SCRiptx",
   214  		"<script>$a&lt;/SCRiptx",
   215  	},
   216  	{
   217  		"' ' completes script end tag",
   218  		"<SCRIPT>a</SCRipt ",
   219  		"<script>$a",
   220  	},
   221  	{
   222  		"'>' completes script end tag",
   223  		"<SCRIPT>a</SCRipt>",
   224  		"<script>$a$</script>",
   225  	},
   226  	{
   227  		"self-closing script end tag",
   228  		"<SCRIPT>a</SCRipt/>",
   229  		"<script>$a$</script>",
   230  	},
   231  	{
   232  		"nested script tag",
   233  		"<SCRIPT>a</SCRipt<script>",
   234  		"<script>$a&lt;/SCRipt&lt;script&gt;",
   235  	},
   236  	{
   237  		"script end tag after unfinished",
   238  		"<SCRIPT>a</SCRipt</script>",
   239  		"<script>$a&lt;/SCRipt$</script>",
   240  	},
   241  	{
   242  		"script/style mismatched tags",
   243  		"<script>a</style>",
   244  		"<script>$a&lt;/style&gt;",
   245  	},
   246  	{
   247  		"style element with entity",
   248  		"<style>&apos;",
   249  		"<style>$&amp;apos;",
   250  	},
   251  	{
   252  		"textarea with tag",
   253  		"<textarea><div></textarea>",
   254  		"<textarea>$&lt;div&gt;$</textarea>",
   255  	},
   256  	{
   257  		"title with tag and entity",
   258  		"<title><b>K&amp;R C</b></title>",
   259  		"<title>$&lt;b&gt;K&amp;R C&lt;/b&gt;$</title>",
   260  	},
   261  	{
   262  		"title with trailing '&lt;' entity",
   263  		"<title>foobar<</title>",
   264  		"<title>$foobar&lt;$</title>",
   265  	},
   266  	// DOCTYPE tests.
   267  	{
   268  		"Proper DOCTYPE",
   269  		"<!DOCTYPE html>",
   270  		"<!DOCTYPE html>",
   271  	},
   272  	{
   273  		"DOCTYPE with no space",
   274  		"<!doctypehtml>",
   275  		"<!DOCTYPE html>",
   276  	},
   277  	{
   278  		"DOCTYPE with two spaces",
   279  		"<!doctype  html>",
   280  		"<!DOCTYPE html>",
   281  	},
   282  	{
   283  		"looks like DOCTYPE but isn't",
   284  		"<!DOCUMENT html>",
   285  		"<!--DOCUMENT html-->",
   286  	},
   287  	{
   288  		"DOCTYPE at EOF",
   289  		"<!DOCtype",
   290  		"<!DOCTYPE >",
   291  	},
   292  	// XML processing instructions.
   293  	{
   294  		"XML processing instruction",
   295  		"<?xml?>",
   296  		"<!--?xml?-->",
   297  	},
   298  	// Comments.
   299  	{
   300  		"comment0",
   301  		"abc<b><!-- skipme --></b>def",
   302  		"abc$<b>$<!-- skipme -->$</b>$def",
   303  	},
   304  	{
   305  		"comment1",
   306  		"a<!-->z",
   307  		"a$<!---->$z",
   308  	},
   309  	{
   310  		"comment2",
   311  		"a<!--->z",
   312  		"a$<!---->$z",
   313  	},
   314  	{
   315  		"comment3",
   316  		"a<!--x>-->z",
   317  		"a$<!--x>-->$z",
   318  	},
   319  	{
   320  		"comment4",
   321  		"a<!--x->-->z",
   322  		"a$<!--x->-->$z",
   323  	},
   324  	{
   325  		"comment5",
   326  		"a<!>z",
   327  		"a$<!---->$z",
   328  	},
   329  	{
   330  		"comment6",
   331  		"a<!->z",
   332  		"a$<!----->$z",
   333  	},
   334  	{
   335  		"comment7",
   336  		"a<!---<>z",
   337  		"a$<!---<>z-->",
   338  	},
   339  	{
   340  		"comment8",
   341  		"a<!--z",
   342  		"a$<!--z-->",
   343  	},
   344  	{
   345  		"comment9",
   346  		"a<!--z-",
   347  		"a$<!--z-->",
   348  	},
   349  	{
   350  		"comment10",
   351  		"a<!--z--",
   352  		"a$<!--z-->",
   353  	},
   354  	{
   355  		"comment11",
   356  		"a<!--z---",
   357  		"a$<!--z--->",
   358  	},
   359  	{
   360  		"comment12",
   361  		"a<!--z----",
   362  		"a$<!--z---->",
   363  	},
   364  	{
   365  		"comment13",
   366  		"a<!--x--!>z",
   367  		"a$<!--x-->$z",
   368  	},
   369  	// An attribute with a backslash.
   370  	{
   371  		"backslash",
   372  		`<p id="a\"b">`,
   373  		`<p id="a\" b"="">`,
   374  	},
   375  	// Entities, tag name and attribute key lower-casing, and whitespace
   376  	// normalization within a tag.
   377  	{
   378  		"tricky",
   379  		"<p \t\n iD=\"a&quot;B\"  foo=\"bar\"><EM>te&lt;&amp;;xt</em></p>",
   380  		`<p id="a&#34;B" foo="bar">$<em>$te&lt;&amp;;xt$</em>$</p>`,
   381  	},
   382  	// A nonexistent entity. Tokenizing and converting back to a string should
   383  	// escape the "&" to become "&amp;".
   384  	{
   385  		"noSuchEntity",
   386  		`<a b="c&noSuchEntity;d">&lt;&alsoDoesntExist;&`,
   387  		`<a b="c&amp;noSuchEntity;d">$&lt;&amp;alsoDoesntExist;&amp;`,
   388  	},
   389  	{
   390  		"entity without semicolon",
   391  		`&notit;&notin;<a b="q=z&amp=5&notice=hello&not;=world">`,
   392  		`¬it;∉$<a b="q=z&amp;amp=5&amp;notice=hello¬=world">`,
   393  	},
   394  	{
   395  		"entity with digits",
   396  		"&frac12;",
   397  		"½",
   398  	},
   399  	// Attribute tests:
   400  	// http://dev.w3.org/html5/pf-summary/Overview.html#attributes
   401  	{
   402  		"Empty attribute",
   403  		`<input disabled FOO>`,
   404  		`<input disabled="" foo="">`,
   405  	},
   406  	{
   407  		"Empty attribute, whitespace",
   408  		`<input disabled FOO >`,
   409  		`<input disabled="" foo="">`,
   410  	},
   411  	{
   412  		"Unquoted attribute value",
   413  		`<input value=yes FOO=BAR>`,
   414  		`<input value="yes" foo="BAR">`,
   415  	},
   416  	{
   417  		"Unquoted attribute value, spaces",
   418  		`<input value = yes FOO = BAR>`,
   419  		`<input value="yes" foo="BAR">`,
   420  	},
   421  	{
   422  		"Unquoted attribute value, trailing space",
   423  		`<input value=yes FOO=BAR >`,
   424  		`<input value="yes" foo="BAR">`,
   425  	},
   426  	{
   427  		"Single-quoted attribute value",
   428  		`<input value='yes' FOO='BAR'>`,
   429  		`<input value="yes" foo="BAR">`,
   430  	},
   431  	{
   432  		"Single-quoted attribute value, trailing space",
   433  		`<input value='yes' FOO='BAR' >`,
   434  		`<input value="yes" foo="BAR">`,
   435  	},
   436  	{
   437  		"Double-quoted attribute value",
   438  		`<input value="I'm an attribute" FOO="BAR">`,
   439  		`<input value="I&#39;m an attribute" foo="BAR">`,
   440  	},
   441  	{
   442  		"Attribute name characters",
   443  		`<meta http-equiv="content-type">`,
   444  		`<meta http-equiv="content-type">`,
   445  	},
   446  	{
   447  		"Mixed attributes",
   448  		`a<P V="0 1" w='2' X=3 y>z`,
   449  		`a$<p v="0 1" w="2" x="3" y="">$z`,
   450  	},
   451  	{
   452  		"Attributes with a solitary single quote",
   453  		`<p id=can't><p id=won't>`,
   454  		`<p id="can&#39;t">$<p id="won&#39;t">`,
   455  	},
   456  }
   457  
   458  func TestTokenizer(t *testing.T) {
   459  loop:
   460  	for _, tt := range tokenTests {
   461  		z := NewTokenizer(strings.NewReader(tt.html))
   462  		if tt.golden != "" {
   463  			for i, s := range strings.Split(tt.golden, "$") {
   464  				if z.Next() == ErrorToken {
   465  					t.Errorf("%s token %d: want %q got error %v", tt.desc, i, s, z.Err())
   466  					continue loop
   467  				}
   468  				actual := z.Token().String()
   469  				if s != actual {
   470  					t.Errorf("%s token %d: want %q got %q", tt.desc, i, s, actual)
   471  					continue loop
   472  				}
   473  			}
   474  		}
   475  		z.Next()
   476  		if z.Err() != io.EOF {
   477  			t.Errorf("%s: want EOF got %q", tt.desc, z.Err())
   478  		}
   479  	}
   480  }
   481  
   482  func TestMaxBuffer(t *testing.T) {
   483  	// Exceeding the maximum buffer size generates ErrBufferExceeded.
   484  	z := NewTokenizer(strings.NewReader("<" + strings.Repeat("t", 10)))
   485  	z.SetMaxBuf(5)
   486  	tt := z.Next()
   487  	if got, want := tt, ErrorToken; got != want {
   488  		t.Fatalf("token type: got: %v want: %v", got, want)
   489  	}
   490  	if got, want := z.Err(), ErrBufferExceeded; got != want {
   491  		t.Errorf("error type: got: %v want: %v", got, want)
   492  	}
   493  	if got, want := string(z.Raw()), "<tttt"; got != want {
   494  		t.Fatalf("buffered before overflow: got: %q want: %q", got, want)
   495  	}
   496  }
   497  
   498  func TestMaxBufferReconstruction(t *testing.T) {
   499  	// Exceeding the maximum buffer size at any point while tokenizing permits
   500  	// reconstructing the original input.
   501  tests:
   502  	for _, test := range tokenTests {
   503  		for maxBuf := 1; ; maxBuf++ {
   504  			r := strings.NewReader(test.html)
   505  			z := NewTokenizer(r)
   506  			z.SetMaxBuf(maxBuf)
   507  			var tokenized bytes.Buffer
   508  			for {
   509  				tt := z.Next()
   510  				tokenized.Write(z.Raw())
   511  				if tt == ErrorToken {
   512  					if err := z.Err(); err != io.EOF && err != ErrBufferExceeded {
   513  						t.Errorf("%s: unexpected error: %v", test.desc, err)
   514  					}
   515  					break
   516  				}
   517  			}
   518  			// Anything tokenized along with untokenized input or data left in the reader.
   519  			assembled, err := ioutil.ReadAll(io.MultiReader(&tokenized, bytes.NewReader(z.Buffered()), r))
   520  			if err != nil {
   521  				t.Errorf("%s: ReadAll: %v", test.desc, err)
   522  				continue tests
   523  			}
   524  			if got, want := string(assembled), test.html; got != want {
   525  				t.Errorf("%s: reassembled html:\n got: %q\nwant: %q", test.desc, got, want)
   526  				continue tests
   527  			}
   528  			// EOF indicates that we completed tokenization and hence found the max
   529  			// maxBuf that generates ErrBufferExceeded, so continue to the next test.
   530  			if z.Err() == io.EOF {
   531  				break
   532  			}
   533  		} // buffer sizes
   534  	} // tests
   535  }
   536  
   537  func TestPassthrough(t *testing.T) {
   538  	// Accumulating the raw output for each parse event should reconstruct the
   539  	// original input.
   540  	for _, test := range tokenTests {
   541  		z := NewTokenizer(strings.NewReader(test.html))
   542  		var parsed bytes.Buffer
   543  		for {
   544  			tt := z.Next()
   545  			parsed.Write(z.Raw())
   546  			if tt == ErrorToken {
   547  				break
   548  			}
   549  		}
   550  		if got, want := parsed.String(), test.html; got != want {
   551  			t.Errorf("%s: parsed output:\n got: %q\nwant: %q", test.desc, got, want)
   552  		}
   553  	}
   554  }
   555  
   556  func TestBufAPI(t *testing.T) {
   557  	s := "0<a>1</a>2<b>3<a>4<a>5</a>6</b>7</a>8<a/>9"
   558  	z := NewTokenizer(bytes.NewBufferString(s))
   559  	var result bytes.Buffer
   560  	depth := 0
   561  loop:
   562  	for {
   563  		tt := z.Next()
   564  		switch tt {
   565  		case ErrorToken:
   566  			if z.Err() != io.EOF {
   567  				t.Error(z.Err())
   568  			}
   569  			break loop
   570  		case TextToken:
   571  			if depth > 0 {
   572  				result.Write(z.Text())
   573  			}
   574  		case StartTagToken, EndTagToken:
   575  			tn, _ := z.TagName()
   576  			if len(tn) == 1 && tn[0] == 'a' {
   577  				if tt == StartTagToken {
   578  					depth++
   579  				} else {
   580  					depth--
   581  				}
   582  			}
   583  		}
   584  	}
   585  	u := "14567"
   586  	v := string(result.Bytes())
   587  	if u != v {
   588  		t.Errorf("TestBufAPI: want %q got %q", u, v)
   589  	}
   590  }
   591  
   592  func TestConvertNewlines(t *testing.T) {
   593  	testCases := map[string]string{
   594  		"Mac\rDOS\r\nUnix\n":    "Mac\nDOS\nUnix\n",
   595  		"Unix\nMac\rDOS\r\n":    "Unix\nMac\nDOS\n",
   596  		"DOS\r\nDOS\r\nDOS\r\n": "DOS\nDOS\nDOS\n",
   597  		"":                      "",
   598  		"\n":                    "\n",
   599  		"\n\r":                  "\n\n",
   600  		"\r":                    "\n",
   601  		"\r\n":                  "\n",
   602  		"\r\n\n":                "\n\n",
   603  		"\r\n\r":                "\n\n",
   604  		"\r\n\r\n":              "\n\n",
   605  		"\r\r":                  "\n\n",
   606  		"\r\r\n":                "\n\n",
   607  		"\r\r\n\n":              "\n\n\n",
   608  		"\r\r\r\n":              "\n\n\n",
   609  		"\r \n":                 "\n \n",
   610  		"xyz":                   "xyz",
   611  	}
   612  	for in, want := range testCases {
   613  		if got := string(convertNewlines([]byte(in))); got != want {
   614  			t.Errorf("input %q: got %q, want %q", in, got, want)
   615  		}
   616  	}
   617  }
   618  
   619  func TestReaderEdgeCases(t *testing.T) {
   620  	const s = "<p>An io.Reader can return (0, nil) or (n, io.EOF).</p>"
   621  	testCases := []io.Reader{
   622  		&zeroOneByteReader{s: s},
   623  		&eofStringsReader{s: s},
   624  		&stuckReader{},
   625  	}
   626  	for i, tc := range testCases {
   627  		got := []TokenType{}
   628  		z := NewTokenizer(tc)
   629  		for {
   630  			tt := z.Next()
   631  			if tt == ErrorToken {
   632  				break
   633  			}
   634  			got = append(got, tt)
   635  		}
   636  		if err := z.Err(); err != nil && err != io.EOF {
   637  			if err != io.ErrNoProgress {
   638  				t.Errorf("i=%d: %v", i, err)
   639  			}
   640  			continue
   641  		}
   642  		want := []TokenType{
   643  			StartTagToken,
   644  			TextToken,
   645  			EndTagToken,
   646  		}
   647  		if !reflect.DeepEqual(got, want) {
   648  			t.Errorf("i=%d: got %v, want %v", i, got, want)
   649  			continue
   650  		}
   651  	}
   652  }
   653  
   654  // zeroOneByteReader is like a strings.Reader that alternates between
   655  // returning 0 bytes and 1 byte at a time.
   656  type zeroOneByteReader struct {
   657  	s string
   658  	n int
   659  }
   660  
   661  func (r *zeroOneByteReader) Read(p []byte) (int, error) {
   662  	if len(p) == 0 {
   663  		return 0, nil
   664  	}
   665  	if len(r.s) == 0 {
   666  		return 0, io.EOF
   667  	}
   668  	r.n++
   669  	if r.n%2 != 0 {
   670  		return 0, nil
   671  	}
   672  	p[0], r.s = r.s[0], r.s[1:]
   673  	return 1, nil
   674  }
   675  
   676  // eofStringsReader is like a strings.Reader but can return an (n, err) where
   677  // n > 0 && err != nil.
   678  type eofStringsReader struct {
   679  	s string
   680  }
   681  
   682  func (r *eofStringsReader) Read(p []byte) (int, error) {
   683  	n := copy(p, r.s)
   684  	r.s = r.s[n:]
   685  	if r.s != "" {
   686  		return n, nil
   687  	}
   688  	return n, io.EOF
   689  }
   690  
   691  // stuckReader is an io.Reader that always returns no data and no error.
   692  type stuckReader struct{}
   693  
   694  func (*stuckReader) Read(p []byte) (int, error) {
   695  	return 0, nil
   696  }
   697  
   698  const (
   699  	rawLevel = iota
   700  	lowLevel
   701  	highLevel
   702  )
   703  
   704  func benchmarkTokenizer(b *testing.B, level int) {
   705  	buf, err := ioutil.ReadFile("testdata/go1.html")
   706  	if err != nil {
   707  		b.Fatalf("could not read testdata/go1.html: %v", err)
   708  	}
   709  	b.SetBytes(int64(len(buf)))
   710  	runtime.GC()
   711  	b.ReportAllocs()
   712  	b.ResetTimer()
   713  	for i := 0; i < b.N; i++ {
   714  		z := NewTokenizer(bytes.NewBuffer(buf))
   715  		for {
   716  			tt := z.Next()
   717  			if tt == ErrorToken {
   718  				if err := z.Err(); err != nil && err != io.EOF {
   719  					b.Fatalf("tokenizer error: %v", err)
   720  				}
   721  				break
   722  			}
   723  			switch level {
   724  			case rawLevel:
   725  				// Calling z.Raw just returns the raw bytes of the token. It does
   726  				// not unescape &lt; to <, or lower-case tag names and attribute keys.
   727  				z.Raw()
   728  			case lowLevel:
   729  				// Caling z.Text, z.TagName and z.TagAttr returns []byte values
   730  				// whose contents may change on the next call to z.Next.
   731  				switch tt {
   732  				case TextToken, CommentToken, DoctypeToken:
   733  					z.Text()
   734  				case StartTagToken, SelfClosingTagToken:
   735  					_, more := z.TagName()
   736  					for more {
   737  						_, _, more = z.TagAttr()
   738  					}
   739  				case EndTagToken:
   740  					z.TagName()
   741  				}
   742  			case highLevel:
   743  				// Calling z.Token converts []byte values to strings whose validity
   744  				// extend beyond the next call to z.Next.
   745  				z.Token()
   746  			}
   747  		}
   748  	}
   749  }
   750  
   751  func BenchmarkRawLevelTokenizer(b *testing.B)  { benchmarkTokenizer(b, rawLevel) }
   752  func BenchmarkLowLevelTokenizer(b *testing.B)  { benchmarkTokenizer(b, lowLevel) }
   753  func BenchmarkHighLevelTokenizer(b *testing.B) { benchmarkTokenizer(b, highLevel) }