github.com/twelsh-aw/go/src@v0.0.0-20230516233729-a56fe86a7c81/encoding/xml/xml_test.go (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package xml
     6  
     7  import (
     8  	"bytes"
     9  	"fmt"
    10  	"io"
    11  	"reflect"
    12  	"strings"
    13  	"testing"
    14  	"unicode/utf8"
    15  )
    16  
    17  type toks struct {
    18  	earlyEOF bool
    19  	t        []Token
    20  }
    21  
    22  func (t *toks) Token() (Token, error) {
    23  	if len(t.t) == 0 {
    24  		return nil, io.EOF
    25  	}
    26  	var tok Token
    27  	tok, t.t = t.t[0], t.t[1:]
    28  	if t.earlyEOF && len(t.t) == 0 {
    29  		return tok, io.EOF
    30  	}
    31  	return tok, nil
    32  }
    33  
    34  func TestDecodeEOF(t *testing.T) {
    35  	start := StartElement{Name: Name{Local: "test"}}
    36  	tests := []struct {
    37  		name   string
    38  		tokens []Token
    39  		ok     bool
    40  	}{
    41  		{
    42  			name: "OK",
    43  			tokens: []Token{
    44  				start,
    45  				start.End(),
    46  			},
    47  			ok: true,
    48  		},
    49  		{
    50  			name: "Malformed",
    51  			tokens: []Token{
    52  				start,
    53  				StartElement{Name: Name{Local: "bad"}},
    54  				start.End(),
    55  			},
    56  			ok: false,
    57  		},
    58  	}
    59  	for _, tc := range tests {
    60  		for _, eof := range []bool{true, false} {
    61  			name := fmt.Sprintf("%s/earlyEOF=%v", tc.name, eof)
    62  			t.Run(name, func(t *testing.T) {
    63  				d := NewTokenDecoder(&toks{
    64  					earlyEOF: eof,
    65  					t:        tc.tokens,
    66  				})
    67  				err := d.Decode(&struct {
    68  					XMLName Name `xml:"test"`
    69  				}{})
    70  				if tc.ok && err != nil {
    71  					t.Fatalf("d.Decode: expected nil error, got %v", err)
    72  				}
    73  				if _, ok := err.(*SyntaxError); !tc.ok && !ok {
    74  					t.Errorf("d.Decode: expected syntax error, got %v", err)
    75  				}
    76  			})
    77  		}
    78  	}
    79  }
    80  
    81  type toksNil struct {
    82  	returnEOF bool
    83  	t         []Token
    84  }
    85  
    86  func (t *toksNil) Token() (Token, error) {
    87  	if len(t.t) == 0 {
    88  		if !t.returnEOF {
    89  			// Return nil, nil before returning an EOF. It's legal, but
    90  			// discouraged.
    91  			t.returnEOF = true
    92  			return nil, nil
    93  		}
    94  		return nil, io.EOF
    95  	}
    96  	var tok Token
    97  	tok, t.t = t.t[0], t.t[1:]
    98  	return tok, nil
    99  }
   100  
   101  func TestDecodeNilToken(t *testing.T) {
   102  	for _, strict := range []bool{true, false} {
   103  		name := fmt.Sprintf("Strict=%v", strict)
   104  		t.Run(name, func(t *testing.T) {
   105  			start := StartElement{Name: Name{Local: "test"}}
   106  			bad := StartElement{Name: Name{Local: "bad"}}
   107  			d := NewTokenDecoder(&toksNil{
   108  				// Malformed
   109  				t: []Token{start, bad, start.End()},
   110  			})
   111  			d.Strict = strict
   112  			err := d.Decode(&struct {
   113  				XMLName Name `xml:"test"`
   114  			}{})
   115  			if _, ok := err.(*SyntaxError); !ok {
   116  				t.Errorf("d.Decode: expected syntax error, got %v", err)
   117  			}
   118  		})
   119  	}
   120  }
   121  
   122  const testInput = `
   123  <?xml version="1.0" encoding="UTF-8"?>
   124  <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
   125    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
   126  <body xmlns:foo="ns1" xmlns="ns2" xmlns:tag="ns3" ` +
   127  	"\r\n\t" + `  >
   128    <hello lang="en">World &lt;&gt;&apos;&quot; &#x767d;&#40300;翔</hello>
   129    <query>&何; &is-it;</query>
   130    <goodbye />
   131    <outer foo:attr="value" xmlns:tag="ns4">
   132      <inner/>
   133    </outer>
   134    <tag:name>
   135      <![CDATA[Some text here.]]>
   136    </tag:name>
   137  </body><!-- missing final newline -->`
   138  
   139  var testEntity = map[string]string{"何": "What", "is-it": "is it?"}
   140  
   141  var rawTokens = []Token{
   142  	CharData("\n"),
   143  	ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)},
   144  	CharData("\n"),
   145  	Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
   146    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`),
   147  	CharData("\n"),
   148  	StartElement{Name{"", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}},
   149  	CharData("\n  "),
   150  	StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}},
   151  	CharData("World <>'\" 白鵬翔"),
   152  	EndElement{Name{"", "hello"}},
   153  	CharData("\n  "),
   154  	StartElement{Name{"", "query"}, []Attr{}},
   155  	CharData("What is it?"),
   156  	EndElement{Name{"", "query"}},
   157  	CharData("\n  "),
   158  	StartElement{Name{"", "goodbye"}, []Attr{}},
   159  	EndElement{Name{"", "goodbye"}},
   160  	CharData("\n  "),
   161  	StartElement{Name{"", "outer"}, []Attr{{Name{"foo", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}},
   162  	CharData("\n    "),
   163  	StartElement{Name{"", "inner"}, []Attr{}},
   164  	EndElement{Name{"", "inner"}},
   165  	CharData("\n  "),
   166  	EndElement{Name{"", "outer"}},
   167  	CharData("\n  "),
   168  	StartElement{Name{"tag", "name"}, []Attr{}},
   169  	CharData("\n    "),
   170  	CharData("Some text here."),
   171  	CharData("\n  "),
   172  	EndElement{Name{"tag", "name"}},
   173  	CharData("\n"),
   174  	EndElement{Name{"", "body"}},
   175  	Comment(" missing final newline "),
   176  }
   177  
   178  var cookedTokens = []Token{
   179  	CharData("\n"),
   180  	ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)},
   181  	CharData("\n"),
   182  	Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
   183    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`),
   184  	CharData("\n"),
   185  	StartElement{Name{"ns2", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}},
   186  	CharData("\n  "),
   187  	StartElement{Name{"ns2", "hello"}, []Attr{{Name{"", "lang"}, "en"}}},
   188  	CharData("World <>'\" 白鵬翔"),
   189  	EndElement{Name{"ns2", "hello"}},
   190  	CharData("\n  "),
   191  	StartElement{Name{"ns2", "query"}, []Attr{}},
   192  	CharData("What is it?"),
   193  	EndElement{Name{"ns2", "query"}},
   194  	CharData("\n  "),
   195  	StartElement{Name{"ns2", "goodbye"}, []Attr{}},
   196  	EndElement{Name{"ns2", "goodbye"}},
   197  	CharData("\n  "),
   198  	StartElement{Name{"ns2", "outer"}, []Attr{{Name{"ns1", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}},
   199  	CharData("\n    "),
   200  	StartElement{Name{"ns2", "inner"}, []Attr{}},
   201  	EndElement{Name{"ns2", "inner"}},
   202  	CharData("\n  "),
   203  	EndElement{Name{"ns2", "outer"}},
   204  	CharData("\n  "),
   205  	StartElement{Name{"ns3", "name"}, []Attr{}},
   206  	CharData("\n    "),
   207  	CharData("Some text here."),
   208  	CharData("\n  "),
   209  	EndElement{Name{"ns3", "name"}},
   210  	CharData("\n"),
   211  	EndElement{Name{"ns2", "body"}},
   212  	Comment(" missing final newline "),
   213  }
   214  
   215  const testInputAltEncoding = `
   216  <?xml version="1.0" encoding="x-testing-uppercase"?>
   217  <TAG>VALUE</TAG>`
   218  
   219  var rawTokensAltEncoding = []Token{
   220  	CharData("\n"),
   221  	ProcInst{"xml", []byte(`version="1.0" encoding="x-testing-uppercase"`)},
   222  	CharData("\n"),
   223  	StartElement{Name{"", "tag"}, []Attr{}},
   224  	CharData("value"),
   225  	EndElement{Name{"", "tag"}},
   226  }
   227  
   228  var xmlInput = []string{
   229  	// unexpected EOF cases
   230  	"<",
   231  	"<t",
   232  	"<t ",
   233  	"<t/",
   234  	"<!",
   235  	"<!-",
   236  	"<!--",
   237  	"<!--c-",
   238  	"<!--c--",
   239  	"<!d",
   240  	"<t></",
   241  	"<t></t",
   242  	"<?",
   243  	"<?p",
   244  	"<t a",
   245  	"<t a=",
   246  	"<t a='",
   247  	"<t a=''",
   248  	"<t/><![",
   249  	"<t/><![C",
   250  	"<t/><![CDATA[d",
   251  	"<t/><![CDATA[d]",
   252  	"<t/><![CDATA[d]]",
   253  
   254  	// other Syntax errors
   255  	"<>",
   256  	"<t/a",
   257  	"<0 />",
   258  	"<?0 >",
   259  	//	"<!0 >",	// let the Token() caller handle
   260  	"</0>",
   261  	"<t 0=''>",
   262  	"<t a='&'>",
   263  	"<t a='<'>",
   264  	"<t>&nbspc;</t>",
   265  	"<t a>",
   266  	"<t a=>",
   267  	"<t a=v>",
   268  	//	"<![CDATA[d]]>",	// let the Token() caller handle
   269  	"<t></e>",
   270  	"<t></>",
   271  	"<t></t!",
   272  	"<t>cdata]]></t>",
   273  }
   274  
   275  func TestRawToken(t *testing.T) {
   276  	d := NewDecoder(strings.NewReader(testInput))
   277  	d.Entity = testEntity
   278  	testRawToken(t, d, testInput, rawTokens)
   279  }
   280  
   281  const nonStrictInput = `
   282  <tag>non&entity</tag>
   283  <tag>&unknown;entity</tag>
   284  <tag>&#123</tag>
   285  <tag>&#zzz;</tag>
   286  <tag>&なまえ3;</tag>
   287  <tag>&lt-gt;</tag>
   288  <tag>&;</tag>
   289  <tag>&0a;</tag>
   290  `
   291  
   292  var nonStrictTokens = []Token{
   293  	CharData("\n"),
   294  	StartElement{Name{"", "tag"}, []Attr{}},
   295  	CharData("non&entity"),
   296  	EndElement{Name{"", "tag"}},
   297  	CharData("\n"),
   298  	StartElement{Name{"", "tag"}, []Attr{}},
   299  	CharData("&unknown;entity"),
   300  	EndElement{Name{"", "tag"}},
   301  	CharData("\n"),
   302  	StartElement{Name{"", "tag"}, []Attr{}},
   303  	CharData("&#123"),
   304  	EndElement{Name{"", "tag"}},
   305  	CharData("\n"),
   306  	StartElement{Name{"", "tag"}, []Attr{}},
   307  	CharData("&#zzz;"),
   308  	EndElement{Name{"", "tag"}},
   309  	CharData("\n"),
   310  	StartElement{Name{"", "tag"}, []Attr{}},
   311  	CharData("&なまえ3;"),
   312  	EndElement{Name{"", "tag"}},
   313  	CharData("\n"),
   314  	StartElement{Name{"", "tag"}, []Attr{}},
   315  	CharData("&lt-gt;"),
   316  	EndElement{Name{"", "tag"}},
   317  	CharData("\n"),
   318  	StartElement{Name{"", "tag"}, []Attr{}},
   319  	CharData("&;"),
   320  	EndElement{Name{"", "tag"}},
   321  	CharData("\n"),
   322  	StartElement{Name{"", "tag"}, []Attr{}},
   323  	CharData("&0a;"),
   324  	EndElement{Name{"", "tag"}},
   325  	CharData("\n"),
   326  }
   327  
   328  func TestNonStrictRawToken(t *testing.T) {
   329  	d := NewDecoder(strings.NewReader(nonStrictInput))
   330  	d.Strict = false
   331  	testRawToken(t, d, nonStrictInput, nonStrictTokens)
   332  }
   333  
   334  type downCaser struct {
   335  	t *testing.T
   336  	r io.ByteReader
   337  }
   338  
   339  func (d *downCaser) ReadByte() (c byte, err error) {
   340  	c, err = d.r.ReadByte()
   341  	if c >= 'A' && c <= 'Z' {
   342  		c += 'a' - 'A'
   343  	}
   344  	return
   345  }
   346  
   347  func (d *downCaser) Read(p []byte) (int, error) {
   348  	d.t.Fatalf("unexpected Read call on downCaser reader")
   349  	panic("unreachable")
   350  }
   351  
   352  func TestRawTokenAltEncoding(t *testing.T) {
   353  	d := NewDecoder(strings.NewReader(testInputAltEncoding))
   354  	d.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) {
   355  		if charset != "x-testing-uppercase" {
   356  			t.Fatalf("unexpected charset %q", charset)
   357  		}
   358  		return &downCaser{t, input.(io.ByteReader)}, nil
   359  	}
   360  	testRawToken(t, d, testInputAltEncoding, rawTokensAltEncoding)
   361  }
   362  
   363  func TestRawTokenAltEncodingNoConverter(t *testing.T) {
   364  	d := NewDecoder(strings.NewReader(testInputAltEncoding))
   365  	token, err := d.RawToken()
   366  	if token == nil {
   367  		t.Fatalf("expected a token on first RawToken call")
   368  	}
   369  	if err != nil {
   370  		t.Fatal(err)
   371  	}
   372  	token, err = d.RawToken()
   373  	if token != nil {
   374  		t.Errorf("expected a nil token; got %#v", token)
   375  	}
   376  	if err == nil {
   377  		t.Fatalf("expected an error on second RawToken call")
   378  	}
   379  	const encoding = "x-testing-uppercase"
   380  	if !strings.Contains(err.Error(), encoding) {
   381  		t.Errorf("expected error to contain %q; got error: %v",
   382  			encoding, err)
   383  	}
   384  }
   385  
   386  func testRawToken(t *testing.T, d *Decoder, raw string, rawTokens []Token) {
   387  	lastEnd := int64(0)
   388  	for i, want := range rawTokens {
   389  		start := d.InputOffset()
   390  		have, err := d.RawToken()
   391  		end := d.InputOffset()
   392  		if err != nil {
   393  			t.Fatalf("token %d: unexpected error: %s", i, err)
   394  		}
   395  		if !reflect.DeepEqual(have, want) {
   396  			var shave, swant string
   397  			if _, ok := have.(CharData); ok {
   398  				shave = fmt.Sprintf("CharData(%q)", have)
   399  			} else {
   400  				shave = fmt.Sprintf("%#v", have)
   401  			}
   402  			if _, ok := want.(CharData); ok {
   403  				swant = fmt.Sprintf("CharData(%q)", want)
   404  			} else {
   405  				swant = fmt.Sprintf("%#v", want)
   406  			}
   407  			t.Errorf("token %d = %s, want %s", i, shave, swant)
   408  		}
   409  
   410  		// Check that InputOffset returned actual token.
   411  		switch {
   412  		case start < lastEnd:
   413  			t.Errorf("token %d: position [%d,%d) for %T is before previous token", i, start, end, have)
   414  		case start >= end:
   415  			// Special case: EndElement can be synthesized.
   416  			if start == end && end == lastEnd {
   417  				break
   418  			}
   419  			t.Errorf("token %d: position [%d,%d) for %T is empty", i, start, end, have)
   420  		case end > int64(len(raw)):
   421  			t.Errorf("token %d: position [%d,%d) for %T extends beyond input", i, start, end, have)
   422  		default:
   423  			text := raw[start:end]
   424  			if strings.ContainsAny(text, "<>") && (!strings.HasPrefix(text, "<") || !strings.HasSuffix(text, ">")) {
   425  				t.Errorf("token %d: misaligned raw token %#q for %T", i, text, have)
   426  			}
   427  		}
   428  		lastEnd = end
   429  	}
   430  }
   431  
   432  // Ensure that directives (specifically !DOCTYPE) include the complete
   433  // text of any nested directives, noting that < and > do not change
   434  // nesting depth if they are in single or double quotes.
   435  
   436  var nestedDirectivesInput = `
   437  <!DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]>
   438  <!DOCTYPE [<!ENTITY xlt ">">]>
   439  <!DOCTYPE [<!ENTITY xlt "<">]>
   440  <!DOCTYPE [<!ENTITY xlt '>'>]>
   441  <!DOCTYPE [<!ENTITY xlt '<'>]>
   442  <!DOCTYPE [<!ENTITY xlt '">'>]>
   443  <!DOCTYPE [<!ENTITY xlt "'<">]>
   444  `
   445  
   446  var nestedDirectivesTokens = []Token{
   447  	CharData("\n"),
   448  	Directive(`DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`),
   449  	CharData("\n"),
   450  	Directive(`DOCTYPE [<!ENTITY xlt ">">]`),
   451  	CharData("\n"),
   452  	Directive(`DOCTYPE [<!ENTITY xlt "<">]`),
   453  	CharData("\n"),
   454  	Directive(`DOCTYPE [<!ENTITY xlt '>'>]`),
   455  	CharData("\n"),
   456  	Directive(`DOCTYPE [<!ENTITY xlt '<'>]`),
   457  	CharData("\n"),
   458  	Directive(`DOCTYPE [<!ENTITY xlt '">'>]`),
   459  	CharData("\n"),
   460  	Directive(`DOCTYPE [<!ENTITY xlt "'<">]`),
   461  	CharData("\n"),
   462  }
   463  
   464  func TestNestedDirectives(t *testing.T) {
   465  	d := NewDecoder(strings.NewReader(nestedDirectivesInput))
   466  
   467  	for i, want := range nestedDirectivesTokens {
   468  		have, err := d.Token()
   469  		if err != nil {
   470  			t.Fatalf("token %d: unexpected error: %s", i, err)
   471  		}
   472  		if !reflect.DeepEqual(have, want) {
   473  			t.Errorf("token %d = %#v want %#v", i, have, want)
   474  		}
   475  	}
   476  }
   477  
   478  func TestToken(t *testing.T) {
   479  	d := NewDecoder(strings.NewReader(testInput))
   480  	d.Entity = testEntity
   481  
   482  	for i, want := range cookedTokens {
   483  		have, err := d.Token()
   484  		if err != nil {
   485  			t.Fatalf("token %d: unexpected error: %s", i, err)
   486  		}
   487  		if !reflect.DeepEqual(have, want) {
   488  			t.Errorf("token %d = %#v want %#v", i, have, want)
   489  		}
   490  	}
   491  }
   492  
   493  func TestSyntax(t *testing.T) {
   494  	for i := range xmlInput {
   495  		d := NewDecoder(strings.NewReader(xmlInput[i]))
   496  		var err error
   497  		for _, err = d.Token(); err == nil; _, err = d.Token() {
   498  		}
   499  		if _, ok := err.(*SyntaxError); !ok {
   500  			t.Fatalf(`xmlInput "%s": expected SyntaxError not received`, xmlInput[i])
   501  		}
   502  	}
   503  }
   504  
   505  func TestInputLinePos(t *testing.T) {
   506  	testInput := `<root>
   507  <?pi
   508   ?>  <elt
   509  att
   510  =
   511  "val">
   512  <![CDATA[
   513  ]]><!--
   514  
   515  --></elt>
   516  </root>`
   517  	linePos := [][]int{
   518  		{1, 7},
   519  		{2, 1},
   520  		{3, 4},
   521  		{3, 6},
   522  		{6, 7},
   523  		{7, 1},
   524  		{8, 4},
   525  		{10, 4},
   526  		{10, 10},
   527  		{11, 1},
   528  		{11, 8},
   529  	}
   530  	dec := NewDecoder(strings.NewReader(testInput))
   531  	for _, want := range linePos {
   532  		if _, err := dec.Token(); err != nil {
   533  			t.Errorf("Unexpected error: %v", err)
   534  			continue
   535  		}
   536  
   537  		gotLine, gotCol := dec.InputPos()
   538  		if gotLine != want[0] || gotCol != want[1] {
   539  			t.Errorf("dec.InputPos() = %d,%d, want %d,%d", gotLine, gotCol, want[0], want[1])
   540  		}
   541  	}
   542  }
   543  
   544  type allScalars struct {
   545  	True1     bool
   546  	True2     bool
   547  	False1    bool
   548  	False2    bool
   549  	Int       int
   550  	Int8      int8
   551  	Int16     int16
   552  	Int32     int32
   553  	Int64     int64
   554  	Uint      int
   555  	Uint8     uint8
   556  	Uint16    uint16
   557  	Uint32    uint32
   558  	Uint64    uint64
   559  	Uintptr   uintptr
   560  	Float32   float32
   561  	Float64   float64
   562  	String    string
   563  	PtrString *string
   564  }
   565  
   566  var all = allScalars{
   567  	True1:     true,
   568  	True2:     true,
   569  	False1:    false,
   570  	False2:    false,
   571  	Int:       1,
   572  	Int8:      -2,
   573  	Int16:     3,
   574  	Int32:     -4,
   575  	Int64:     5,
   576  	Uint:      6,
   577  	Uint8:     7,
   578  	Uint16:    8,
   579  	Uint32:    9,
   580  	Uint64:    10,
   581  	Uintptr:   11,
   582  	Float32:   13.0,
   583  	Float64:   14.0,
   584  	String:    "15",
   585  	PtrString: &sixteen,
   586  }
   587  
   588  var sixteen = "16"
   589  
   590  const testScalarsInput = `<allscalars>
   591  	<True1>true</True1>
   592  	<True2>1</True2>
   593  	<False1>false</False1>
   594  	<False2>0</False2>
   595  	<Int>1</Int>
   596  	<Int8>-2</Int8>
   597  	<Int16>3</Int16>
   598  	<Int32>-4</Int32>
   599  	<Int64>5</Int64>
   600  	<Uint>6</Uint>
   601  	<Uint8>7</Uint8>
   602  	<Uint16>8</Uint16>
   603  	<Uint32>9</Uint32>
   604  	<Uint64>10</Uint64>
   605  	<Uintptr>11</Uintptr>
   606  	<Float>12.0</Float>
   607  	<Float32>13.0</Float32>
   608  	<Float64>14.0</Float64>
   609  	<String>15</String>
   610  	<PtrString>16</PtrString>
   611  </allscalars>`
   612  
   613  func TestAllScalars(t *testing.T) {
   614  	var a allScalars
   615  	err := Unmarshal([]byte(testScalarsInput), &a)
   616  
   617  	if err != nil {
   618  		t.Fatal(err)
   619  	}
   620  	if !reflect.DeepEqual(a, all) {
   621  		t.Errorf("have %+v want %+v", a, all)
   622  	}
   623  }
   624  
   625  type item struct {
   626  	FieldA string
   627  }
   628  
   629  func TestIssue569(t *testing.T) {
   630  	data := `<item><FieldA>abcd</FieldA></item>`
   631  	var i item
   632  	err := Unmarshal([]byte(data), &i)
   633  
   634  	if err != nil || i.FieldA != "abcd" {
   635  		t.Fatal("Expecting abcd")
   636  	}
   637  }
   638  
   639  func TestUnquotedAttrs(t *testing.T) {
   640  	data := "<tag attr=azAZ09:-_\t>"
   641  	d := NewDecoder(strings.NewReader(data))
   642  	d.Strict = false
   643  	token, err := d.Token()
   644  	if _, ok := err.(*SyntaxError); ok {
   645  		t.Errorf("Unexpected error: %v", err)
   646  	}
   647  	if token.(StartElement).Name.Local != "tag" {
   648  		t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local)
   649  	}
   650  	attr := token.(StartElement).Attr[0]
   651  	if attr.Value != "azAZ09:-_" {
   652  		t.Errorf("Unexpected attribute value: %v", attr.Value)
   653  	}
   654  	if attr.Name.Local != "attr" {
   655  		t.Errorf("Unexpected attribute name: %v", attr.Name.Local)
   656  	}
   657  }
   658  
   659  func TestValuelessAttrs(t *testing.T) {
   660  	tests := [][3]string{
   661  		{"<p nowrap>", "p", "nowrap"},
   662  		{"<p nowrap >", "p", "nowrap"},
   663  		{"<input checked/>", "input", "checked"},
   664  		{"<input checked />", "input", "checked"},
   665  	}
   666  	for _, test := range tests {
   667  		d := NewDecoder(strings.NewReader(test[0]))
   668  		d.Strict = false
   669  		token, err := d.Token()
   670  		if _, ok := err.(*SyntaxError); ok {
   671  			t.Errorf("Unexpected error: %v", err)
   672  		}
   673  		if token.(StartElement).Name.Local != test[1] {
   674  			t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local)
   675  		}
   676  		attr := token.(StartElement).Attr[0]
   677  		if attr.Value != test[2] {
   678  			t.Errorf("Unexpected attribute value: %v", attr.Value)
   679  		}
   680  		if attr.Name.Local != test[2] {
   681  			t.Errorf("Unexpected attribute name: %v", attr.Name.Local)
   682  		}
   683  	}
   684  }
   685  
   686  func TestCopyTokenCharData(t *testing.T) {
   687  	data := []byte("same data")
   688  	var tok1 Token = CharData(data)
   689  	tok2 := CopyToken(tok1)
   690  	if !reflect.DeepEqual(tok1, tok2) {
   691  		t.Error("CopyToken(CharData) != CharData")
   692  	}
   693  	data[1] = 'o'
   694  	if reflect.DeepEqual(tok1, tok2) {
   695  		t.Error("CopyToken(CharData) uses same buffer.")
   696  	}
   697  }
   698  
   699  func TestCopyTokenStartElement(t *testing.T) {
   700  	elt := StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}}
   701  	var tok1 Token = elt
   702  	tok2 := CopyToken(tok1)
   703  	if tok1.(StartElement).Attr[0].Value != "en" {
   704  		t.Error("CopyToken overwrote Attr[0]")
   705  	}
   706  	if !reflect.DeepEqual(tok1, tok2) {
   707  		t.Error("CopyToken(StartElement) != StartElement")
   708  	}
   709  	tok1.(StartElement).Attr[0] = Attr{Name{"", "lang"}, "de"}
   710  	if reflect.DeepEqual(tok1, tok2) {
   711  		t.Error("CopyToken(CharData) uses same buffer.")
   712  	}
   713  }
   714  
   715  func TestCopyTokenComment(t *testing.T) {
   716  	data := []byte("<!-- some comment -->")
   717  	var tok1 Token = Comment(data)
   718  	tok2 := CopyToken(tok1)
   719  	if !reflect.DeepEqual(tok1, tok2) {
   720  		t.Error("CopyToken(Comment) != Comment")
   721  	}
   722  	data[1] = 'o'
   723  	if reflect.DeepEqual(tok1, tok2) {
   724  		t.Error("CopyToken(Comment) uses same buffer.")
   725  	}
   726  }
   727  
   728  func TestSyntaxErrorLineNum(t *testing.T) {
   729  	testInput := "<P>Foo<P>\n\n<P>Bar</>\n"
   730  	d := NewDecoder(strings.NewReader(testInput))
   731  	var err error
   732  	for _, err = d.Token(); err == nil; _, err = d.Token() {
   733  	}
   734  	synerr, ok := err.(*SyntaxError)
   735  	if !ok {
   736  		t.Error("Expected SyntaxError.")
   737  	}
   738  	if synerr.Line != 3 {
   739  		t.Error("SyntaxError didn't have correct line number.")
   740  	}
   741  }
   742  
   743  func TestTrailingRawToken(t *testing.T) {
   744  	input := `<FOO></FOO>  `
   745  	d := NewDecoder(strings.NewReader(input))
   746  	var err error
   747  	for _, err = d.RawToken(); err == nil; _, err = d.RawToken() {
   748  	}
   749  	if err != io.EOF {
   750  		t.Fatalf("d.RawToken() = _, %v, want _, io.EOF", err)
   751  	}
   752  }
   753  
   754  func TestTrailingToken(t *testing.T) {
   755  	input := `<FOO></FOO>  `
   756  	d := NewDecoder(strings.NewReader(input))
   757  	var err error
   758  	for _, err = d.Token(); err == nil; _, err = d.Token() {
   759  	}
   760  	if err != io.EOF {
   761  		t.Fatalf("d.Token() = _, %v, want _, io.EOF", err)
   762  	}
   763  }
   764  
   765  func TestEntityInsideCDATA(t *testing.T) {
   766  	input := `<test><![CDATA[ &val=foo ]]></test>`
   767  	d := NewDecoder(strings.NewReader(input))
   768  	var err error
   769  	for _, err = d.Token(); err == nil; _, err = d.Token() {
   770  	}
   771  	if err != io.EOF {
   772  		t.Fatalf("d.Token() = _, %v, want _, io.EOF", err)
   773  	}
   774  }
   775  
   776  var characterTests = []struct {
   777  	in  string
   778  	err string
   779  }{
   780  	{"\x12<doc/>", "illegal character code U+0012"},
   781  	{"<?xml version=\"1.0\"?>\x0b<doc/>", "illegal character code U+000B"},
   782  	{"\xef\xbf\xbe<doc/>", "illegal character code U+FFFE"},
   783  	{"<?xml version=\"1.0\"?><doc>\r\n<hiya/>\x07<toots/></doc>", "illegal character code U+0007"},
   784  	{"<?xml version=\"1.0\"?><doc \x12='value'>what's up</doc>", "expected attribute name in element"},
   785  	{"<doc>&abc\x01;</doc>", "invalid character entity &abc (no semicolon)"},
   786  	{"<doc>&\x01;</doc>", "invalid character entity & (no semicolon)"},
   787  	{"<doc>&\xef\xbf\xbe;</doc>", "invalid character entity &\uFFFE;"},
   788  	{"<doc>&hello;</doc>", "invalid character entity &hello;"},
   789  }
   790  
   791  func TestDisallowedCharacters(t *testing.T) {
   792  
   793  	for i, tt := range characterTests {
   794  		d := NewDecoder(strings.NewReader(tt.in))
   795  		var err error
   796  
   797  		for err == nil {
   798  			_, err = d.Token()
   799  		}
   800  		synerr, ok := err.(*SyntaxError)
   801  		if !ok {
   802  			t.Fatalf("input %d d.Token() = _, %v, want _, *SyntaxError", i, err)
   803  		}
   804  		if synerr.Msg != tt.err {
   805  			t.Fatalf("input %d synerr.Msg wrong: want %q, got %q", i, tt.err, synerr.Msg)
   806  		}
   807  	}
   808  }
   809  
   810  func TestIsInCharacterRange(t *testing.T) {
   811  	invalid := []rune{
   812  		utf8.MaxRune + 1,
   813  		0xD800, // surrogate min
   814  		0xDFFF, // surrogate max
   815  		-1,
   816  	}
   817  	for _, r := range invalid {
   818  		if isInCharacterRange(r) {
   819  			t.Errorf("rune %U considered valid", r)
   820  		}
   821  	}
   822  }
   823  
   824  var procInstTests = []struct {
   825  	input  string
   826  	expect [2]string
   827  }{
   828  	{`version="1.0" encoding="utf-8"`, [2]string{"1.0", "utf-8"}},
   829  	{`version="1.0" encoding='utf-8'`, [2]string{"1.0", "utf-8"}},
   830  	{`version="1.0" encoding='utf-8' `, [2]string{"1.0", "utf-8"}},
   831  	{`version="1.0" encoding=utf-8`, [2]string{"1.0", ""}},
   832  	{`encoding="FOO" `, [2]string{"", "FOO"}},
   833  }
   834  
   835  func TestProcInstEncoding(t *testing.T) {
   836  	for _, test := range procInstTests {
   837  		if got := procInst("version", test.input); got != test.expect[0] {
   838  			t.Errorf("procInst(version, %q) = %q; want %q", test.input, got, test.expect[0])
   839  		}
   840  		if got := procInst("encoding", test.input); got != test.expect[1] {
   841  			t.Errorf("procInst(encoding, %q) = %q; want %q", test.input, got, test.expect[1])
   842  		}
   843  	}
   844  }
   845  
   846  // Ensure that directives with comments include the complete
   847  // text of any nested directives.
   848  
   849  var directivesWithCommentsInput = `
   850  <!DOCTYPE [<!-- a comment --><!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]>
   851  <!DOCTYPE [<!ENTITY go "Golang"><!-- a comment-->]>
   852  <!DOCTYPE <!-> <!> <!----> <!-->--> <!--->--> [<!ENTITY go "Golang"><!-- a comment-->]>
   853  `
   854  
   855  var directivesWithCommentsTokens = []Token{
   856  	CharData("\n"),
   857  	Directive(`DOCTYPE [ <!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`),
   858  	CharData("\n"),
   859  	Directive(`DOCTYPE [<!ENTITY go "Golang"> ]`),
   860  	CharData("\n"),
   861  	Directive(`DOCTYPE <!-> <!>       [<!ENTITY go "Golang"> ]`),
   862  	CharData("\n"),
   863  }
   864  
   865  func TestDirectivesWithComments(t *testing.T) {
   866  	d := NewDecoder(strings.NewReader(directivesWithCommentsInput))
   867  
   868  	for i, want := range directivesWithCommentsTokens {
   869  		have, err := d.Token()
   870  		if err != nil {
   871  			t.Fatalf("token %d: unexpected error: %s", i, err)
   872  		}
   873  		if !reflect.DeepEqual(have, want) {
   874  			t.Errorf("token %d = %#v want %#v", i, have, want)
   875  		}
   876  	}
   877  }
   878  
   879  // Writer whose Write method always returns an error.
   880  type errWriter struct{}
   881  
   882  func (errWriter) Write(p []byte) (n int, err error) { return 0, fmt.Errorf("unwritable") }
   883  
   884  func TestEscapeTextIOErrors(t *testing.T) {
   885  	expectErr := "unwritable"
   886  	err := EscapeText(errWriter{}, []byte{'A'})
   887  
   888  	if err == nil || err.Error() != expectErr {
   889  		t.Errorf("have %v, want %v", err, expectErr)
   890  	}
   891  }
   892  
   893  func TestEscapeTextInvalidChar(t *testing.T) {
   894  	input := []byte("A \x00 terminated string.")
   895  	expected := "A \uFFFD terminated string."
   896  
   897  	buff := new(strings.Builder)
   898  	if err := EscapeText(buff, input); err != nil {
   899  		t.Fatalf("have %v, want nil", err)
   900  	}
   901  	text := buff.String()
   902  
   903  	if text != expected {
   904  		t.Errorf("have %v, want %v", text, expected)
   905  	}
   906  }
   907  
   908  func TestIssue5880(t *testing.T) {
   909  	type T []byte
   910  	data, err := Marshal(T{192, 168, 0, 1})
   911  	if err != nil {
   912  		t.Errorf("Marshal error: %v", err)
   913  	}
   914  	if !utf8.Valid(data) {
   915  		t.Errorf("Marshal generated invalid UTF-8: %x", data)
   916  	}
   917  }
   918  
   919  func TestIssue8535(t *testing.T) {
   920  
   921  	type ExampleConflict struct {
   922  		XMLName  Name   `xml:"example"`
   923  		Link     string `xml:"link"`
   924  		AtomLink string `xml:"http://www.w3.org/2005/Atom link"` // Same name in a different name space
   925  	}
   926  	testCase := `<example>
   927  			<title>Example</title>
   928  			<link>http://example.com/default</link> <!-- not assigned -->
   929  			<link>http://example.com/home</link> <!-- not assigned -->
   930  			<ns:link xmlns:ns="http://www.w3.org/2005/Atom">http://example.com/ns</ns:link>
   931  		</example>`
   932  
   933  	var dest ExampleConflict
   934  	d := NewDecoder(strings.NewReader(testCase))
   935  	if err := d.Decode(&dest); err != nil {
   936  		t.Fatal(err)
   937  	}
   938  }
   939  
   940  func TestEncodeXMLNS(t *testing.T) {
   941  	testCases := []struct {
   942  		f    func() ([]byte, error)
   943  		want string
   944  		ok   bool
   945  	}{
   946  		{encodeXMLNS1, `<Test xmlns="http://example.com/ns"><Body>hello world</Body></Test>`, true},
   947  		{encodeXMLNS2, `<Test><body xmlns="http://example.com/ns">hello world</body></Test>`, true},
   948  		{encodeXMLNS3, `<Test xmlns="http://example.com/ns"><Body>hello world</Body></Test>`, true},
   949  		{encodeXMLNS4, `<Test xmlns="http://example.com/ns"><Body>hello world</Body></Test>`, false},
   950  	}
   951  
   952  	for i, tc := range testCases {
   953  		if b, err := tc.f(); err == nil {
   954  			if got, want := string(b), tc.want; got != want {
   955  				t.Errorf("%d: got %s, want %s \n", i, got, want)
   956  			}
   957  		} else {
   958  			t.Errorf("%d: marshal failed with %s", i, err)
   959  		}
   960  	}
   961  }
   962  
   963  func encodeXMLNS1() ([]byte, error) {
   964  
   965  	type T struct {
   966  		XMLName Name   `xml:"Test"`
   967  		Ns      string `xml:"xmlns,attr"`
   968  		Body    string
   969  	}
   970  
   971  	s := &T{Ns: "http://example.com/ns", Body: "hello world"}
   972  	return Marshal(s)
   973  }
   974  
   975  func encodeXMLNS2() ([]byte, error) {
   976  
   977  	type Test struct {
   978  		Body string `xml:"http://example.com/ns body"`
   979  	}
   980  
   981  	s := &Test{Body: "hello world"}
   982  	return Marshal(s)
   983  }
   984  
   985  func encodeXMLNS3() ([]byte, error) {
   986  
   987  	type Test struct {
   988  		XMLName Name `xml:"http://example.com/ns Test"`
   989  		Body    string
   990  	}
   991  
   992  	//s := &Test{XMLName: Name{"http://example.com/ns",""}, Body: "hello world"} is unusable as the "-" is missing
   993  	// as documentation states
   994  	s := &Test{Body: "hello world"}
   995  	return Marshal(s)
   996  }
   997  
   998  func encodeXMLNS4() ([]byte, error) {
   999  
  1000  	type Test struct {
  1001  		Ns   string `xml:"xmlns,attr"`
  1002  		Body string
  1003  	}
  1004  
  1005  	s := &Test{Ns: "http://example.com/ns", Body: "hello world"}
  1006  	return Marshal(s)
  1007  }
  1008  
  1009  func TestIssue11405(t *testing.T) {
  1010  	testCases := []string{
  1011  		"<root>",
  1012  		"<root><foo>",
  1013  		"<root><foo></foo>",
  1014  	}
  1015  	for _, tc := range testCases {
  1016  		d := NewDecoder(strings.NewReader(tc))
  1017  		var err error
  1018  		for {
  1019  			_, err = d.Token()
  1020  			if err != nil {
  1021  				break
  1022  			}
  1023  		}
  1024  		if _, ok := err.(*SyntaxError); !ok {
  1025  			t.Errorf("%s: Token: Got error %v, want SyntaxError", tc, err)
  1026  		}
  1027  	}
  1028  }
  1029  
  1030  func TestIssue12417(t *testing.T) {
  1031  	testCases := []struct {
  1032  		s  string
  1033  		ok bool
  1034  	}{
  1035  		{`<?xml encoding="UtF-8" version="1.0"?><root/>`, true},
  1036  		{`<?xml encoding="UTF-8" version="1.0"?><root/>`, true},
  1037  		{`<?xml encoding="utf-8" version="1.0"?><root/>`, true},
  1038  		{`<?xml encoding="uuu-9" version="1.0"?><root/>`, false},
  1039  	}
  1040  	for _, tc := range testCases {
  1041  		d := NewDecoder(strings.NewReader(tc.s))
  1042  		var err error
  1043  		for {
  1044  			_, err = d.Token()
  1045  			if err != nil {
  1046  				if err == io.EOF {
  1047  					err = nil
  1048  				}
  1049  				break
  1050  			}
  1051  		}
  1052  		if err != nil && tc.ok {
  1053  			t.Errorf("%q: Encoding charset: expected no error, got %s", tc.s, err)
  1054  			continue
  1055  		}
  1056  		if err == nil && !tc.ok {
  1057  			t.Errorf("%q: Encoding charset: expected error, got nil", tc.s)
  1058  		}
  1059  	}
  1060  }
  1061  
  1062  func TestIssue7113(t *testing.T) {
  1063  	type C struct {
  1064  		XMLName Name `xml:""` // Sets empty namespace
  1065  	}
  1066  
  1067  	type A struct {
  1068  		XMLName Name `xml:""`
  1069  		C       C    `xml:""`
  1070  	}
  1071  
  1072  	var a A
  1073  	structSpace := "b"
  1074  	xmlTest := `<A xmlns="` + structSpace + `"><C xmlns=""></C></A>`
  1075  	t.Log(xmlTest)
  1076  	err := Unmarshal([]byte(xmlTest), &a)
  1077  	if err != nil {
  1078  		t.Fatal(err)
  1079  	}
  1080  
  1081  	if a.XMLName.Space != structSpace {
  1082  		t.Errorf("overidding with empty namespace: unmarshalling, got %s, want %s\n", a.XMLName.Space, structSpace)
  1083  	}
  1084  	if len(a.C.XMLName.Space) != 0 {
  1085  		t.Fatalf("overidding with empty namespace: unmarshalling, got %s, want empty\n", a.C.XMLName.Space)
  1086  	}
  1087  
  1088  	var b []byte
  1089  	b, err = Marshal(&a)
  1090  	if err != nil {
  1091  		t.Fatal(err)
  1092  	}
  1093  	if len(a.C.XMLName.Space) != 0 {
  1094  		t.Errorf("overidding with empty namespace: marshaling, got %s in C tag which should be empty\n", a.C.XMLName.Space)
  1095  	}
  1096  	if string(b) != xmlTest {
  1097  		t.Fatalf("overidding with empty namespace: marshalling, got %s, want %s\n", b, xmlTest)
  1098  	}
  1099  	var c A
  1100  	err = Unmarshal(b, &c)
  1101  	if err != nil {
  1102  		t.Fatalf("second Unmarshal failed: %s", err)
  1103  	}
  1104  	if c.XMLName.Space != "b" {
  1105  		t.Errorf("overidding with empty namespace: after marshaling & unmarshaling, XML name space: got %s, want %s\n", a.XMLName.Space, structSpace)
  1106  	}
  1107  	if len(c.C.XMLName.Space) != 0 {
  1108  		t.Errorf("overidding with empty namespace: after marshaling & unmarshaling, got %s, want empty\n", a.C.XMLName.Space)
  1109  	}
  1110  }
  1111  
  1112  func TestIssue20396(t *testing.T) {
  1113  
  1114  	var attrError = UnmarshalError("XML syntax error on line 1: expected attribute name in element")
  1115  
  1116  	testCases := []struct {
  1117  		s       string
  1118  		wantErr error
  1119  	}{
  1120  		{`<a:te:st xmlns:a="abcd"/>`, // Issue 20396
  1121  			UnmarshalError("XML syntax error on line 1: expected element name after <")},
  1122  		{`<a:te=st xmlns:a="abcd"/>`, attrError},
  1123  		{`<a:te&st xmlns:a="abcd"/>`, attrError},
  1124  		{`<a:test xmlns:a="abcd"/>`, nil},
  1125  		{`<a:te:st xmlns:a="abcd">1</a:te:st>`,
  1126  			UnmarshalError("XML syntax error on line 1: expected element name after <")},
  1127  		{`<a:te=st xmlns:a="abcd">1</a:te=st>`, attrError},
  1128  		{`<a:te&st xmlns:a="abcd">1</a:te&st>`, attrError},
  1129  		{`<a:test xmlns:a="abcd">1</a:test>`, nil},
  1130  	}
  1131  
  1132  	var dest string
  1133  	for _, tc := range testCases {
  1134  		if got, want := Unmarshal([]byte(tc.s), &dest), tc.wantErr; got != want {
  1135  			if got == nil {
  1136  				t.Errorf("%s: Unexpected success, want %v", tc.s, want)
  1137  			} else if want == nil {
  1138  				t.Errorf("%s: Unexpected error, got %v", tc.s, got)
  1139  			} else if got.Error() != want.Error() {
  1140  				t.Errorf("%s: got %v, want %v", tc.s, got, want)
  1141  			}
  1142  		}
  1143  	}
  1144  }
  1145  
  1146  func TestIssue20685(t *testing.T) {
  1147  	testCases := []struct {
  1148  		s  string
  1149  		ok bool
  1150  	}{
  1151  		{`<x:book xmlns:x="abcd" xmlns:y="abcd"><unclosetag>one</x:book>`, false},
  1152  		{`<x:book xmlns:x="abcd" xmlns:y="abcd">one</x:book>`, true},
  1153  		{`<x:book xmlns:x="abcd" xmlns:y="abcd">one</y:book>`, false},
  1154  		{`<x:book xmlns:y="abcd" xmlns:x="abcd">one</y:book>`, false},
  1155  		{`<x:book xmlns:x="abcd">one</y:book>`, false},
  1156  		{`<x:book>one</y:book>`, false},
  1157  		{`<xbook>one</ybook>`, false},
  1158  	}
  1159  	for _, tc := range testCases {
  1160  		d := NewDecoder(strings.NewReader(tc.s))
  1161  		var err error
  1162  		for {
  1163  			_, err = d.Token()
  1164  			if err != nil {
  1165  				if err == io.EOF {
  1166  					err = nil
  1167  				}
  1168  				break
  1169  			}
  1170  		}
  1171  		if err != nil && tc.ok {
  1172  			t.Errorf("%q: Closing tag with namespace : expected no error, got %s", tc.s, err)
  1173  			continue
  1174  		}
  1175  		if err == nil && !tc.ok {
  1176  			t.Errorf("%q: Closing tag with namespace : expected error, got nil", tc.s)
  1177  		}
  1178  	}
  1179  }
  1180  
  1181  func tokenMap(mapping func(t Token) Token) func(TokenReader) TokenReader {
  1182  	return func(src TokenReader) TokenReader {
  1183  		return mapper{
  1184  			t: src,
  1185  			f: mapping,
  1186  		}
  1187  	}
  1188  }
  1189  
  1190  type mapper struct {
  1191  	t TokenReader
  1192  	f func(Token) Token
  1193  }
  1194  
  1195  func (m mapper) Token() (Token, error) {
  1196  	tok, err := m.t.Token()
  1197  	if err != nil {
  1198  		return nil, err
  1199  	}
  1200  	return m.f(tok), nil
  1201  }
  1202  
  1203  func TestNewTokenDecoderIdempotent(t *testing.T) {
  1204  	d := NewDecoder(strings.NewReader(`<br>`))
  1205  	d2 := NewTokenDecoder(d)
  1206  	if d != d2 {
  1207  		t.Error("NewTokenDecoder did not detect underlying Decoder")
  1208  	}
  1209  }
  1210  
  1211  func TestWrapDecoder(t *testing.T) {
  1212  	d := NewDecoder(strings.NewReader(`<quote>[Re-enter Clown with a letter, and FABIAN]</quote>`))
  1213  	m := tokenMap(func(t Token) Token {
  1214  		switch tok := t.(type) {
  1215  		case StartElement:
  1216  			if tok.Name.Local == "quote" {
  1217  				tok.Name.Local = "blocking"
  1218  				return tok
  1219  			}
  1220  		case EndElement:
  1221  			if tok.Name.Local == "quote" {
  1222  				tok.Name.Local = "blocking"
  1223  				return tok
  1224  			}
  1225  		}
  1226  		return t
  1227  	})
  1228  
  1229  	d = NewTokenDecoder(m(d))
  1230  
  1231  	o := struct {
  1232  		XMLName  Name   `xml:"blocking"`
  1233  		Chardata string `xml:",chardata"`
  1234  	}{}
  1235  
  1236  	if err := d.Decode(&o); err != nil {
  1237  		t.Fatal("Got unexpected error while decoding:", err)
  1238  	}
  1239  
  1240  	if o.Chardata != "[Re-enter Clown with a letter, and FABIAN]" {
  1241  		t.Fatalf("Got unexpected chardata: `%s`\n", o.Chardata)
  1242  	}
  1243  }
  1244  
  1245  type tokReader struct{}
  1246  
  1247  func (tokReader) Token() (Token, error) {
  1248  	return StartElement{}, nil
  1249  }
  1250  
  1251  type Failure struct{}
  1252  
  1253  func (Failure) UnmarshalXML(*Decoder, StartElement) error {
  1254  	return nil
  1255  }
  1256  
  1257  func TestTokenUnmarshaler(t *testing.T) {
  1258  	defer func() {
  1259  		if r := recover(); r != nil {
  1260  			t.Error("Unexpected panic using custom token unmarshaler")
  1261  		}
  1262  	}()
  1263  
  1264  	d := NewTokenDecoder(tokReader{})
  1265  	d.Decode(&Failure{})
  1266  }
  1267  
  1268  func testRoundTrip(t *testing.T, input string) {
  1269  	d := NewDecoder(strings.NewReader(input))
  1270  	var tokens []Token
  1271  	var buf bytes.Buffer
  1272  	e := NewEncoder(&buf)
  1273  	for {
  1274  		tok, err := d.Token()
  1275  		if err == io.EOF {
  1276  			break
  1277  		}
  1278  		if err != nil {
  1279  			t.Fatalf("invalid input: %v", err)
  1280  		}
  1281  		if err := e.EncodeToken(tok); err != nil {
  1282  			t.Fatalf("failed to re-encode input: %v", err)
  1283  		}
  1284  		tokens = append(tokens, CopyToken(tok))
  1285  	}
  1286  	if err := e.Flush(); err != nil {
  1287  		t.Fatal(err)
  1288  	}
  1289  
  1290  	d = NewDecoder(&buf)
  1291  	for {
  1292  		tok, err := d.Token()
  1293  		if err == io.EOF {
  1294  			break
  1295  		}
  1296  		if err != nil {
  1297  			t.Fatalf("failed to decode output: %v", err)
  1298  		}
  1299  		if len(tokens) == 0 {
  1300  			t.Fatalf("unexpected token: %#v", tok)
  1301  		}
  1302  		a, b := tokens[0], tok
  1303  		if !reflect.DeepEqual(a, b) {
  1304  			t.Fatalf("token mismatch: %#v vs %#v", a, b)
  1305  		}
  1306  		tokens = tokens[1:]
  1307  	}
  1308  	if len(tokens) > 0 {
  1309  		t.Fatalf("lost tokens: %#v", tokens)
  1310  	}
  1311  }
  1312  
  1313  func TestRoundTrip(t *testing.T) {
  1314  	tests := map[string]string{
  1315  		"trailing colon":         `<foo abc:="x"></foo>`,
  1316  		"comments in directives": `<!ENTITY x<!<!-- c1 [ " -->--x --> > <e></e> <!DOCTYPE xxx [ x<!-- c2 " -->--x ]>`,
  1317  	}
  1318  	for name, input := range tests {
  1319  		t.Run(name, func(t *testing.T) { testRoundTrip(t, input) })
  1320  	}
  1321  }
  1322  
  1323  func TestParseErrors(t *testing.T) {
  1324  	withDefaultHeader := func(s string) string {
  1325  		return `<?xml version="1.0" encoding="UTF-8"?>` + s
  1326  	}
  1327  	tests := []struct {
  1328  		src string
  1329  		err string
  1330  	}{
  1331  		{withDefaultHeader(`</foo>`), `unexpected end element </foo>`},
  1332  		{withDefaultHeader(`<x:foo></y:foo>`), `element <foo> in space x closed by </foo> in space y`},
  1333  		{withDefaultHeader(`<? not ok ?>`), `expected target name after <?`},
  1334  		{withDefaultHeader(`<!- not ok -->`), `invalid sequence <!- not part of <!--`},
  1335  		{withDefaultHeader(`<!-? not ok -->`), `invalid sequence <!- not part of <!--`},
  1336  		{withDefaultHeader(`<![not ok]>`), `invalid <![ sequence`},
  1337  		{withDefaultHeader("\xf1"), `invalid UTF-8`},
  1338  
  1339  		// Header-related errors.
  1340  		{`<?xml version="1.1" encoding="UTF-8"?>`, `unsupported version "1.1"; only version 1.0 is supported`},
  1341  
  1342  		// Cases below are for "no errors".
  1343  		{withDefaultHeader(`<?ok?>`), ``},
  1344  		{withDefaultHeader(`<?ok version="ok"?>`), ``},
  1345  	}
  1346  
  1347  	for _, test := range tests {
  1348  		d := NewDecoder(strings.NewReader(test.src))
  1349  		var err error
  1350  		for {
  1351  			_, err = d.Token()
  1352  			if err != nil {
  1353  				break
  1354  			}
  1355  		}
  1356  		if test.err == "" {
  1357  			if err != io.EOF {
  1358  				t.Errorf("parse %s: have %q error, expected none", test.src, err)
  1359  			}
  1360  			continue
  1361  		}
  1362  		// Inv: err != nil
  1363  		if err == io.EOF {
  1364  			t.Errorf("parse %s: unexpected EOF", test.src)
  1365  			continue
  1366  		}
  1367  		if !strings.Contains(err.Error(), test.err) {
  1368  			t.Errorf("parse %s: can't find %q error sudbstring\nerror: %q", test.src, test.err, err)
  1369  			continue
  1370  		}
  1371  	}
  1372  }
  1373  
  1374  const testInputHTMLAutoClose = `<?xml version="1.0" encoding="UTF-8"?>
  1375  <br>
  1376  <br/><br/>
  1377  <br><br>
  1378  <br></br>
  1379  <BR>
  1380  <BR/><BR/>
  1381  <Br></Br>
  1382  <BR><span id="test">abc</span><br/><br/>`
  1383  
  1384  func BenchmarkHTMLAutoClose(b *testing.B) {
  1385  	b.RunParallel(func(p *testing.PB) {
  1386  		for p.Next() {
  1387  			d := NewDecoder(strings.NewReader(testInputHTMLAutoClose))
  1388  			d.Strict = false
  1389  			d.AutoClose = HTMLAutoClose
  1390  			d.Entity = HTMLEntity
  1391  			for {
  1392  				_, err := d.Token()
  1393  				if err != nil {
  1394  					if err == io.EOF {
  1395  						break
  1396  					}
  1397  					b.Fatalf("unexpected error: %v", err)
  1398  				}
  1399  			}
  1400  		}
  1401  	})
  1402  }
  1403  
  1404  func TestHTMLAutoClose(t *testing.T) {
  1405  	wantTokens := []Token{
  1406  		ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)},
  1407  		CharData("\n"),
  1408  		StartElement{Name{"", "br"}, []Attr{}},
  1409  		EndElement{Name{"", "br"}},
  1410  		CharData("\n"),
  1411  		StartElement{Name{"", "br"}, []Attr{}},
  1412  		EndElement{Name{"", "br"}},
  1413  		StartElement{Name{"", "br"}, []Attr{}},
  1414  		EndElement{Name{"", "br"}},
  1415  		CharData("\n"),
  1416  		StartElement{Name{"", "br"}, []Attr{}},
  1417  		EndElement{Name{"", "br"}},
  1418  		StartElement{Name{"", "br"}, []Attr{}},
  1419  		EndElement{Name{"", "br"}},
  1420  		CharData("\n"),
  1421  		StartElement{Name{"", "br"}, []Attr{}},
  1422  		EndElement{Name{"", "br"}},
  1423  		CharData("\n"),
  1424  		StartElement{Name{"", "BR"}, []Attr{}},
  1425  		EndElement{Name{"", "BR"}},
  1426  		CharData("\n"),
  1427  		StartElement{Name{"", "BR"}, []Attr{}},
  1428  		EndElement{Name{"", "BR"}},
  1429  		StartElement{Name{"", "BR"}, []Attr{}},
  1430  		EndElement{Name{"", "BR"}},
  1431  		CharData("\n"),
  1432  		StartElement{Name{"", "Br"}, []Attr{}},
  1433  		EndElement{Name{"", "Br"}},
  1434  		CharData("\n"),
  1435  		StartElement{Name{"", "BR"}, []Attr{}},
  1436  		EndElement{Name{"", "BR"}},
  1437  		StartElement{Name{"", "span"}, []Attr{{Name: Name{"", "id"}, Value: "test"}}},
  1438  		CharData("abc"),
  1439  		EndElement{Name{"", "span"}},
  1440  		StartElement{Name{"", "br"}, []Attr{}},
  1441  		EndElement{Name{"", "br"}},
  1442  		StartElement{Name{"", "br"}, []Attr{}},
  1443  		EndElement{Name{"", "br"}},
  1444  	}
  1445  
  1446  	d := NewDecoder(strings.NewReader(testInputHTMLAutoClose))
  1447  	d.Strict = false
  1448  	d.AutoClose = HTMLAutoClose
  1449  	d.Entity = HTMLEntity
  1450  	var haveTokens []Token
  1451  	for {
  1452  		tok, err := d.Token()
  1453  		if err != nil {
  1454  			if err == io.EOF {
  1455  				break
  1456  			}
  1457  			t.Fatalf("unexpected error: %v", err)
  1458  		}
  1459  		haveTokens = append(haveTokens, CopyToken(tok))
  1460  	}
  1461  	if len(haveTokens) != len(wantTokens) {
  1462  		t.Errorf("tokens count mismatch: have %d, want %d", len(haveTokens), len(wantTokens))
  1463  	}
  1464  	for i, want := range wantTokens {
  1465  		if i >= len(haveTokens) {
  1466  			t.Errorf("token[%d] expected %#v, have no token", i, want)
  1467  		} else {
  1468  			have := haveTokens[i]
  1469  			if !reflect.DeepEqual(have, want) {
  1470  				t.Errorf("token[%d] mismatch:\nhave: %#v\nwant: %#v", i, have, want)
  1471  			}
  1472  		}
  1473  	}
  1474  }