github.com/kdevb0x/go@v0.0.0-20180115030120-39687051e9e7/src/encoding/xml/xml_test.go (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package xml
     6  
     7  import (
     8  	"bytes"
     9  	"fmt"
    10  	"io"
    11  	"reflect"
    12  	"strings"
    13  	"testing"
    14  	"unicode/utf8"
    15  )
    16  
    17  const testInput = `
    18  <?xml version="1.0" encoding="UTF-8"?>
    19  <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
    20    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
    21  <body xmlns:foo="ns1" xmlns="ns2" xmlns:tag="ns3" ` +
    22  	"\r\n\t" + `  >
    23    <hello lang="en">World &lt;&gt;&apos;&quot; &#x767d;&#40300;翔</hello>
    24    <query>&何; &is-it;</query>
    25    <goodbye />
    26    <outer foo:attr="value" xmlns:tag="ns4">
    27      <inner/>
    28    </outer>
    29    <tag:name>
    30      <![CDATA[Some text here.]]>
    31    </tag:name>
    32  </body><!-- missing final newline -->`
    33  
    34  var testEntity = map[string]string{"何": "What", "is-it": "is it?"}
    35  
    36  var rawTokens = []Token{
    37  	CharData("\n"),
    38  	ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)},
    39  	CharData("\n"),
    40  	Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
    41    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`),
    42  	CharData("\n"),
    43  	StartElement{Name{"", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}},
    44  	CharData("\n  "),
    45  	StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}},
    46  	CharData("World <>'\" 白鵬翔"),
    47  	EndElement{Name{"", "hello"}},
    48  	CharData("\n  "),
    49  	StartElement{Name{"", "query"}, []Attr{}},
    50  	CharData("What is it?"),
    51  	EndElement{Name{"", "query"}},
    52  	CharData("\n  "),
    53  	StartElement{Name{"", "goodbye"}, []Attr{}},
    54  	EndElement{Name{"", "goodbye"}},
    55  	CharData("\n  "),
    56  	StartElement{Name{"", "outer"}, []Attr{{Name{"foo", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}},
    57  	CharData("\n    "),
    58  	StartElement{Name{"", "inner"}, []Attr{}},
    59  	EndElement{Name{"", "inner"}},
    60  	CharData("\n  "),
    61  	EndElement{Name{"", "outer"}},
    62  	CharData("\n  "),
    63  	StartElement{Name{"tag", "name"}, []Attr{}},
    64  	CharData("\n    "),
    65  	CharData("Some text here."),
    66  	CharData("\n  "),
    67  	EndElement{Name{"tag", "name"}},
    68  	CharData("\n"),
    69  	EndElement{Name{"", "body"}},
    70  	Comment(" missing final newline "),
    71  }
    72  
    73  var cookedTokens = []Token{
    74  	CharData("\n"),
    75  	ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)},
    76  	CharData("\n"),
    77  	Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
    78    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`),
    79  	CharData("\n"),
    80  	StartElement{Name{"ns2", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}},
    81  	CharData("\n  "),
    82  	StartElement{Name{"ns2", "hello"}, []Attr{{Name{"", "lang"}, "en"}}},
    83  	CharData("World <>'\" 白鵬翔"),
    84  	EndElement{Name{"ns2", "hello"}},
    85  	CharData("\n  "),
    86  	StartElement{Name{"ns2", "query"}, []Attr{}},
    87  	CharData("What is it?"),
    88  	EndElement{Name{"ns2", "query"}},
    89  	CharData("\n  "),
    90  	StartElement{Name{"ns2", "goodbye"}, []Attr{}},
    91  	EndElement{Name{"ns2", "goodbye"}},
    92  	CharData("\n  "),
    93  	StartElement{Name{"ns2", "outer"}, []Attr{{Name{"ns1", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}},
    94  	CharData("\n    "),
    95  	StartElement{Name{"ns2", "inner"}, []Attr{}},
    96  	EndElement{Name{"ns2", "inner"}},
    97  	CharData("\n  "),
    98  	EndElement{Name{"ns2", "outer"}},
    99  	CharData("\n  "),
   100  	StartElement{Name{"ns3", "name"}, []Attr{}},
   101  	CharData("\n    "),
   102  	CharData("Some text here."),
   103  	CharData("\n  "),
   104  	EndElement{Name{"ns3", "name"}},
   105  	CharData("\n"),
   106  	EndElement{Name{"ns2", "body"}},
   107  	Comment(" missing final newline "),
   108  }
   109  
   110  const testInputAltEncoding = `
   111  <?xml version="1.0" encoding="x-testing-uppercase"?>
   112  <TAG>VALUE</TAG>`
   113  
   114  var rawTokensAltEncoding = []Token{
   115  	CharData("\n"),
   116  	ProcInst{"xml", []byte(`version="1.0" encoding="x-testing-uppercase"`)},
   117  	CharData("\n"),
   118  	StartElement{Name{"", "tag"}, []Attr{}},
   119  	CharData("value"),
   120  	EndElement{Name{"", "tag"}},
   121  }
   122  
   123  var xmlInput = []string{
   124  	// unexpected EOF cases
   125  	"<",
   126  	"<t",
   127  	"<t ",
   128  	"<t/",
   129  	"<!",
   130  	"<!-",
   131  	"<!--",
   132  	"<!--c-",
   133  	"<!--c--",
   134  	"<!d",
   135  	"<t></",
   136  	"<t></t",
   137  	"<?",
   138  	"<?p",
   139  	"<t a",
   140  	"<t a=",
   141  	"<t a='",
   142  	"<t a=''",
   143  	"<t/><![",
   144  	"<t/><![C",
   145  	"<t/><![CDATA[d",
   146  	"<t/><![CDATA[d]",
   147  	"<t/><![CDATA[d]]",
   148  
   149  	// other Syntax errors
   150  	"<>",
   151  	"<t/a",
   152  	"<0 />",
   153  	"<?0 >",
   154  	//	"<!0 >",	// let the Token() caller handle
   155  	"</0>",
   156  	"<t 0=''>",
   157  	"<t a='&'>",
   158  	"<t a='<'>",
   159  	"<t>&nbspc;</t>",
   160  	"<t a>",
   161  	"<t a=>",
   162  	"<t a=v>",
   163  	//	"<![CDATA[d]]>",	// let the Token() caller handle
   164  	"<t></e>",
   165  	"<t></>",
   166  	"<t></t!",
   167  	"<t>cdata]]></t>",
   168  }
   169  
   170  func TestRawToken(t *testing.T) {
   171  	d := NewDecoder(strings.NewReader(testInput))
   172  	d.Entity = testEntity
   173  	testRawToken(t, d, testInput, rawTokens)
   174  }
   175  
   176  const nonStrictInput = `
   177  <tag>non&entity</tag>
   178  <tag>&unknown;entity</tag>
   179  <tag>&#123</tag>
   180  <tag>&#zzz;</tag>
   181  <tag>&なまえ3;</tag>
   182  <tag>&lt-gt;</tag>
   183  <tag>&;</tag>
   184  <tag>&0a;</tag>
   185  `
   186  
   187  var nonStrictTokens = []Token{
   188  	CharData("\n"),
   189  	StartElement{Name{"", "tag"}, []Attr{}},
   190  	CharData("non&entity"),
   191  	EndElement{Name{"", "tag"}},
   192  	CharData("\n"),
   193  	StartElement{Name{"", "tag"}, []Attr{}},
   194  	CharData("&unknown;entity"),
   195  	EndElement{Name{"", "tag"}},
   196  	CharData("\n"),
   197  	StartElement{Name{"", "tag"}, []Attr{}},
   198  	CharData("&#123"),
   199  	EndElement{Name{"", "tag"}},
   200  	CharData("\n"),
   201  	StartElement{Name{"", "tag"}, []Attr{}},
   202  	CharData("&#zzz;"),
   203  	EndElement{Name{"", "tag"}},
   204  	CharData("\n"),
   205  	StartElement{Name{"", "tag"}, []Attr{}},
   206  	CharData("&なまえ3;"),
   207  	EndElement{Name{"", "tag"}},
   208  	CharData("\n"),
   209  	StartElement{Name{"", "tag"}, []Attr{}},
   210  	CharData("&lt-gt;"),
   211  	EndElement{Name{"", "tag"}},
   212  	CharData("\n"),
   213  	StartElement{Name{"", "tag"}, []Attr{}},
   214  	CharData("&;"),
   215  	EndElement{Name{"", "tag"}},
   216  	CharData("\n"),
   217  	StartElement{Name{"", "tag"}, []Attr{}},
   218  	CharData("&0a;"),
   219  	EndElement{Name{"", "tag"}},
   220  	CharData("\n"),
   221  }
   222  
   223  func TestNonStrictRawToken(t *testing.T) {
   224  	d := NewDecoder(strings.NewReader(nonStrictInput))
   225  	d.Strict = false
   226  	testRawToken(t, d, nonStrictInput, nonStrictTokens)
   227  }
   228  
   229  type downCaser struct {
   230  	t *testing.T
   231  	r io.ByteReader
   232  }
   233  
   234  func (d *downCaser) ReadByte() (c byte, err error) {
   235  	c, err = d.r.ReadByte()
   236  	if c >= 'A' && c <= 'Z' {
   237  		c += 'a' - 'A'
   238  	}
   239  	return
   240  }
   241  
   242  func (d *downCaser) Read(p []byte) (int, error) {
   243  	d.t.Fatalf("unexpected Read call on downCaser reader")
   244  	panic("unreachable")
   245  }
   246  
   247  func TestRawTokenAltEncoding(t *testing.T) {
   248  	d := NewDecoder(strings.NewReader(testInputAltEncoding))
   249  	d.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) {
   250  		if charset != "x-testing-uppercase" {
   251  			t.Fatalf("unexpected charset %q", charset)
   252  		}
   253  		return &downCaser{t, input.(io.ByteReader)}, nil
   254  	}
   255  	testRawToken(t, d, testInputAltEncoding, rawTokensAltEncoding)
   256  }
   257  
   258  func TestRawTokenAltEncodingNoConverter(t *testing.T) {
   259  	d := NewDecoder(strings.NewReader(testInputAltEncoding))
   260  	token, err := d.RawToken()
   261  	if token == nil {
   262  		t.Fatalf("expected a token on first RawToken call")
   263  	}
   264  	if err != nil {
   265  		t.Fatal(err)
   266  	}
   267  	token, err = d.RawToken()
   268  	if token != nil {
   269  		t.Errorf("expected a nil token; got %#v", token)
   270  	}
   271  	if err == nil {
   272  		t.Fatalf("expected an error on second RawToken call")
   273  	}
   274  	const encoding = "x-testing-uppercase"
   275  	if !strings.Contains(err.Error(), encoding) {
   276  		t.Errorf("expected error to contain %q; got error: %v",
   277  			encoding, err)
   278  	}
   279  }
   280  
   281  func testRawToken(t *testing.T, d *Decoder, raw string, rawTokens []Token) {
   282  	lastEnd := int64(0)
   283  	for i, want := range rawTokens {
   284  		start := d.InputOffset()
   285  		have, err := d.RawToken()
   286  		end := d.InputOffset()
   287  		if err != nil {
   288  			t.Fatalf("token %d: unexpected error: %s", i, err)
   289  		}
   290  		if !reflect.DeepEqual(have, want) {
   291  			var shave, swant string
   292  			if _, ok := have.(CharData); ok {
   293  				shave = fmt.Sprintf("CharData(%q)", have)
   294  			} else {
   295  				shave = fmt.Sprintf("%#v", have)
   296  			}
   297  			if _, ok := want.(CharData); ok {
   298  				swant = fmt.Sprintf("CharData(%q)", want)
   299  			} else {
   300  				swant = fmt.Sprintf("%#v", want)
   301  			}
   302  			t.Errorf("token %d = %s, want %s", i, shave, swant)
   303  		}
   304  
   305  		// Check that InputOffset returned actual token.
   306  		switch {
   307  		case start < lastEnd:
   308  			t.Errorf("token %d: position [%d,%d) for %T is before previous token", i, start, end, have)
   309  		case start >= end:
   310  			// Special case: EndElement can be synthesized.
   311  			if start == end && end == lastEnd {
   312  				break
   313  			}
   314  			t.Errorf("token %d: position [%d,%d) for %T is empty", i, start, end, have)
   315  		case end > int64(len(raw)):
   316  			t.Errorf("token %d: position [%d,%d) for %T extends beyond input", i, start, end, have)
   317  		default:
   318  			text := raw[start:end]
   319  			if strings.ContainsAny(text, "<>") && (!strings.HasPrefix(text, "<") || !strings.HasSuffix(text, ">")) {
   320  				t.Errorf("token %d: misaligned raw token %#q for %T", i, text, have)
   321  			}
   322  		}
   323  		lastEnd = end
   324  	}
   325  }
   326  
   327  // Ensure that directives (specifically !DOCTYPE) include the complete
   328  // text of any nested directives, noting that < and > do not change
   329  // nesting depth if they are in single or double quotes.
   330  
   331  var nestedDirectivesInput = `
   332  <!DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]>
   333  <!DOCTYPE [<!ENTITY xlt ">">]>
   334  <!DOCTYPE [<!ENTITY xlt "<">]>
   335  <!DOCTYPE [<!ENTITY xlt '>'>]>
   336  <!DOCTYPE [<!ENTITY xlt '<'>]>
   337  <!DOCTYPE [<!ENTITY xlt '">'>]>
   338  <!DOCTYPE [<!ENTITY xlt "'<">]>
   339  `
   340  
   341  var nestedDirectivesTokens = []Token{
   342  	CharData("\n"),
   343  	Directive(`DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`),
   344  	CharData("\n"),
   345  	Directive(`DOCTYPE [<!ENTITY xlt ">">]`),
   346  	CharData("\n"),
   347  	Directive(`DOCTYPE [<!ENTITY xlt "<">]`),
   348  	CharData("\n"),
   349  	Directive(`DOCTYPE [<!ENTITY xlt '>'>]`),
   350  	CharData("\n"),
   351  	Directive(`DOCTYPE [<!ENTITY xlt '<'>]`),
   352  	CharData("\n"),
   353  	Directive(`DOCTYPE [<!ENTITY xlt '">'>]`),
   354  	CharData("\n"),
   355  	Directive(`DOCTYPE [<!ENTITY xlt "'<">]`),
   356  	CharData("\n"),
   357  }
   358  
   359  func TestNestedDirectives(t *testing.T) {
   360  	d := NewDecoder(strings.NewReader(nestedDirectivesInput))
   361  
   362  	for i, want := range nestedDirectivesTokens {
   363  		have, err := d.Token()
   364  		if err != nil {
   365  			t.Fatalf("token %d: unexpected error: %s", i, err)
   366  		}
   367  		if !reflect.DeepEqual(have, want) {
   368  			t.Errorf("token %d = %#v want %#v", i, have, want)
   369  		}
   370  	}
   371  }
   372  
   373  func TestToken(t *testing.T) {
   374  	d := NewDecoder(strings.NewReader(testInput))
   375  	d.Entity = testEntity
   376  
   377  	for i, want := range cookedTokens {
   378  		have, err := d.Token()
   379  		if err != nil {
   380  			t.Fatalf("token %d: unexpected error: %s", i, err)
   381  		}
   382  		if !reflect.DeepEqual(have, want) {
   383  			t.Errorf("token %d = %#v want %#v", i, have, want)
   384  		}
   385  	}
   386  }
   387  
   388  func TestSyntax(t *testing.T) {
   389  	for i := range xmlInput {
   390  		d := NewDecoder(strings.NewReader(xmlInput[i]))
   391  		var err error
   392  		for _, err = d.Token(); err == nil; _, err = d.Token() {
   393  		}
   394  		if _, ok := err.(*SyntaxError); !ok {
   395  			t.Fatalf(`xmlInput "%s": expected SyntaxError not received`, xmlInput[i])
   396  		}
   397  	}
   398  }
   399  
   400  type allScalars struct {
   401  	True1     bool
   402  	True2     bool
   403  	False1    bool
   404  	False2    bool
   405  	Int       int
   406  	Int8      int8
   407  	Int16     int16
   408  	Int32     int32
   409  	Int64     int64
   410  	Uint      int
   411  	Uint8     uint8
   412  	Uint16    uint16
   413  	Uint32    uint32
   414  	Uint64    uint64
   415  	Uintptr   uintptr
   416  	Float32   float32
   417  	Float64   float64
   418  	String    string
   419  	PtrString *string
   420  }
   421  
   422  var all = allScalars{
   423  	True1:     true,
   424  	True2:     true,
   425  	False1:    false,
   426  	False2:    false,
   427  	Int:       1,
   428  	Int8:      -2,
   429  	Int16:     3,
   430  	Int32:     -4,
   431  	Int64:     5,
   432  	Uint:      6,
   433  	Uint8:     7,
   434  	Uint16:    8,
   435  	Uint32:    9,
   436  	Uint64:    10,
   437  	Uintptr:   11,
   438  	Float32:   13.0,
   439  	Float64:   14.0,
   440  	String:    "15",
   441  	PtrString: &sixteen,
   442  }
   443  
   444  var sixteen = "16"
   445  
   446  const testScalarsInput = `<allscalars>
   447  	<True1>true</True1>
   448  	<True2>1</True2>
   449  	<False1>false</False1>
   450  	<False2>0</False2>
   451  	<Int>1</Int>
   452  	<Int8>-2</Int8>
   453  	<Int16>3</Int16>
   454  	<Int32>-4</Int32>
   455  	<Int64>5</Int64>
   456  	<Uint>6</Uint>
   457  	<Uint8>7</Uint8>
   458  	<Uint16>8</Uint16>
   459  	<Uint32>9</Uint32>
   460  	<Uint64>10</Uint64>
   461  	<Uintptr>11</Uintptr>
   462  	<Float>12.0</Float>
   463  	<Float32>13.0</Float32>
   464  	<Float64>14.0</Float64>
   465  	<String>15</String>
   466  	<PtrString>16</PtrString>
   467  </allscalars>`
   468  
   469  func TestAllScalars(t *testing.T) {
   470  	var a allScalars
   471  	err := Unmarshal([]byte(testScalarsInput), &a)
   472  
   473  	if err != nil {
   474  		t.Fatal(err)
   475  	}
   476  	if !reflect.DeepEqual(a, all) {
   477  		t.Errorf("have %+v want %+v", a, all)
   478  	}
   479  }
   480  
   481  type item struct {
   482  	FieldA string
   483  }
   484  
   485  func TestIssue569(t *testing.T) {
   486  	data := `<item><FieldA>abcd</FieldA></item>`
   487  	var i item
   488  	err := Unmarshal([]byte(data), &i)
   489  
   490  	if err != nil || i.FieldA != "abcd" {
   491  		t.Fatal("Expecting abcd")
   492  	}
   493  }
   494  
   495  func TestUnquotedAttrs(t *testing.T) {
   496  	data := "<tag attr=azAZ09:-_\t>"
   497  	d := NewDecoder(strings.NewReader(data))
   498  	d.Strict = false
   499  	token, err := d.Token()
   500  	if _, ok := err.(*SyntaxError); ok {
   501  		t.Errorf("Unexpected error: %v", err)
   502  	}
   503  	if token.(StartElement).Name.Local != "tag" {
   504  		t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local)
   505  	}
   506  	attr := token.(StartElement).Attr[0]
   507  	if attr.Value != "azAZ09:-_" {
   508  		t.Errorf("Unexpected attribute value: %v", attr.Value)
   509  	}
   510  	if attr.Name.Local != "attr" {
   511  		t.Errorf("Unexpected attribute name: %v", attr.Name.Local)
   512  	}
   513  }
   514  
   515  func TestValuelessAttrs(t *testing.T) {
   516  	tests := [][3]string{
   517  		{"<p nowrap>", "p", "nowrap"},
   518  		{"<p nowrap >", "p", "nowrap"},
   519  		{"<input checked/>", "input", "checked"},
   520  		{"<input checked />", "input", "checked"},
   521  	}
   522  	for _, test := range tests {
   523  		d := NewDecoder(strings.NewReader(test[0]))
   524  		d.Strict = false
   525  		token, err := d.Token()
   526  		if _, ok := err.(*SyntaxError); ok {
   527  			t.Errorf("Unexpected error: %v", err)
   528  		}
   529  		if token.(StartElement).Name.Local != test[1] {
   530  			t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local)
   531  		}
   532  		attr := token.(StartElement).Attr[0]
   533  		if attr.Value != test[2] {
   534  			t.Errorf("Unexpected attribute value: %v", attr.Value)
   535  		}
   536  		if attr.Name.Local != test[2] {
   537  			t.Errorf("Unexpected attribute name: %v", attr.Name.Local)
   538  		}
   539  	}
   540  }
   541  
   542  func TestCopyTokenCharData(t *testing.T) {
   543  	data := []byte("same data")
   544  	var tok1 Token = CharData(data)
   545  	tok2 := CopyToken(tok1)
   546  	if !reflect.DeepEqual(tok1, tok2) {
   547  		t.Error("CopyToken(CharData) != CharData")
   548  	}
   549  	data[1] = 'o'
   550  	if reflect.DeepEqual(tok1, tok2) {
   551  		t.Error("CopyToken(CharData) uses same buffer.")
   552  	}
   553  }
   554  
   555  func TestCopyTokenStartElement(t *testing.T) {
   556  	elt := StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}}
   557  	var tok1 Token = elt
   558  	tok2 := CopyToken(tok1)
   559  	if tok1.(StartElement).Attr[0].Value != "en" {
   560  		t.Error("CopyToken overwrote Attr[0]")
   561  	}
   562  	if !reflect.DeepEqual(tok1, tok2) {
   563  		t.Error("CopyToken(StartElement) != StartElement")
   564  	}
   565  	tok1.(StartElement).Attr[0] = Attr{Name{"", "lang"}, "de"}
   566  	if reflect.DeepEqual(tok1, tok2) {
   567  		t.Error("CopyToken(CharData) uses same buffer.")
   568  	}
   569  }
   570  
   571  func TestSyntaxErrorLineNum(t *testing.T) {
   572  	testInput := "<P>Foo<P>\n\n<P>Bar</>\n"
   573  	d := NewDecoder(strings.NewReader(testInput))
   574  	var err error
   575  	for _, err = d.Token(); err == nil; _, err = d.Token() {
   576  	}
   577  	synerr, ok := err.(*SyntaxError)
   578  	if !ok {
   579  		t.Error("Expected SyntaxError.")
   580  	}
   581  	if synerr.Line != 3 {
   582  		t.Error("SyntaxError didn't have correct line number.")
   583  	}
   584  }
   585  
   586  func TestTrailingRawToken(t *testing.T) {
   587  	input := `<FOO></FOO>  `
   588  	d := NewDecoder(strings.NewReader(input))
   589  	var err error
   590  	for _, err = d.RawToken(); err == nil; _, err = d.RawToken() {
   591  	}
   592  	if err != io.EOF {
   593  		t.Fatalf("d.RawToken() = _, %v, want _, io.EOF", err)
   594  	}
   595  }
   596  
   597  func TestTrailingToken(t *testing.T) {
   598  	input := `<FOO></FOO>  `
   599  	d := NewDecoder(strings.NewReader(input))
   600  	var err error
   601  	for _, err = d.Token(); err == nil; _, err = d.Token() {
   602  	}
   603  	if err != io.EOF {
   604  		t.Fatalf("d.Token() = _, %v, want _, io.EOF", err)
   605  	}
   606  }
   607  
   608  func TestEntityInsideCDATA(t *testing.T) {
   609  	input := `<test><![CDATA[ &val=foo ]]></test>`
   610  	d := NewDecoder(strings.NewReader(input))
   611  	var err error
   612  	for _, err = d.Token(); err == nil; _, err = d.Token() {
   613  	}
   614  	if err != io.EOF {
   615  		t.Fatalf("d.Token() = _, %v, want _, io.EOF", err)
   616  	}
   617  }
   618  
   619  var characterTests = []struct {
   620  	in  string
   621  	err string
   622  }{
   623  	{"\x12<doc/>", "illegal character code U+0012"},
   624  	{"<?xml version=\"1.0\"?>\x0b<doc/>", "illegal character code U+000B"},
   625  	{"\xef\xbf\xbe<doc/>", "illegal character code U+FFFE"},
   626  	{"<?xml version=\"1.0\"?><doc>\r\n<hiya/>\x07<toots/></doc>", "illegal character code U+0007"},
   627  	{"<?xml version=\"1.0\"?><doc \x12='value'>what's up</doc>", "expected attribute name in element"},
   628  	{"<doc>&abc\x01;</doc>", "invalid character entity &abc (no semicolon)"},
   629  	{"<doc>&\x01;</doc>", "invalid character entity & (no semicolon)"},
   630  	{"<doc>&\xef\xbf\xbe;</doc>", "invalid character entity &\uFFFE;"},
   631  	{"<doc>&hello;</doc>", "invalid character entity &hello;"},
   632  }
   633  
   634  func TestDisallowedCharacters(t *testing.T) {
   635  
   636  	for i, tt := range characterTests {
   637  		d := NewDecoder(strings.NewReader(tt.in))
   638  		var err error
   639  
   640  		for err == nil {
   641  			_, err = d.Token()
   642  		}
   643  		synerr, ok := err.(*SyntaxError)
   644  		if !ok {
   645  			t.Fatalf("input %d d.Token() = _, %v, want _, *SyntaxError", i, err)
   646  		}
   647  		if synerr.Msg != tt.err {
   648  			t.Fatalf("input %d synerr.Msg wrong: want %q, got %q", i, tt.err, synerr.Msg)
   649  		}
   650  	}
   651  }
   652  
   653  var procInstTests = []struct {
   654  	input  string
   655  	expect [2]string
   656  }{
   657  	{`version="1.0" encoding="utf-8"`, [2]string{"1.0", "utf-8"}},
   658  	{`version="1.0" encoding='utf-8'`, [2]string{"1.0", "utf-8"}},
   659  	{`version="1.0" encoding='utf-8' `, [2]string{"1.0", "utf-8"}},
   660  	{`version="1.0" encoding=utf-8`, [2]string{"1.0", ""}},
   661  	{`encoding="FOO" `, [2]string{"", "FOO"}},
   662  }
   663  
   664  func TestProcInstEncoding(t *testing.T) {
   665  	for _, test := range procInstTests {
   666  		if got := procInst("version", test.input); got != test.expect[0] {
   667  			t.Errorf("procInst(version, %q) = %q; want %q", test.input, got, test.expect[0])
   668  		}
   669  		if got := procInst("encoding", test.input); got != test.expect[1] {
   670  			t.Errorf("procInst(encoding, %q) = %q; want %q", test.input, got, test.expect[1])
   671  		}
   672  	}
   673  }
   674  
   675  // Ensure that directives with comments include the complete
   676  // text of any nested directives.
   677  
   678  var directivesWithCommentsInput = `
   679  <!DOCTYPE [<!-- a comment --><!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]>
   680  <!DOCTYPE [<!ENTITY go "Golang"><!-- a comment-->]>
   681  <!DOCTYPE <!-> <!> <!----> <!-->--> <!--->--> [<!ENTITY go "Golang"><!-- a comment-->]>
   682  `
   683  
   684  var directivesWithCommentsTokens = []Token{
   685  	CharData("\n"),
   686  	Directive(`DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`),
   687  	CharData("\n"),
   688  	Directive(`DOCTYPE [<!ENTITY go "Golang">]`),
   689  	CharData("\n"),
   690  	Directive(`DOCTYPE <!-> <!>    [<!ENTITY go "Golang">]`),
   691  	CharData("\n"),
   692  }
   693  
   694  func TestDirectivesWithComments(t *testing.T) {
   695  	d := NewDecoder(strings.NewReader(directivesWithCommentsInput))
   696  
   697  	for i, want := range directivesWithCommentsTokens {
   698  		have, err := d.Token()
   699  		if err != nil {
   700  			t.Fatalf("token %d: unexpected error: %s", i, err)
   701  		}
   702  		if !reflect.DeepEqual(have, want) {
   703  			t.Errorf("token %d = %#v want %#v", i, have, want)
   704  		}
   705  	}
   706  }
   707  
   708  // Writer whose Write method always returns an error.
   709  type errWriter struct{}
   710  
   711  func (errWriter) Write(p []byte) (n int, err error) { return 0, fmt.Errorf("unwritable") }
   712  
   713  func TestEscapeTextIOErrors(t *testing.T) {
   714  	expectErr := "unwritable"
   715  	err := EscapeText(errWriter{}, []byte{'A'})
   716  
   717  	if err == nil || err.Error() != expectErr {
   718  		t.Errorf("have %v, want %v", err, expectErr)
   719  	}
   720  }
   721  
   722  func TestEscapeTextInvalidChar(t *testing.T) {
   723  	input := []byte("A \x00 terminated string.")
   724  	expected := "A \uFFFD terminated string."
   725  
   726  	buff := new(bytes.Buffer)
   727  	if err := EscapeText(buff, input); err != nil {
   728  		t.Fatalf("have %v, want nil", err)
   729  	}
   730  	text := buff.String()
   731  
   732  	if text != expected {
   733  		t.Errorf("have %v, want %v", text, expected)
   734  	}
   735  }
   736  
   737  func TestIssue5880(t *testing.T) {
   738  	type T []byte
   739  	data, err := Marshal(T{192, 168, 0, 1})
   740  	if err != nil {
   741  		t.Errorf("Marshal error: %v", err)
   742  	}
   743  	if !utf8.Valid(data) {
   744  		t.Errorf("Marshal generated invalid UTF-8: %x", data)
   745  	}
   746  }
   747  
   748  func TestIssue11405(t *testing.T) {
   749  	testCases := []string{
   750  		"<root>",
   751  		"<root><foo>",
   752  		"<root><foo></foo>",
   753  	}
   754  	for _, tc := range testCases {
   755  		d := NewDecoder(strings.NewReader(tc))
   756  		var err error
   757  		for {
   758  			_, err = d.Token()
   759  			if err != nil {
   760  				break
   761  			}
   762  		}
   763  		if _, ok := err.(*SyntaxError); !ok {
   764  			t.Errorf("%s: Token: Got error %v, want SyntaxError", tc, err)
   765  		}
   766  	}
   767  }
   768  
   769  func TestIssue12417(t *testing.T) {
   770  	testCases := []struct {
   771  		s  string
   772  		ok bool
   773  	}{
   774  		{`<?xml encoding="UtF-8" version="1.0"?><root/>`, true},
   775  		{`<?xml encoding="UTF-8" version="1.0"?><root/>`, true},
   776  		{`<?xml encoding="utf-8" version="1.0"?><root/>`, true},
   777  		{`<?xml encoding="uuu-9" version="1.0"?><root/>`, false},
   778  	}
   779  	for _, tc := range testCases {
   780  		d := NewDecoder(strings.NewReader(tc.s))
   781  		var err error
   782  		for {
   783  			_, err = d.Token()
   784  			if err != nil {
   785  				if err == io.EOF {
   786  					err = nil
   787  				}
   788  				break
   789  			}
   790  		}
   791  		if err != nil && tc.ok {
   792  			t.Errorf("%q: Encoding charset: expected no error, got %s", tc.s, err)
   793  			continue
   794  		}
   795  		if err == nil && !tc.ok {
   796  			t.Errorf("%q: Encoding charset: expected error, got nil", tc.s)
   797  		}
   798  	}
   799  }
   800  
   801  func tokenMap(mapping func(t Token) Token) func(TokenReader) TokenReader {
   802  	return func(src TokenReader) TokenReader {
   803  		return mapper{
   804  			t: src,
   805  			f: mapping,
   806  		}
   807  	}
   808  }
   809  
   810  type mapper struct {
   811  	t TokenReader
   812  	f func(Token) Token
   813  }
   814  
   815  func (m mapper) Token() (Token, error) {
   816  	tok, err := m.t.Token()
   817  	if err != nil {
   818  		return nil, err
   819  	}
   820  	return m.f(tok), nil
   821  }
   822  
   823  func TestNewTokenDecoderIdempotent(t *testing.T) {
   824  	d := NewDecoder(strings.NewReader(`<br/>`))
   825  	d2 := NewTokenDecoder(d)
   826  	if d != d2 {
   827  		t.Error("NewTokenDecoder did not detect underlying Decoder")
   828  	}
   829  }
   830  
   831  func TestWrapDecoder(t *testing.T) {
   832  	d := NewDecoder(strings.NewReader(`<quote>[Re-enter Clown with a letter, and FABIAN]</quote>`))
   833  	m := tokenMap(func(t Token) Token {
   834  		switch tok := t.(type) {
   835  		case StartElement:
   836  			if tok.Name.Local == "quote" {
   837  				tok.Name.Local = "blocking"
   838  				return tok
   839  			}
   840  		case EndElement:
   841  			if tok.Name.Local == "quote" {
   842  				tok.Name.Local = "blocking"
   843  				return tok
   844  			}
   845  		}
   846  		return t
   847  	})
   848  
   849  	d = NewTokenDecoder(m(d))
   850  
   851  	o := struct {
   852  		XMLName  Name   `xml:"blocking"`
   853  		Chardata string `xml:",chardata"`
   854  	}{}
   855  
   856  	if err := d.Decode(&o); err != nil {
   857  		t.Fatal("Got unexpected error while decoding:", err)
   858  	}
   859  
   860  	if o.Chardata != "[Re-enter Clown with a letter, and FABIAN]" {
   861  		t.Fatalf("Got unexpected chardata: `%s`\n", o.Chardata)
   862  	}
   863  }
   864  
   865  type tokReader struct{}
   866  
   867  func (tokReader) Token() (Token, error) {
   868  	return StartElement{}, nil
   869  }
   870  
   871  type Failure struct{}
   872  
   873  func (Failure) UnmarshalXML(*Decoder, StartElement) error {
   874  	return nil
   875  }
   876  
   877  func TestTokenUnmarshaler(t *testing.T) {
   878  	defer func() {
   879  		if r := recover(); r != nil {
   880  			t.Error("Unexpected panic using custom token unmarshaler")
   881  		}
   882  	}()
   883  
   884  	d := NewTokenDecoder(tokReader{})
   885  	d.Decode(&Failure{})
   886  }