github.com/ader1990/go@v0.0.0-20140630135419-8c24447fa791/src/pkg/encoding/xml/xml_test.go (about)

     1  // Copyright 2009 The Go Authors.  All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package xml
     6  
     7  import (
     8  	"bytes"
     9  	"fmt"
    10  	"io"
    11  	"reflect"
    12  	"strings"
    13  	"testing"
    14  	"unicode/utf8"
    15  )
    16  
    17  const testInput = `
    18  <?xml version="1.0" encoding="UTF-8"?>
    19  <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
    20    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
    21  <body xmlns:foo="ns1" xmlns="ns2" xmlns:tag="ns3" ` +
    22  	"\r\n\t" + `  >
    23    <hello lang="en">World &lt;&gt;&apos;&quot; &#x767d;&#40300;翔</hello>
    24    <query>&何; &is-it;</query>
    25    <goodbye />
    26    <outer foo:attr="value" xmlns:tag="ns4">
    27      <inner/>
    28    </outer>
    29    <tag:name>
    30      <![CDATA[Some text here.]]>
    31    </tag:name>
    32  </body><!-- missing final newline -->`
    33  
    34  var testEntity = map[string]string{"何": "What", "is-it": "is it?"}
    35  
    36  var rawTokens = []Token{
    37  	CharData("\n"),
    38  	ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)},
    39  	CharData("\n"),
    40  	Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
    41    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`),
    42  	CharData("\n"),
    43  	StartElement{Name{"", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}},
    44  	CharData("\n  "),
    45  	StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}},
    46  	CharData("World <>'\" 白鵬翔"),
    47  	EndElement{Name{"", "hello"}},
    48  	CharData("\n  "),
    49  	StartElement{Name{"", "query"}, []Attr{}},
    50  	CharData("What is it?"),
    51  	EndElement{Name{"", "query"}},
    52  	CharData("\n  "),
    53  	StartElement{Name{"", "goodbye"}, []Attr{}},
    54  	EndElement{Name{"", "goodbye"}},
    55  	CharData("\n  "),
    56  	StartElement{Name{"", "outer"}, []Attr{{Name{"foo", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}},
    57  	CharData("\n    "),
    58  	StartElement{Name{"", "inner"}, []Attr{}},
    59  	EndElement{Name{"", "inner"}},
    60  	CharData("\n  "),
    61  	EndElement{Name{"", "outer"}},
    62  	CharData("\n  "),
    63  	StartElement{Name{"tag", "name"}, []Attr{}},
    64  	CharData("\n    "),
    65  	CharData("Some text here."),
    66  	CharData("\n  "),
    67  	EndElement{Name{"tag", "name"}},
    68  	CharData("\n"),
    69  	EndElement{Name{"", "body"}},
    70  	Comment(" missing final newline "),
    71  }
    72  
    73  var cookedTokens = []Token{
    74  	CharData("\n"),
    75  	ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)},
    76  	CharData("\n"),
    77  	Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
    78    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`),
    79  	CharData("\n"),
    80  	StartElement{Name{"ns2", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}},
    81  	CharData("\n  "),
    82  	StartElement{Name{"ns2", "hello"}, []Attr{{Name{"", "lang"}, "en"}}},
    83  	CharData("World <>'\" 白鵬翔"),
    84  	EndElement{Name{"ns2", "hello"}},
    85  	CharData("\n  "),
    86  	StartElement{Name{"ns2", "query"}, []Attr{}},
    87  	CharData("What is it?"),
    88  	EndElement{Name{"ns2", "query"}},
    89  	CharData("\n  "),
    90  	StartElement{Name{"ns2", "goodbye"}, []Attr{}},
    91  	EndElement{Name{"ns2", "goodbye"}},
    92  	CharData("\n  "),
    93  	StartElement{Name{"ns2", "outer"}, []Attr{{Name{"ns1", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}},
    94  	CharData("\n    "),
    95  	StartElement{Name{"ns2", "inner"}, []Attr{}},
    96  	EndElement{Name{"ns2", "inner"}},
    97  	CharData("\n  "),
    98  	EndElement{Name{"ns2", "outer"}},
    99  	CharData("\n  "),
   100  	StartElement{Name{"ns3", "name"}, []Attr{}},
   101  	CharData("\n    "),
   102  	CharData("Some text here."),
   103  	CharData("\n  "),
   104  	EndElement{Name{"ns3", "name"}},
   105  	CharData("\n"),
   106  	EndElement{Name{"ns2", "body"}},
   107  	Comment(" missing final newline "),
   108  }
   109  
   110  const testInputAltEncoding = `
   111  <?xml version="1.0" encoding="x-testing-uppercase"?>
   112  <TAG>VALUE</TAG>`
   113  
   114  var rawTokensAltEncoding = []Token{
   115  	CharData("\n"),
   116  	ProcInst{"xml", []byte(`version="1.0" encoding="x-testing-uppercase"`)},
   117  	CharData("\n"),
   118  	StartElement{Name{"", "tag"}, []Attr{}},
   119  	CharData("value"),
   120  	EndElement{Name{"", "tag"}},
   121  }
   122  
   123  var xmlInput = []string{
   124  	// unexpected EOF cases
   125  	"<",
   126  	"<t",
   127  	"<t ",
   128  	"<t/",
   129  	"<!",
   130  	"<!-",
   131  	"<!--",
   132  	"<!--c-",
   133  	"<!--c--",
   134  	"<!d",
   135  	"<t></",
   136  	"<t></t",
   137  	"<?",
   138  	"<?p",
   139  	"<t a",
   140  	"<t a=",
   141  	"<t a='",
   142  	"<t a=''",
   143  	"<t/><![",
   144  	"<t/><![C",
   145  	"<t/><![CDATA[d",
   146  	"<t/><![CDATA[d]",
   147  	"<t/><![CDATA[d]]",
   148  
   149  	// other Syntax errors
   150  	"<>",
   151  	"<t/a",
   152  	"<0 />",
   153  	"<?0 >",
   154  	//	"<!0 >",	// let the Token() caller handle
   155  	"</0>",
   156  	"<t 0=''>",
   157  	"<t a='&'>",
   158  	"<t a='<'>",
   159  	"<t>&nbspc;</t>",
   160  	"<t a>",
   161  	"<t a=>",
   162  	"<t a=v>",
   163  	//	"<![CDATA[d]]>",	// let the Token() caller handle
   164  	"<t></e>",
   165  	"<t></>",
   166  	"<t></t!",
   167  	"<t>cdata]]></t>",
   168  }
   169  
   170  func TestRawToken(t *testing.T) {
   171  	d := NewDecoder(strings.NewReader(testInput))
   172  	d.Entity = testEntity
   173  	testRawToken(t, d, rawTokens)
   174  }
   175  
   176  const nonStrictInput = `
   177  <tag>non&entity</tag>
   178  <tag>&unknown;entity</tag>
   179  <tag>&#123</tag>
   180  <tag>&#zzz;</tag>
   181  <tag>&なまえ3;</tag>
   182  <tag>&lt-gt;</tag>
   183  <tag>&;</tag>
   184  <tag>&0a;</tag>
   185  `
   186  
   187  var nonStringEntity = map[string]string{"": "oops!", "0a": "oops!"}
   188  
   189  var nonStrictTokens = []Token{
   190  	CharData("\n"),
   191  	StartElement{Name{"", "tag"}, []Attr{}},
   192  	CharData("non&entity"),
   193  	EndElement{Name{"", "tag"}},
   194  	CharData("\n"),
   195  	StartElement{Name{"", "tag"}, []Attr{}},
   196  	CharData("&unknown;entity"),
   197  	EndElement{Name{"", "tag"}},
   198  	CharData("\n"),
   199  	StartElement{Name{"", "tag"}, []Attr{}},
   200  	CharData("&#123"),
   201  	EndElement{Name{"", "tag"}},
   202  	CharData("\n"),
   203  	StartElement{Name{"", "tag"}, []Attr{}},
   204  	CharData("&#zzz;"),
   205  	EndElement{Name{"", "tag"}},
   206  	CharData("\n"),
   207  	StartElement{Name{"", "tag"}, []Attr{}},
   208  	CharData("&なまえ3;"),
   209  	EndElement{Name{"", "tag"}},
   210  	CharData("\n"),
   211  	StartElement{Name{"", "tag"}, []Attr{}},
   212  	CharData("&lt-gt;"),
   213  	EndElement{Name{"", "tag"}},
   214  	CharData("\n"),
   215  	StartElement{Name{"", "tag"}, []Attr{}},
   216  	CharData("&;"),
   217  	EndElement{Name{"", "tag"}},
   218  	CharData("\n"),
   219  	StartElement{Name{"", "tag"}, []Attr{}},
   220  	CharData("&0a;"),
   221  	EndElement{Name{"", "tag"}},
   222  	CharData("\n"),
   223  }
   224  
   225  func TestNonStrictRawToken(t *testing.T) {
   226  	d := NewDecoder(strings.NewReader(nonStrictInput))
   227  	d.Strict = false
   228  	testRawToken(t, d, nonStrictTokens)
   229  }
   230  
   231  type downCaser struct {
   232  	t *testing.T
   233  	r io.ByteReader
   234  }
   235  
   236  func (d *downCaser) ReadByte() (c byte, err error) {
   237  	c, err = d.r.ReadByte()
   238  	if c >= 'A' && c <= 'Z' {
   239  		c += 'a' - 'A'
   240  	}
   241  	return
   242  }
   243  
   244  func (d *downCaser) Read(p []byte) (int, error) {
   245  	d.t.Fatalf("unexpected Read call on downCaser reader")
   246  	panic("unreachable")
   247  }
   248  
   249  func TestRawTokenAltEncoding(t *testing.T) {
   250  	d := NewDecoder(strings.NewReader(testInputAltEncoding))
   251  	d.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) {
   252  		if charset != "x-testing-uppercase" {
   253  			t.Fatalf("unexpected charset %q", charset)
   254  		}
   255  		return &downCaser{t, input.(io.ByteReader)}, nil
   256  	}
   257  	testRawToken(t, d, rawTokensAltEncoding)
   258  }
   259  
   260  func TestRawTokenAltEncodingNoConverter(t *testing.T) {
   261  	d := NewDecoder(strings.NewReader(testInputAltEncoding))
   262  	token, err := d.RawToken()
   263  	if token == nil {
   264  		t.Fatalf("expected a token on first RawToken call")
   265  	}
   266  	if err != nil {
   267  		t.Fatal(err)
   268  	}
   269  	token, err = d.RawToken()
   270  	if token != nil {
   271  		t.Errorf("expected a nil token; got %#v", token)
   272  	}
   273  	if err == nil {
   274  		t.Fatalf("expected an error on second RawToken call")
   275  	}
   276  	const encoding = "x-testing-uppercase"
   277  	if !strings.Contains(err.Error(), encoding) {
   278  		t.Errorf("expected error to contain %q; got error: %v",
   279  			encoding, err)
   280  	}
   281  }
   282  
   283  func testRawToken(t *testing.T, d *Decoder, rawTokens []Token) {
   284  	for i, want := range rawTokens {
   285  		have, err := d.RawToken()
   286  		if err != nil {
   287  			t.Fatalf("token %d: unexpected error: %s", i, err)
   288  		}
   289  		if !reflect.DeepEqual(have, want) {
   290  			var shave, swant string
   291  			if _, ok := have.(CharData); ok {
   292  				shave = fmt.Sprintf("CharData(%q)", have)
   293  			} else {
   294  				shave = fmt.Sprintf("%#v", have)
   295  			}
   296  			if _, ok := want.(CharData); ok {
   297  				swant = fmt.Sprintf("CharData(%q)", want)
   298  			} else {
   299  				swant = fmt.Sprintf("%#v", want)
   300  			}
   301  			t.Errorf("token %d = %s, want %s", i, shave, swant)
   302  		}
   303  	}
   304  }
   305  
   306  // Ensure that directives (specifically !DOCTYPE) include the complete
   307  // text of any nested directives, noting that < and > do not change
   308  // nesting depth if they are in single or double quotes.
   309  
   310  var nestedDirectivesInput = `
   311  <!DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]>
   312  <!DOCTYPE [<!ENTITY xlt ">">]>
   313  <!DOCTYPE [<!ENTITY xlt "<">]>
   314  <!DOCTYPE [<!ENTITY xlt '>'>]>
   315  <!DOCTYPE [<!ENTITY xlt '<'>]>
   316  <!DOCTYPE [<!ENTITY xlt '">'>]>
   317  <!DOCTYPE [<!ENTITY xlt "'<">]>
   318  `
   319  
   320  var nestedDirectivesTokens = []Token{
   321  	CharData("\n"),
   322  	Directive(`DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`),
   323  	CharData("\n"),
   324  	Directive(`DOCTYPE [<!ENTITY xlt ">">]`),
   325  	CharData("\n"),
   326  	Directive(`DOCTYPE [<!ENTITY xlt "<">]`),
   327  	CharData("\n"),
   328  	Directive(`DOCTYPE [<!ENTITY xlt '>'>]`),
   329  	CharData("\n"),
   330  	Directive(`DOCTYPE [<!ENTITY xlt '<'>]`),
   331  	CharData("\n"),
   332  	Directive(`DOCTYPE [<!ENTITY xlt '">'>]`),
   333  	CharData("\n"),
   334  	Directive(`DOCTYPE [<!ENTITY xlt "'<">]`),
   335  	CharData("\n"),
   336  }
   337  
   338  func TestNestedDirectives(t *testing.T) {
   339  	d := NewDecoder(strings.NewReader(nestedDirectivesInput))
   340  
   341  	for i, want := range nestedDirectivesTokens {
   342  		have, err := d.Token()
   343  		if err != nil {
   344  			t.Fatalf("token %d: unexpected error: %s", i, err)
   345  		}
   346  		if !reflect.DeepEqual(have, want) {
   347  			t.Errorf("token %d = %#v want %#v", i, have, want)
   348  		}
   349  	}
   350  }
   351  
   352  func TestToken(t *testing.T) {
   353  	d := NewDecoder(strings.NewReader(testInput))
   354  	d.Entity = testEntity
   355  
   356  	for i, want := range cookedTokens {
   357  		have, err := d.Token()
   358  		if err != nil {
   359  			t.Fatalf("token %d: unexpected error: %s", i, err)
   360  		}
   361  		if !reflect.DeepEqual(have, want) {
   362  			t.Errorf("token %d = %#v want %#v", i, have, want)
   363  		}
   364  	}
   365  }
   366  
   367  func TestSyntax(t *testing.T) {
   368  	for i := range xmlInput {
   369  		d := NewDecoder(strings.NewReader(xmlInput[i]))
   370  		var err error
   371  		for _, err = d.Token(); err == nil; _, err = d.Token() {
   372  		}
   373  		if _, ok := err.(*SyntaxError); !ok {
   374  			t.Fatalf(`xmlInput "%s": expected SyntaxError not received`, xmlInput[i])
   375  		}
   376  	}
   377  }
   378  
   379  type allScalars struct {
   380  	True1     bool
   381  	True2     bool
   382  	False1    bool
   383  	False2    bool
   384  	Int       int
   385  	Int8      int8
   386  	Int16     int16
   387  	Int32     int32
   388  	Int64     int64
   389  	Uint      int
   390  	Uint8     uint8
   391  	Uint16    uint16
   392  	Uint32    uint32
   393  	Uint64    uint64
   394  	Uintptr   uintptr
   395  	Float32   float32
   396  	Float64   float64
   397  	String    string
   398  	PtrString *string
   399  }
   400  
   401  var all = allScalars{
   402  	True1:     true,
   403  	True2:     true,
   404  	False1:    false,
   405  	False2:    false,
   406  	Int:       1,
   407  	Int8:      -2,
   408  	Int16:     3,
   409  	Int32:     -4,
   410  	Int64:     5,
   411  	Uint:      6,
   412  	Uint8:     7,
   413  	Uint16:    8,
   414  	Uint32:    9,
   415  	Uint64:    10,
   416  	Uintptr:   11,
   417  	Float32:   13.0,
   418  	Float64:   14.0,
   419  	String:    "15",
   420  	PtrString: &sixteen,
   421  }
   422  
   423  var sixteen = "16"
   424  
   425  const testScalarsInput = `<allscalars>
   426  	<True1>true</True1>
   427  	<True2>1</True2>
   428  	<False1>false</False1>
   429  	<False2>0</False2>
   430  	<Int>1</Int>
   431  	<Int8>-2</Int8>
   432  	<Int16>3</Int16>
   433  	<Int32>-4</Int32>
   434  	<Int64>5</Int64>
   435  	<Uint>6</Uint>
   436  	<Uint8>7</Uint8>
   437  	<Uint16>8</Uint16>
   438  	<Uint32>9</Uint32>
   439  	<Uint64>10</Uint64>
   440  	<Uintptr>11</Uintptr>
   441  	<Float>12.0</Float>
   442  	<Float32>13.0</Float32>
   443  	<Float64>14.0</Float64>
   444  	<String>15</String>
   445  	<PtrString>16</PtrString>
   446  </allscalars>`
   447  
   448  func TestAllScalars(t *testing.T) {
   449  	var a allScalars
   450  	err := Unmarshal([]byte(testScalarsInput), &a)
   451  
   452  	if err != nil {
   453  		t.Fatal(err)
   454  	}
   455  	if !reflect.DeepEqual(a, all) {
   456  		t.Errorf("have %+v want %+v", a, all)
   457  	}
   458  }
   459  
   460  type item struct {
   461  	Field_a string
   462  }
   463  
   464  func TestIssue569(t *testing.T) {
   465  	data := `<item><Field_a>abcd</Field_a></item>`
   466  	var i item
   467  	err := Unmarshal([]byte(data), &i)
   468  
   469  	if err != nil || i.Field_a != "abcd" {
   470  		t.Fatal("Expecting abcd")
   471  	}
   472  }
   473  
   474  func TestUnquotedAttrs(t *testing.T) {
   475  	data := "<tag attr=azAZ09:-_\t>"
   476  	d := NewDecoder(strings.NewReader(data))
   477  	d.Strict = false
   478  	token, err := d.Token()
   479  	if _, ok := err.(*SyntaxError); ok {
   480  		t.Errorf("Unexpected error: %v", err)
   481  	}
   482  	if token.(StartElement).Name.Local != "tag" {
   483  		t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local)
   484  	}
   485  	attr := token.(StartElement).Attr[0]
   486  	if attr.Value != "azAZ09:-_" {
   487  		t.Errorf("Unexpected attribute value: %v", attr.Value)
   488  	}
   489  	if attr.Name.Local != "attr" {
   490  		t.Errorf("Unexpected attribute name: %v", attr.Name.Local)
   491  	}
   492  }
   493  
   494  func TestValuelessAttrs(t *testing.T) {
   495  	tests := [][3]string{
   496  		{"<p nowrap>", "p", "nowrap"},
   497  		{"<p nowrap >", "p", "nowrap"},
   498  		{"<input checked/>", "input", "checked"},
   499  		{"<input checked />", "input", "checked"},
   500  	}
   501  	for _, test := range tests {
   502  		d := NewDecoder(strings.NewReader(test[0]))
   503  		d.Strict = false
   504  		token, err := d.Token()
   505  		if _, ok := err.(*SyntaxError); ok {
   506  			t.Errorf("Unexpected error: %v", err)
   507  		}
   508  		if token.(StartElement).Name.Local != test[1] {
   509  			t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local)
   510  		}
   511  		attr := token.(StartElement).Attr[0]
   512  		if attr.Value != test[2] {
   513  			t.Errorf("Unexpected attribute value: %v", attr.Value)
   514  		}
   515  		if attr.Name.Local != test[2] {
   516  			t.Errorf("Unexpected attribute name: %v", attr.Name.Local)
   517  		}
   518  	}
   519  }
   520  
   521  func TestCopyTokenCharData(t *testing.T) {
   522  	data := []byte("same data")
   523  	var tok1 Token = CharData(data)
   524  	tok2 := CopyToken(tok1)
   525  	if !reflect.DeepEqual(tok1, tok2) {
   526  		t.Error("CopyToken(CharData) != CharData")
   527  	}
   528  	data[1] = 'o'
   529  	if reflect.DeepEqual(tok1, tok2) {
   530  		t.Error("CopyToken(CharData) uses same buffer.")
   531  	}
   532  }
   533  
   534  func TestCopyTokenStartElement(t *testing.T) {
   535  	elt := StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}}
   536  	var tok1 Token = elt
   537  	tok2 := CopyToken(tok1)
   538  	if tok1.(StartElement).Attr[0].Value != "en" {
   539  		t.Error("CopyToken overwrote Attr[0]")
   540  	}
   541  	if !reflect.DeepEqual(tok1, tok2) {
   542  		t.Error("CopyToken(StartElement) != StartElement")
   543  	}
   544  	tok1.(StartElement).Attr[0] = Attr{Name{"", "lang"}, "de"}
   545  	if reflect.DeepEqual(tok1, tok2) {
   546  		t.Error("CopyToken(CharData) uses same buffer.")
   547  	}
   548  }
   549  
   550  func TestSyntaxErrorLineNum(t *testing.T) {
   551  	testInput := "<P>Foo<P>\n\n<P>Bar</>\n"
   552  	d := NewDecoder(strings.NewReader(testInput))
   553  	var err error
   554  	for _, err = d.Token(); err == nil; _, err = d.Token() {
   555  	}
   556  	synerr, ok := err.(*SyntaxError)
   557  	if !ok {
   558  		t.Error("Expected SyntaxError.")
   559  	}
   560  	if synerr.Line != 3 {
   561  		t.Error("SyntaxError didn't have correct line number.")
   562  	}
   563  }
   564  
   565  func TestTrailingRawToken(t *testing.T) {
   566  	input := `<FOO></FOO>  `
   567  	d := NewDecoder(strings.NewReader(input))
   568  	var err error
   569  	for _, err = d.RawToken(); err == nil; _, err = d.RawToken() {
   570  	}
   571  	if err != io.EOF {
   572  		t.Fatalf("d.RawToken() = _, %v, want _, io.EOF", err)
   573  	}
   574  }
   575  
   576  func TestTrailingToken(t *testing.T) {
   577  	input := `<FOO></FOO>  `
   578  	d := NewDecoder(strings.NewReader(input))
   579  	var err error
   580  	for _, err = d.Token(); err == nil; _, err = d.Token() {
   581  	}
   582  	if err != io.EOF {
   583  		t.Fatalf("d.Token() = _, %v, want _, io.EOF", err)
   584  	}
   585  }
   586  
   587  func TestEntityInsideCDATA(t *testing.T) {
   588  	input := `<test><![CDATA[ &val=foo ]]></test>`
   589  	d := NewDecoder(strings.NewReader(input))
   590  	var err error
   591  	for _, err = d.Token(); err == nil; _, err = d.Token() {
   592  	}
   593  	if err != io.EOF {
   594  		t.Fatalf("d.Token() = _, %v, want _, io.EOF", err)
   595  	}
   596  }
   597  
   598  var characterTests = []struct {
   599  	in  string
   600  	err string
   601  }{
   602  	{"\x12<doc/>", "illegal character code U+0012"},
   603  	{"<?xml version=\"1.0\"?>\x0b<doc/>", "illegal character code U+000B"},
   604  	{"\xef\xbf\xbe<doc/>", "illegal character code U+FFFE"},
   605  	{"<?xml version=\"1.0\"?><doc>\r\n<hiya/>\x07<toots/></doc>", "illegal character code U+0007"},
   606  	{"<?xml version=\"1.0\"?><doc \x12='value'>what's up</doc>", "expected attribute name in element"},
   607  	{"<doc>&abc\x01;</doc>", "invalid character entity &abc (no semicolon)"},
   608  	{"<doc>&\x01;</doc>", "invalid character entity & (no semicolon)"},
   609  	{"<doc>&\xef\xbf\xbe;</doc>", "invalid character entity &\uFFFE;"},
   610  	{"<doc>&hello;</doc>", "invalid character entity &hello;"},
   611  }
   612  
   613  func TestDisallowedCharacters(t *testing.T) {
   614  
   615  	for i, tt := range characterTests {
   616  		d := NewDecoder(strings.NewReader(tt.in))
   617  		var err error
   618  
   619  		for err == nil {
   620  			_, err = d.Token()
   621  		}
   622  		synerr, ok := err.(*SyntaxError)
   623  		if !ok {
   624  			t.Fatalf("input %d d.Token() = _, %v, want _, *SyntaxError", i, err)
   625  		}
   626  		if synerr.Msg != tt.err {
   627  			t.Fatalf("input %d synerr.Msg wrong: want %q, got %q", i, tt.err, synerr.Msg)
   628  		}
   629  	}
   630  }
   631  
   632  type procInstEncodingTest struct {
   633  	expect, got string
   634  }
   635  
   636  var procInstTests = []struct {
   637  	input, expect string
   638  }{
   639  	{`version="1.0" encoding="utf-8"`, "utf-8"},
   640  	{`version="1.0" encoding='utf-8'`, "utf-8"},
   641  	{`version="1.0" encoding='utf-8' `, "utf-8"},
   642  	{`version="1.0" encoding=utf-8`, ""},
   643  	{`encoding="FOO" `, "FOO"},
   644  }
   645  
   646  func TestProcInstEncoding(t *testing.T) {
   647  	for _, test := range procInstTests {
   648  		got := procInstEncoding(test.input)
   649  		if got != test.expect {
   650  			t.Errorf("procInstEncoding(%q) = %q; want %q", test.input, got, test.expect)
   651  		}
   652  	}
   653  }
   654  
   655  // Ensure that directives with comments include the complete
   656  // text of any nested directives.
   657  
   658  var directivesWithCommentsInput = `
   659  <!DOCTYPE [<!-- a comment --><!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]>
   660  <!DOCTYPE [<!ENTITY go "Golang"><!-- a comment-->]>
   661  <!DOCTYPE <!-> <!> <!----> <!-->--> <!--->--> [<!ENTITY go "Golang"><!-- a comment-->]>
   662  `
   663  
   664  var directivesWithCommentsTokens = []Token{
   665  	CharData("\n"),
   666  	Directive(`DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`),
   667  	CharData("\n"),
   668  	Directive(`DOCTYPE [<!ENTITY go "Golang">]`),
   669  	CharData("\n"),
   670  	Directive(`DOCTYPE <!-> <!>    [<!ENTITY go "Golang">]`),
   671  	CharData("\n"),
   672  }
   673  
   674  func TestDirectivesWithComments(t *testing.T) {
   675  	d := NewDecoder(strings.NewReader(directivesWithCommentsInput))
   676  
   677  	for i, want := range directivesWithCommentsTokens {
   678  		have, err := d.Token()
   679  		if err != nil {
   680  			t.Fatalf("token %d: unexpected error: %s", i, err)
   681  		}
   682  		if !reflect.DeepEqual(have, want) {
   683  			t.Errorf("token %d = %#v want %#v", i, have, want)
   684  		}
   685  	}
   686  }
   687  
   688  // Writer whose Write method always returns an error.
   689  type errWriter struct{}
   690  
   691  func (errWriter) Write(p []byte) (n int, err error) { return 0, fmt.Errorf("unwritable") }
   692  
   693  func TestEscapeTextIOErrors(t *testing.T) {
   694  	expectErr := "unwritable"
   695  	err := EscapeText(errWriter{}, []byte{'A'})
   696  
   697  	if err == nil || err.Error() != expectErr {
   698  		t.Errorf("have %v, want %v", err, expectErr)
   699  	}
   700  }
   701  
   702  func TestEscapeTextInvalidChar(t *testing.T) {
   703  	input := []byte("A \x00 terminated string.")
   704  	expected := "A \uFFFD terminated string."
   705  
   706  	buff := new(bytes.Buffer)
   707  	if err := EscapeText(buff, input); err != nil {
   708  		t.Fatalf("have %v, want nil", err)
   709  	}
   710  	text := buff.String()
   711  
   712  	if text != expected {
   713  		t.Errorf("have %v, want %v", text, expected)
   714  	}
   715  }
   716  
   717  func TestIssue5880(t *testing.T) {
   718  	type T []byte
   719  	data, err := Marshal(T{192, 168, 0, 1})
   720  	if err != nil {
   721  		t.Errorf("Marshal error: %v", err)
   722  	}
   723  	if !utf8.Valid(data) {
   724  		t.Errorf("Marshal generated invalid UTF-8: %x", data)
   725  	}
   726  }