github.com/xushiwei/go@v0.0.0-20130601165731-2b9d83f45bc9/src/pkg/encoding/xml/xml_test.go (about)

     1  // Copyright 2009 The Go Authors.  All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package xml
     6  
     7  import (
     8  	"bytes"
     9  	"fmt"
    10  	"io"
    11  	"reflect"
    12  	"strings"
    13  	"testing"
    14  )
    15  
    16  const testInput = `
    17  <?xml version="1.0" encoding="UTF-8"?>
    18  <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
    19    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
    20  <body xmlns:foo="ns1" xmlns="ns2" xmlns:tag="ns3" ` +
    21  	"\r\n\t" + `  >
    22    <hello lang="en">World &lt;&gt;&apos;&quot; &#x767d;&#40300;翔</hello>
    23    <query>&何; &is-it;</query>
    24    <goodbye />
    25    <outer foo:attr="value" xmlns:tag="ns4">
    26      <inner/>
    27    </outer>
    28    <tag:name>
    29      <![CDATA[Some text here.]]>
    30    </tag:name>
    31  </body><!-- missing final newline -->`
    32  
    33  var testEntity = map[string]string{"何": "What", "is-it": "is it?"}
    34  
    35  var rawTokens = []Token{
    36  	CharData("\n"),
    37  	ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)},
    38  	CharData("\n"),
    39  	Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
    40    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`),
    41  	CharData("\n"),
    42  	StartElement{Name{"", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}},
    43  	CharData("\n  "),
    44  	StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}},
    45  	CharData("World <>'\" 白鵬翔"),
    46  	EndElement{Name{"", "hello"}},
    47  	CharData("\n  "),
    48  	StartElement{Name{"", "query"}, []Attr{}},
    49  	CharData("What is it?"),
    50  	EndElement{Name{"", "query"}},
    51  	CharData("\n  "),
    52  	StartElement{Name{"", "goodbye"}, []Attr{}},
    53  	EndElement{Name{"", "goodbye"}},
    54  	CharData("\n  "),
    55  	StartElement{Name{"", "outer"}, []Attr{{Name{"foo", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}},
    56  	CharData("\n    "),
    57  	StartElement{Name{"", "inner"}, []Attr{}},
    58  	EndElement{Name{"", "inner"}},
    59  	CharData("\n  "),
    60  	EndElement{Name{"", "outer"}},
    61  	CharData("\n  "),
    62  	StartElement{Name{"tag", "name"}, []Attr{}},
    63  	CharData("\n    "),
    64  	CharData("Some text here."),
    65  	CharData("\n  "),
    66  	EndElement{Name{"tag", "name"}},
    67  	CharData("\n"),
    68  	EndElement{Name{"", "body"}},
    69  	Comment(" missing final newline "),
    70  }
    71  
    72  var cookedTokens = []Token{
    73  	CharData("\n"),
    74  	ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)},
    75  	CharData("\n"),
    76  	Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
    77    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`),
    78  	CharData("\n"),
    79  	StartElement{Name{"ns2", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}},
    80  	CharData("\n  "),
    81  	StartElement{Name{"ns2", "hello"}, []Attr{{Name{"", "lang"}, "en"}}},
    82  	CharData("World <>'\" 白鵬翔"),
    83  	EndElement{Name{"ns2", "hello"}},
    84  	CharData("\n  "),
    85  	StartElement{Name{"ns2", "query"}, []Attr{}},
    86  	CharData("What is it?"),
    87  	EndElement{Name{"ns2", "query"}},
    88  	CharData("\n  "),
    89  	StartElement{Name{"ns2", "goodbye"}, []Attr{}},
    90  	EndElement{Name{"ns2", "goodbye"}},
    91  	CharData("\n  "),
    92  	StartElement{Name{"ns2", "outer"}, []Attr{{Name{"ns1", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}},
    93  	CharData("\n    "),
    94  	StartElement{Name{"ns2", "inner"}, []Attr{}},
    95  	EndElement{Name{"ns2", "inner"}},
    96  	CharData("\n  "),
    97  	EndElement{Name{"ns2", "outer"}},
    98  	CharData("\n  "),
    99  	StartElement{Name{"ns3", "name"}, []Attr{}},
   100  	CharData("\n    "),
   101  	CharData("Some text here."),
   102  	CharData("\n  "),
   103  	EndElement{Name{"ns3", "name"}},
   104  	CharData("\n"),
   105  	EndElement{Name{"ns2", "body"}},
   106  	Comment(" missing final newline "),
   107  }
   108  
   109  const testInputAltEncoding = `
   110  <?xml version="1.0" encoding="x-testing-uppercase"?>
   111  <TAG>VALUE</TAG>`
   112  
   113  var rawTokensAltEncoding = []Token{
   114  	CharData("\n"),
   115  	ProcInst{"xml", []byte(`version="1.0" encoding="x-testing-uppercase"`)},
   116  	CharData("\n"),
   117  	StartElement{Name{"", "tag"}, []Attr{}},
   118  	CharData("value"),
   119  	EndElement{Name{"", "tag"}},
   120  }
   121  
   122  var xmlInput = []string{
   123  	// unexpected EOF cases
   124  	"<",
   125  	"<t",
   126  	"<t ",
   127  	"<t/",
   128  	"<!",
   129  	"<!-",
   130  	"<!--",
   131  	"<!--c-",
   132  	"<!--c--",
   133  	"<!d",
   134  	"<t></",
   135  	"<t></t",
   136  	"<?",
   137  	"<?p",
   138  	"<t a",
   139  	"<t a=",
   140  	"<t a='",
   141  	"<t a=''",
   142  	"<t/><![",
   143  	"<t/><![C",
   144  	"<t/><![CDATA[d",
   145  	"<t/><![CDATA[d]",
   146  	"<t/><![CDATA[d]]",
   147  
   148  	// other Syntax errors
   149  	"<>",
   150  	"<t/a",
   151  	"<0 />",
   152  	"<?0 >",
   153  	//	"<!0 >",	// let the Token() caller handle
   154  	"</0>",
   155  	"<t 0=''>",
   156  	"<t a='&'>",
   157  	"<t a='<'>",
   158  	"<t>&nbspc;</t>",
   159  	"<t a>",
   160  	"<t a=>",
   161  	"<t a=v>",
   162  	//	"<![CDATA[d]]>",	// let the Token() caller handle
   163  	"<t></e>",
   164  	"<t></>",
   165  	"<t></t!",
   166  	"<t>cdata]]></t>",
   167  }
   168  
   169  func TestRawToken(t *testing.T) {
   170  	d := NewDecoder(strings.NewReader(testInput))
   171  	d.Entity = testEntity
   172  	testRawToken(t, d, rawTokens)
   173  }
   174  
   175  const nonStrictInput = `
   176  <tag>non&entity</tag>
   177  <tag>&unknown;entity</tag>
   178  <tag>&#123</tag>
   179  <tag>&#zzz;</tag>
   180  <tag>&なまえ3;</tag>
   181  <tag>&lt-gt;</tag>
   182  <tag>&;</tag>
   183  <tag>&0a;</tag>
   184  `
   185  
   186  var nonStringEntity = map[string]string{"": "oops!", "0a": "oops!"}
   187  
   188  var nonStrictTokens = []Token{
   189  	CharData("\n"),
   190  	StartElement{Name{"", "tag"}, []Attr{}},
   191  	CharData("non&entity"),
   192  	EndElement{Name{"", "tag"}},
   193  	CharData("\n"),
   194  	StartElement{Name{"", "tag"}, []Attr{}},
   195  	CharData("&unknown;entity"),
   196  	EndElement{Name{"", "tag"}},
   197  	CharData("\n"),
   198  	StartElement{Name{"", "tag"}, []Attr{}},
   199  	CharData("&#123"),
   200  	EndElement{Name{"", "tag"}},
   201  	CharData("\n"),
   202  	StartElement{Name{"", "tag"}, []Attr{}},
   203  	CharData("&#zzz;"),
   204  	EndElement{Name{"", "tag"}},
   205  	CharData("\n"),
   206  	StartElement{Name{"", "tag"}, []Attr{}},
   207  	CharData("&なまえ3;"),
   208  	EndElement{Name{"", "tag"}},
   209  	CharData("\n"),
   210  	StartElement{Name{"", "tag"}, []Attr{}},
   211  	CharData("&lt-gt;"),
   212  	EndElement{Name{"", "tag"}},
   213  	CharData("\n"),
   214  	StartElement{Name{"", "tag"}, []Attr{}},
   215  	CharData("&;"),
   216  	EndElement{Name{"", "tag"}},
   217  	CharData("\n"),
   218  	StartElement{Name{"", "tag"}, []Attr{}},
   219  	CharData("&0a;"),
   220  	EndElement{Name{"", "tag"}},
   221  	CharData("\n"),
   222  }
   223  
   224  func TestNonStrictRawToken(t *testing.T) {
   225  	d := NewDecoder(strings.NewReader(nonStrictInput))
   226  	d.Strict = false
   227  	testRawToken(t, d, nonStrictTokens)
   228  }
   229  
   230  type downCaser struct {
   231  	t *testing.T
   232  	r io.ByteReader
   233  }
   234  
   235  func (d *downCaser) ReadByte() (c byte, err error) {
   236  	c, err = d.r.ReadByte()
   237  	if c >= 'A' && c <= 'Z' {
   238  		c += 'a' - 'A'
   239  	}
   240  	return
   241  }
   242  
   243  func (d *downCaser) Read(p []byte) (int, error) {
   244  	d.t.Fatalf("unexpected Read call on downCaser reader")
   245  	panic("unreachable")
   246  }
   247  
   248  func TestRawTokenAltEncoding(t *testing.T) {
   249  	sawEncoding := ""
   250  	d := NewDecoder(strings.NewReader(testInputAltEncoding))
   251  	d.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) {
   252  		sawEncoding = charset
   253  		if charset != "x-testing-uppercase" {
   254  			t.Fatalf("unexpected charset %q", charset)
   255  		}
   256  		return &downCaser{t, input.(io.ByteReader)}, nil
   257  	}
   258  	testRawToken(t, d, rawTokensAltEncoding)
   259  }
   260  
   261  func TestRawTokenAltEncodingNoConverter(t *testing.T) {
   262  	d := NewDecoder(strings.NewReader(testInputAltEncoding))
   263  	token, err := d.RawToken()
   264  	if token == nil {
   265  		t.Fatalf("expected a token on first RawToken call")
   266  	}
   267  	if err != nil {
   268  		t.Fatal(err)
   269  	}
   270  	token, err = d.RawToken()
   271  	if token != nil {
   272  		t.Errorf("expected a nil token; got %#v", token)
   273  	}
   274  	if err == nil {
   275  		t.Fatalf("expected an error on second RawToken call")
   276  	}
   277  	const encoding = "x-testing-uppercase"
   278  	if !strings.Contains(err.Error(), encoding) {
   279  		t.Errorf("expected error to contain %q; got error: %v",
   280  			encoding, err)
   281  	}
   282  }
   283  
   284  func testRawToken(t *testing.T, d *Decoder, rawTokens []Token) {
   285  	for i, want := range rawTokens {
   286  		have, err := d.RawToken()
   287  		if err != nil {
   288  			t.Fatalf("token %d: unexpected error: %s", i, err)
   289  		}
   290  		if !reflect.DeepEqual(have, want) {
   291  			var shave, swant string
   292  			if _, ok := have.(CharData); ok {
   293  				shave = fmt.Sprintf("CharData(%q)", have)
   294  			} else {
   295  				shave = fmt.Sprintf("%#v", have)
   296  			}
   297  			if _, ok := want.(CharData); ok {
   298  				swant = fmt.Sprintf("CharData(%q)", want)
   299  			} else {
   300  				swant = fmt.Sprintf("%#v", want)
   301  			}
   302  			t.Errorf("token %d = %s, want %s", i, shave, swant)
   303  		}
   304  	}
   305  }
   306  
   307  // Ensure that directives (specifically !DOCTYPE) include the complete
   308  // text of any nested directives, noting that < and > do not change
   309  // nesting depth if they are in single or double quotes.
   310  
   311  var nestedDirectivesInput = `
   312  <!DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]>
   313  <!DOCTYPE [<!ENTITY xlt ">">]>
   314  <!DOCTYPE [<!ENTITY xlt "<">]>
   315  <!DOCTYPE [<!ENTITY xlt '>'>]>
   316  <!DOCTYPE [<!ENTITY xlt '<'>]>
   317  <!DOCTYPE [<!ENTITY xlt '">'>]>
   318  <!DOCTYPE [<!ENTITY xlt "'<">]>
   319  `
   320  
   321  var nestedDirectivesTokens = []Token{
   322  	CharData("\n"),
   323  	Directive(`DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`),
   324  	CharData("\n"),
   325  	Directive(`DOCTYPE [<!ENTITY xlt ">">]`),
   326  	CharData("\n"),
   327  	Directive(`DOCTYPE [<!ENTITY xlt "<">]`),
   328  	CharData("\n"),
   329  	Directive(`DOCTYPE [<!ENTITY xlt '>'>]`),
   330  	CharData("\n"),
   331  	Directive(`DOCTYPE [<!ENTITY xlt '<'>]`),
   332  	CharData("\n"),
   333  	Directive(`DOCTYPE [<!ENTITY xlt '">'>]`),
   334  	CharData("\n"),
   335  	Directive(`DOCTYPE [<!ENTITY xlt "'<">]`),
   336  	CharData("\n"),
   337  }
   338  
   339  func TestNestedDirectives(t *testing.T) {
   340  	d := NewDecoder(strings.NewReader(nestedDirectivesInput))
   341  
   342  	for i, want := range nestedDirectivesTokens {
   343  		have, err := d.Token()
   344  		if err != nil {
   345  			t.Fatalf("token %d: unexpected error: %s", i, err)
   346  		}
   347  		if !reflect.DeepEqual(have, want) {
   348  			t.Errorf("token %d = %#v want %#v", i, have, want)
   349  		}
   350  	}
   351  }
   352  
   353  func TestToken(t *testing.T) {
   354  	d := NewDecoder(strings.NewReader(testInput))
   355  	d.Entity = testEntity
   356  
   357  	for i, want := range cookedTokens {
   358  		have, err := d.Token()
   359  		if err != nil {
   360  			t.Fatalf("token %d: unexpected error: %s", i, err)
   361  		}
   362  		if !reflect.DeepEqual(have, want) {
   363  			t.Errorf("token %d = %#v want %#v", i, have, want)
   364  		}
   365  	}
   366  }
   367  
   368  func TestSyntax(t *testing.T) {
   369  	for i := range xmlInput {
   370  		d := NewDecoder(strings.NewReader(xmlInput[i]))
   371  		var err error
   372  		for _, err = d.Token(); err == nil; _, err = d.Token() {
   373  		}
   374  		if _, ok := err.(*SyntaxError); !ok {
   375  			t.Fatalf(`xmlInput "%s": expected SyntaxError not received`, xmlInput[i])
   376  		}
   377  	}
   378  }
   379  
   380  type allScalars struct {
   381  	True1     bool
   382  	True2     bool
   383  	False1    bool
   384  	False2    bool
   385  	Int       int
   386  	Int8      int8
   387  	Int16     int16
   388  	Int32     int32
   389  	Int64     int64
   390  	Uint      int
   391  	Uint8     uint8
   392  	Uint16    uint16
   393  	Uint32    uint32
   394  	Uint64    uint64
   395  	Uintptr   uintptr
   396  	Float32   float32
   397  	Float64   float64
   398  	String    string
   399  	PtrString *string
   400  }
   401  
   402  var all = allScalars{
   403  	True1:     true,
   404  	True2:     true,
   405  	False1:    false,
   406  	False2:    false,
   407  	Int:       1,
   408  	Int8:      -2,
   409  	Int16:     3,
   410  	Int32:     -4,
   411  	Int64:     5,
   412  	Uint:      6,
   413  	Uint8:     7,
   414  	Uint16:    8,
   415  	Uint32:    9,
   416  	Uint64:    10,
   417  	Uintptr:   11,
   418  	Float32:   13.0,
   419  	Float64:   14.0,
   420  	String:    "15",
   421  	PtrString: &sixteen,
   422  }
   423  
   424  var sixteen = "16"
   425  
   426  const testScalarsInput = `<allscalars>
   427  	<True1>true</True1>
   428  	<True2>1</True2>
   429  	<False1>false</False1>
   430  	<False2>0</False2>
   431  	<Int>1</Int>
   432  	<Int8>-2</Int8>
   433  	<Int16>3</Int16>
   434  	<Int32>-4</Int32>
   435  	<Int64>5</Int64>
   436  	<Uint>6</Uint>
   437  	<Uint8>7</Uint8>
   438  	<Uint16>8</Uint16>
   439  	<Uint32>9</Uint32>
   440  	<Uint64>10</Uint64>
   441  	<Uintptr>11</Uintptr>
   442  	<Float>12.0</Float>
   443  	<Float32>13.0</Float32>
   444  	<Float64>14.0</Float64>
   445  	<String>15</String>
   446  	<PtrString>16</PtrString>
   447  </allscalars>`
   448  
   449  func TestAllScalars(t *testing.T) {
   450  	var a allScalars
   451  	err := Unmarshal([]byte(testScalarsInput), &a)
   452  
   453  	if err != nil {
   454  		t.Fatal(err)
   455  	}
   456  	if !reflect.DeepEqual(a, all) {
   457  		t.Errorf("have %+v want %+v", a, all)
   458  	}
   459  }
   460  
   461  type item struct {
   462  	Field_a string
   463  }
   464  
   465  func TestIssue569(t *testing.T) {
   466  	data := `<item><Field_a>abcd</Field_a></item>`
   467  	var i item
   468  	err := Unmarshal([]byte(data), &i)
   469  
   470  	if err != nil || i.Field_a != "abcd" {
   471  		t.Fatal("Expecting abcd")
   472  	}
   473  }
   474  
   475  func TestUnquotedAttrs(t *testing.T) {
   476  	data := "<tag attr=azAZ09:-_\t>"
   477  	d := NewDecoder(strings.NewReader(data))
   478  	d.Strict = false
   479  	token, err := d.Token()
   480  	if _, ok := err.(*SyntaxError); ok {
   481  		t.Errorf("Unexpected error: %v", err)
   482  	}
   483  	if token.(StartElement).Name.Local != "tag" {
   484  		t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local)
   485  	}
   486  	attr := token.(StartElement).Attr[0]
   487  	if attr.Value != "azAZ09:-_" {
   488  		t.Errorf("Unexpected attribute value: %v", attr.Value)
   489  	}
   490  	if attr.Name.Local != "attr" {
   491  		t.Errorf("Unexpected attribute name: %v", attr.Name.Local)
   492  	}
   493  }
   494  
   495  func TestValuelessAttrs(t *testing.T) {
   496  	tests := [][3]string{
   497  		{"<p nowrap>", "p", "nowrap"},
   498  		{"<p nowrap >", "p", "nowrap"},
   499  		{"<input checked/>", "input", "checked"},
   500  		{"<input checked />", "input", "checked"},
   501  	}
   502  	for _, test := range tests {
   503  		d := NewDecoder(strings.NewReader(test[0]))
   504  		d.Strict = false
   505  		token, err := d.Token()
   506  		if _, ok := err.(*SyntaxError); ok {
   507  			t.Errorf("Unexpected error: %v", err)
   508  		}
   509  		if token.(StartElement).Name.Local != test[1] {
   510  			t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local)
   511  		}
   512  		attr := token.(StartElement).Attr[0]
   513  		if attr.Value != test[2] {
   514  			t.Errorf("Unexpected attribute value: %v", attr.Value)
   515  		}
   516  		if attr.Name.Local != test[2] {
   517  			t.Errorf("Unexpected attribute name: %v", attr.Name.Local)
   518  		}
   519  	}
   520  }
   521  
   522  func TestCopyTokenCharData(t *testing.T) {
   523  	data := []byte("same data")
   524  	var tok1 Token = CharData(data)
   525  	tok2 := CopyToken(tok1)
   526  	if !reflect.DeepEqual(tok1, tok2) {
   527  		t.Error("CopyToken(CharData) != CharData")
   528  	}
   529  	data[1] = 'o'
   530  	if reflect.DeepEqual(tok1, tok2) {
   531  		t.Error("CopyToken(CharData) uses same buffer.")
   532  	}
   533  }
   534  
   535  func TestCopyTokenStartElement(t *testing.T) {
   536  	elt := StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}}
   537  	var tok1 Token = elt
   538  	tok2 := CopyToken(tok1)
   539  	if tok1.(StartElement).Attr[0].Value != "en" {
   540  		t.Error("CopyToken overwrote Attr[0]")
   541  	}
   542  	if !reflect.DeepEqual(tok1, tok2) {
   543  		t.Error("CopyToken(StartElement) != StartElement")
   544  	}
   545  	tok1.(StartElement).Attr[0] = Attr{Name{"", "lang"}, "de"}
   546  	if reflect.DeepEqual(tok1, tok2) {
   547  		t.Error("CopyToken(CharData) uses same buffer.")
   548  	}
   549  }
   550  
   551  func TestSyntaxErrorLineNum(t *testing.T) {
   552  	testInput := "<P>Foo<P>\n\n<P>Bar</>\n"
   553  	d := NewDecoder(strings.NewReader(testInput))
   554  	var err error
   555  	for _, err = d.Token(); err == nil; _, err = d.Token() {
   556  	}
   557  	synerr, ok := err.(*SyntaxError)
   558  	if !ok {
   559  		t.Error("Expected SyntaxError.")
   560  	}
   561  	if synerr.Line != 3 {
   562  		t.Error("SyntaxError didn't have correct line number.")
   563  	}
   564  }
   565  
   566  func TestTrailingRawToken(t *testing.T) {
   567  	input := `<FOO></FOO>  `
   568  	d := NewDecoder(strings.NewReader(input))
   569  	var err error
   570  	for _, err = d.RawToken(); err == nil; _, err = d.RawToken() {
   571  	}
   572  	if err != io.EOF {
   573  		t.Fatalf("d.RawToken() = _, %v, want _, io.EOF", err)
   574  	}
   575  }
   576  
   577  func TestTrailingToken(t *testing.T) {
   578  	input := `<FOO></FOO>  `
   579  	d := NewDecoder(strings.NewReader(input))
   580  	var err error
   581  	for _, err = d.Token(); err == nil; _, err = d.Token() {
   582  	}
   583  	if err != io.EOF {
   584  		t.Fatalf("d.Token() = _, %v, want _, io.EOF", err)
   585  	}
   586  }
   587  
   588  func TestEntityInsideCDATA(t *testing.T) {
   589  	input := `<test><![CDATA[ &val=foo ]]></test>`
   590  	d := NewDecoder(strings.NewReader(input))
   591  	var err error
   592  	for _, err = d.Token(); err == nil; _, err = d.Token() {
   593  	}
   594  	if err != io.EOF {
   595  		t.Fatalf("d.Token() = _, %v, want _, io.EOF", err)
   596  	}
   597  }
   598  
   599  var characterTests = []struct {
   600  	in  string
   601  	err string
   602  }{
   603  	{"\x12<doc/>", "illegal character code U+0012"},
   604  	{"<?xml version=\"1.0\"?>\x0b<doc/>", "illegal character code U+000B"},
   605  	{"\xef\xbf\xbe<doc/>", "illegal character code U+FFFE"},
   606  	{"<?xml version=\"1.0\"?><doc>\r\n<hiya/>\x07<toots/></doc>", "illegal character code U+0007"},
   607  	{"<?xml version=\"1.0\"?><doc \x12='value'>what's up</doc>", "expected attribute name in element"},
   608  	{"<doc>&abc\x01;</doc>", "invalid character entity &abc (no semicolon)"},
   609  	{"<doc>&\x01;</doc>", "invalid character entity & (no semicolon)"},
   610  	{"<doc>&\xef\xbf\xbe;</doc>", "invalid character entity &\uFFFE;"},
   611  	{"<doc>&hello;</doc>", "invalid character entity &hello;"},
   612  }
   613  
   614  func TestDisallowedCharacters(t *testing.T) {
   615  
   616  	for i, tt := range characterTests {
   617  		d := NewDecoder(strings.NewReader(tt.in))
   618  		var err error
   619  
   620  		for err == nil {
   621  			_, err = d.Token()
   622  		}
   623  		synerr, ok := err.(*SyntaxError)
   624  		if !ok {
   625  			t.Fatalf("input %d d.Token() = _, %v, want _, *SyntaxError", i, err)
   626  		}
   627  		if synerr.Msg != tt.err {
   628  			t.Fatalf("input %d synerr.Msg wrong: want %q, got %q", i, tt.err, synerr.Msg)
   629  		}
   630  	}
   631  }
   632  
   633  type procInstEncodingTest struct {
   634  	expect, got string
   635  }
   636  
   637  var procInstTests = []struct {
   638  	input, expect string
   639  }{
   640  	{`version="1.0" encoding="utf-8"`, "utf-8"},
   641  	{`version="1.0" encoding='utf-8'`, "utf-8"},
   642  	{`version="1.0" encoding='utf-8' `, "utf-8"},
   643  	{`version="1.0" encoding=utf-8`, ""},
   644  	{`encoding="FOO" `, "FOO"},
   645  }
   646  
   647  func TestProcInstEncoding(t *testing.T) {
   648  	for _, test := range procInstTests {
   649  		got := procInstEncoding(test.input)
   650  		if got != test.expect {
   651  			t.Errorf("procInstEncoding(%q) = %q; want %q", test.input, got, test.expect)
   652  		}
   653  	}
   654  }
   655  
   656  // Ensure that directives with comments include the complete
   657  // text of any nested directives.
   658  
   659  var directivesWithCommentsInput = `
   660  <!DOCTYPE [<!-- a comment --><!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]>
   661  <!DOCTYPE [<!ENTITY go "Golang"><!-- a comment-->]>
   662  <!DOCTYPE <!-> <!> <!----> <!-->--> <!--->--> [<!ENTITY go "Golang"><!-- a comment-->]>
   663  `
   664  
   665  var directivesWithCommentsTokens = []Token{
   666  	CharData("\n"),
   667  	Directive(`DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`),
   668  	CharData("\n"),
   669  	Directive(`DOCTYPE [<!ENTITY go "Golang">]`),
   670  	CharData("\n"),
   671  	Directive(`DOCTYPE <!-> <!>    [<!ENTITY go "Golang">]`),
   672  	CharData("\n"),
   673  }
   674  
   675  func TestDirectivesWithComments(t *testing.T) {
   676  	d := NewDecoder(strings.NewReader(directivesWithCommentsInput))
   677  
   678  	for i, want := range directivesWithCommentsTokens {
   679  		have, err := d.Token()
   680  		if err != nil {
   681  			t.Fatalf("token %d: unexpected error: %s", i, err)
   682  		}
   683  		if !reflect.DeepEqual(have, want) {
   684  			t.Errorf("token %d = %#v want %#v", i, have, want)
   685  		}
   686  	}
   687  }
   688  
   689  // Writer whose Write method always returns an error.
   690  type errWriter struct{}
   691  
   692  func (errWriter) Write(p []byte) (n int, err error) { return 0, fmt.Errorf("unwritable") }
   693  
   694  func TestEscapeTextIOErrors(t *testing.T) {
   695  	expectErr := "unwritable"
   696  	err := EscapeText(errWriter{}, []byte{'A'})
   697  
   698  	if err == nil || err.Error() != expectErr {
   699  		t.Errorf("have %v, want %v", err, expectErr)
   700  	}
   701  }
   702  
   703  func TestEscapeTextInvalidChar(t *testing.T) {
   704  	input := []byte("A \x00 terminated string.")
   705  	expected := "A \uFFFD terminated string."
   706  
   707  	buff := new(bytes.Buffer)
   708  	if err := EscapeText(buff, input); err != nil {
   709  		t.Fatalf("have %v, want nil", err)
   710  	}
   711  	text := buff.String()
   712  
   713  	if text != expected {
   714  		t.Errorf("have %v, want %v", text, expected)
   715  	}
   716  }