github.com/varialus/godfly@v0.0.0-20130904042352-1934f9f095ab/src/pkg/encoding/xml/xml_test.go (about)

     1  // Copyright 2009 The Go Authors.  All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package xml
     6  
     7  import (
     8  	"bytes"
     9  	"fmt"
    10  	"io"
    11  	"reflect"
    12  	"strings"
    13  	"testing"
    14  	"unicode/utf8"
    15  )
    16  
    17  const testInput = `
    18  <?xml version="1.0" encoding="UTF-8"?>
    19  <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
    20    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
    21  <body xmlns:foo="ns1" xmlns="ns2" xmlns:tag="ns3" ` +
    22  	"\r\n\t" + `  >
    23    <hello lang="en">World &lt;&gt;&apos;&quot; &#x767d;&#40300;翔</hello>
    24    <query>&何; &is-it;</query>
    25    <goodbye />
    26    <outer foo:attr="value" xmlns:tag="ns4">
    27      <inner/>
    28    </outer>
    29    <tag:name>
    30      <![CDATA[Some text here.]]>
    31    </tag:name>
    32  </body><!-- missing final newline -->`
    33  
    34  var testEntity = map[string]string{"何": "What", "is-it": "is it?"}
    35  
    36  var rawTokens = []Token{
    37  	CharData("\n"),
    38  	ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)},
    39  	CharData("\n"),
    40  	Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
    41    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`),
    42  	CharData("\n"),
    43  	StartElement{Name{"", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}},
    44  	CharData("\n  "),
    45  	StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}},
    46  	CharData("World <>'\" 白鵬翔"),
    47  	EndElement{Name{"", "hello"}},
    48  	CharData("\n  "),
    49  	StartElement{Name{"", "query"}, []Attr{}},
    50  	CharData("What is it?"),
    51  	EndElement{Name{"", "query"}},
    52  	CharData("\n  "),
    53  	StartElement{Name{"", "goodbye"}, []Attr{}},
    54  	EndElement{Name{"", "goodbye"}},
    55  	CharData("\n  "),
    56  	StartElement{Name{"", "outer"}, []Attr{{Name{"foo", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}},
    57  	CharData("\n    "),
    58  	StartElement{Name{"", "inner"}, []Attr{}},
    59  	EndElement{Name{"", "inner"}},
    60  	CharData("\n  "),
    61  	EndElement{Name{"", "outer"}},
    62  	CharData("\n  "),
    63  	StartElement{Name{"tag", "name"}, []Attr{}},
    64  	CharData("\n    "),
    65  	CharData("Some text here."),
    66  	CharData("\n  "),
    67  	EndElement{Name{"tag", "name"}},
    68  	CharData("\n"),
    69  	EndElement{Name{"", "body"}},
    70  	Comment(" missing final newline "),
    71  }
    72  
    73  var cookedTokens = []Token{
    74  	CharData("\n"),
    75  	ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)},
    76  	CharData("\n"),
    77  	Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
    78    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`),
    79  	CharData("\n"),
    80  	StartElement{Name{"ns2", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}},
    81  	CharData("\n  "),
    82  	StartElement{Name{"ns2", "hello"}, []Attr{{Name{"", "lang"}, "en"}}},
    83  	CharData("World <>'\" 白鵬翔"),
    84  	EndElement{Name{"ns2", "hello"}},
    85  	CharData("\n  "),
    86  	StartElement{Name{"ns2", "query"}, []Attr{}},
    87  	CharData("What is it?"),
    88  	EndElement{Name{"ns2", "query"}},
    89  	CharData("\n  "),
    90  	StartElement{Name{"ns2", "goodbye"}, []Attr{}},
    91  	EndElement{Name{"ns2", "goodbye"}},
    92  	CharData("\n  "),
    93  	StartElement{Name{"ns2", "outer"}, []Attr{{Name{"ns1", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}},
    94  	CharData("\n    "),
    95  	StartElement{Name{"ns2", "inner"}, []Attr{}},
    96  	EndElement{Name{"ns2", "inner"}},
    97  	CharData("\n  "),
    98  	EndElement{Name{"ns2", "outer"}},
    99  	CharData("\n  "),
   100  	StartElement{Name{"ns3", "name"}, []Attr{}},
   101  	CharData("\n    "),
   102  	CharData("Some text here."),
   103  	CharData("\n  "),
   104  	EndElement{Name{"ns3", "name"}},
   105  	CharData("\n"),
   106  	EndElement{Name{"ns2", "body"}},
   107  	Comment(" missing final newline "),
   108  }
   109  
   110  const testInputAltEncoding = `
   111  <?xml version="1.0" encoding="x-testing-uppercase"?>
   112  <TAG>VALUE</TAG>`
   113  
   114  var rawTokensAltEncoding = []Token{
   115  	CharData("\n"),
   116  	ProcInst{"xml", []byte(`version="1.0" encoding="x-testing-uppercase"`)},
   117  	CharData("\n"),
   118  	StartElement{Name{"", "tag"}, []Attr{}},
   119  	CharData("value"),
   120  	EndElement{Name{"", "tag"}},
   121  }
   122  
   123  var xmlInput = []string{
   124  	// unexpected EOF cases
   125  	"<",
   126  	"<t",
   127  	"<t ",
   128  	"<t/",
   129  	"<!",
   130  	"<!-",
   131  	"<!--",
   132  	"<!--c-",
   133  	"<!--c--",
   134  	"<!d",
   135  	"<t></",
   136  	"<t></t",
   137  	"<?",
   138  	"<?p",
   139  	"<t a",
   140  	"<t a=",
   141  	"<t a='",
   142  	"<t a=''",
   143  	"<t/><![",
   144  	"<t/><![C",
   145  	"<t/><![CDATA[d",
   146  	"<t/><![CDATA[d]",
   147  	"<t/><![CDATA[d]]",
   148  
   149  	// other Syntax errors
   150  	"<>",
   151  	"<t/a",
   152  	"<0 />",
   153  	"<?0 >",
   154  	//	"<!0 >",	// let the Token() caller handle
   155  	"</0>",
   156  	"<t 0=''>",
   157  	"<t a='&'>",
   158  	"<t a='<'>",
   159  	"<t>&nbspc;</t>",
   160  	"<t a>",
   161  	"<t a=>",
   162  	"<t a=v>",
   163  	//	"<![CDATA[d]]>",	// let the Token() caller handle
   164  	"<t></e>",
   165  	"<t></>",
   166  	"<t></t!",
   167  	"<t>cdata]]></t>",
   168  }
   169  
   170  func TestRawToken(t *testing.T) {
   171  	d := NewDecoder(strings.NewReader(testInput))
   172  	d.Entity = testEntity
   173  	testRawToken(t, d, rawTokens)
   174  }
   175  
   176  const nonStrictInput = `
   177  <tag>non&entity</tag>
   178  <tag>&unknown;entity</tag>
   179  <tag>&#123</tag>
   180  <tag>&#zzz;</tag>
   181  <tag>&なまえ3;</tag>
   182  <tag>&lt-gt;</tag>
   183  <tag>&;</tag>
   184  <tag>&0a;</tag>
   185  `
   186  
   187  var nonStringEntity = map[string]string{"": "oops!", "0a": "oops!"}
   188  
   189  var nonStrictTokens = []Token{
   190  	CharData("\n"),
   191  	StartElement{Name{"", "tag"}, []Attr{}},
   192  	CharData("non&entity"),
   193  	EndElement{Name{"", "tag"}},
   194  	CharData("\n"),
   195  	StartElement{Name{"", "tag"}, []Attr{}},
   196  	CharData("&unknown;entity"),
   197  	EndElement{Name{"", "tag"}},
   198  	CharData("\n"),
   199  	StartElement{Name{"", "tag"}, []Attr{}},
   200  	CharData("&#123"),
   201  	EndElement{Name{"", "tag"}},
   202  	CharData("\n"),
   203  	StartElement{Name{"", "tag"}, []Attr{}},
   204  	CharData("&#zzz;"),
   205  	EndElement{Name{"", "tag"}},
   206  	CharData("\n"),
   207  	StartElement{Name{"", "tag"}, []Attr{}},
   208  	CharData("&なまえ3;"),
   209  	EndElement{Name{"", "tag"}},
   210  	CharData("\n"),
   211  	StartElement{Name{"", "tag"}, []Attr{}},
   212  	CharData("&lt-gt;"),
   213  	EndElement{Name{"", "tag"}},
   214  	CharData("\n"),
   215  	StartElement{Name{"", "tag"}, []Attr{}},
   216  	CharData("&;"),
   217  	EndElement{Name{"", "tag"}},
   218  	CharData("\n"),
   219  	StartElement{Name{"", "tag"}, []Attr{}},
   220  	CharData("&0a;"),
   221  	EndElement{Name{"", "tag"}},
   222  	CharData("\n"),
   223  }
   224  
   225  func TestNonStrictRawToken(t *testing.T) {
   226  	d := NewDecoder(strings.NewReader(nonStrictInput))
   227  	d.Strict = false
   228  	testRawToken(t, d, nonStrictTokens)
   229  }
   230  
   231  type downCaser struct {
   232  	t *testing.T
   233  	r io.ByteReader
   234  }
   235  
   236  func (d *downCaser) ReadByte() (c byte, err error) {
   237  	c, err = d.r.ReadByte()
   238  	if c >= 'A' && c <= 'Z' {
   239  		c += 'a' - 'A'
   240  	}
   241  	return
   242  }
   243  
   244  func (d *downCaser) Read(p []byte) (int, error) {
   245  	d.t.Fatalf("unexpected Read call on downCaser reader")
   246  	panic("unreachable")
   247  }
   248  
   249  func TestRawTokenAltEncoding(t *testing.T) {
   250  	sawEncoding := ""
   251  	d := NewDecoder(strings.NewReader(testInputAltEncoding))
   252  	d.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) {
   253  		sawEncoding = charset
   254  		if charset != "x-testing-uppercase" {
   255  			t.Fatalf("unexpected charset %q", charset)
   256  		}
   257  		return &downCaser{t, input.(io.ByteReader)}, nil
   258  	}
   259  	testRawToken(t, d, rawTokensAltEncoding)
   260  }
   261  
   262  func TestRawTokenAltEncodingNoConverter(t *testing.T) {
   263  	d := NewDecoder(strings.NewReader(testInputAltEncoding))
   264  	token, err := d.RawToken()
   265  	if token == nil {
   266  		t.Fatalf("expected a token on first RawToken call")
   267  	}
   268  	if err != nil {
   269  		t.Fatal(err)
   270  	}
   271  	token, err = d.RawToken()
   272  	if token != nil {
   273  		t.Errorf("expected a nil token; got %#v", token)
   274  	}
   275  	if err == nil {
   276  		t.Fatalf("expected an error on second RawToken call")
   277  	}
   278  	const encoding = "x-testing-uppercase"
   279  	if !strings.Contains(err.Error(), encoding) {
   280  		t.Errorf("expected error to contain %q; got error: %v",
   281  			encoding, err)
   282  	}
   283  }
   284  
   285  func testRawToken(t *testing.T, d *Decoder, rawTokens []Token) {
   286  	for i, want := range rawTokens {
   287  		have, err := d.RawToken()
   288  		if err != nil {
   289  			t.Fatalf("token %d: unexpected error: %s", i, err)
   290  		}
   291  		if !reflect.DeepEqual(have, want) {
   292  			var shave, swant string
   293  			if _, ok := have.(CharData); ok {
   294  				shave = fmt.Sprintf("CharData(%q)", have)
   295  			} else {
   296  				shave = fmt.Sprintf("%#v", have)
   297  			}
   298  			if _, ok := want.(CharData); ok {
   299  				swant = fmt.Sprintf("CharData(%q)", want)
   300  			} else {
   301  				swant = fmt.Sprintf("%#v", want)
   302  			}
   303  			t.Errorf("token %d = %s, want %s", i, shave, swant)
   304  		}
   305  	}
   306  }
   307  
   308  // Ensure that directives (specifically !DOCTYPE) include the complete
   309  // text of any nested directives, noting that < and > do not change
   310  // nesting depth if they are in single or double quotes.
   311  
   312  var nestedDirectivesInput = `
   313  <!DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]>
   314  <!DOCTYPE [<!ENTITY xlt ">">]>
   315  <!DOCTYPE [<!ENTITY xlt "<">]>
   316  <!DOCTYPE [<!ENTITY xlt '>'>]>
   317  <!DOCTYPE [<!ENTITY xlt '<'>]>
   318  <!DOCTYPE [<!ENTITY xlt '">'>]>
   319  <!DOCTYPE [<!ENTITY xlt "'<">]>
   320  `
   321  
   322  var nestedDirectivesTokens = []Token{
   323  	CharData("\n"),
   324  	Directive(`DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`),
   325  	CharData("\n"),
   326  	Directive(`DOCTYPE [<!ENTITY xlt ">">]`),
   327  	CharData("\n"),
   328  	Directive(`DOCTYPE [<!ENTITY xlt "<">]`),
   329  	CharData("\n"),
   330  	Directive(`DOCTYPE [<!ENTITY xlt '>'>]`),
   331  	CharData("\n"),
   332  	Directive(`DOCTYPE [<!ENTITY xlt '<'>]`),
   333  	CharData("\n"),
   334  	Directive(`DOCTYPE [<!ENTITY xlt '">'>]`),
   335  	CharData("\n"),
   336  	Directive(`DOCTYPE [<!ENTITY xlt "'<">]`),
   337  	CharData("\n"),
   338  }
   339  
   340  func TestNestedDirectives(t *testing.T) {
   341  	d := NewDecoder(strings.NewReader(nestedDirectivesInput))
   342  
   343  	for i, want := range nestedDirectivesTokens {
   344  		have, err := d.Token()
   345  		if err != nil {
   346  			t.Fatalf("token %d: unexpected error: %s", i, err)
   347  		}
   348  		if !reflect.DeepEqual(have, want) {
   349  			t.Errorf("token %d = %#v want %#v", i, have, want)
   350  		}
   351  	}
   352  }
   353  
   354  func TestToken(t *testing.T) {
   355  	d := NewDecoder(strings.NewReader(testInput))
   356  	d.Entity = testEntity
   357  
   358  	for i, want := range cookedTokens {
   359  		have, err := d.Token()
   360  		if err != nil {
   361  			t.Fatalf("token %d: unexpected error: %s", i, err)
   362  		}
   363  		if !reflect.DeepEqual(have, want) {
   364  			t.Errorf("token %d = %#v want %#v", i, have, want)
   365  		}
   366  	}
   367  }
   368  
   369  func TestSyntax(t *testing.T) {
   370  	for i := range xmlInput {
   371  		d := NewDecoder(strings.NewReader(xmlInput[i]))
   372  		var err error
   373  		for _, err = d.Token(); err == nil; _, err = d.Token() {
   374  		}
   375  		if _, ok := err.(*SyntaxError); !ok {
   376  			t.Fatalf(`xmlInput "%s": expected SyntaxError not received`, xmlInput[i])
   377  		}
   378  	}
   379  }
   380  
   381  type allScalars struct {
   382  	True1     bool
   383  	True2     bool
   384  	False1    bool
   385  	False2    bool
   386  	Int       int
   387  	Int8      int8
   388  	Int16     int16
   389  	Int32     int32
   390  	Int64     int64
   391  	Uint      int
   392  	Uint8     uint8
   393  	Uint16    uint16
   394  	Uint32    uint32
   395  	Uint64    uint64
   396  	Uintptr   uintptr
   397  	Float32   float32
   398  	Float64   float64
   399  	String    string
   400  	PtrString *string
   401  }
   402  
   403  var all = allScalars{
   404  	True1:     true,
   405  	True2:     true,
   406  	False1:    false,
   407  	False2:    false,
   408  	Int:       1,
   409  	Int8:      -2,
   410  	Int16:     3,
   411  	Int32:     -4,
   412  	Int64:     5,
   413  	Uint:      6,
   414  	Uint8:     7,
   415  	Uint16:    8,
   416  	Uint32:    9,
   417  	Uint64:    10,
   418  	Uintptr:   11,
   419  	Float32:   13.0,
   420  	Float64:   14.0,
   421  	String:    "15",
   422  	PtrString: &sixteen,
   423  }
   424  
   425  var sixteen = "16"
   426  
   427  const testScalarsInput = `<allscalars>
   428  	<True1>true</True1>
   429  	<True2>1</True2>
   430  	<False1>false</False1>
   431  	<False2>0</False2>
   432  	<Int>1</Int>
   433  	<Int8>-2</Int8>
   434  	<Int16>3</Int16>
   435  	<Int32>-4</Int32>
   436  	<Int64>5</Int64>
   437  	<Uint>6</Uint>
   438  	<Uint8>7</Uint8>
   439  	<Uint16>8</Uint16>
   440  	<Uint32>9</Uint32>
   441  	<Uint64>10</Uint64>
   442  	<Uintptr>11</Uintptr>
   443  	<Float>12.0</Float>
   444  	<Float32>13.0</Float32>
   445  	<Float64>14.0</Float64>
   446  	<String>15</String>
   447  	<PtrString>16</PtrString>
   448  </allscalars>`
   449  
   450  func TestAllScalars(t *testing.T) {
   451  	var a allScalars
   452  	err := Unmarshal([]byte(testScalarsInput), &a)
   453  
   454  	if err != nil {
   455  		t.Fatal(err)
   456  	}
   457  	if !reflect.DeepEqual(a, all) {
   458  		t.Errorf("have %+v want %+v", a, all)
   459  	}
   460  }
   461  
   462  type item struct {
   463  	Field_a string
   464  }
   465  
   466  func TestIssue569(t *testing.T) {
   467  	data := `<item><Field_a>abcd</Field_a></item>`
   468  	var i item
   469  	err := Unmarshal([]byte(data), &i)
   470  
   471  	if err != nil || i.Field_a != "abcd" {
   472  		t.Fatal("Expecting abcd")
   473  	}
   474  }
   475  
   476  func TestUnquotedAttrs(t *testing.T) {
   477  	data := "<tag attr=azAZ09:-_\t>"
   478  	d := NewDecoder(strings.NewReader(data))
   479  	d.Strict = false
   480  	token, err := d.Token()
   481  	if _, ok := err.(*SyntaxError); ok {
   482  		t.Errorf("Unexpected error: %v", err)
   483  	}
   484  	if token.(StartElement).Name.Local != "tag" {
   485  		t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local)
   486  	}
   487  	attr := token.(StartElement).Attr[0]
   488  	if attr.Value != "azAZ09:-_" {
   489  		t.Errorf("Unexpected attribute value: %v", attr.Value)
   490  	}
   491  	if attr.Name.Local != "attr" {
   492  		t.Errorf("Unexpected attribute name: %v", attr.Name.Local)
   493  	}
   494  }
   495  
   496  func TestValuelessAttrs(t *testing.T) {
   497  	tests := [][3]string{
   498  		{"<p nowrap>", "p", "nowrap"},
   499  		{"<p nowrap >", "p", "nowrap"},
   500  		{"<input checked/>", "input", "checked"},
   501  		{"<input checked />", "input", "checked"},
   502  	}
   503  	for _, test := range tests {
   504  		d := NewDecoder(strings.NewReader(test[0]))
   505  		d.Strict = false
   506  		token, err := d.Token()
   507  		if _, ok := err.(*SyntaxError); ok {
   508  			t.Errorf("Unexpected error: %v", err)
   509  		}
   510  		if token.(StartElement).Name.Local != test[1] {
   511  			t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local)
   512  		}
   513  		attr := token.(StartElement).Attr[0]
   514  		if attr.Value != test[2] {
   515  			t.Errorf("Unexpected attribute value: %v", attr.Value)
   516  		}
   517  		if attr.Name.Local != test[2] {
   518  			t.Errorf("Unexpected attribute name: %v", attr.Name.Local)
   519  		}
   520  	}
   521  }
   522  
   523  func TestCopyTokenCharData(t *testing.T) {
   524  	data := []byte("same data")
   525  	var tok1 Token = CharData(data)
   526  	tok2 := CopyToken(tok1)
   527  	if !reflect.DeepEqual(tok1, tok2) {
   528  		t.Error("CopyToken(CharData) != CharData")
   529  	}
   530  	data[1] = 'o'
   531  	if reflect.DeepEqual(tok1, tok2) {
   532  		t.Error("CopyToken(CharData) uses same buffer.")
   533  	}
   534  }
   535  
   536  func TestCopyTokenStartElement(t *testing.T) {
   537  	elt := StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}}
   538  	var tok1 Token = elt
   539  	tok2 := CopyToken(tok1)
   540  	if tok1.(StartElement).Attr[0].Value != "en" {
   541  		t.Error("CopyToken overwrote Attr[0]")
   542  	}
   543  	if !reflect.DeepEqual(tok1, tok2) {
   544  		t.Error("CopyToken(StartElement) != StartElement")
   545  	}
   546  	tok1.(StartElement).Attr[0] = Attr{Name{"", "lang"}, "de"}
   547  	if reflect.DeepEqual(tok1, tok2) {
   548  		t.Error("CopyToken(CharData) uses same buffer.")
   549  	}
   550  }
   551  
   552  func TestSyntaxErrorLineNum(t *testing.T) {
   553  	testInput := "<P>Foo<P>\n\n<P>Bar</>\n"
   554  	d := NewDecoder(strings.NewReader(testInput))
   555  	var err error
   556  	for _, err = d.Token(); err == nil; _, err = d.Token() {
   557  	}
   558  	synerr, ok := err.(*SyntaxError)
   559  	if !ok {
   560  		t.Error("Expected SyntaxError.")
   561  	}
   562  	if synerr.Line != 3 {
   563  		t.Error("SyntaxError didn't have correct line number.")
   564  	}
   565  }
   566  
   567  func TestTrailingRawToken(t *testing.T) {
   568  	input := `<FOO></FOO>  `
   569  	d := NewDecoder(strings.NewReader(input))
   570  	var err error
   571  	for _, err = d.RawToken(); err == nil; _, err = d.RawToken() {
   572  	}
   573  	if err != io.EOF {
   574  		t.Fatalf("d.RawToken() = _, %v, want _, io.EOF", err)
   575  	}
   576  }
   577  
   578  func TestTrailingToken(t *testing.T) {
   579  	input := `<FOO></FOO>  `
   580  	d := NewDecoder(strings.NewReader(input))
   581  	var err error
   582  	for _, err = d.Token(); err == nil; _, err = d.Token() {
   583  	}
   584  	if err != io.EOF {
   585  		t.Fatalf("d.Token() = _, %v, want _, io.EOF", err)
   586  	}
   587  }
   588  
   589  func TestEntityInsideCDATA(t *testing.T) {
   590  	input := `<test><![CDATA[ &val=foo ]]></test>`
   591  	d := NewDecoder(strings.NewReader(input))
   592  	var err error
   593  	for _, err = d.Token(); err == nil; _, err = d.Token() {
   594  	}
   595  	if err != io.EOF {
   596  		t.Fatalf("d.Token() = _, %v, want _, io.EOF", err)
   597  	}
   598  }
   599  
   600  var characterTests = []struct {
   601  	in  string
   602  	err string
   603  }{
   604  	{"\x12<doc/>", "illegal character code U+0012"},
   605  	{"<?xml version=\"1.0\"?>\x0b<doc/>", "illegal character code U+000B"},
   606  	{"\xef\xbf\xbe<doc/>", "illegal character code U+FFFE"},
   607  	{"<?xml version=\"1.0\"?><doc>\r\n<hiya/>\x07<toots/></doc>", "illegal character code U+0007"},
   608  	{"<?xml version=\"1.0\"?><doc \x12='value'>what's up</doc>", "expected attribute name in element"},
   609  	{"<doc>&abc\x01;</doc>", "invalid character entity &abc (no semicolon)"},
   610  	{"<doc>&\x01;</doc>", "invalid character entity & (no semicolon)"},
   611  	{"<doc>&\xef\xbf\xbe;</doc>", "invalid character entity &\uFFFE;"},
   612  	{"<doc>&hello;</doc>", "invalid character entity &hello;"},
   613  }
   614  
   615  func TestDisallowedCharacters(t *testing.T) {
   616  
   617  	for i, tt := range characterTests {
   618  		d := NewDecoder(strings.NewReader(tt.in))
   619  		var err error
   620  
   621  		for err == nil {
   622  			_, err = d.Token()
   623  		}
   624  		synerr, ok := err.(*SyntaxError)
   625  		if !ok {
   626  			t.Fatalf("input %d d.Token() = _, %v, want _, *SyntaxError", i, err)
   627  		}
   628  		if synerr.Msg != tt.err {
   629  			t.Fatalf("input %d synerr.Msg wrong: want %q, got %q", i, tt.err, synerr.Msg)
   630  		}
   631  	}
   632  }
   633  
   634  type procInstEncodingTest struct {
   635  	expect, got string
   636  }
   637  
   638  var procInstTests = []struct {
   639  	input, expect string
   640  }{
   641  	{`version="1.0" encoding="utf-8"`, "utf-8"},
   642  	{`version="1.0" encoding='utf-8'`, "utf-8"},
   643  	{`version="1.0" encoding='utf-8' `, "utf-8"},
   644  	{`version="1.0" encoding=utf-8`, ""},
   645  	{`encoding="FOO" `, "FOO"},
   646  }
   647  
   648  func TestProcInstEncoding(t *testing.T) {
   649  	for _, test := range procInstTests {
   650  		got := procInstEncoding(test.input)
   651  		if got != test.expect {
   652  			t.Errorf("procInstEncoding(%q) = %q; want %q", test.input, got, test.expect)
   653  		}
   654  	}
   655  }
   656  
   657  // Ensure that directives with comments include the complete
   658  // text of any nested directives.
   659  
   660  var directivesWithCommentsInput = `
   661  <!DOCTYPE [<!-- a comment --><!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]>
   662  <!DOCTYPE [<!ENTITY go "Golang"><!-- a comment-->]>
   663  <!DOCTYPE <!-> <!> <!----> <!-->--> <!--->--> [<!ENTITY go "Golang"><!-- a comment-->]>
   664  `
   665  
   666  var directivesWithCommentsTokens = []Token{
   667  	CharData("\n"),
   668  	Directive(`DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`),
   669  	CharData("\n"),
   670  	Directive(`DOCTYPE [<!ENTITY go "Golang">]`),
   671  	CharData("\n"),
   672  	Directive(`DOCTYPE <!-> <!>    [<!ENTITY go "Golang">]`),
   673  	CharData("\n"),
   674  }
   675  
   676  func TestDirectivesWithComments(t *testing.T) {
   677  	d := NewDecoder(strings.NewReader(directivesWithCommentsInput))
   678  
   679  	for i, want := range directivesWithCommentsTokens {
   680  		have, err := d.Token()
   681  		if err != nil {
   682  			t.Fatalf("token %d: unexpected error: %s", i, err)
   683  		}
   684  		if !reflect.DeepEqual(have, want) {
   685  			t.Errorf("token %d = %#v want %#v", i, have, want)
   686  		}
   687  	}
   688  }
   689  
   690  // Writer whose Write method always returns an error.
   691  type errWriter struct{}
   692  
   693  func (errWriter) Write(p []byte) (n int, err error) { return 0, fmt.Errorf("unwritable") }
   694  
   695  func TestEscapeTextIOErrors(t *testing.T) {
   696  	expectErr := "unwritable"
   697  	err := EscapeText(errWriter{}, []byte{'A'})
   698  
   699  	if err == nil || err.Error() != expectErr {
   700  		t.Errorf("have %v, want %v", err, expectErr)
   701  	}
   702  }
   703  
   704  func TestEscapeTextInvalidChar(t *testing.T) {
   705  	input := []byte("A \x00 terminated string.")
   706  	expected := "A \uFFFD terminated string."
   707  
   708  	buff := new(bytes.Buffer)
   709  	if err := EscapeText(buff, input); err != nil {
   710  		t.Fatalf("have %v, want nil", err)
   711  	}
   712  	text := buff.String()
   713  
   714  	if text != expected {
   715  		t.Errorf("have %v, want %v", text, expected)
   716  	}
   717  }
   718  
   719  func TestIssue5880(t *testing.T) {
   720  	type T []byte
   721  	data, err := Marshal(T{192, 168, 0, 1})
   722  	if err != nil {
   723  		t.Errorf("Marshal error: %v", err)
   724  	}
   725  	if !utf8.Valid(data) {
   726  		t.Errorf("Marshal generated invalid UTF-8: %x", data)
   727  	}
   728  }