github.com/huandu/go@v0.0.0-20151114150818-04e615e41150/src/encoding/xml/xml_test.go (about)

     1  // Copyright 2009 The Go Authors.  All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package xml
     6  
     7  import (
     8  	"bytes"
     9  	"fmt"
    10  	"io"
    11  	"reflect"
    12  	"strings"
    13  	"testing"
    14  	"unicode/utf8"
    15  )
    16  
    17  const testInput = `
    18  <?xml version="1.0" encoding="UTF-8"?>
    19  <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
    20    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
    21  <body xmlns:foo="ns1" xmlns="ns2" xmlns:tag="ns3" ` +
    22  	"\r\n\t" + `  >
    23    <hello lang="en">World &lt;&gt;&apos;&quot; &#x767d;&#40300;翔</hello>
    24    <query>&何; &is-it;</query>
    25    <goodbye />
    26    <outer foo:attr="value" xmlns:tag="ns4">
    27      <inner/>
    28    </outer>
    29    <tag:name>
    30      <![CDATA[Some text here.]]>
    31    </tag:name>
    32  </body><!-- missing final newline -->`
    33  
    34  var testEntity = map[string]string{"何": "What", "is-it": "is it?"}
    35  
    36  var rawTokens = []Token{
    37  	CharData("\n"),
    38  	ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)},
    39  	CharData("\n"),
    40  	Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
    41    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`),
    42  	CharData("\n"),
    43  	StartElement{Name{"", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}},
    44  	CharData("\n  "),
    45  	StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}},
    46  	CharData("World <>'\" 白鵬翔"),
    47  	EndElement{Name{"", "hello"}},
    48  	CharData("\n  "),
    49  	StartElement{Name{"", "query"}, []Attr{}},
    50  	CharData("What is it?"),
    51  	EndElement{Name{"", "query"}},
    52  	CharData("\n  "),
    53  	StartElement{Name{"", "goodbye"}, []Attr{}},
    54  	EndElement{Name{"", "goodbye"}},
    55  	CharData("\n  "),
    56  	StartElement{Name{"", "outer"}, []Attr{{Name{"foo", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}},
    57  	CharData("\n    "),
    58  	StartElement{Name{"", "inner"}, []Attr{}},
    59  	EndElement{Name{"", "inner"}},
    60  	CharData("\n  "),
    61  	EndElement{Name{"", "outer"}},
    62  	CharData("\n  "),
    63  	StartElement{Name{"tag", "name"}, []Attr{}},
    64  	CharData("\n    "),
    65  	CharData("Some text here."),
    66  	CharData("\n  "),
    67  	EndElement{Name{"tag", "name"}},
    68  	CharData("\n"),
    69  	EndElement{Name{"", "body"}},
    70  	Comment(" missing final newline "),
    71  }
    72  
    73  var cookedTokens = []Token{
    74  	CharData("\n"),
    75  	ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)},
    76  	CharData("\n"),
    77  	Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
    78    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`),
    79  	CharData("\n"),
    80  	StartElement{Name{"ns2", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}},
    81  	CharData("\n  "),
    82  	StartElement{Name{"ns2", "hello"}, []Attr{{Name{"", "lang"}, "en"}}},
    83  	CharData("World <>'\" 白鵬翔"),
    84  	EndElement{Name{"ns2", "hello"}},
    85  	CharData("\n  "),
    86  	StartElement{Name{"ns2", "query"}, []Attr{}},
    87  	CharData("What is it?"),
    88  	EndElement{Name{"ns2", "query"}},
    89  	CharData("\n  "),
    90  	StartElement{Name{"ns2", "goodbye"}, []Attr{}},
    91  	EndElement{Name{"ns2", "goodbye"}},
    92  	CharData("\n  "),
    93  	StartElement{Name{"ns2", "outer"}, []Attr{{Name{"ns1", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}},
    94  	CharData("\n    "),
    95  	StartElement{Name{"ns2", "inner"}, []Attr{}},
    96  	EndElement{Name{"ns2", "inner"}},
    97  	CharData("\n  "),
    98  	EndElement{Name{"ns2", "outer"}},
    99  	CharData("\n  "),
   100  	StartElement{Name{"ns3", "name"}, []Attr{}},
   101  	CharData("\n    "),
   102  	CharData("Some text here."),
   103  	CharData("\n  "),
   104  	EndElement{Name{"ns3", "name"}},
   105  	CharData("\n"),
   106  	EndElement{Name{"ns2", "body"}},
   107  	Comment(" missing final newline "),
   108  }
   109  
   110  const testInputAltEncoding = `
   111  <?xml version="1.0" encoding="x-testing-uppercase"?>
   112  <TAG>VALUE</TAG>`
   113  
   114  var rawTokensAltEncoding = []Token{
   115  	CharData("\n"),
   116  	ProcInst{"xml", []byte(`version="1.0" encoding="x-testing-uppercase"`)},
   117  	CharData("\n"),
   118  	StartElement{Name{"", "tag"}, []Attr{}},
   119  	CharData("value"),
   120  	EndElement{Name{"", "tag"}},
   121  }
   122  
   123  var xmlInput = []string{
   124  	// unexpected EOF cases
   125  	"<",
   126  	"<t",
   127  	"<t ",
   128  	"<t/",
   129  	"<!",
   130  	"<!-",
   131  	"<!--",
   132  	"<!--c-",
   133  	"<!--c--",
   134  	"<!d",
   135  	"<t></",
   136  	"<t></t",
   137  	"<?",
   138  	"<?p",
   139  	"<t a",
   140  	"<t a=",
   141  	"<t a='",
   142  	"<t a=''",
   143  	"<t/><![",
   144  	"<t/><![C",
   145  	"<t/><![CDATA[d",
   146  	"<t/><![CDATA[d]",
   147  	"<t/><![CDATA[d]]",
   148  
   149  	// other Syntax errors
   150  	"<>",
   151  	"<t/a",
   152  	"<0 />",
   153  	"<?0 >",
   154  	//	"<!0 >",	// let the Token() caller handle
   155  	"</0>",
   156  	"<t 0=''>",
   157  	"<t a='&'>",
   158  	"<t a='<'>",
   159  	"<t>&nbspc;</t>",
   160  	"<t a>",
   161  	"<t a=>",
   162  	"<t a=v>",
   163  	//	"<![CDATA[d]]>",	// let the Token() caller handle
   164  	"<t></e>",
   165  	"<t></>",
   166  	"<t></t!",
   167  	"<t>cdata]]></t>",
   168  }
   169  
   170  func TestRawToken(t *testing.T) {
   171  	d := NewDecoder(strings.NewReader(testInput))
   172  	d.Entity = testEntity
   173  	testRawToken(t, d, testInput, rawTokens)
   174  }
   175  
   176  const nonStrictInput = `
   177  <tag>non&entity</tag>
   178  <tag>&unknown;entity</tag>
   179  <tag>&#123</tag>
   180  <tag>&#zzz;</tag>
   181  <tag>&なまえ3;</tag>
   182  <tag>&lt-gt;</tag>
   183  <tag>&;</tag>
   184  <tag>&0a;</tag>
   185  `
   186  
   187  var nonStringEntity = map[string]string{"": "oops!", "0a": "oops!"}
   188  
   189  var nonStrictTokens = []Token{
   190  	CharData("\n"),
   191  	StartElement{Name{"", "tag"}, []Attr{}},
   192  	CharData("non&entity"),
   193  	EndElement{Name{"", "tag"}},
   194  	CharData("\n"),
   195  	StartElement{Name{"", "tag"}, []Attr{}},
   196  	CharData("&unknown;entity"),
   197  	EndElement{Name{"", "tag"}},
   198  	CharData("\n"),
   199  	StartElement{Name{"", "tag"}, []Attr{}},
   200  	CharData("&#123"),
   201  	EndElement{Name{"", "tag"}},
   202  	CharData("\n"),
   203  	StartElement{Name{"", "tag"}, []Attr{}},
   204  	CharData("&#zzz;"),
   205  	EndElement{Name{"", "tag"}},
   206  	CharData("\n"),
   207  	StartElement{Name{"", "tag"}, []Attr{}},
   208  	CharData("&なまえ3;"),
   209  	EndElement{Name{"", "tag"}},
   210  	CharData("\n"),
   211  	StartElement{Name{"", "tag"}, []Attr{}},
   212  	CharData("&lt-gt;"),
   213  	EndElement{Name{"", "tag"}},
   214  	CharData("\n"),
   215  	StartElement{Name{"", "tag"}, []Attr{}},
   216  	CharData("&;"),
   217  	EndElement{Name{"", "tag"}},
   218  	CharData("\n"),
   219  	StartElement{Name{"", "tag"}, []Attr{}},
   220  	CharData("&0a;"),
   221  	EndElement{Name{"", "tag"}},
   222  	CharData("\n"),
   223  }
   224  
   225  func TestNonStrictRawToken(t *testing.T) {
   226  	d := NewDecoder(strings.NewReader(nonStrictInput))
   227  	d.Strict = false
   228  	testRawToken(t, d, nonStrictInput, nonStrictTokens)
   229  }
   230  
   231  type downCaser struct {
   232  	t *testing.T
   233  	r io.ByteReader
   234  }
   235  
   236  func (d *downCaser) ReadByte() (c byte, err error) {
   237  	c, err = d.r.ReadByte()
   238  	if c >= 'A' && c <= 'Z' {
   239  		c += 'a' - 'A'
   240  	}
   241  	return
   242  }
   243  
   244  func (d *downCaser) Read(p []byte) (int, error) {
   245  	d.t.Fatalf("unexpected Read call on downCaser reader")
   246  	panic("unreachable")
   247  }
   248  
   249  func TestRawTokenAltEncoding(t *testing.T) {
   250  	d := NewDecoder(strings.NewReader(testInputAltEncoding))
   251  	d.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) {
   252  		if charset != "x-testing-uppercase" {
   253  			t.Fatalf("unexpected charset %q", charset)
   254  		}
   255  		return &downCaser{t, input.(io.ByteReader)}, nil
   256  	}
   257  	testRawToken(t, d, testInputAltEncoding, rawTokensAltEncoding)
   258  }
   259  
   260  func TestRawTokenAltEncodingNoConverter(t *testing.T) {
   261  	d := NewDecoder(strings.NewReader(testInputAltEncoding))
   262  	token, err := d.RawToken()
   263  	if token == nil {
   264  		t.Fatalf("expected a token on first RawToken call")
   265  	}
   266  	if err != nil {
   267  		t.Fatal(err)
   268  	}
   269  	token, err = d.RawToken()
   270  	if token != nil {
   271  		t.Errorf("expected a nil token; got %#v", token)
   272  	}
   273  	if err == nil {
   274  		t.Fatalf("expected an error on second RawToken call")
   275  	}
   276  	const encoding = "x-testing-uppercase"
   277  	if !strings.Contains(err.Error(), encoding) {
   278  		t.Errorf("expected error to contain %q; got error: %v",
   279  			encoding, err)
   280  	}
   281  }
   282  
   283  func testRawToken(t *testing.T, d *Decoder, raw string, rawTokens []Token) {
   284  	lastEnd := int64(0)
   285  	for i, want := range rawTokens {
   286  		start := d.InputOffset()
   287  		have, err := d.RawToken()
   288  		end := d.InputOffset()
   289  		if err != nil {
   290  			t.Fatalf("token %d: unexpected error: %s", i, err)
   291  		}
   292  		if !reflect.DeepEqual(have, want) {
   293  			var shave, swant string
   294  			if _, ok := have.(CharData); ok {
   295  				shave = fmt.Sprintf("CharData(%q)", have)
   296  			} else {
   297  				shave = fmt.Sprintf("%#v", have)
   298  			}
   299  			if _, ok := want.(CharData); ok {
   300  				swant = fmt.Sprintf("CharData(%q)", want)
   301  			} else {
   302  				swant = fmt.Sprintf("%#v", want)
   303  			}
   304  			t.Errorf("token %d = %s, want %s", i, shave, swant)
   305  		}
   306  
   307  		// Check that InputOffset returned actual token.
   308  		switch {
   309  		case start < lastEnd:
   310  			t.Errorf("token %d: position [%d,%d) for %T is before previous token", i, start, end, have)
   311  		case start >= end:
   312  			// Special case: EndElement can be synthesized.
   313  			if start == end && end == lastEnd {
   314  				break
   315  			}
   316  			t.Errorf("token %d: position [%d,%d) for %T is empty", i, start, end, have)
   317  		case end > int64(len(raw)):
   318  			t.Errorf("token %d: position [%d,%d) for %T extends beyond input", i, start, end, have)
   319  		default:
   320  			text := raw[start:end]
   321  			if strings.ContainsAny(text, "<>") && (!strings.HasPrefix(text, "<") || !strings.HasSuffix(text, ">")) {
   322  				t.Errorf("token %d: misaligned raw token %#q for %T", i, text, have)
   323  			}
   324  		}
   325  		lastEnd = end
   326  	}
   327  }
   328  
   329  // Ensure that directives (specifically !DOCTYPE) include the complete
   330  // text of any nested directives, noting that < and > do not change
   331  // nesting depth if they are in single or double quotes.
   332  
   333  var nestedDirectivesInput = `
   334  <!DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]>
   335  <!DOCTYPE [<!ENTITY xlt ">">]>
   336  <!DOCTYPE [<!ENTITY xlt "<">]>
   337  <!DOCTYPE [<!ENTITY xlt '>'>]>
   338  <!DOCTYPE [<!ENTITY xlt '<'>]>
   339  <!DOCTYPE [<!ENTITY xlt '">'>]>
   340  <!DOCTYPE [<!ENTITY xlt "'<">]>
   341  `
   342  
   343  var nestedDirectivesTokens = []Token{
   344  	CharData("\n"),
   345  	Directive(`DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`),
   346  	CharData("\n"),
   347  	Directive(`DOCTYPE [<!ENTITY xlt ">">]`),
   348  	CharData("\n"),
   349  	Directive(`DOCTYPE [<!ENTITY xlt "<">]`),
   350  	CharData("\n"),
   351  	Directive(`DOCTYPE [<!ENTITY xlt '>'>]`),
   352  	CharData("\n"),
   353  	Directive(`DOCTYPE [<!ENTITY xlt '<'>]`),
   354  	CharData("\n"),
   355  	Directive(`DOCTYPE [<!ENTITY xlt '">'>]`),
   356  	CharData("\n"),
   357  	Directive(`DOCTYPE [<!ENTITY xlt "'<">]`),
   358  	CharData("\n"),
   359  }
   360  
   361  func TestNestedDirectives(t *testing.T) {
   362  	d := NewDecoder(strings.NewReader(nestedDirectivesInput))
   363  
   364  	for i, want := range nestedDirectivesTokens {
   365  		have, err := d.Token()
   366  		if err != nil {
   367  			t.Fatalf("token %d: unexpected error: %s", i, err)
   368  		}
   369  		if !reflect.DeepEqual(have, want) {
   370  			t.Errorf("token %d = %#v want %#v", i, have, want)
   371  		}
   372  	}
   373  }
   374  
   375  func TestToken(t *testing.T) {
   376  	d := NewDecoder(strings.NewReader(testInput))
   377  	d.Entity = testEntity
   378  
   379  	for i, want := range cookedTokens {
   380  		have, err := d.Token()
   381  		if err != nil {
   382  			t.Fatalf("token %d: unexpected error: %s", i, err)
   383  		}
   384  		if !reflect.DeepEqual(have, want) {
   385  			t.Errorf("token %d = %#v want %#v", i, have, want)
   386  		}
   387  	}
   388  }
   389  
   390  func TestSyntax(t *testing.T) {
   391  	for i := range xmlInput {
   392  		d := NewDecoder(strings.NewReader(xmlInput[i]))
   393  		var err error
   394  		for _, err = d.Token(); err == nil; _, err = d.Token() {
   395  		}
   396  		if _, ok := err.(*SyntaxError); !ok {
   397  			t.Fatalf(`xmlInput "%s": expected SyntaxError not received`, xmlInput[i])
   398  		}
   399  	}
   400  }
   401  
   402  type allScalars struct {
   403  	True1     bool
   404  	True2     bool
   405  	False1    bool
   406  	False2    bool
   407  	Int       int
   408  	Int8      int8
   409  	Int16     int16
   410  	Int32     int32
   411  	Int64     int64
   412  	Uint      int
   413  	Uint8     uint8
   414  	Uint16    uint16
   415  	Uint32    uint32
   416  	Uint64    uint64
   417  	Uintptr   uintptr
   418  	Float32   float32
   419  	Float64   float64
   420  	String    string
   421  	PtrString *string
   422  }
   423  
   424  var all = allScalars{
   425  	True1:     true,
   426  	True2:     true,
   427  	False1:    false,
   428  	False2:    false,
   429  	Int:       1,
   430  	Int8:      -2,
   431  	Int16:     3,
   432  	Int32:     -4,
   433  	Int64:     5,
   434  	Uint:      6,
   435  	Uint8:     7,
   436  	Uint16:    8,
   437  	Uint32:    9,
   438  	Uint64:    10,
   439  	Uintptr:   11,
   440  	Float32:   13.0,
   441  	Float64:   14.0,
   442  	String:    "15",
   443  	PtrString: &sixteen,
   444  }
   445  
   446  var sixteen = "16"
   447  
   448  const testScalarsInput = `<allscalars>
   449  	<True1>true</True1>
   450  	<True2>1</True2>
   451  	<False1>false</False1>
   452  	<False2>0</False2>
   453  	<Int>1</Int>
   454  	<Int8>-2</Int8>
   455  	<Int16>3</Int16>
   456  	<Int32>-4</Int32>
   457  	<Int64>5</Int64>
   458  	<Uint>6</Uint>
   459  	<Uint8>7</Uint8>
   460  	<Uint16>8</Uint16>
   461  	<Uint32>9</Uint32>
   462  	<Uint64>10</Uint64>
   463  	<Uintptr>11</Uintptr>
   464  	<Float>12.0</Float>
   465  	<Float32>13.0</Float32>
   466  	<Float64>14.0</Float64>
   467  	<String>15</String>
   468  	<PtrString>16</PtrString>
   469  </allscalars>`
   470  
   471  func TestAllScalars(t *testing.T) {
   472  	var a allScalars
   473  	err := Unmarshal([]byte(testScalarsInput), &a)
   474  
   475  	if err != nil {
   476  		t.Fatal(err)
   477  	}
   478  	if !reflect.DeepEqual(a, all) {
   479  		t.Errorf("have %+v want %+v", a, all)
   480  	}
   481  }
   482  
   483  type item struct {
   484  	Field_a string
   485  }
   486  
   487  func TestIssue569(t *testing.T) {
   488  	data := `<item><Field_a>abcd</Field_a></item>`
   489  	var i item
   490  	err := Unmarshal([]byte(data), &i)
   491  
   492  	if err != nil || i.Field_a != "abcd" {
   493  		t.Fatal("Expecting abcd")
   494  	}
   495  }
   496  
   497  func TestUnquotedAttrs(t *testing.T) {
   498  	data := "<tag attr=azAZ09:-_\t>"
   499  	d := NewDecoder(strings.NewReader(data))
   500  	d.Strict = false
   501  	token, err := d.Token()
   502  	if _, ok := err.(*SyntaxError); ok {
   503  		t.Errorf("Unexpected error: %v", err)
   504  	}
   505  	if token.(StartElement).Name.Local != "tag" {
   506  		t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local)
   507  	}
   508  	attr := token.(StartElement).Attr[0]
   509  	if attr.Value != "azAZ09:-_" {
   510  		t.Errorf("Unexpected attribute value: %v", attr.Value)
   511  	}
   512  	if attr.Name.Local != "attr" {
   513  		t.Errorf("Unexpected attribute name: %v", attr.Name.Local)
   514  	}
   515  }
   516  
   517  func TestValuelessAttrs(t *testing.T) {
   518  	tests := [][3]string{
   519  		{"<p nowrap>", "p", "nowrap"},
   520  		{"<p nowrap >", "p", "nowrap"},
   521  		{"<input checked/>", "input", "checked"},
   522  		{"<input checked />", "input", "checked"},
   523  	}
   524  	for _, test := range tests {
   525  		d := NewDecoder(strings.NewReader(test[0]))
   526  		d.Strict = false
   527  		token, err := d.Token()
   528  		if _, ok := err.(*SyntaxError); ok {
   529  			t.Errorf("Unexpected error: %v", err)
   530  		}
   531  		if token.(StartElement).Name.Local != test[1] {
   532  			t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local)
   533  		}
   534  		attr := token.(StartElement).Attr[0]
   535  		if attr.Value != test[2] {
   536  			t.Errorf("Unexpected attribute value: %v", attr.Value)
   537  		}
   538  		if attr.Name.Local != test[2] {
   539  			t.Errorf("Unexpected attribute name: %v", attr.Name.Local)
   540  		}
   541  	}
   542  }
   543  
   544  func TestCopyTokenCharData(t *testing.T) {
   545  	data := []byte("same data")
   546  	var tok1 Token = CharData(data)
   547  	tok2 := CopyToken(tok1)
   548  	if !reflect.DeepEqual(tok1, tok2) {
   549  		t.Error("CopyToken(CharData) != CharData")
   550  	}
   551  	data[1] = 'o'
   552  	if reflect.DeepEqual(tok1, tok2) {
   553  		t.Error("CopyToken(CharData) uses same buffer.")
   554  	}
   555  }
   556  
   557  func TestCopyTokenStartElement(t *testing.T) {
   558  	elt := StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}}
   559  	var tok1 Token = elt
   560  	tok2 := CopyToken(tok1)
   561  	if tok1.(StartElement).Attr[0].Value != "en" {
   562  		t.Error("CopyToken overwrote Attr[0]")
   563  	}
   564  	if !reflect.DeepEqual(tok1, tok2) {
   565  		t.Error("CopyToken(StartElement) != StartElement")
   566  	}
   567  	tok1.(StartElement).Attr[0] = Attr{Name{"", "lang"}, "de"}
   568  	if reflect.DeepEqual(tok1, tok2) {
   569  		t.Error("CopyToken(CharData) uses same buffer.")
   570  	}
   571  }
   572  
   573  func TestSyntaxErrorLineNum(t *testing.T) {
   574  	testInput := "<P>Foo<P>\n\n<P>Bar</>\n"
   575  	d := NewDecoder(strings.NewReader(testInput))
   576  	var err error
   577  	for _, err = d.Token(); err == nil; _, err = d.Token() {
   578  	}
   579  	synerr, ok := err.(*SyntaxError)
   580  	if !ok {
   581  		t.Error("Expected SyntaxError.")
   582  	}
   583  	if synerr.Line != 3 {
   584  		t.Error("SyntaxError didn't have correct line number.")
   585  	}
   586  }
   587  
   588  func TestTrailingRawToken(t *testing.T) {
   589  	input := `<FOO></FOO>  `
   590  	d := NewDecoder(strings.NewReader(input))
   591  	var err error
   592  	for _, err = d.RawToken(); err == nil; _, err = d.RawToken() {
   593  	}
   594  	if err != io.EOF {
   595  		t.Fatalf("d.RawToken() = _, %v, want _, io.EOF", err)
   596  	}
   597  }
   598  
   599  func TestTrailingToken(t *testing.T) {
   600  	input := `<FOO></FOO>  `
   601  	d := NewDecoder(strings.NewReader(input))
   602  	var err error
   603  	for _, err = d.Token(); err == nil; _, err = d.Token() {
   604  	}
   605  	if err != io.EOF {
   606  		t.Fatalf("d.Token() = _, %v, want _, io.EOF", err)
   607  	}
   608  }
   609  
   610  func TestEntityInsideCDATA(t *testing.T) {
   611  	input := `<test><![CDATA[ &val=foo ]]></test>`
   612  	d := NewDecoder(strings.NewReader(input))
   613  	var err error
   614  	for _, err = d.Token(); err == nil; _, err = d.Token() {
   615  	}
   616  	if err != io.EOF {
   617  		t.Fatalf("d.Token() = _, %v, want _, io.EOF", err)
   618  	}
   619  }
   620  
   621  var characterTests = []struct {
   622  	in  string
   623  	err string
   624  }{
   625  	{"\x12<doc/>", "illegal character code U+0012"},
   626  	{"<?xml version=\"1.0\"?>\x0b<doc/>", "illegal character code U+000B"},
   627  	{"\xef\xbf\xbe<doc/>", "illegal character code U+FFFE"},
   628  	{"<?xml version=\"1.0\"?><doc>\r\n<hiya/>\x07<toots/></doc>", "illegal character code U+0007"},
   629  	{"<?xml version=\"1.0\"?><doc \x12='value'>what's up</doc>", "expected attribute name in element"},
   630  	{"<doc>&abc\x01;</doc>", "invalid character entity &abc (no semicolon)"},
   631  	{"<doc>&\x01;</doc>", "invalid character entity & (no semicolon)"},
   632  	{"<doc>&\xef\xbf\xbe;</doc>", "invalid character entity &\uFFFE;"},
   633  	{"<doc>&hello;</doc>", "invalid character entity &hello;"},
   634  }
   635  
   636  func TestDisallowedCharacters(t *testing.T) {
   637  
   638  	for i, tt := range characterTests {
   639  		d := NewDecoder(strings.NewReader(tt.in))
   640  		var err error
   641  
   642  		for err == nil {
   643  			_, err = d.Token()
   644  		}
   645  		synerr, ok := err.(*SyntaxError)
   646  		if !ok {
   647  			t.Fatalf("input %d d.Token() = _, %v, want _, *SyntaxError", i, err)
   648  		}
   649  		if synerr.Msg != tt.err {
   650  			t.Fatalf("input %d synerr.Msg wrong: want %q, got %q", i, tt.err, synerr.Msg)
   651  		}
   652  	}
   653  }
   654  
   655  type procInstEncodingTest struct {
   656  	expect, got string
   657  }
   658  
   659  var procInstTests = []struct {
   660  	input  string
   661  	expect [2]string
   662  }{
   663  	{`version="1.0" encoding="utf-8"`, [2]string{"1.0", "utf-8"}},
   664  	{`version="1.0" encoding='utf-8'`, [2]string{"1.0", "utf-8"}},
   665  	{`version="1.0" encoding='utf-8' `, [2]string{"1.0", "utf-8"}},
   666  	{`version="1.0" encoding=utf-8`, [2]string{"1.0", ""}},
   667  	{`encoding="FOO" `, [2]string{"", "FOO"}},
   668  }
   669  
   670  func TestProcInstEncoding(t *testing.T) {
   671  	for _, test := range procInstTests {
   672  		if got := procInst("version", test.input); got != test.expect[0] {
   673  			t.Errorf("procInst(version, %q) = %q; want %q", test.input, got, test.expect[0])
   674  		}
   675  		if got := procInst("encoding", test.input); got != test.expect[1] {
   676  			t.Errorf("procInst(encoding, %q) = %q; want %q", test.input, got, test.expect[1])
   677  		}
   678  	}
   679  }
   680  
   681  // Ensure that directives with comments include the complete
   682  // text of any nested directives.
   683  
   684  var directivesWithCommentsInput = `
   685  <!DOCTYPE [<!-- a comment --><!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]>
   686  <!DOCTYPE [<!ENTITY go "Golang"><!-- a comment-->]>
   687  <!DOCTYPE <!-> <!> <!----> <!-->--> <!--->--> [<!ENTITY go "Golang"><!-- a comment-->]>
   688  `
   689  
   690  var directivesWithCommentsTokens = []Token{
   691  	CharData("\n"),
   692  	Directive(`DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`),
   693  	CharData("\n"),
   694  	Directive(`DOCTYPE [<!ENTITY go "Golang">]`),
   695  	CharData("\n"),
   696  	Directive(`DOCTYPE <!-> <!>    [<!ENTITY go "Golang">]`),
   697  	CharData("\n"),
   698  }
   699  
   700  func TestDirectivesWithComments(t *testing.T) {
   701  	d := NewDecoder(strings.NewReader(directivesWithCommentsInput))
   702  
   703  	for i, want := range directivesWithCommentsTokens {
   704  		have, err := d.Token()
   705  		if err != nil {
   706  			t.Fatalf("token %d: unexpected error: %s", i, err)
   707  		}
   708  		if !reflect.DeepEqual(have, want) {
   709  			t.Errorf("token %d = %#v want %#v", i, have, want)
   710  		}
   711  	}
   712  }
   713  
   714  // Writer whose Write method always returns an error.
   715  type errWriter struct{}
   716  
   717  func (errWriter) Write(p []byte) (n int, err error) { return 0, fmt.Errorf("unwritable") }
   718  
   719  func TestEscapeTextIOErrors(t *testing.T) {
   720  	expectErr := "unwritable"
   721  	err := EscapeText(errWriter{}, []byte{'A'})
   722  
   723  	if err == nil || err.Error() != expectErr {
   724  		t.Errorf("have %v, want %v", err, expectErr)
   725  	}
   726  }
   727  
   728  func TestEscapeTextInvalidChar(t *testing.T) {
   729  	input := []byte("A \x00 terminated string.")
   730  	expected := "A \uFFFD terminated string."
   731  
   732  	buff := new(bytes.Buffer)
   733  	if err := EscapeText(buff, input); err != nil {
   734  		t.Fatalf("have %v, want nil", err)
   735  	}
   736  	text := buff.String()
   737  
   738  	if text != expected {
   739  		t.Errorf("have %v, want %v", text, expected)
   740  	}
   741  }
   742  
   743  func TestIssue5880(t *testing.T) {
   744  	type T []byte
   745  	data, err := Marshal(T{192, 168, 0, 1})
   746  	if err != nil {
   747  		t.Errorf("Marshal error: %v", err)
   748  	}
   749  	if !utf8.Valid(data) {
   750  		t.Errorf("Marshal generated invalid UTF-8: %x", data)
   751  	}
   752  }