github.com/vmware/govmomi@v0.37.2/vim25/xml/xml_test.go (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package xml
     6  
     7  import (
     8  	"bytes"
     9  	"fmt"
    10  	"io"
    11  	"reflect"
    12  	"strings"
    13  	"testing"
    14  	"unicode/utf8"
    15  )
    16  
    17  type toks struct {
    18  	earlyEOF bool
    19  	t        []Token
    20  }
    21  
    22  func (t *toks) Token() (Token, error) {
    23  	if len(t.t) == 0 {
    24  		return nil, io.EOF
    25  	}
    26  	var tok Token
    27  	tok, t.t = t.t[0], t.t[1:]
    28  	if t.earlyEOF && len(t.t) == 0 {
    29  		return tok, io.EOF
    30  	}
    31  	return tok, nil
    32  }
    33  
    34  func TestDecodeEOF(t *testing.T) {
    35  	start := StartElement{Name: Name{Local: "test"}}
    36  	t.Run("EarlyEOF", func(t *testing.T) {
    37  		d := NewTokenDecoder(&toks{earlyEOF: true, t: []Token{
    38  			start,
    39  			start.End(),
    40  		}})
    41  		err := d.Decode(&struct {
    42  			XMLName Name `xml:"test"`
    43  		}{})
    44  		if err != nil {
    45  			t.Error(err)
    46  		}
    47  	})
    48  	t.Run("LateEOF", func(t *testing.T) {
    49  		d := NewTokenDecoder(&toks{t: []Token{
    50  			start,
    51  			start.End(),
    52  		}})
    53  		err := d.Decode(&struct {
    54  			XMLName Name `xml:"test"`
    55  		}{})
    56  		if err != nil {
    57  			t.Error(err)
    58  		}
    59  	})
    60  }
    61  
    62  const testInput = `
    63  <?xml version="1.0" encoding="UTF-8"?>
    64  <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
    65    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
    66  <body xmlns:foo="ns1" xmlns="ns2" xmlns:tag="ns3" ` +
    67  	"\r\n\t" + `  >
    68    <hello lang="en">World &lt;&gt;&apos;&quot; &#x767d;&#40300;翔</hello>
    69    <query>&何; &is-it;</query>
    70    <goodbye />
    71    <outer foo:attr="value" xmlns:tag="ns4">
    72      <inner/>
    73    </outer>
    74    <tag:name>
    75      <![CDATA[Some text here.]]>
    76    </tag:name>
    77  </body><!-- missing final newline -->`
    78  
    79  var testEntity = map[string]string{"何": "What", "is-it": "is it?"}
    80  
    81  var rawTokens = []Token{
    82  	CharData("\n"),
    83  	ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)},
    84  	CharData("\n"),
    85  	Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
    86    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`),
    87  	CharData("\n"),
    88  	StartElement{Name{"", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}},
    89  	CharData("\n  "),
    90  	StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}},
    91  	CharData("World <>'\" 白鵬翔"),
    92  	EndElement{Name{"", "hello"}},
    93  	CharData("\n  "),
    94  	StartElement{Name{"", "query"}, []Attr{}},
    95  	CharData("What is it?"),
    96  	EndElement{Name{"", "query"}},
    97  	CharData("\n  "),
    98  	StartElement{Name{"", "goodbye"}, []Attr{}},
    99  	EndElement{Name{"", "goodbye"}},
   100  	CharData("\n  "),
   101  	StartElement{Name{"", "outer"}, []Attr{{Name{"foo", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}},
   102  	CharData("\n    "),
   103  	StartElement{Name{"", "inner"}, []Attr{}},
   104  	EndElement{Name{"", "inner"}},
   105  	CharData("\n  "),
   106  	EndElement{Name{"", "outer"}},
   107  	CharData("\n  "),
   108  	StartElement{Name{"tag", "name"}, []Attr{}},
   109  	CharData("\n    "),
   110  	CharData("Some text here."),
   111  	CharData("\n  "),
   112  	EndElement{Name{"tag", "name"}},
   113  	CharData("\n"),
   114  	EndElement{Name{"", "body"}},
   115  	Comment(" missing final newline "),
   116  }
   117  
   118  var cookedTokens = []Token{
   119  	CharData("\n"),
   120  	ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)},
   121  	CharData("\n"),
   122  	Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
   123    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`),
   124  	CharData("\n"),
   125  	StartElement{Name{"ns2", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}},
   126  	CharData("\n  "),
   127  	StartElement{Name{"ns2", "hello"}, []Attr{{Name{"", "lang"}, "en"}}},
   128  	CharData("World <>'\" 白鵬翔"),
   129  	EndElement{Name{"ns2", "hello"}},
   130  	CharData("\n  "),
   131  	StartElement{Name{"ns2", "query"}, []Attr{}},
   132  	CharData("What is it?"),
   133  	EndElement{Name{"ns2", "query"}},
   134  	CharData("\n  "),
   135  	StartElement{Name{"ns2", "goodbye"}, []Attr{}},
   136  	EndElement{Name{"ns2", "goodbye"}},
   137  	CharData("\n  "),
   138  	StartElement{Name{"ns2", "outer"}, []Attr{{Name{"ns1", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}},
   139  	CharData("\n    "),
   140  	StartElement{Name{"ns2", "inner"}, []Attr{}},
   141  	EndElement{Name{"ns2", "inner"}},
   142  	CharData("\n  "),
   143  	EndElement{Name{"ns2", "outer"}},
   144  	CharData("\n  "),
   145  	StartElement{Name{"ns3", "name"}, []Attr{}},
   146  	CharData("\n    "),
   147  	CharData("Some text here."),
   148  	CharData("\n  "),
   149  	EndElement{Name{"ns3", "name"}},
   150  	CharData("\n"),
   151  	EndElement{Name{"ns2", "body"}},
   152  	Comment(" missing final newline "),
   153  }
   154  
   155  const testInputAltEncoding = `
   156  <?xml version="1.0" encoding="x-testing-uppercase"?>
   157  <TAG>VALUE</TAG>`
   158  
   159  var rawTokensAltEncoding = []Token{
   160  	CharData("\n"),
   161  	ProcInst{"xml", []byte(`version="1.0" encoding="x-testing-uppercase"`)},
   162  	CharData("\n"),
   163  	StartElement{Name{"", "tag"}, []Attr{}},
   164  	CharData("value"),
   165  	EndElement{Name{"", "tag"}},
   166  }
   167  
   168  var xmlInput = []string{
   169  	// unexpected EOF cases
   170  	"<",
   171  	"<t",
   172  	"<t ",
   173  	"<t/",
   174  	"<!",
   175  	"<!-",
   176  	"<!--",
   177  	"<!--c-",
   178  	"<!--c--",
   179  	"<!d",
   180  	"<t></",
   181  	"<t></t",
   182  	"<?",
   183  	"<?p",
   184  	"<t a",
   185  	"<t a=",
   186  	"<t a='",
   187  	"<t a=''",
   188  	"<t/><![",
   189  	"<t/><![C",
   190  	"<t/><![CDATA[d",
   191  	"<t/><![CDATA[d]",
   192  	"<t/><![CDATA[d]]",
   193  
   194  	// other Syntax errors
   195  	"<>",
   196  	"<t/a",
   197  	"<0 />",
   198  	"<?0 >",
   199  	//	"<!0 >",	// let the Token() caller handle
   200  	"</0>",
   201  	"<t 0=''>",
   202  	"<t a='&'>",
   203  	"<t a='<'>",
   204  	"<t>&nbspc;</t>",
   205  	"<t a>",
   206  	"<t a=>",
   207  	"<t a=v>",
   208  	//	"<![CDATA[d]]>",	// let the Token() caller handle
   209  	"<t></e>",
   210  	"<t></>",
   211  	"<t></t!",
   212  	"<t>cdata]]></t>",
   213  }
   214  
   215  func TestRawToken(t *testing.T) {
   216  	d := NewDecoder(strings.NewReader(testInput))
   217  	d.Entity = testEntity
   218  	testRawToken(t, d, testInput, rawTokens)
   219  }
   220  
   221  const nonStrictInput = `
   222  <tag>non&entity</tag>
   223  <tag>&unknown;entity</tag>
   224  <tag>&#123</tag>
   225  <tag>&#zzz;</tag>
   226  <tag>&なまえ3;</tag>
   227  <tag>&lt-gt;</tag>
   228  <tag>&;</tag>
   229  <tag>&0a;</tag>
   230  `
   231  
   232  var nonStrictTokens = []Token{
   233  	CharData("\n"),
   234  	StartElement{Name{"", "tag"}, []Attr{}},
   235  	CharData("non&entity"),
   236  	EndElement{Name{"", "tag"}},
   237  	CharData("\n"),
   238  	StartElement{Name{"", "tag"}, []Attr{}},
   239  	CharData("&unknown;entity"),
   240  	EndElement{Name{"", "tag"}},
   241  	CharData("\n"),
   242  	StartElement{Name{"", "tag"}, []Attr{}},
   243  	CharData("&#123"),
   244  	EndElement{Name{"", "tag"}},
   245  	CharData("\n"),
   246  	StartElement{Name{"", "tag"}, []Attr{}},
   247  	CharData("&#zzz;"),
   248  	EndElement{Name{"", "tag"}},
   249  	CharData("\n"),
   250  	StartElement{Name{"", "tag"}, []Attr{}},
   251  	CharData("&なまえ3;"),
   252  	EndElement{Name{"", "tag"}},
   253  	CharData("\n"),
   254  	StartElement{Name{"", "tag"}, []Attr{}},
   255  	CharData("&lt-gt;"),
   256  	EndElement{Name{"", "tag"}},
   257  	CharData("\n"),
   258  	StartElement{Name{"", "tag"}, []Attr{}},
   259  	CharData("&;"),
   260  	EndElement{Name{"", "tag"}},
   261  	CharData("\n"),
   262  	StartElement{Name{"", "tag"}, []Attr{}},
   263  	CharData("&0a;"),
   264  	EndElement{Name{"", "tag"}},
   265  	CharData("\n"),
   266  }
   267  
   268  func TestNonStrictRawToken(t *testing.T) {
   269  	d := NewDecoder(strings.NewReader(nonStrictInput))
   270  	d.Strict = false
   271  	testRawToken(t, d, nonStrictInput, nonStrictTokens)
   272  }
   273  
   274  type downCaser struct {
   275  	t *testing.T
   276  	r io.ByteReader
   277  }
   278  
   279  func (d *downCaser) ReadByte() (c byte, err error) {
   280  	c, err = d.r.ReadByte()
   281  	if c >= 'A' && c <= 'Z' {
   282  		c += 'a' - 'A'
   283  	}
   284  	return
   285  }
   286  
   287  func (d *downCaser) Read(p []byte) (int, error) {
   288  	d.t.Fatalf("unexpected Read call on downCaser reader")
   289  	panic("unreachable")
   290  }
   291  
   292  func TestRawTokenAltEncoding(t *testing.T) {
   293  	d := NewDecoder(strings.NewReader(testInputAltEncoding))
   294  	d.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) {
   295  		if charset != "x-testing-uppercase" {
   296  			t.Fatalf("unexpected charset %q", charset)
   297  		}
   298  		return &downCaser{t, input.(io.ByteReader)}, nil
   299  	}
   300  	testRawToken(t, d, testInputAltEncoding, rawTokensAltEncoding)
   301  }
   302  
   303  func TestRawTokenAltEncodingNoConverter(t *testing.T) {
   304  	d := NewDecoder(strings.NewReader(testInputAltEncoding))
   305  	token, err := d.RawToken()
   306  	if token == nil {
   307  		t.Fatalf("expected a token on first RawToken call")
   308  	}
   309  	if err != nil {
   310  		t.Fatal(err)
   311  	}
   312  	token, err = d.RawToken()
   313  	if token != nil {
   314  		t.Errorf("expected a nil token; got %#v", token)
   315  	}
   316  	if err == nil {
   317  		t.Fatalf("expected an error on second RawToken call")
   318  	}
   319  	const encoding = "x-testing-uppercase"
   320  	if !strings.Contains(err.Error(), encoding) {
   321  		t.Errorf("expected error to contain %q; got error: %v",
   322  			encoding, err)
   323  	}
   324  }
   325  
   326  func testRawToken(t *testing.T, d *Decoder, raw string, rawTokens []Token) {
   327  	lastEnd := int64(0)
   328  	for i, want := range rawTokens {
   329  		start := d.InputOffset()
   330  		have, err := d.RawToken()
   331  		end := d.InputOffset()
   332  		if err != nil {
   333  			t.Fatalf("token %d: unexpected error: %s", i, err)
   334  		}
   335  		if !reflect.DeepEqual(have, want) {
   336  			var shave, swant string
   337  			if _, ok := have.(CharData); ok {
   338  				shave = fmt.Sprintf("CharData(%q)", have)
   339  			} else {
   340  				shave = fmt.Sprintf("%#v", have)
   341  			}
   342  			if _, ok := want.(CharData); ok {
   343  				swant = fmt.Sprintf("CharData(%q)", want)
   344  			} else {
   345  				swant = fmt.Sprintf("%#v", want)
   346  			}
   347  			t.Errorf("token %d = %s, want %s", i, shave, swant)
   348  		}
   349  
   350  		// Check that InputOffset returned actual token.
   351  		switch {
   352  		case start < lastEnd:
   353  			t.Errorf("token %d: position [%d,%d) for %T is before previous token", i, start, end, have)
   354  		case start >= end:
   355  			// Special case: EndElement can be synthesized.
   356  			if start == end && end == lastEnd {
   357  				break
   358  			}
   359  			t.Errorf("token %d: position [%d,%d) for %T is empty", i, start, end, have)
   360  		case end > int64(len(raw)):
   361  			t.Errorf("token %d: position [%d,%d) for %T extends beyond input", i, start, end, have)
   362  		default:
   363  			text := raw[start:end]
   364  			if strings.ContainsAny(text, "<>") && (!strings.HasPrefix(text, "<") || !strings.HasSuffix(text, ">")) {
   365  				t.Errorf("token %d: misaligned raw token %#q for %T", i, text, have)
   366  			}
   367  		}
   368  		lastEnd = end
   369  	}
   370  }
   371  
   372  // Ensure that directives (specifically !DOCTYPE) include the complete
   373  // text of any nested directives, noting that < and > do not change
   374  // nesting depth if they are in single or double quotes.
   375  
   376  var nestedDirectivesInput = `
   377  <!DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]>
   378  <!DOCTYPE [<!ENTITY xlt ">">]>
   379  <!DOCTYPE [<!ENTITY xlt "<">]>
   380  <!DOCTYPE [<!ENTITY xlt '>'>]>
   381  <!DOCTYPE [<!ENTITY xlt '<'>]>
   382  <!DOCTYPE [<!ENTITY xlt '">'>]>
   383  <!DOCTYPE [<!ENTITY xlt "'<">]>
   384  `
   385  
   386  var nestedDirectivesTokens = []Token{
   387  	CharData("\n"),
   388  	Directive(`DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`),
   389  	CharData("\n"),
   390  	Directive(`DOCTYPE [<!ENTITY xlt ">">]`),
   391  	CharData("\n"),
   392  	Directive(`DOCTYPE [<!ENTITY xlt "<">]`),
   393  	CharData("\n"),
   394  	Directive(`DOCTYPE [<!ENTITY xlt '>'>]`),
   395  	CharData("\n"),
   396  	Directive(`DOCTYPE [<!ENTITY xlt '<'>]`),
   397  	CharData("\n"),
   398  	Directive(`DOCTYPE [<!ENTITY xlt '">'>]`),
   399  	CharData("\n"),
   400  	Directive(`DOCTYPE [<!ENTITY xlt "'<">]`),
   401  	CharData("\n"),
   402  }
   403  
   404  func TestNestedDirectives(t *testing.T) {
   405  	d := NewDecoder(strings.NewReader(nestedDirectivesInput))
   406  
   407  	for i, want := range nestedDirectivesTokens {
   408  		have, err := d.Token()
   409  		if err != nil {
   410  			t.Fatalf("token %d: unexpected error: %s", i, err)
   411  		}
   412  		if !reflect.DeepEqual(have, want) {
   413  			t.Errorf("token %d = %#v want %#v", i, have, want)
   414  		}
   415  	}
   416  }
   417  
   418  func TestToken(t *testing.T) {
   419  	d := NewDecoder(strings.NewReader(testInput))
   420  	d.Entity = testEntity
   421  
   422  	for i, want := range cookedTokens {
   423  		have, err := d.Token()
   424  		if err != nil {
   425  			t.Fatalf("token %d: unexpected error: %s", i, err)
   426  		}
   427  		if !reflect.DeepEqual(have, want) {
   428  			t.Errorf("token %d = %#v want %#v", i, have, want)
   429  		}
   430  	}
   431  }
   432  
   433  func TestSyntax(t *testing.T) {
   434  	for i := range xmlInput {
   435  		d := NewDecoder(strings.NewReader(xmlInput[i]))
   436  		var err error
   437  		for _, err = d.Token(); err == nil; _, err = d.Token() {
   438  		}
   439  		if _, ok := err.(*SyntaxError); !ok {
   440  			t.Fatalf(`xmlInput "%s": expected SyntaxError not received`, xmlInput[i])
   441  		}
   442  	}
   443  }
   444  
   445  type allScalars struct {
   446  	True1     bool
   447  	True2     bool
   448  	False1    bool
   449  	False2    bool
   450  	Int       int
   451  	Int8      int8
   452  	Int16     int16
   453  	Int32     int32
   454  	Int64     int64
   455  	Uint      int
   456  	Uint8     uint8
   457  	Uint16    uint16
   458  	Uint32    uint32
   459  	Uint64    uint64
   460  	Uintptr   uintptr
   461  	Float32   float32
   462  	Float64   float64
   463  	String    string
   464  	PtrString *string
   465  }
   466  
   467  var all = allScalars{
   468  	True1:     true,
   469  	True2:     true,
   470  	False1:    false,
   471  	False2:    false,
   472  	Int:       1,
   473  	Int8:      -2,
   474  	Int16:     3,
   475  	Int32:     -4,
   476  	Int64:     5,
   477  	Uint:      6,
   478  	Uint8:     7,
   479  	Uint16:    8,
   480  	Uint32:    9,
   481  	Uint64:    10,
   482  	Uintptr:   11,
   483  	Float32:   13.0,
   484  	Float64:   14.0,
   485  	String:    "15",
   486  	PtrString: &sixteen,
   487  }
   488  
   489  var sixteen = "16"
   490  
   491  const testScalarsInput = `<allscalars>
   492  	<True1>true</True1>
   493  	<True2>1</True2>
   494  	<False1>false</False1>
   495  	<False2>0</False2>
   496  	<Int>1</Int>
   497  	<Int8>-2</Int8>
   498  	<Int16>3</Int16>
   499  	<Int32>-4</Int32>
   500  	<Int64>5</Int64>
   501  	<Uint>6</Uint>
   502  	<Uint8>7</Uint8>
   503  	<Uint16>8</Uint16>
   504  	<Uint32>9</Uint32>
   505  	<Uint64>10</Uint64>
   506  	<Uintptr>11</Uintptr>
   507  	<Float>12.0</Float>
   508  	<Float32>13.0</Float32>
   509  	<Float64>14.0</Float64>
   510  	<String>15</String>
   511  	<PtrString>16</PtrString>
   512  </allscalars>`
   513  
   514  func TestAllScalars(t *testing.T) {
   515  	var a allScalars
   516  	err := Unmarshal([]byte(testScalarsInput), &a)
   517  
   518  	if err != nil {
   519  		t.Fatal(err)
   520  	}
   521  	if !reflect.DeepEqual(a, all) {
   522  		t.Errorf("have %+v want %+v", a, all)
   523  	}
   524  }
   525  
   526  type item struct {
   527  	FieldA string
   528  }
   529  
   530  func TestIssue569(t *testing.T) {
   531  	data := `<item><FieldA>abcd</FieldA></item>`
   532  	var i item
   533  	err := Unmarshal([]byte(data), &i)
   534  
   535  	if err != nil || i.FieldA != "abcd" {
   536  		t.Fatal("Expecting abcd")
   537  	}
   538  }
   539  
   540  func TestUnquotedAttrs(t *testing.T) {
   541  	data := "<tag attr=azAZ09:-_\t>"
   542  	d := NewDecoder(strings.NewReader(data))
   543  	d.Strict = false
   544  	token, err := d.Token()
   545  	if _, ok := err.(*SyntaxError); ok {
   546  		t.Errorf("Unexpected error: %v", err)
   547  	}
   548  	if token.(StartElement).Name.Local != "tag" {
   549  		t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local)
   550  	}
   551  	attr := token.(StartElement).Attr[0]
   552  	if attr.Value != "azAZ09:-_" {
   553  		t.Errorf("Unexpected attribute value: %v", attr.Value)
   554  	}
   555  	if attr.Name.Local != "attr" {
   556  		t.Errorf("Unexpected attribute name: %v", attr.Name.Local)
   557  	}
   558  }
   559  
   560  func TestValuelessAttrs(t *testing.T) {
   561  	tests := [][3]string{
   562  		{"<p nowrap>", "p", "nowrap"},
   563  		{"<p nowrap >", "p", "nowrap"},
   564  		{"<input checked/>", "input", "checked"},
   565  		{"<input checked />", "input", "checked"},
   566  	}
   567  	for _, test := range tests {
   568  		d := NewDecoder(strings.NewReader(test[0]))
   569  		d.Strict = false
   570  		token, err := d.Token()
   571  		if _, ok := err.(*SyntaxError); ok {
   572  			t.Errorf("Unexpected error: %v", err)
   573  		}
   574  		if token.(StartElement).Name.Local != test[1] {
   575  			t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local)
   576  		}
   577  		attr := token.(StartElement).Attr[0]
   578  		if attr.Value != test[2] {
   579  			t.Errorf("Unexpected attribute value: %v", attr.Value)
   580  		}
   581  		if attr.Name.Local != test[2] {
   582  			t.Errorf("Unexpected attribute name: %v", attr.Name.Local)
   583  		}
   584  	}
   585  }
   586  
   587  func TestCopyTokenCharData(t *testing.T) {
   588  	data := []byte("same data")
   589  	var tok1 Token = CharData(data)
   590  	tok2 := CopyToken(tok1)
   591  	if !reflect.DeepEqual(tok1, tok2) {
   592  		t.Error("CopyToken(CharData) != CharData")
   593  	}
   594  	data[1] = 'o'
   595  	if reflect.DeepEqual(tok1, tok2) {
   596  		t.Error("CopyToken(CharData) uses same buffer.")
   597  	}
   598  }
   599  
   600  func TestCopyTokenStartElement(t *testing.T) {
   601  	elt := StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}}
   602  	var tok1 Token = elt
   603  	tok2 := CopyToken(tok1)
   604  	if tok1.(StartElement).Attr[0].Value != "en" {
   605  		t.Error("CopyToken overwrote Attr[0]")
   606  	}
   607  	if !reflect.DeepEqual(tok1, tok2) {
   608  		t.Error("CopyToken(StartElement) != StartElement")
   609  	}
   610  	tok1.(StartElement).Attr[0] = Attr{Name{"", "lang"}, "de"}
   611  	if reflect.DeepEqual(tok1, tok2) {
   612  		t.Error("CopyToken(CharData) uses same buffer.")
   613  	}
   614  }
   615  
   616  func TestSyntaxErrorLineNum(t *testing.T) {
   617  	testInput := "<P>Foo<P>\n\n<P>Bar</>\n"
   618  	d := NewDecoder(strings.NewReader(testInput))
   619  	var err error
   620  	for _, err = d.Token(); err == nil; _, err = d.Token() {
   621  	}
   622  	synerr, ok := err.(*SyntaxError)
   623  	if !ok {
   624  		t.Error("Expected SyntaxError.")
   625  	}
   626  	if synerr.Line != 3 {
   627  		t.Error("SyntaxError didn't have correct line number.")
   628  	}
   629  }
   630  
   631  func TestTrailingRawToken(t *testing.T) {
   632  	input := `<FOO></FOO>  `
   633  	d := NewDecoder(strings.NewReader(input))
   634  	var err error
   635  	for _, err = d.RawToken(); err == nil; _, err = d.RawToken() {
   636  	}
   637  	if err != io.EOF {
   638  		t.Fatalf("d.RawToken() = _, %v, want _, io.EOF", err)
   639  	}
   640  }
   641  
   642  func TestTrailingToken(t *testing.T) {
   643  	input := `<FOO></FOO>  `
   644  	d := NewDecoder(strings.NewReader(input))
   645  	var err error
   646  	for _, err = d.Token(); err == nil; _, err = d.Token() {
   647  	}
   648  	if err != io.EOF {
   649  		t.Fatalf("d.Token() = _, %v, want _, io.EOF", err)
   650  	}
   651  }
   652  
   653  func TestEntityInsideCDATA(t *testing.T) {
   654  	input := `<test><![CDATA[ &val=foo ]]></test>`
   655  	d := NewDecoder(strings.NewReader(input))
   656  	var err error
   657  	for _, err = d.Token(); err == nil; _, err = d.Token() {
   658  	}
   659  	if err != io.EOF {
   660  		t.Fatalf("d.Token() = _, %v, want _, io.EOF", err)
   661  	}
   662  }
   663  
   664  var characterTests = []struct {
   665  	in  string
   666  	err string
   667  }{
   668  	{"\x12<doc/>", "illegal character code U+0012"},
   669  	{"<?xml version=\"1.0\"?>\x0b<doc/>", "illegal character code U+000B"},
   670  	{"\xef\xbf\xbe<doc/>", "illegal character code U+FFFE"},
   671  	{"<?xml version=\"1.0\"?><doc>\r\n<hiya/>\x07<toots/></doc>", "illegal character code U+0007"},
   672  	{"<?xml version=\"1.0\"?><doc \x12='value'>what's up</doc>", "expected attribute name in element"},
   673  	{"<doc>&abc\x01;</doc>", "invalid character entity &abc (no semicolon)"},
   674  	{"<doc>&\x01;</doc>", "invalid character entity & (no semicolon)"},
   675  	{"<doc>&\xef\xbf\xbe;</doc>", "invalid character entity &\uFFFE;"},
   676  	{"<doc>&hello;</doc>", "invalid character entity &hello;"},
   677  }
   678  
   679  func TestDisallowedCharacters(t *testing.T) {
   680  
   681  	for i, tt := range characterTests {
   682  		d := NewDecoder(strings.NewReader(tt.in))
   683  		var err error
   684  
   685  		for err == nil {
   686  			_, err = d.Token()
   687  		}
   688  		synerr, ok := err.(*SyntaxError)
   689  		if !ok {
   690  			t.Fatalf("input %d d.Token() = _, %v, want _, *SyntaxError", i, err)
   691  		}
   692  		if synerr.Msg != tt.err {
   693  			t.Fatalf("input %d synerr.Msg wrong: want %q, got %q", i, tt.err, synerr.Msg)
   694  		}
   695  	}
   696  }
   697  
   698  func TestIsInCharacterRange(t *testing.T) {
   699  	invalid := []rune{
   700  		utf8.MaxRune + 1,
   701  		0xD800, // surrogate min
   702  		0xDFFF, // surrogate max
   703  		-1,
   704  	}
   705  	for _, r := range invalid {
   706  		if isInCharacterRange(r) {
   707  			t.Errorf("rune %U considered valid", r)
   708  		}
   709  	}
   710  }
   711  
   712  var procInstTests = []struct {
   713  	input  string
   714  	expect [2]string
   715  }{
   716  	{`version="1.0" encoding="utf-8"`, [2]string{"1.0", "utf-8"}},
   717  	{`version="1.0" encoding='utf-8'`, [2]string{"1.0", "utf-8"}},
   718  	{`version="1.0" encoding='utf-8' `, [2]string{"1.0", "utf-8"}},
   719  	{`version="1.0" encoding=utf-8`, [2]string{"1.0", ""}},
   720  	{`encoding="FOO" `, [2]string{"", "FOO"}},
   721  }
   722  
   723  func TestProcInstEncoding(t *testing.T) {
   724  	for _, test := range procInstTests {
   725  		if got := procInst("version", test.input); got != test.expect[0] {
   726  			t.Errorf("procInst(version, %q) = %q; want %q", test.input, got, test.expect[0])
   727  		}
   728  		if got := procInst("encoding", test.input); got != test.expect[1] {
   729  			t.Errorf("procInst(encoding, %q) = %q; want %q", test.input, got, test.expect[1])
   730  		}
   731  	}
   732  }
   733  
   734  // Ensure that directives with comments include the complete
   735  // text of any nested directives.
   736  
   737  var directivesWithCommentsInput = `
   738  <!DOCTYPE [<!-- a comment --><!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]>
   739  <!DOCTYPE [<!ENTITY go "Golang"><!-- a comment-->]>
   740  <!DOCTYPE <!-> <!> <!----> <!-->--> <!--->--> [<!ENTITY go "Golang"><!-- a comment-->]>
   741  `
   742  
   743  var directivesWithCommentsTokens = []Token{
   744  	CharData("\n"),
   745  	Directive(`DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`),
   746  	CharData("\n"),
   747  	Directive(`DOCTYPE [<!ENTITY go "Golang">]`),
   748  	CharData("\n"),
   749  	Directive(`DOCTYPE <!-> <!>    [<!ENTITY go "Golang">]`),
   750  	CharData("\n"),
   751  }
   752  
   753  func TestDirectivesWithComments(t *testing.T) {
   754  	d := NewDecoder(strings.NewReader(directivesWithCommentsInput))
   755  
   756  	for i, want := range directivesWithCommentsTokens {
   757  		have, err := d.Token()
   758  		if err != nil {
   759  			t.Fatalf("token %d: unexpected error: %s", i, err)
   760  		}
   761  		if !reflect.DeepEqual(have, want) {
   762  			t.Errorf("token %d = %#v want %#v", i, have, want)
   763  		}
   764  	}
   765  }
   766  
   767  // Writer whose Write method always returns an error.
   768  type errWriter struct{}
   769  
   770  func (errWriter) Write(p []byte) (n int, err error) { return 0, fmt.Errorf("unwritable") }
   771  
   772  func TestEscapeTextIOErrors(t *testing.T) {
   773  	expectErr := "unwritable"
   774  	err := EscapeText(errWriter{}, []byte{'A'})
   775  
   776  	if err == nil || err.Error() != expectErr {
   777  		t.Errorf("have %v, want %v", err, expectErr)
   778  	}
   779  }
   780  
   781  func TestEscapeTextInvalidChar(t *testing.T) {
   782  	input := []byte("A \x00 terminated string.")
   783  	expected := "A \uFFFD terminated string."
   784  
   785  	buff := new(bytes.Buffer)
   786  	if err := EscapeText(buff, input); err != nil {
   787  		t.Fatalf("have %v, want nil", err)
   788  	}
   789  	text := buff.String()
   790  
   791  	if text != expected {
   792  		t.Errorf("have %v, want %v", text, expected)
   793  	}
   794  }
   795  
   796  func TestIssue5880(t *testing.T) {
   797  	type T []byte
   798  	data, err := Marshal(T{192, 168, 0, 1})
   799  	if err != nil {
   800  		t.Errorf("Marshal error: %v", err)
   801  	}
   802  	if !utf8.Valid(data) {
   803  		t.Errorf("Marshal generated invalid UTF-8: %x", data)
   804  	}
   805  }
   806  
   807  func TestIssue11405(t *testing.T) {
   808  	testCases := []string{
   809  		"<root>",
   810  		"<root><foo>",
   811  		"<root><foo></foo>",
   812  	}
   813  	for _, tc := range testCases {
   814  		d := NewDecoder(strings.NewReader(tc))
   815  		var err error
   816  		for {
   817  			_, err = d.Token()
   818  			if err != nil {
   819  				break
   820  			}
   821  		}
   822  		if _, ok := err.(*SyntaxError); !ok {
   823  			t.Errorf("%s: Token: Got error %v, want SyntaxError", tc, err)
   824  		}
   825  	}
   826  }
   827  
   828  func TestIssue12417(t *testing.T) {
   829  	testCases := []struct {
   830  		s  string
   831  		ok bool
   832  	}{
   833  		{`<?xml encoding="UtF-8" version="1.0"?><root/>`, true},
   834  		{`<?xml encoding="UTF-8" version="1.0"?><root/>`, true},
   835  		{`<?xml encoding="utf-8" version="1.0"?><root/>`, true},
   836  		{`<?xml encoding="uuu-9" version="1.0"?><root/>`, false},
   837  	}
   838  	for _, tc := range testCases {
   839  		d := NewDecoder(strings.NewReader(tc.s))
   840  		var err error
   841  		for {
   842  			_, err = d.Token()
   843  			if err != nil {
   844  				if err == io.EOF {
   845  					err = nil
   846  				}
   847  				break
   848  			}
   849  		}
   850  		if err != nil && tc.ok {
   851  			t.Errorf("%q: Encoding charset: expected no error, got %s", tc.s, err)
   852  			continue
   853  		}
   854  		if err == nil && !tc.ok {
   855  			t.Errorf("%q: Encoding charset: expected error, got nil", tc.s)
   856  		}
   857  	}
   858  }
   859  
   860  func tokenMap(mapping func(t Token) Token) func(TokenReader) TokenReader {
   861  	return func(src TokenReader) TokenReader {
   862  		return mapper{
   863  			t: src,
   864  			f: mapping,
   865  		}
   866  	}
   867  }
   868  
   869  type mapper struct {
   870  	t TokenReader
   871  	f func(Token) Token
   872  }
   873  
   874  func (m mapper) Token() (Token, error) {
   875  	tok, err := m.t.Token()
   876  	if err != nil {
   877  		return nil, err
   878  	}
   879  	return m.f(tok), nil
   880  }
   881  
   882  func TestNewTokenDecoderIdempotent(t *testing.T) {
   883  	d := NewDecoder(strings.NewReader(`<br/>`))
   884  	d2 := NewTokenDecoder(d)
   885  	if d != d2 {
   886  		t.Error("NewTokenDecoder did not detect underlying Decoder")
   887  	}
   888  }
   889  
   890  func TestWrapDecoder(t *testing.T) {
   891  	d := NewDecoder(strings.NewReader(`<quote>[Re-enter Clown with a letter, and FABIAN]</quote>`))
   892  	m := tokenMap(func(t Token) Token {
   893  		switch tok := t.(type) {
   894  		case StartElement:
   895  			if tok.Name.Local == "quote" {
   896  				tok.Name.Local = "blocking"
   897  				return tok
   898  			}
   899  		case EndElement:
   900  			if tok.Name.Local == "quote" {
   901  				tok.Name.Local = "blocking"
   902  				return tok
   903  			}
   904  		}
   905  		return t
   906  	})
   907  
   908  	d = NewTokenDecoder(m(d))
   909  
   910  	o := struct {
   911  		XMLName  Name   `xml:"blocking"`
   912  		Chardata string `xml:",chardata"`
   913  	}{}
   914  
   915  	if err := d.Decode(&o); err != nil {
   916  		t.Fatal("Got unexpected error while decoding:", err)
   917  	}
   918  
   919  	if o.Chardata != "[Re-enter Clown with a letter, and FABIAN]" {
   920  		t.Fatalf("Got unexpected chardata: `%s`\n", o.Chardata)
   921  	}
   922  }
   923  
   924  type tokReader struct{}
   925  
   926  func (tokReader) Token() (Token, error) {
   927  	return StartElement{}, nil
   928  }
   929  
   930  type Failure struct{}
   931  
   932  func (Failure) UnmarshalXML(*Decoder, StartElement) error {
   933  	return nil
   934  }
   935  
   936  func TestTokenUnmarshaler(t *testing.T) {
   937  	defer func() {
   938  		if r := recover(); r != nil {
   939  			t.Error("Unexpected panic using custom token unmarshaler")
   940  		}
   941  	}()
   942  
   943  	d := NewTokenDecoder(tokReader{})
   944  	d.Decode(&Failure{})
   945  }