github.com/guyezi/gofrontend@v0.0.0-20200228202240-7a62a49e62c0/libgo/go/encoding/xml/xml_test.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package xml 6 7 import ( 8 "bytes" 9 "fmt" 10 "io" 11 "reflect" 12 "strings" 13 "testing" 14 "unicode/utf8" 15 ) 16 17 type toks struct { 18 earlyEOF bool 19 t []Token 20 } 21 22 func (t *toks) Token() (Token, error) { 23 if len(t.t) == 0 { 24 return nil, io.EOF 25 } 26 var tok Token 27 tok, t.t = t.t[0], t.t[1:] 28 if t.earlyEOF && len(t.t) == 0 { 29 return tok, io.EOF 30 } 31 return tok, nil 32 } 33 34 func TestDecodeEOF(t *testing.T) { 35 start := StartElement{Name: Name{Local: "test"}} 36 t.Run("EarlyEOF", func(t *testing.T) { 37 d := NewTokenDecoder(&toks{earlyEOF: true, t: []Token{ 38 start, 39 start.End(), 40 }}) 41 err := d.Decode(&struct { 42 XMLName Name `xml:"test"` 43 }{}) 44 if err != nil { 45 t.Error(err) 46 } 47 }) 48 t.Run("LateEOF", func(t *testing.T) { 49 d := NewTokenDecoder(&toks{t: []Token{ 50 start, 51 start.End(), 52 }}) 53 err := d.Decode(&struct { 54 XMLName Name `xml:"test"` 55 }{}) 56 if err != nil { 57 t.Error(err) 58 } 59 }) 60 } 61 62 const testInput = ` 63 <?xml version="1.0" encoding="UTF-8"?> 64 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" 65 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> 66 <body xmlns:foo="ns1" xmlns="ns2" xmlns:tag="ns3" ` + 67 "\r\n\t" + ` > 68 <hello lang="en">World <>'" 白鵬翔</hello> 69 <query>&何; &is-it;</query> 70 <goodbye /> 71 <outer foo:attr="value" xmlns:tag="ns4"> 72 <inner/> 73 </outer> 74 <tag:name> 75 <![CDATA[Some text here.]]> 76 </tag:name> 77 </body><!-- missing final newline -->` 78 79 var testEntity = map[string]string{"何": "What", "is-it": "is it?"} 80 81 var rawTokens = []Token{ 82 CharData("\n"), 83 ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)}, 84 CharData("\n"), 85 Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" 86 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`), 87 CharData("\n"), 88 StartElement{Name{"", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}}, 89 CharData("\n "), 90 StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}}, 91 CharData("World <>'\" 白鵬翔"), 92 EndElement{Name{"", "hello"}}, 93 CharData("\n "), 94 StartElement{Name{"", "query"}, []Attr{}}, 95 CharData("What is it?"), 96 EndElement{Name{"", "query"}}, 97 CharData("\n "), 98 StartElement{Name{"", "goodbye"}, []Attr{}}, 99 EndElement{Name{"", "goodbye"}}, 100 CharData("\n "), 101 StartElement{Name{"", "outer"}, []Attr{{Name{"foo", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}}, 102 CharData("\n "), 103 StartElement{Name{"", "inner"}, []Attr{}}, 104 EndElement{Name{"", "inner"}}, 105 CharData("\n "), 106 EndElement{Name{"", "outer"}}, 107 CharData("\n "), 108 StartElement{Name{"tag", "name"}, []Attr{}}, 109 CharData("\n "), 110 CharData("Some text here."), 111 CharData("\n "), 112 EndElement{Name{"tag", "name"}}, 113 CharData("\n"), 114 EndElement{Name{"", "body"}}, 115 Comment(" missing final newline "), 116 } 117 118 var cookedTokens = []Token{ 119 CharData("\n"), 120 ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)}, 121 CharData("\n"), 122 Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" 123 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`), 124 CharData("\n"), 125 StartElement{Name{"ns2", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}}, 126 CharData("\n "), 127 StartElement{Name{"ns2", "hello"}, []Attr{{Name{"", "lang"}, "en"}}}, 128 CharData("World <>'\" 白鵬翔"), 129 EndElement{Name{"ns2", "hello"}}, 130 CharData("\n "), 131 StartElement{Name{"ns2", "query"}, []Attr{}}, 132 CharData("What is it?"), 133 EndElement{Name{"ns2", "query"}}, 134 CharData("\n "), 135 StartElement{Name{"ns2", "goodbye"}, []Attr{}}, 136 EndElement{Name{"ns2", "goodbye"}}, 137 CharData("\n "), 138 StartElement{Name{"ns2", "outer"}, []Attr{{Name{"ns1", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}}, 139 CharData("\n "), 140 StartElement{Name{"ns2", "inner"}, []Attr{}}, 141 EndElement{Name{"ns2", "inner"}}, 142 CharData("\n "), 143 EndElement{Name{"ns2", "outer"}}, 144 CharData("\n "), 145 StartElement{Name{"ns3", "name"}, []Attr{}}, 146 CharData("\n "), 147 CharData("Some text here."), 148 CharData("\n "), 149 EndElement{Name{"ns3", "name"}}, 150 CharData("\n"), 151 EndElement{Name{"ns2", "body"}}, 152 Comment(" missing final newline "), 153 } 154 155 const testInputAltEncoding = ` 156 <?xml version="1.0" encoding="x-testing-uppercase"?> 157 <TAG>VALUE</TAG>` 158 159 var rawTokensAltEncoding = []Token{ 160 CharData("\n"), 161 ProcInst{"xml", []byte(`version="1.0" encoding="x-testing-uppercase"`)}, 162 CharData("\n"), 163 StartElement{Name{"", "tag"}, []Attr{}}, 164 CharData("value"), 165 EndElement{Name{"", "tag"}}, 166 } 167 168 var xmlInput = []string{ 169 // unexpected EOF cases 170 "<", 171 "<t", 172 "<t ", 173 "<t/", 174 "<!", 175 "<!-", 176 "<!--", 177 "<!--c-", 178 "<!--c--", 179 "<!d", 180 "<t></", 181 "<t></t", 182 "<?", 183 "<?p", 184 "<t a", 185 "<t a=", 186 "<t a='", 187 "<t a=''", 188 "<t/><![", 189 "<t/><![C", 190 "<t/><![CDATA[d", 191 "<t/><![CDATA[d]", 192 "<t/><![CDATA[d]]", 193 194 // other Syntax errors 195 "<>", 196 "<t/a", 197 "<0 />", 198 "<?0 >", 199 // "<!0 >", // let the Token() caller handle 200 "</0>", 201 "<t 0=''>", 202 "<t a='&'>", 203 "<t a='<'>", 204 "<t> c;</t>", 205 "<t a>", 206 "<t a=>", 207 "<t a=v>", 208 // "<![CDATA[d]]>", // let the Token() caller handle 209 "<t></e>", 210 "<t></>", 211 "<t></t!", 212 "<t>cdata]]></t>", 213 } 214 215 func TestRawToken(t *testing.T) { 216 d := NewDecoder(strings.NewReader(testInput)) 217 d.Entity = testEntity 218 testRawToken(t, d, testInput, rawTokens) 219 } 220 221 const nonStrictInput = ` 222 <tag>non&entity</tag> 223 <tag>&unknown;entity</tag> 224 <tag>{</tag> 225 <tag>&#zzz;</tag> 226 <tag>&なまえ3;</tag> 227 <tag><-gt;</tag> 228 <tag>&;</tag> 229 <tag>&0a;</tag> 230 ` 231 232 var nonStrictTokens = []Token{ 233 CharData("\n"), 234 StartElement{Name{"", "tag"}, []Attr{}}, 235 CharData("non&entity"), 236 EndElement{Name{"", "tag"}}, 237 CharData("\n"), 238 StartElement{Name{"", "tag"}, []Attr{}}, 239 CharData("&unknown;entity"), 240 EndElement{Name{"", "tag"}}, 241 CharData("\n"), 242 StartElement{Name{"", "tag"}, []Attr{}}, 243 CharData("{"), 244 EndElement{Name{"", "tag"}}, 245 CharData("\n"), 246 StartElement{Name{"", "tag"}, []Attr{}}, 247 CharData("&#zzz;"), 248 EndElement{Name{"", "tag"}}, 249 CharData("\n"), 250 StartElement{Name{"", "tag"}, []Attr{}}, 251 CharData("&なまえ3;"), 252 EndElement{Name{"", "tag"}}, 253 CharData("\n"), 254 StartElement{Name{"", "tag"}, []Attr{}}, 255 CharData("<-gt;"), 256 EndElement{Name{"", "tag"}}, 257 CharData("\n"), 258 StartElement{Name{"", "tag"}, []Attr{}}, 259 CharData("&;"), 260 EndElement{Name{"", "tag"}}, 261 CharData("\n"), 262 StartElement{Name{"", "tag"}, []Attr{}}, 263 CharData("&0a;"), 264 EndElement{Name{"", "tag"}}, 265 CharData("\n"), 266 } 267 268 func TestNonStrictRawToken(t *testing.T) { 269 d := NewDecoder(strings.NewReader(nonStrictInput)) 270 d.Strict = false 271 testRawToken(t, d, nonStrictInput, nonStrictTokens) 272 } 273 274 type downCaser struct { 275 t *testing.T 276 r io.ByteReader 277 } 278 279 func (d *downCaser) ReadByte() (c byte, err error) { 280 c, err = d.r.ReadByte() 281 if c >= 'A' && c <= 'Z' { 282 c += 'a' - 'A' 283 } 284 return 285 } 286 287 func (d *downCaser) Read(p []byte) (int, error) { 288 d.t.Fatalf("unexpected Read call on downCaser reader") 289 panic("unreachable") 290 } 291 292 func TestRawTokenAltEncoding(t *testing.T) { 293 d := NewDecoder(strings.NewReader(testInputAltEncoding)) 294 d.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) { 295 if charset != "x-testing-uppercase" { 296 t.Fatalf("unexpected charset %q", charset) 297 } 298 return &downCaser{t, input.(io.ByteReader)}, nil 299 } 300 testRawToken(t, d, testInputAltEncoding, rawTokensAltEncoding) 301 } 302 303 func TestRawTokenAltEncodingNoConverter(t *testing.T) { 304 d := NewDecoder(strings.NewReader(testInputAltEncoding)) 305 token, err := d.RawToken() 306 if token == nil { 307 t.Fatalf("expected a token on first RawToken call") 308 } 309 if err != nil { 310 t.Fatal(err) 311 } 312 token, err = d.RawToken() 313 if token != nil { 314 t.Errorf("expected a nil token; got %#v", token) 315 } 316 if err == nil { 317 t.Fatalf("expected an error on second RawToken call") 318 } 319 const encoding = "x-testing-uppercase" 320 if !strings.Contains(err.Error(), encoding) { 321 t.Errorf("expected error to contain %q; got error: %v", 322 encoding, err) 323 } 324 } 325 326 func testRawToken(t *testing.T, d *Decoder, raw string, rawTokens []Token) { 327 lastEnd := int64(0) 328 for i, want := range rawTokens { 329 start := d.InputOffset() 330 have, err := d.RawToken() 331 end := d.InputOffset() 332 if err != nil { 333 t.Fatalf("token %d: unexpected error: %s", i, err) 334 } 335 if !reflect.DeepEqual(have, want) { 336 var shave, swant string 337 if _, ok := have.(CharData); ok { 338 shave = fmt.Sprintf("CharData(%q)", have) 339 } else { 340 shave = fmt.Sprintf("%#v", have) 341 } 342 if _, ok := want.(CharData); ok { 343 swant = fmt.Sprintf("CharData(%q)", want) 344 } else { 345 swant = fmt.Sprintf("%#v", want) 346 } 347 t.Errorf("token %d = %s, want %s", i, shave, swant) 348 } 349 350 // Check that InputOffset returned actual token. 351 switch { 352 case start < lastEnd: 353 t.Errorf("token %d: position [%d,%d) for %T is before previous token", i, start, end, have) 354 case start >= end: 355 // Special case: EndElement can be synthesized. 356 if start == end && end == lastEnd { 357 break 358 } 359 t.Errorf("token %d: position [%d,%d) for %T is empty", i, start, end, have) 360 case end > int64(len(raw)): 361 t.Errorf("token %d: position [%d,%d) for %T extends beyond input", i, start, end, have) 362 default: 363 text := raw[start:end] 364 if strings.ContainsAny(text, "<>") && (!strings.HasPrefix(text, "<") || !strings.HasSuffix(text, ">")) { 365 t.Errorf("token %d: misaligned raw token %#q for %T", i, text, have) 366 } 367 } 368 lastEnd = end 369 } 370 } 371 372 // Ensure that directives (specifically !DOCTYPE) include the complete 373 // text of any nested directives, noting that < and > do not change 374 // nesting depth if they are in single or double quotes. 375 376 var nestedDirectivesInput = ` 377 <!DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]> 378 <!DOCTYPE [<!ENTITY xlt ">">]> 379 <!DOCTYPE [<!ENTITY xlt "<">]> 380 <!DOCTYPE [<!ENTITY xlt '>'>]> 381 <!DOCTYPE [<!ENTITY xlt '<'>]> 382 <!DOCTYPE [<!ENTITY xlt '">'>]> 383 <!DOCTYPE [<!ENTITY xlt "'<">]> 384 ` 385 386 var nestedDirectivesTokens = []Token{ 387 CharData("\n"), 388 Directive(`DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`), 389 CharData("\n"), 390 Directive(`DOCTYPE [<!ENTITY xlt ">">]`), 391 CharData("\n"), 392 Directive(`DOCTYPE [<!ENTITY xlt "<">]`), 393 CharData("\n"), 394 Directive(`DOCTYPE [<!ENTITY xlt '>'>]`), 395 CharData("\n"), 396 Directive(`DOCTYPE [<!ENTITY xlt '<'>]`), 397 CharData("\n"), 398 Directive(`DOCTYPE [<!ENTITY xlt '">'>]`), 399 CharData("\n"), 400 Directive(`DOCTYPE [<!ENTITY xlt "'<">]`), 401 CharData("\n"), 402 } 403 404 func TestNestedDirectives(t *testing.T) { 405 d := NewDecoder(strings.NewReader(nestedDirectivesInput)) 406 407 for i, want := range nestedDirectivesTokens { 408 have, err := d.Token() 409 if err != nil { 410 t.Fatalf("token %d: unexpected error: %s", i, err) 411 } 412 if !reflect.DeepEqual(have, want) { 413 t.Errorf("token %d = %#v want %#v", i, have, want) 414 } 415 } 416 } 417 418 func TestToken(t *testing.T) { 419 d := NewDecoder(strings.NewReader(testInput)) 420 d.Entity = testEntity 421 422 for i, want := range cookedTokens { 423 have, err := d.Token() 424 if err != nil { 425 t.Fatalf("token %d: unexpected error: %s", i, err) 426 } 427 if !reflect.DeepEqual(have, want) { 428 t.Errorf("token %d = %#v want %#v", i, have, want) 429 } 430 } 431 } 432 433 func TestSyntax(t *testing.T) { 434 for i := range xmlInput { 435 d := NewDecoder(strings.NewReader(xmlInput[i])) 436 var err error 437 for _, err = d.Token(); err == nil; _, err = d.Token() { 438 } 439 if _, ok := err.(*SyntaxError); !ok { 440 t.Fatalf(`xmlInput "%s": expected SyntaxError not received`, xmlInput[i]) 441 } 442 } 443 } 444 445 type allScalars struct { 446 True1 bool 447 True2 bool 448 False1 bool 449 False2 bool 450 Int int 451 Int8 int8 452 Int16 int16 453 Int32 int32 454 Int64 int64 455 Uint int 456 Uint8 uint8 457 Uint16 uint16 458 Uint32 uint32 459 Uint64 uint64 460 Uintptr uintptr 461 Float32 float32 462 Float64 float64 463 String string 464 PtrString *string 465 } 466 467 var all = allScalars{ 468 True1: true, 469 True2: true, 470 False1: false, 471 False2: false, 472 Int: 1, 473 Int8: -2, 474 Int16: 3, 475 Int32: -4, 476 Int64: 5, 477 Uint: 6, 478 Uint8: 7, 479 Uint16: 8, 480 Uint32: 9, 481 Uint64: 10, 482 Uintptr: 11, 483 Float32: 13.0, 484 Float64: 14.0, 485 String: "15", 486 PtrString: &sixteen, 487 } 488 489 var sixteen = "16" 490 491 const testScalarsInput = `<allscalars> 492 <True1>true</True1> 493 <True2>1</True2> 494 <False1>false</False1> 495 <False2>0</False2> 496 <Int>1</Int> 497 <Int8>-2</Int8> 498 <Int16>3</Int16> 499 <Int32>-4</Int32> 500 <Int64>5</Int64> 501 <Uint>6</Uint> 502 <Uint8>7</Uint8> 503 <Uint16>8</Uint16> 504 <Uint32>9</Uint32> 505 <Uint64>10</Uint64> 506 <Uintptr>11</Uintptr> 507 <Float>12.0</Float> 508 <Float32>13.0</Float32> 509 <Float64>14.0</Float64> 510 <String>15</String> 511 <PtrString>16</PtrString> 512 </allscalars>` 513 514 func TestAllScalars(t *testing.T) { 515 var a allScalars 516 err := Unmarshal([]byte(testScalarsInput), &a) 517 518 if err != nil { 519 t.Fatal(err) 520 } 521 if !reflect.DeepEqual(a, all) { 522 t.Errorf("have %+v want %+v", a, all) 523 } 524 } 525 526 type item struct { 527 FieldA string 528 } 529 530 func TestIssue569(t *testing.T) { 531 data := `<item><FieldA>abcd</FieldA></item>` 532 var i item 533 err := Unmarshal([]byte(data), &i) 534 535 if err != nil || i.FieldA != "abcd" { 536 t.Fatal("Expecting abcd") 537 } 538 } 539 540 func TestUnquotedAttrs(t *testing.T) { 541 data := "<tag attr=azAZ09:-_\t>" 542 d := NewDecoder(strings.NewReader(data)) 543 d.Strict = false 544 token, err := d.Token() 545 if _, ok := err.(*SyntaxError); ok { 546 t.Errorf("Unexpected error: %v", err) 547 } 548 if token.(StartElement).Name.Local != "tag" { 549 t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local) 550 } 551 attr := token.(StartElement).Attr[0] 552 if attr.Value != "azAZ09:-_" { 553 t.Errorf("Unexpected attribute value: %v", attr.Value) 554 } 555 if attr.Name.Local != "attr" { 556 t.Errorf("Unexpected attribute name: %v", attr.Name.Local) 557 } 558 } 559 560 func TestValuelessAttrs(t *testing.T) { 561 tests := [][3]string{ 562 {"<p nowrap>", "p", "nowrap"}, 563 {"<p nowrap >", "p", "nowrap"}, 564 {"<input checked/>", "input", "checked"}, 565 {"<input checked />", "input", "checked"}, 566 } 567 for _, test := range tests { 568 d := NewDecoder(strings.NewReader(test[0])) 569 d.Strict = false 570 token, err := d.Token() 571 if _, ok := err.(*SyntaxError); ok { 572 t.Errorf("Unexpected error: %v", err) 573 } 574 if token.(StartElement).Name.Local != test[1] { 575 t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local) 576 } 577 attr := token.(StartElement).Attr[0] 578 if attr.Value != test[2] { 579 t.Errorf("Unexpected attribute value: %v", attr.Value) 580 } 581 if attr.Name.Local != test[2] { 582 t.Errorf("Unexpected attribute name: %v", attr.Name.Local) 583 } 584 } 585 } 586 587 func TestCopyTokenCharData(t *testing.T) { 588 data := []byte("same data") 589 var tok1 Token = CharData(data) 590 tok2 := CopyToken(tok1) 591 if !reflect.DeepEqual(tok1, tok2) { 592 t.Error("CopyToken(CharData) != CharData") 593 } 594 data[1] = 'o' 595 if reflect.DeepEqual(tok1, tok2) { 596 t.Error("CopyToken(CharData) uses same buffer.") 597 } 598 } 599 600 func TestCopyTokenStartElement(t *testing.T) { 601 elt := StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}} 602 var tok1 Token = elt 603 tok2 := CopyToken(tok1) 604 if tok1.(StartElement).Attr[0].Value != "en" { 605 t.Error("CopyToken overwrote Attr[0]") 606 } 607 if !reflect.DeepEqual(tok1, tok2) { 608 t.Error("CopyToken(StartElement) != StartElement") 609 } 610 tok1.(StartElement).Attr[0] = Attr{Name{"", "lang"}, "de"} 611 if reflect.DeepEqual(tok1, tok2) { 612 t.Error("CopyToken(CharData) uses same buffer.") 613 } 614 } 615 616 func TestSyntaxErrorLineNum(t *testing.T) { 617 testInput := "<P>Foo<P>\n\n<P>Bar</>\n" 618 d := NewDecoder(strings.NewReader(testInput)) 619 var err error 620 for _, err = d.Token(); err == nil; _, err = d.Token() { 621 } 622 synerr, ok := err.(*SyntaxError) 623 if !ok { 624 t.Error("Expected SyntaxError.") 625 } 626 if synerr.Line != 3 { 627 t.Error("SyntaxError didn't have correct line number.") 628 } 629 } 630 631 func TestTrailingRawToken(t *testing.T) { 632 input := `<FOO></FOO> ` 633 d := NewDecoder(strings.NewReader(input)) 634 var err error 635 for _, err = d.RawToken(); err == nil; _, err = d.RawToken() { 636 } 637 if err != io.EOF { 638 t.Fatalf("d.RawToken() = _, %v, want _, io.EOF", err) 639 } 640 } 641 642 func TestTrailingToken(t *testing.T) { 643 input := `<FOO></FOO> ` 644 d := NewDecoder(strings.NewReader(input)) 645 var err error 646 for _, err = d.Token(); err == nil; _, err = d.Token() { 647 } 648 if err != io.EOF { 649 t.Fatalf("d.Token() = _, %v, want _, io.EOF", err) 650 } 651 } 652 653 func TestEntityInsideCDATA(t *testing.T) { 654 input := `<test><![CDATA[ &val=foo ]]></test>` 655 d := NewDecoder(strings.NewReader(input)) 656 var err error 657 for _, err = d.Token(); err == nil; _, err = d.Token() { 658 } 659 if err != io.EOF { 660 t.Fatalf("d.Token() = _, %v, want _, io.EOF", err) 661 } 662 } 663 664 var characterTests = []struct { 665 in string 666 err string 667 }{ 668 {"\x12<doc/>", "illegal character code U+0012"}, 669 {"<?xml version=\"1.0\"?>\x0b<doc/>", "illegal character code U+000B"}, 670 {"\xef\xbf\xbe<doc/>", "illegal character code U+FFFE"}, 671 {"<?xml version=\"1.0\"?><doc>\r\n<hiya/>\x07<toots/></doc>", "illegal character code U+0007"}, 672 {"<?xml version=\"1.0\"?><doc \x12='value'>what's up</doc>", "expected attribute name in element"}, 673 {"<doc>&abc\x01;</doc>", "invalid character entity &abc (no semicolon)"}, 674 {"<doc>&\x01;</doc>", "invalid character entity & (no semicolon)"}, 675 {"<doc>&\xef\xbf\xbe;</doc>", "invalid character entity &\uFFFE;"}, 676 {"<doc>&hello;</doc>", "invalid character entity &hello;"}, 677 } 678 679 func TestDisallowedCharacters(t *testing.T) { 680 681 for i, tt := range characterTests { 682 d := NewDecoder(strings.NewReader(tt.in)) 683 var err error 684 685 for err == nil { 686 _, err = d.Token() 687 } 688 synerr, ok := err.(*SyntaxError) 689 if !ok { 690 t.Fatalf("input %d d.Token() = _, %v, want _, *SyntaxError", i, err) 691 } 692 if synerr.Msg != tt.err { 693 t.Fatalf("input %d synerr.Msg wrong: want %q, got %q", i, tt.err, synerr.Msg) 694 } 695 } 696 } 697 698 func TestIsInCharacterRange(t *testing.T) { 699 invalid := []rune{ 700 utf8.MaxRune + 1, 701 0xD800, // surrogate min 702 0xDFFF, // surrogate max 703 -1, 704 } 705 for _, r := range invalid { 706 if isInCharacterRange(r) { 707 t.Errorf("rune %U considered valid", r) 708 } 709 } 710 } 711 712 var procInstTests = []struct { 713 input string 714 expect [2]string 715 }{ 716 {`version="1.0" encoding="utf-8"`, [2]string{"1.0", "utf-8"}}, 717 {`version="1.0" encoding='utf-8'`, [2]string{"1.0", "utf-8"}}, 718 {`version="1.0" encoding='utf-8' `, [2]string{"1.0", "utf-8"}}, 719 {`version="1.0" encoding=utf-8`, [2]string{"1.0", ""}}, 720 {`encoding="FOO" `, [2]string{"", "FOO"}}, 721 } 722 723 func TestProcInstEncoding(t *testing.T) { 724 for _, test := range procInstTests { 725 if got := procInst("version", test.input); got != test.expect[0] { 726 t.Errorf("procInst(version, %q) = %q; want %q", test.input, got, test.expect[0]) 727 } 728 if got := procInst("encoding", test.input); got != test.expect[1] { 729 t.Errorf("procInst(encoding, %q) = %q; want %q", test.input, got, test.expect[1]) 730 } 731 } 732 } 733 734 // Ensure that directives with comments include the complete 735 // text of any nested directives. 736 737 var directivesWithCommentsInput = ` 738 <!DOCTYPE [<!-- a comment --><!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]> 739 <!DOCTYPE [<!ENTITY go "Golang"><!-- a comment-->]> 740 <!DOCTYPE <!-> <!> <!----> <!-->--> <!--->--> [<!ENTITY go "Golang"><!-- a comment-->]> 741 ` 742 743 var directivesWithCommentsTokens = []Token{ 744 CharData("\n"), 745 Directive(`DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`), 746 CharData("\n"), 747 Directive(`DOCTYPE [<!ENTITY go "Golang">]`), 748 CharData("\n"), 749 Directive(`DOCTYPE <!-> <!> [<!ENTITY go "Golang">]`), 750 CharData("\n"), 751 } 752 753 func TestDirectivesWithComments(t *testing.T) { 754 d := NewDecoder(strings.NewReader(directivesWithCommentsInput)) 755 756 for i, want := range directivesWithCommentsTokens { 757 have, err := d.Token() 758 if err != nil { 759 t.Fatalf("token %d: unexpected error: %s", i, err) 760 } 761 if !reflect.DeepEqual(have, want) { 762 t.Errorf("token %d = %#v want %#v", i, have, want) 763 } 764 } 765 } 766 767 // Writer whose Write method always returns an error. 768 type errWriter struct{} 769 770 func (errWriter) Write(p []byte) (n int, err error) { return 0, fmt.Errorf("unwritable") } 771 772 func TestEscapeTextIOErrors(t *testing.T) { 773 expectErr := "unwritable" 774 err := EscapeText(errWriter{}, []byte{'A'}) 775 776 if err == nil || err.Error() != expectErr { 777 t.Errorf("have %v, want %v", err, expectErr) 778 } 779 } 780 781 func TestEscapeTextInvalidChar(t *testing.T) { 782 input := []byte("A \x00 terminated string.") 783 expected := "A \uFFFD terminated string." 784 785 buff := new(bytes.Buffer) 786 if err := EscapeText(buff, input); err != nil { 787 t.Fatalf("have %v, want nil", err) 788 } 789 text := buff.String() 790 791 if text != expected { 792 t.Errorf("have %v, want %v", text, expected) 793 } 794 } 795 796 func TestIssue5880(t *testing.T) { 797 type T []byte 798 data, err := Marshal(T{192, 168, 0, 1}) 799 if err != nil { 800 t.Errorf("Marshal error: %v", err) 801 } 802 if !utf8.Valid(data) { 803 t.Errorf("Marshal generated invalid UTF-8: %x", data) 804 } 805 } 806 807 func TestIssue11405(t *testing.T) { 808 testCases := []string{ 809 "<root>", 810 "<root><foo>", 811 "<root><foo></foo>", 812 } 813 for _, tc := range testCases { 814 d := NewDecoder(strings.NewReader(tc)) 815 var err error 816 for { 817 _, err = d.Token() 818 if err != nil { 819 break 820 } 821 } 822 if _, ok := err.(*SyntaxError); !ok { 823 t.Errorf("%s: Token: Got error %v, want SyntaxError", tc, err) 824 } 825 } 826 } 827 828 func TestIssue12417(t *testing.T) { 829 testCases := []struct { 830 s string 831 ok bool 832 }{ 833 {`<?xml encoding="UtF-8" version="1.0"?><root/>`, true}, 834 {`<?xml encoding="UTF-8" version="1.0"?><root/>`, true}, 835 {`<?xml encoding="utf-8" version="1.0"?><root/>`, true}, 836 {`<?xml encoding="uuu-9" version="1.0"?><root/>`, false}, 837 } 838 for _, tc := range testCases { 839 d := NewDecoder(strings.NewReader(tc.s)) 840 var err error 841 for { 842 _, err = d.Token() 843 if err != nil { 844 if err == io.EOF { 845 err = nil 846 } 847 break 848 } 849 } 850 if err != nil && tc.ok { 851 t.Errorf("%q: Encoding charset: expected no error, got %s", tc.s, err) 852 continue 853 } 854 if err == nil && !tc.ok { 855 t.Errorf("%q: Encoding charset: expected error, got nil", tc.s) 856 } 857 } 858 } 859 860 func tokenMap(mapping func(t Token) Token) func(TokenReader) TokenReader { 861 return func(src TokenReader) TokenReader { 862 return mapper{ 863 t: src, 864 f: mapping, 865 } 866 } 867 } 868 869 type mapper struct { 870 t TokenReader 871 f func(Token) Token 872 } 873 874 func (m mapper) Token() (Token, error) { 875 tok, err := m.t.Token() 876 if err != nil { 877 return nil, err 878 } 879 return m.f(tok), nil 880 } 881 882 func TestNewTokenDecoderIdempotent(t *testing.T) { 883 d := NewDecoder(strings.NewReader(`<br/>`)) 884 d2 := NewTokenDecoder(d) 885 if d != d2 { 886 t.Error("NewTokenDecoder did not detect underlying Decoder") 887 } 888 } 889 890 func TestWrapDecoder(t *testing.T) { 891 d := NewDecoder(strings.NewReader(`<quote>[Re-enter Clown with a letter, and FABIAN]</quote>`)) 892 m := tokenMap(func(t Token) Token { 893 switch tok := t.(type) { 894 case StartElement: 895 if tok.Name.Local == "quote" { 896 tok.Name.Local = "blocking" 897 return tok 898 } 899 case EndElement: 900 if tok.Name.Local == "quote" { 901 tok.Name.Local = "blocking" 902 return tok 903 } 904 } 905 return t 906 }) 907 908 d = NewTokenDecoder(m(d)) 909 910 o := struct { 911 XMLName Name `xml:"blocking"` 912 Chardata string `xml:",chardata"` 913 }{} 914 915 if err := d.Decode(&o); err != nil { 916 t.Fatal("Got unexpected error while decoding:", err) 917 } 918 919 if o.Chardata != "[Re-enter Clown with a letter, and FABIAN]" { 920 t.Fatalf("Got unexpected chardata: `%s`\n", o.Chardata) 921 } 922 } 923 924 type tokReader struct{} 925 926 func (tokReader) Token() (Token, error) { 927 return StartElement{}, nil 928 } 929 930 type Failure struct{} 931 932 func (Failure) UnmarshalXML(*Decoder, StartElement) error { 933 return nil 934 } 935 936 func TestTokenUnmarshaler(t *testing.T) { 937 defer func() { 938 if r := recover(); r != nil { 939 t.Error("Unexpected panic using custom token unmarshaler") 940 } 941 }() 942 943 d := NewTokenDecoder(tokReader{}) 944 d.Decode(&Failure{}) 945 }