github.com/ice-blockchain/go/src@v0.0.0-20240403114104-1564d284e521/encoding/xml/xml_test.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package xml 6 7 import ( 8 "bytes" 9 "fmt" 10 "io" 11 "reflect" 12 "strings" 13 "testing" 14 "unicode/utf8" 15 ) 16 17 type toks struct { 18 earlyEOF bool 19 t []Token 20 } 21 22 func (t *toks) Token() (Token, error) { 23 if len(t.t) == 0 { 24 return nil, io.EOF 25 } 26 var tok Token 27 tok, t.t = t.t[0], t.t[1:] 28 if t.earlyEOF && len(t.t) == 0 { 29 return tok, io.EOF 30 } 31 return tok, nil 32 } 33 34 func TestDecodeEOF(t *testing.T) { 35 start := StartElement{Name: Name{Local: "test"}} 36 tests := []struct { 37 name string 38 tokens []Token 39 ok bool 40 }{ 41 { 42 name: "OK", 43 tokens: []Token{ 44 start, 45 start.End(), 46 }, 47 ok: true, 48 }, 49 { 50 name: "Malformed", 51 tokens: []Token{ 52 start, 53 StartElement{Name: Name{Local: "bad"}}, 54 start.End(), 55 }, 56 ok: false, 57 }, 58 } 59 for _, tc := range tests { 60 for _, eof := range []bool{true, false} { 61 name := fmt.Sprintf("%s/earlyEOF=%v", tc.name, eof) 62 t.Run(name, func(t *testing.T) { 63 d := NewTokenDecoder(&toks{ 64 earlyEOF: eof, 65 t: tc.tokens, 66 }) 67 err := d.Decode(&struct { 68 XMLName Name `xml:"test"` 69 }{}) 70 if tc.ok && err != nil { 71 t.Fatalf("d.Decode: expected nil error, got %v", err) 72 } 73 if _, ok := err.(*SyntaxError); !tc.ok && !ok { 74 t.Errorf("d.Decode: expected syntax error, got %v", err) 75 } 76 }) 77 } 78 } 79 } 80 81 type toksNil struct { 82 returnEOF bool 83 t []Token 84 } 85 86 func (t *toksNil) Token() (Token, error) { 87 if len(t.t) == 0 { 88 if !t.returnEOF { 89 // Return nil, nil before returning an EOF. It's legal, but 90 // discouraged. 91 t.returnEOF = true 92 return nil, nil 93 } 94 return nil, io.EOF 95 } 96 var tok Token 97 tok, t.t = t.t[0], t.t[1:] 98 return tok, nil 99 } 100 101 func TestDecodeNilToken(t *testing.T) { 102 for _, strict := range []bool{true, false} { 103 name := fmt.Sprintf("Strict=%v", strict) 104 t.Run(name, func(t *testing.T) { 105 start := StartElement{Name: Name{Local: "test"}} 106 bad := StartElement{Name: Name{Local: "bad"}} 107 d := NewTokenDecoder(&toksNil{ 108 // Malformed 109 t: []Token{start, bad, start.End()}, 110 }) 111 d.Strict = strict 112 err := d.Decode(&struct { 113 XMLName Name `xml:"test"` 114 }{}) 115 if _, ok := err.(*SyntaxError); !ok { 116 t.Errorf("d.Decode: expected syntax error, got %v", err) 117 } 118 }) 119 } 120 } 121 122 const testInput = ` 123 <?xml version="1.0" encoding="UTF-8"?> 124 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" 125 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> 126 <body xmlns:foo="ns1" xmlns="ns2" xmlns:tag="ns3" ` + 127 "\r\n\t" + ` > 128 <hello lang="en">World <>'" 白鵬翔</hello> 129 <query>&何; &is-it;</query> 130 <goodbye /> 131 <outer foo:attr="value" xmlns:tag="ns4"> 132 <inner/> 133 </outer> 134 <tag:name> 135 <![CDATA[Some text here.]]> 136 </tag:name> 137 </body><!-- missing final newline -->` 138 139 var testEntity = map[string]string{"何": "What", "is-it": "is it?"} 140 141 var rawTokens = []Token{ 142 CharData("\n"), 143 ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)}, 144 CharData("\n"), 145 Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" 146 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`), 147 CharData("\n"), 148 StartElement{Name{"", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}}, 149 CharData("\n "), 150 StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}}, 151 CharData("World <>'\" 白鵬翔"), 152 EndElement{Name{"", "hello"}}, 153 CharData("\n "), 154 StartElement{Name{"", "query"}, []Attr{}}, 155 CharData("What is it?"), 156 EndElement{Name{"", "query"}}, 157 CharData("\n "), 158 StartElement{Name{"", "goodbye"}, []Attr{}}, 159 EndElement{Name{"", "goodbye"}}, 160 CharData("\n "), 161 StartElement{Name{"", "outer"}, []Attr{{Name{"foo", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}}, 162 CharData("\n "), 163 StartElement{Name{"", "inner"}, []Attr{}}, 164 EndElement{Name{"", "inner"}}, 165 CharData("\n "), 166 EndElement{Name{"", "outer"}}, 167 CharData("\n "), 168 StartElement{Name{"tag", "name"}, []Attr{}}, 169 CharData("\n "), 170 CharData("Some text here."), 171 CharData("\n "), 172 EndElement{Name{"tag", "name"}}, 173 CharData("\n"), 174 EndElement{Name{"", "body"}}, 175 Comment(" missing final newline "), 176 } 177 178 var cookedTokens = []Token{ 179 CharData("\n"), 180 ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)}, 181 CharData("\n"), 182 Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" 183 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`), 184 CharData("\n"), 185 StartElement{Name{"ns2", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}}, 186 CharData("\n "), 187 StartElement{Name{"ns2", "hello"}, []Attr{{Name{"", "lang"}, "en"}}}, 188 CharData("World <>'\" 白鵬翔"), 189 EndElement{Name{"ns2", "hello"}}, 190 CharData("\n "), 191 StartElement{Name{"ns2", "query"}, []Attr{}}, 192 CharData("What is it?"), 193 EndElement{Name{"ns2", "query"}}, 194 CharData("\n "), 195 StartElement{Name{"ns2", "goodbye"}, []Attr{}}, 196 EndElement{Name{"ns2", "goodbye"}}, 197 CharData("\n "), 198 StartElement{Name{"ns2", "outer"}, []Attr{{Name{"ns1", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}}, 199 CharData("\n "), 200 StartElement{Name{"ns2", "inner"}, []Attr{}}, 201 EndElement{Name{"ns2", "inner"}}, 202 CharData("\n "), 203 EndElement{Name{"ns2", "outer"}}, 204 CharData("\n "), 205 StartElement{Name{"ns3", "name"}, []Attr{}}, 206 CharData("\n "), 207 CharData("Some text here."), 208 CharData("\n "), 209 EndElement{Name{"ns3", "name"}}, 210 CharData("\n"), 211 EndElement{Name{"ns2", "body"}}, 212 Comment(" missing final newline "), 213 } 214 215 const testInputAltEncoding = ` 216 <?xml version="1.0" encoding="x-testing-uppercase"?> 217 <TAG>VALUE</TAG>` 218 219 var rawTokensAltEncoding = []Token{ 220 CharData("\n"), 221 ProcInst{"xml", []byte(`version="1.0" encoding="x-testing-uppercase"`)}, 222 CharData("\n"), 223 StartElement{Name{"", "tag"}, []Attr{}}, 224 CharData("value"), 225 EndElement{Name{"", "tag"}}, 226 } 227 228 var xmlInput = []string{ 229 // unexpected EOF cases 230 "<", 231 "<t", 232 "<t ", 233 "<t/", 234 "<!", 235 "<!-", 236 "<!--", 237 "<!--c-", 238 "<!--c--", 239 "<!d", 240 "<t></", 241 "<t></t", 242 "<?", 243 "<?p", 244 "<t a", 245 "<t a=", 246 "<t a='", 247 "<t a=''", 248 "<t/><![", 249 "<t/><![C", 250 "<t/><![CDATA[d", 251 "<t/><![CDATA[d]", 252 "<t/><![CDATA[d]]", 253 254 // other Syntax errors 255 "<>", 256 "<t/a", 257 "<0 />", 258 "<?0 >", 259 // "<!0 >", // let the Token() caller handle 260 "</0>", 261 "<t 0=''>", 262 "<t a='&'>", 263 "<t a='<'>", 264 "<t> c;</t>", 265 "<t a>", 266 "<t a=>", 267 "<t a=v>", 268 // "<![CDATA[d]]>", // let the Token() caller handle 269 "<t></e>", 270 "<t></>", 271 "<t></t!", 272 "<t>cdata]]></t>", 273 } 274 275 func TestRawToken(t *testing.T) { 276 d := NewDecoder(strings.NewReader(testInput)) 277 d.Entity = testEntity 278 testRawToken(t, d, testInput, rawTokens) 279 } 280 281 const nonStrictInput = ` 282 <tag>non&entity</tag> 283 <tag>&unknown;entity</tag> 284 <tag>{</tag> 285 <tag>&#zzz;</tag> 286 <tag>&なまえ3;</tag> 287 <tag><-gt;</tag> 288 <tag>&;</tag> 289 <tag>&0a;</tag> 290 ` 291 292 var nonStrictTokens = []Token{ 293 CharData("\n"), 294 StartElement{Name{"", "tag"}, []Attr{}}, 295 CharData("non&entity"), 296 EndElement{Name{"", "tag"}}, 297 CharData("\n"), 298 StartElement{Name{"", "tag"}, []Attr{}}, 299 CharData("&unknown;entity"), 300 EndElement{Name{"", "tag"}}, 301 CharData("\n"), 302 StartElement{Name{"", "tag"}, []Attr{}}, 303 CharData("{"), 304 EndElement{Name{"", "tag"}}, 305 CharData("\n"), 306 StartElement{Name{"", "tag"}, []Attr{}}, 307 CharData("&#zzz;"), 308 EndElement{Name{"", "tag"}}, 309 CharData("\n"), 310 StartElement{Name{"", "tag"}, []Attr{}}, 311 CharData("&なまえ3;"), 312 EndElement{Name{"", "tag"}}, 313 CharData("\n"), 314 StartElement{Name{"", "tag"}, []Attr{}}, 315 CharData("<-gt;"), 316 EndElement{Name{"", "tag"}}, 317 CharData("\n"), 318 StartElement{Name{"", "tag"}, []Attr{}}, 319 CharData("&;"), 320 EndElement{Name{"", "tag"}}, 321 CharData("\n"), 322 StartElement{Name{"", "tag"}, []Attr{}}, 323 CharData("&0a;"), 324 EndElement{Name{"", "tag"}}, 325 CharData("\n"), 326 } 327 328 func TestNonStrictRawToken(t *testing.T) { 329 d := NewDecoder(strings.NewReader(nonStrictInput)) 330 d.Strict = false 331 testRawToken(t, d, nonStrictInput, nonStrictTokens) 332 } 333 334 type downCaser struct { 335 t *testing.T 336 r io.ByteReader 337 } 338 339 func (d *downCaser) ReadByte() (c byte, err error) { 340 c, err = d.r.ReadByte() 341 if c >= 'A' && c <= 'Z' { 342 c += 'a' - 'A' 343 } 344 return 345 } 346 347 func (d *downCaser) Read(p []byte) (int, error) { 348 d.t.Fatalf("unexpected Read call on downCaser reader") 349 panic("unreachable") 350 } 351 352 func TestRawTokenAltEncoding(t *testing.T) { 353 d := NewDecoder(strings.NewReader(testInputAltEncoding)) 354 d.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) { 355 if charset != "x-testing-uppercase" { 356 t.Fatalf("unexpected charset %q", charset) 357 } 358 return &downCaser{t, input.(io.ByteReader)}, nil 359 } 360 testRawToken(t, d, testInputAltEncoding, rawTokensAltEncoding) 361 } 362 363 func TestRawTokenAltEncodingNoConverter(t *testing.T) { 364 d := NewDecoder(strings.NewReader(testInputAltEncoding)) 365 token, err := d.RawToken() 366 if token == nil { 367 t.Fatalf("expected a token on first RawToken call") 368 } 369 if err != nil { 370 t.Fatal(err) 371 } 372 token, err = d.RawToken() 373 if token != nil { 374 t.Errorf("expected a nil token; got %#v", token) 375 } 376 if err == nil { 377 t.Fatalf("expected an error on second RawToken call") 378 } 379 const encoding = "x-testing-uppercase" 380 if !strings.Contains(err.Error(), encoding) { 381 t.Errorf("expected error to contain %q; got error: %v", 382 encoding, err) 383 } 384 } 385 386 func testRawToken(t *testing.T, d *Decoder, raw string, rawTokens []Token) { 387 lastEnd := int64(0) 388 for i, want := range rawTokens { 389 start := d.InputOffset() 390 have, err := d.RawToken() 391 end := d.InputOffset() 392 if err != nil { 393 t.Fatalf("token %d: unexpected error: %s", i, err) 394 } 395 if !reflect.DeepEqual(have, want) { 396 var shave, swant string 397 if _, ok := have.(CharData); ok { 398 shave = fmt.Sprintf("CharData(%q)", have) 399 } else { 400 shave = fmt.Sprintf("%#v", have) 401 } 402 if _, ok := want.(CharData); ok { 403 swant = fmt.Sprintf("CharData(%q)", want) 404 } else { 405 swant = fmt.Sprintf("%#v", want) 406 } 407 t.Errorf("token %d = %s, want %s", i, shave, swant) 408 } 409 410 // Check that InputOffset returned actual token. 411 switch { 412 case start < lastEnd: 413 t.Errorf("token %d: position [%d,%d) for %T is before previous token", i, start, end, have) 414 case start >= end: 415 // Special case: EndElement can be synthesized. 416 if start == end && end == lastEnd { 417 break 418 } 419 t.Errorf("token %d: position [%d,%d) for %T is empty", i, start, end, have) 420 case end > int64(len(raw)): 421 t.Errorf("token %d: position [%d,%d) for %T extends beyond input", i, start, end, have) 422 default: 423 text := raw[start:end] 424 if strings.ContainsAny(text, "<>") && (!strings.HasPrefix(text, "<") || !strings.HasSuffix(text, ">")) { 425 t.Errorf("token %d: misaligned raw token %#q for %T", i, text, have) 426 } 427 } 428 lastEnd = end 429 } 430 } 431 432 // Ensure that directives (specifically !DOCTYPE) include the complete 433 // text of any nested directives, noting that < and > do not change 434 // nesting depth if they are in single or double quotes. 435 436 var nestedDirectivesInput = ` 437 <!DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]> 438 <!DOCTYPE [<!ENTITY xlt ">">]> 439 <!DOCTYPE [<!ENTITY xlt "<">]> 440 <!DOCTYPE [<!ENTITY xlt '>'>]> 441 <!DOCTYPE [<!ENTITY xlt '<'>]> 442 <!DOCTYPE [<!ENTITY xlt '">'>]> 443 <!DOCTYPE [<!ENTITY xlt "'<">]> 444 ` 445 446 var nestedDirectivesTokens = []Token{ 447 CharData("\n"), 448 Directive(`DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`), 449 CharData("\n"), 450 Directive(`DOCTYPE [<!ENTITY xlt ">">]`), 451 CharData("\n"), 452 Directive(`DOCTYPE [<!ENTITY xlt "<">]`), 453 CharData("\n"), 454 Directive(`DOCTYPE [<!ENTITY xlt '>'>]`), 455 CharData("\n"), 456 Directive(`DOCTYPE [<!ENTITY xlt '<'>]`), 457 CharData("\n"), 458 Directive(`DOCTYPE [<!ENTITY xlt '">'>]`), 459 CharData("\n"), 460 Directive(`DOCTYPE [<!ENTITY xlt "'<">]`), 461 CharData("\n"), 462 } 463 464 func TestNestedDirectives(t *testing.T) { 465 d := NewDecoder(strings.NewReader(nestedDirectivesInput)) 466 467 for i, want := range nestedDirectivesTokens { 468 have, err := d.Token() 469 if err != nil { 470 t.Fatalf("token %d: unexpected error: %s", i, err) 471 } 472 if !reflect.DeepEqual(have, want) { 473 t.Errorf("token %d = %#v want %#v", i, have, want) 474 } 475 } 476 } 477 478 func TestToken(t *testing.T) { 479 d := NewDecoder(strings.NewReader(testInput)) 480 d.Entity = testEntity 481 482 for i, want := range cookedTokens { 483 have, err := d.Token() 484 if err != nil { 485 t.Fatalf("token %d: unexpected error: %s", i, err) 486 } 487 if !reflect.DeepEqual(have, want) { 488 t.Errorf("token %d = %#v want %#v", i, have, want) 489 } 490 } 491 } 492 493 func TestSyntax(t *testing.T) { 494 for i := range xmlInput { 495 d := NewDecoder(strings.NewReader(xmlInput[i])) 496 var err error 497 for _, err = d.Token(); err == nil; _, err = d.Token() { 498 } 499 if _, ok := err.(*SyntaxError); !ok { 500 t.Fatalf(`xmlInput "%s": expected SyntaxError not received`, xmlInput[i]) 501 } 502 } 503 } 504 505 func TestInputLinePos(t *testing.T) { 506 testInput := `<root> 507 <?pi 508 ?> <elt 509 att 510 = 511 "val"> 512 <![CDATA[ 513 ]]><!-- 514 515 --></elt> 516 </root>` 517 linePos := [][]int{ 518 {1, 7}, 519 {2, 1}, 520 {3, 4}, 521 {3, 6}, 522 {6, 7}, 523 {7, 1}, 524 {8, 4}, 525 {10, 4}, 526 {10, 10}, 527 {11, 1}, 528 {11, 8}, 529 } 530 dec := NewDecoder(strings.NewReader(testInput)) 531 for _, want := range linePos { 532 if _, err := dec.Token(); err != nil { 533 t.Errorf("Unexpected error: %v", err) 534 continue 535 } 536 537 gotLine, gotCol := dec.InputPos() 538 if gotLine != want[0] || gotCol != want[1] { 539 t.Errorf("dec.InputPos() = %d,%d, want %d,%d", gotLine, gotCol, want[0], want[1]) 540 } 541 } 542 } 543 544 type allScalars struct { 545 True1 bool 546 True2 bool 547 False1 bool 548 False2 bool 549 Int int 550 Int8 int8 551 Int16 int16 552 Int32 int32 553 Int64 int64 554 Uint int 555 Uint8 uint8 556 Uint16 uint16 557 Uint32 uint32 558 Uint64 uint64 559 Uintptr uintptr 560 Float32 float32 561 Float64 float64 562 String string 563 PtrString *string 564 } 565 566 var all = allScalars{ 567 True1: true, 568 True2: true, 569 False1: false, 570 False2: false, 571 Int: 1, 572 Int8: -2, 573 Int16: 3, 574 Int32: -4, 575 Int64: 5, 576 Uint: 6, 577 Uint8: 7, 578 Uint16: 8, 579 Uint32: 9, 580 Uint64: 10, 581 Uintptr: 11, 582 Float32: 13.0, 583 Float64: 14.0, 584 String: "15", 585 PtrString: &sixteen, 586 } 587 588 var sixteen = "16" 589 590 const testScalarsInput = `<allscalars> 591 <True1>true</True1> 592 <True2>1</True2> 593 <False1>false</False1> 594 <False2>0</False2> 595 <Int>1</Int> 596 <Int8>-2</Int8> 597 <Int16>3</Int16> 598 <Int32>-4</Int32> 599 <Int64>5</Int64> 600 <Uint>6</Uint> 601 <Uint8>7</Uint8> 602 <Uint16>8</Uint16> 603 <Uint32>9</Uint32> 604 <Uint64>10</Uint64> 605 <Uintptr>11</Uintptr> 606 <Float>12.0</Float> 607 <Float32>13.0</Float32> 608 <Float64>14.0</Float64> 609 <String>15</String> 610 <PtrString>16</PtrString> 611 </allscalars>` 612 613 func TestAllScalars(t *testing.T) { 614 var a allScalars 615 err := Unmarshal([]byte(testScalarsInput), &a) 616 617 if err != nil { 618 t.Fatal(err) 619 } 620 if !reflect.DeepEqual(a, all) { 621 t.Errorf("have %+v want %+v", a, all) 622 } 623 } 624 625 type item struct { 626 FieldA string 627 } 628 629 func TestIssue569(t *testing.T) { 630 data := `<item><FieldA>abcd</FieldA></item>` 631 var i item 632 err := Unmarshal([]byte(data), &i) 633 634 if err != nil || i.FieldA != "abcd" { 635 t.Fatal("Expecting abcd") 636 } 637 } 638 639 func TestUnquotedAttrs(t *testing.T) { 640 data := "<tag attr=azAZ09:-_\t>" 641 d := NewDecoder(strings.NewReader(data)) 642 d.Strict = false 643 token, err := d.Token() 644 if _, ok := err.(*SyntaxError); ok { 645 t.Errorf("Unexpected error: %v", err) 646 } 647 if token.(StartElement).Name.Local != "tag" { 648 t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local) 649 } 650 attr := token.(StartElement).Attr[0] 651 if attr.Value != "azAZ09:-_" { 652 t.Errorf("Unexpected attribute value: %v", attr.Value) 653 } 654 if attr.Name.Local != "attr" { 655 t.Errorf("Unexpected attribute name: %v", attr.Name.Local) 656 } 657 } 658 659 func TestValuelessAttrs(t *testing.T) { 660 tests := [][3]string{ 661 {"<p nowrap>", "p", "nowrap"}, 662 {"<p nowrap >", "p", "nowrap"}, 663 {"<input checked/>", "input", "checked"}, 664 {"<input checked />", "input", "checked"}, 665 } 666 for _, test := range tests { 667 d := NewDecoder(strings.NewReader(test[0])) 668 d.Strict = false 669 token, err := d.Token() 670 if _, ok := err.(*SyntaxError); ok { 671 t.Errorf("Unexpected error: %v", err) 672 } 673 if token.(StartElement).Name.Local != test[1] { 674 t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local) 675 } 676 attr := token.(StartElement).Attr[0] 677 if attr.Value != test[2] { 678 t.Errorf("Unexpected attribute value: %v", attr.Value) 679 } 680 if attr.Name.Local != test[2] { 681 t.Errorf("Unexpected attribute name: %v", attr.Name.Local) 682 } 683 } 684 } 685 686 func TestCopyTokenCharData(t *testing.T) { 687 data := []byte("same data") 688 var tok1 Token = CharData(data) 689 tok2 := CopyToken(tok1) 690 if !reflect.DeepEqual(tok1, tok2) { 691 t.Error("CopyToken(CharData) != CharData") 692 } 693 data[1] = 'o' 694 if reflect.DeepEqual(tok1, tok2) { 695 t.Error("CopyToken(CharData) uses same buffer.") 696 } 697 } 698 699 func TestCopyTokenStartElement(t *testing.T) { 700 elt := StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}} 701 var tok1 Token = elt 702 tok2 := CopyToken(tok1) 703 if tok1.(StartElement).Attr[0].Value != "en" { 704 t.Error("CopyToken overwrote Attr[0]") 705 } 706 if !reflect.DeepEqual(tok1, tok2) { 707 t.Error("CopyToken(StartElement) != StartElement") 708 } 709 tok1.(StartElement).Attr[0] = Attr{Name{"", "lang"}, "de"} 710 if reflect.DeepEqual(tok1, tok2) { 711 t.Error("CopyToken(CharData) uses same buffer.") 712 } 713 } 714 715 func TestCopyTokenComment(t *testing.T) { 716 data := []byte("<!-- some comment -->") 717 var tok1 Token = Comment(data) 718 tok2 := CopyToken(tok1) 719 if !reflect.DeepEqual(tok1, tok2) { 720 t.Error("CopyToken(Comment) != Comment") 721 } 722 data[1] = 'o' 723 if reflect.DeepEqual(tok1, tok2) { 724 t.Error("CopyToken(Comment) uses same buffer.") 725 } 726 } 727 728 func TestSyntaxErrorLineNum(t *testing.T) { 729 testInput := "<P>Foo<P>\n\n<P>Bar</>\n" 730 d := NewDecoder(strings.NewReader(testInput)) 731 var err error 732 for _, err = d.Token(); err == nil; _, err = d.Token() { 733 } 734 synerr, ok := err.(*SyntaxError) 735 if !ok { 736 t.Error("Expected SyntaxError.") 737 } 738 if synerr.Line != 3 { 739 t.Error("SyntaxError didn't have correct line number.") 740 } 741 } 742 743 func TestTrailingRawToken(t *testing.T) { 744 input := `<FOO></FOO> ` 745 d := NewDecoder(strings.NewReader(input)) 746 var err error 747 for _, err = d.RawToken(); err == nil; _, err = d.RawToken() { 748 } 749 if err != io.EOF { 750 t.Fatalf("d.RawToken() = _, %v, want _, io.EOF", err) 751 } 752 } 753 754 func TestTrailingToken(t *testing.T) { 755 input := `<FOO></FOO> ` 756 d := NewDecoder(strings.NewReader(input)) 757 var err error 758 for _, err = d.Token(); err == nil; _, err = d.Token() { 759 } 760 if err != io.EOF { 761 t.Fatalf("d.Token() = _, %v, want _, io.EOF", err) 762 } 763 } 764 765 func TestEntityInsideCDATA(t *testing.T) { 766 input := `<test><![CDATA[ &val=foo ]]></test>` 767 d := NewDecoder(strings.NewReader(input)) 768 var err error 769 for _, err = d.Token(); err == nil; _, err = d.Token() { 770 } 771 if err != io.EOF { 772 t.Fatalf("d.Token() = _, %v, want _, io.EOF", err) 773 } 774 } 775 776 var characterTests = []struct { 777 in string 778 err string 779 }{ 780 {"\x12<doc/>", "illegal character code U+0012"}, 781 {"<?xml version=\"1.0\"?>\x0b<doc/>", "illegal character code U+000B"}, 782 {"\xef\xbf\xbe<doc/>", "illegal character code U+FFFE"}, 783 {"<?xml version=\"1.0\"?><doc>\r\n<hiya/>\x07<toots/></doc>", "illegal character code U+0007"}, 784 {"<?xml version=\"1.0\"?><doc \x12='value'>what's up</doc>", "expected attribute name in element"}, 785 {"<doc>&abc\x01;</doc>", "invalid character entity &abc (no semicolon)"}, 786 {"<doc>&\x01;</doc>", "invalid character entity & (no semicolon)"}, 787 {"<doc>&\xef\xbf\xbe;</doc>", "invalid character entity &\uFFFE;"}, 788 {"<doc>&hello;</doc>", "invalid character entity &hello;"}, 789 } 790 791 func TestDisallowedCharacters(t *testing.T) { 792 793 for i, tt := range characterTests { 794 d := NewDecoder(strings.NewReader(tt.in)) 795 var err error 796 797 for err == nil { 798 _, err = d.Token() 799 } 800 synerr, ok := err.(*SyntaxError) 801 if !ok { 802 t.Fatalf("input %d d.Token() = _, %v, want _, *SyntaxError", i, err) 803 } 804 if synerr.Msg != tt.err { 805 t.Fatalf("input %d synerr.Msg wrong: want %q, got %q", i, tt.err, synerr.Msg) 806 } 807 } 808 } 809 810 func TestIsInCharacterRange(t *testing.T) { 811 invalid := []rune{ 812 utf8.MaxRune + 1, 813 0xD800, // surrogate min 814 0xDFFF, // surrogate max 815 -1, 816 } 817 for _, r := range invalid { 818 if isInCharacterRange(r) { 819 t.Errorf("rune %U considered valid", r) 820 } 821 } 822 } 823 824 var procInstTests = []struct { 825 input string 826 expect [2]string 827 }{ 828 {`version="1.0" encoding="utf-8"`, [2]string{"1.0", "utf-8"}}, 829 {`version="1.0" encoding='utf-8'`, [2]string{"1.0", "utf-8"}}, 830 {`version="1.0" encoding='utf-8' `, [2]string{"1.0", "utf-8"}}, 831 {`version="1.0" encoding=utf-8`, [2]string{"1.0", ""}}, 832 {`encoding="FOO" `, [2]string{"", "FOO"}}, 833 {`version=2.0 version="1.0" encoding=utf-7 encoding='utf-8'`, [2]string{"1.0", "utf-8"}}, 834 {`version= encoding=`, [2]string{"", ""}}, 835 {`encoding="version=1.0"`, [2]string{"", "version=1.0"}}, 836 {``, [2]string{"", ""}}, 837 // TODO: what's the right approach to handle these nested cases? 838 {`encoding="version='1.0'"`, [2]string{"1.0", "version='1.0'"}}, 839 {`version="encoding='utf-8'"`, [2]string{"encoding='utf-8'", "utf-8"}}, 840 } 841 842 func TestProcInstEncoding(t *testing.T) { 843 for _, test := range procInstTests { 844 if got := procInst("version", test.input); got != test.expect[0] { 845 t.Errorf("procInst(version, %q) = %q; want %q", test.input, got, test.expect[0]) 846 } 847 if got := procInst("encoding", test.input); got != test.expect[1] { 848 t.Errorf("procInst(encoding, %q) = %q; want %q", test.input, got, test.expect[1]) 849 } 850 } 851 } 852 853 // Ensure that directives with comments include the complete 854 // text of any nested directives. 855 856 var directivesWithCommentsInput = ` 857 <!DOCTYPE [<!-- a comment --><!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]> 858 <!DOCTYPE [<!ENTITY go "Golang"><!-- a comment-->]> 859 <!DOCTYPE <!-> <!> <!----> <!-->--> <!--->--> [<!ENTITY go "Golang"><!-- a comment-->]> 860 ` 861 862 var directivesWithCommentsTokens = []Token{ 863 CharData("\n"), 864 Directive(`DOCTYPE [ <!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`), 865 CharData("\n"), 866 Directive(`DOCTYPE [<!ENTITY go "Golang"> ]`), 867 CharData("\n"), 868 Directive(`DOCTYPE <!-> <!> [<!ENTITY go "Golang"> ]`), 869 CharData("\n"), 870 } 871 872 func TestDirectivesWithComments(t *testing.T) { 873 d := NewDecoder(strings.NewReader(directivesWithCommentsInput)) 874 875 for i, want := range directivesWithCommentsTokens { 876 have, err := d.Token() 877 if err != nil { 878 t.Fatalf("token %d: unexpected error: %s", i, err) 879 } 880 if !reflect.DeepEqual(have, want) { 881 t.Errorf("token %d = %#v want %#v", i, have, want) 882 } 883 } 884 } 885 886 // Writer whose Write method always returns an error. 887 type errWriter struct{} 888 889 func (errWriter) Write(p []byte) (n int, err error) { return 0, fmt.Errorf("unwritable") } 890 891 func TestEscapeTextIOErrors(t *testing.T) { 892 expectErr := "unwritable" 893 err := EscapeText(errWriter{}, []byte{'A'}) 894 895 if err == nil || err.Error() != expectErr { 896 t.Errorf("have %v, want %v", err, expectErr) 897 } 898 } 899 900 func TestEscapeTextInvalidChar(t *testing.T) { 901 input := []byte("A \x00 terminated string.") 902 expected := "A \uFFFD terminated string." 903 904 buff := new(strings.Builder) 905 if err := EscapeText(buff, input); err != nil { 906 t.Fatalf("have %v, want nil", err) 907 } 908 text := buff.String() 909 910 if text != expected { 911 t.Errorf("have %v, want %v", text, expected) 912 } 913 } 914 915 func TestIssue5880(t *testing.T) { 916 type T []byte 917 data, err := Marshal(T{192, 168, 0, 1}) 918 if err != nil { 919 t.Errorf("Marshal error: %v", err) 920 } 921 if !utf8.Valid(data) { 922 t.Errorf("Marshal generated invalid UTF-8: %x", data) 923 } 924 } 925 926 func TestIssue8535(t *testing.T) { 927 928 type ExampleConflict struct { 929 XMLName Name `xml:"example"` 930 Link string `xml:"link"` 931 AtomLink string `xml:"http://www.w3.org/2005/Atom link"` // Same name in a different name space 932 } 933 testCase := `<example> 934 <title>Example</title> 935 <link>http://example.com/default</link> <!-- not assigned --> 936 <link>http://example.com/home</link> <!-- not assigned --> 937 <ns:link xmlns:ns="http://www.w3.org/2005/Atom">http://example.com/ns</ns:link> 938 </example>` 939 940 var dest ExampleConflict 941 d := NewDecoder(strings.NewReader(testCase)) 942 if err := d.Decode(&dest); err != nil { 943 t.Fatal(err) 944 } 945 } 946 947 func TestEncodeXMLNS(t *testing.T) { 948 testCases := []struct { 949 f func() ([]byte, error) 950 want string 951 ok bool 952 }{ 953 {encodeXMLNS1, `<Test xmlns="http://example.com/ns"><Body>hello world</Body></Test>`, true}, 954 {encodeXMLNS2, `<Test><body xmlns="http://example.com/ns">hello world</body></Test>`, true}, 955 {encodeXMLNS3, `<Test xmlns="http://example.com/ns"><Body>hello world</Body></Test>`, true}, 956 {encodeXMLNS4, `<Test xmlns="http://example.com/ns"><Body>hello world</Body></Test>`, false}, 957 } 958 959 for i, tc := range testCases { 960 if b, err := tc.f(); err == nil { 961 if got, want := string(b), tc.want; got != want { 962 t.Errorf("%d: got %s, want %s \n", i, got, want) 963 } 964 } else { 965 t.Errorf("%d: marshal failed with %s", i, err) 966 } 967 } 968 } 969 970 func encodeXMLNS1() ([]byte, error) { 971 972 type T struct { 973 XMLName Name `xml:"Test"` 974 Ns string `xml:"xmlns,attr"` 975 Body string 976 } 977 978 s := &T{Ns: "http://example.com/ns", Body: "hello world"} 979 return Marshal(s) 980 } 981 982 func encodeXMLNS2() ([]byte, error) { 983 984 type Test struct { 985 Body string `xml:"http://example.com/ns body"` 986 } 987 988 s := &Test{Body: "hello world"} 989 return Marshal(s) 990 } 991 992 func encodeXMLNS3() ([]byte, error) { 993 994 type Test struct { 995 XMLName Name `xml:"http://example.com/ns Test"` 996 Body string 997 } 998 999 //s := &Test{XMLName: Name{"http://example.com/ns",""}, Body: "hello world"} is unusable as the "-" is missing 1000 // as documentation states 1001 s := &Test{Body: "hello world"} 1002 return Marshal(s) 1003 } 1004 1005 func encodeXMLNS4() ([]byte, error) { 1006 1007 type Test struct { 1008 Ns string `xml:"xmlns,attr"` 1009 Body string 1010 } 1011 1012 s := &Test{Ns: "http://example.com/ns", Body: "hello world"} 1013 return Marshal(s) 1014 } 1015 1016 func TestIssue11405(t *testing.T) { 1017 testCases := []string{ 1018 "<root>", 1019 "<root><foo>", 1020 "<root><foo></foo>", 1021 } 1022 for _, tc := range testCases { 1023 d := NewDecoder(strings.NewReader(tc)) 1024 var err error 1025 for { 1026 _, err = d.Token() 1027 if err != nil { 1028 break 1029 } 1030 } 1031 if _, ok := err.(*SyntaxError); !ok { 1032 t.Errorf("%s: Token: Got error %v, want SyntaxError", tc, err) 1033 } 1034 } 1035 } 1036 1037 func TestIssue12417(t *testing.T) { 1038 testCases := []struct { 1039 s string 1040 ok bool 1041 }{ 1042 {`<?xml encoding="UtF-8" version="1.0"?><root/>`, true}, 1043 {`<?xml encoding="UTF-8" version="1.0"?><root/>`, true}, 1044 {`<?xml encoding="utf-8" version="1.0"?><root/>`, true}, 1045 {`<?xml encoding="uuu-9" version="1.0"?><root/>`, false}, 1046 } 1047 for _, tc := range testCases { 1048 d := NewDecoder(strings.NewReader(tc.s)) 1049 var err error 1050 for { 1051 _, err = d.Token() 1052 if err != nil { 1053 if err == io.EOF { 1054 err = nil 1055 } 1056 break 1057 } 1058 } 1059 if err != nil && tc.ok { 1060 t.Errorf("%q: Encoding charset: expected no error, got %s", tc.s, err) 1061 continue 1062 } 1063 if err == nil && !tc.ok { 1064 t.Errorf("%q: Encoding charset: expected error, got nil", tc.s) 1065 } 1066 } 1067 } 1068 1069 func TestIssue7113(t *testing.T) { 1070 type C struct { 1071 XMLName Name `xml:""` // Sets empty namespace 1072 } 1073 1074 type D struct { 1075 XMLName Name `xml:"d"` 1076 } 1077 1078 type A struct { 1079 XMLName Name `xml:""` 1080 C C `xml:""` 1081 D D 1082 } 1083 1084 var a A 1085 structSpace := "b" 1086 xmlTest := `<A xmlns="` + structSpace + `"><C xmlns=""></C><d></d></A>` 1087 t.Log(xmlTest) 1088 err := Unmarshal([]byte(xmlTest), &a) 1089 if err != nil { 1090 t.Fatal(err) 1091 } 1092 1093 if a.XMLName.Space != structSpace { 1094 t.Errorf("overidding with empty namespace: unmarshalling, got %s, want %s\n", a.XMLName.Space, structSpace) 1095 } 1096 if len(a.C.XMLName.Space) != 0 { 1097 t.Fatalf("overidding with empty namespace: unmarshalling, got %s, want empty\n", a.C.XMLName.Space) 1098 } 1099 1100 var b []byte 1101 b, err = Marshal(&a) 1102 if err != nil { 1103 t.Fatal(err) 1104 } 1105 if len(a.C.XMLName.Space) != 0 { 1106 t.Errorf("overidding with empty namespace: marshaling, got %s in C tag which should be empty\n", a.C.XMLName.Space) 1107 } 1108 if string(b) != xmlTest { 1109 t.Fatalf("overidding with empty namespace: marshalling, got %s, want %s\n", b, xmlTest) 1110 } 1111 var c A 1112 err = Unmarshal(b, &c) 1113 if err != nil { 1114 t.Fatalf("second Unmarshal failed: %s", err) 1115 } 1116 if c.XMLName.Space != "b" { 1117 t.Errorf("overidding with empty namespace: after marshaling & unmarshaling, XML name space: got %s, want %s\n", a.XMLName.Space, structSpace) 1118 } 1119 if len(c.C.XMLName.Space) != 0 { 1120 t.Errorf("overidding with empty namespace: after marshaling & unmarshaling, got %s, want empty\n", a.C.XMLName.Space) 1121 } 1122 } 1123 1124 func TestIssue20396(t *testing.T) { 1125 1126 var attrError = UnmarshalError("XML syntax error on line 1: expected attribute name in element") 1127 1128 testCases := []struct { 1129 s string 1130 wantErr error 1131 }{ 1132 {`<a:te:st xmlns:a="abcd"/>`, // Issue 20396 1133 UnmarshalError("XML syntax error on line 1: expected element name after <")}, 1134 {`<a:te=st xmlns:a="abcd"/>`, attrError}, 1135 {`<a:te&st xmlns:a="abcd"/>`, attrError}, 1136 {`<a:test xmlns:a="abcd"/>`, nil}, 1137 {`<a:te:st xmlns:a="abcd">1</a:te:st>`, 1138 UnmarshalError("XML syntax error on line 1: expected element name after <")}, 1139 {`<a:te=st xmlns:a="abcd">1</a:te=st>`, attrError}, 1140 {`<a:te&st xmlns:a="abcd">1</a:te&st>`, attrError}, 1141 {`<a:test xmlns:a="abcd">1</a:test>`, nil}, 1142 } 1143 1144 var dest string 1145 for _, tc := range testCases { 1146 if got, want := Unmarshal([]byte(tc.s), &dest), tc.wantErr; got != want { 1147 if got == nil { 1148 t.Errorf("%s: Unexpected success, want %v", tc.s, want) 1149 } else if want == nil { 1150 t.Errorf("%s: Unexpected error, got %v", tc.s, got) 1151 } else if got.Error() != want.Error() { 1152 t.Errorf("%s: got %v, want %v", tc.s, got, want) 1153 } 1154 } 1155 } 1156 } 1157 1158 func TestIssue20685(t *testing.T) { 1159 testCases := []struct { 1160 s string 1161 ok bool 1162 }{ 1163 {`<x:book xmlns:x="abcd" xmlns:y="abcd"><unclosetag>one</x:book>`, false}, 1164 {`<x:book xmlns:x="abcd" xmlns:y="abcd">one</x:book>`, true}, 1165 {`<x:book xmlns:x="abcd" xmlns:y="abcd">one</y:book>`, false}, 1166 {`<x:book xmlns:y="abcd" xmlns:x="abcd">one</y:book>`, false}, 1167 {`<x:book xmlns:x="abcd">one</y:book>`, false}, 1168 {`<x:book>one</y:book>`, false}, 1169 {`<xbook>one</ybook>`, false}, 1170 } 1171 for _, tc := range testCases { 1172 d := NewDecoder(strings.NewReader(tc.s)) 1173 var err error 1174 for { 1175 _, err = d.Token() 1176 if err != nil { 1177 if err == io.EOF { 1178 err = nil 1179 } 1180 break 1181 } 1182 } 1183 if err != nil && tc.ok { 1184 t.Errorf("%q: Closing tag with namespace : expected no error, got %s", tc.s, err) 1185 continue 1186 } 1187 if err == nil && !tc.ok { 1188 t.Errorf("%q: Closing tag with namespace : expected error, got nil", tc.s) 1189 } 1190 } 1191 } 1192 1193 func tokenMap(mapping func(t Token) Token) func(TokenReader) TokenReader { 1194 return func(src TokenReader) TokenReader { 1195 return mapper{ 1196 t: src, 1197 f: mapping, 1198 } 1199 } 1200 } 1201 1202 type mapper struct { 1203 t TokenReader 1204 f func(Token) Token 1205 } 1206 1207 func (m mapper) Token() (Token, error) { 1208 tok, err := m.t.Token() 1209 if err != nil { 1210 return nil, err 1211 } 1212 return m.f(tok), nil 1213 } 1214 1215 func TestNewTokenDecoderIdempotent(t *testing.T) { 1216 d := NewDecoder(strings.NewReader(`<br>`)) 1217 d2 := NewTokenDecoder(d) 1218 if d != d2 { 1219 t.Error("NewTokenDecoder did not detect underlying Decoder") 1220 } 1221 } 1222 1223 func TestWrapDecoder(t *testing.T) { 1224 d := NewDecoder(strings.NewReader(`<quote>[Re-enter Clown with a letter, and FABIAN]</quote>`)) 1225 m := tokenMap(func(t Token) Token { 1226 switch tok := t.(type) { 1227 case StartElement: 1228 if tok.Name.Local == "quote" { 1229 tok.Name.Local = "blocking" 1230 return tok 1231 } 1232 case EndElement: 1233 if tok.Name.Local == "quote" { 1234 tok.Name.Local = "blocking" 1235 return tok 1236 } 1237 } 1238 return t 1239 }) 1240 1241 d = NewTokenDecoder(m(d)) 1242 1243 o := struct { 1244 XMLName Name `xml:"blocking"` 1245 Chardata string `xml:",chardata"` 1246 }{} 1247 1248 if err := d.Decode(&o); err != nil { 1249 t.Fatal("Got unexpected error while decoding:", err) 1250 } 1251 1252 if o.Chardata != "[Re-enter Clown with a letter, and FABIAN]" { 1253 t.Fatalf("Got unexpected chardata: `%s`\n", o.Chardata) 1254 } 1255 } 1256 1257 type tokReader struct{} 1258 1259 func (tokReader) Token() (Token, error) { 1260 return StartElement{}, nil 1261 } 1262 1263 type Failure struct{} 1264 1265 func (Failure) UnmarshalXML(*Decoder, StartElement) error { 1266 return nil 1267 } 1268 1269 func TestTokenUnmarshaler(t *testing.T) { 1270 defer func() { 1271 if r := recover(); r != nil { 1272 t.Error("Unexpected panic using custom token unmarshaler") 1273 } 1274 }() 1275 1276 d := NewTokenDecoder(tokReader{}) 1277 d.Decode(&Failure{}) 1278 } 1279 1280 func testRoundTrip(t *testing.T, input string) { 1281 d := NewDecoder(strings.NewReader(input)) 1282 var tokens []Token 1283 var buf bytes.Buffer 1284 e := NewEncoder(&buf) 1285 for { 1286 tok, err := d.Token() 1287 if err == io.EOF { 1288 break 1289 } 1290 if err != nil { 1291 t.Fatalf("invalid input: %v", err) 1292 } 1293 if err := e.EncodeToken(tok); err != nil { 1294 t.Fatalf("failed to re-encode input: %v", err) 1295 } 1296 tokens = append(tokens, CopyToken(tok)) 1297 } 1298 if err := e.Flush(); err != nil { 1299 t.Fatal(err) 1300 } 1301 1302 d = NewDecoder(&buf) 1303 for { 1304 tok, err := d.Token() 1305 if err == io.EOF { 1306 break 1307 } 1308 if err != nil { 1309 t.Fatalf("failed to decode output: %v", err) 1310 } 1311 if len(tokens) == 0 { 1312 t.Fatalf("unexpected token: %#v", tok) 1313 } 1314 a, b := tokens[0], tok 1315 if !reflect.DeepEqual(a, b) { 1316 t.Fatalf("token mismatch: %#v vs %#v", a, b) 1317 } 1318 tokens = tokens[1:] 1319 } 1320 if len(tokens) > 0 { 1321 t.Fatalf("lost tokens: %#v", tokens) 1322 } 1323 } 1324 1325 func TestRoundTrip(t *testing.T) { 1326 tests := map[string]string{ 1327 "trailing colon": `<foo abc:="x"></foo>`, 1328 "comments in directives": `<!ENTITY x<!<!-- c1 [ " -->--x --> > <e></e> <!DOCTYPE xxx [ x<!-- c2 " -->--x ]>`, 1329 } 1330 for name, input := range tests { 1331 t.Run(name, func(t *testing.T) { testRoundTrip(t, input) }) 1332 } 1333 } 1334 1335 func TestParseErrors(t *testing.T) { 1336 withDefaultHeader := func(s string) string { 1337 return `<?xml version="1.0" encoding="UTF-8"?>` + s 1338 } 1339 tests := []struct { 1340 src string 1341 err string 1342 }{ 1343 {withDefaultHeader(`</foo>`), `unexpected end element </foo>`}, 1344 {withDefaultHeader(`<x:foo></y:foo>`), `element <foo> in space x closed by </foo> in space y`}, 1345 {withDefaultHeader(`<? not ok ?>`), `expected target name after <?`}, 1346 {withDefaultHeader(`<!- not ok -->`), `invalid sequence <!- not part of <!--`}, 1347 {withDefaultHeader(`<!-? not ok -->`), `invalid sequence <!- not part of <!--`}, 1348 {withDefaultHeader(`<![not ok]>`), `invalid <![ sequence`}, 1349 {withDefaultHeader(`<zzz:foo xmlns:zzz="http://example.com"><bar>baz</bar></foo>`), 1350 `element <foo> in space zzz closed by </foo> in space ""`}, 1351 {withDefaultHeader("\xf1"), `invalid UTF-8`}, 1352 1353 // Header-related errors. 1354 {`<?xml version="1.1" encoding="UTF-8"?>`, `unsupported version "1.1"; only version 1.0 is supported`}, 1355 1356 // Cases below are for "no errors". 1357 {withDefaultHeader(`<?ok?>`), ``}, 1358 {withDefaultHeader(`<?ok version="ok"?>`), ``}, 1359 } 1360 1361 for _, test := range tests { 1362 d := NewDecoder(strings.NewReader(test.src)) 1363 var err error 1364 for { 1365 _, err = d.Token() 1366 if err != nil { 1367 break 1368 } 1369 } 1370 if test.err == "" { 1371 if err != io.EOF { 1372 t.Errorf("parse %s: have %q error, expected none", test.src, err) 1373 } 1374 continue 1375 } 1376 // Inv: err != nil 1377 if err == io.EOF { 1378 t.Errorf("parse %s: unexpected EOF", test.src) 1379 continue 1380 } 1381 if !strings.Contains(err.Error(), test.err) { 1382 t.Errorf("parse %s: can't find %q error substring\nerror: %q", test.src, test.err, err) 1383 continue 1384 } 1385 } 1386 } 1387 1388 const testInputHTMLAutoClose = `<?xml version="1.0" encoding="UTF-8"?> 1389 <br> 1390 <br/><br/> 1391 <br><br> 1392 <br></br> 1393 <BR> 1394 <BR/><BR/> 1395 <Br></Br> 1396 <BR><span id="test">abc</span><br/><br/>` 1397 1398 func BenchmarkHTMLAutoClose(b *testing.B) { 1399 b.RunParallel(func(p *testing.PB) { 1400 for p.Next() { 1401 d := NewDecoder(strings.NewReader(testInputHTMLAutoClose)) 1402 d.Strict = false 1403 d.AutoClose = HTMLAutoClose 1404 d.Entity = HTMLEntity 1405 for { 1406 _, err := d.Token() 1407 if err != nil { 1408 if err == io.EOF { 1409 break 1410 } 1411 b.Fatalf("unexpected error: %v", err) 1412 } 1413 } 1414 } 1415 }) 1416 } 1417 1418 func TestHTMLAutoClose(t *testing.T) { 1419 wantTokens := []Token{ 1420 ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)}, 1421 CharData("\n"), 1422 StartElement{Name{"", "br"}, []Attr{}}, 1423 EndElement{Name{"", "br"}}, 1424 CharData("\n"), 1425 StartElement{Name{"", "br"}, []Attr{}}, 1426 EndElement{Name{"", "br"}}, 1427 StartElement{Name{"", "br"}, []Attr{}}, 1428 EndElement{Name{"", "br"}}, 1429 CharData("\n"), 1430 StartElement{Name{"", "br"}, []Attr{}}, 1431 EndElement{Name{"", "br"}}, 1432 StartElement{Name{"", "br"}, []Attr{}}, 1433 EndElement{Name{"", "br"}}, 1434 CharData("\n"), 1435 StartElement{Name{"", "br"}, []Attr{}}, 1436 EndElement{Name{"", "br"}}, 1437 CharData("\n"), 1438 StartElement{Name{"", "BR"}, []Attr{}}, 1439 EndElement{Name{"", "BR"}}, 1440 CharData("\n"), 1441 StartElement{Name{"", "BR"}, []Attr{}}, 1442 EndElement{Name{"", "BR"}}, 1443 StartElement{Name{"", "BR"}, []Attr{}}, 1444 EndElement{Name{"", "BR"}}, 1445 CharData("\n"), 1446 StartElement{Name{"", "Br"}, []Attr{}}, 1447 EndElement{Name{"", "Br"}}, 1448 CharData("\n"), 1449 StartElement{Name{"", "BR"}, []Attr{}}, 1450 EndElement{Name{"", "BR"}}, 1451 StartElement{Name{"", "span"}, []Attr{{Name: Name{"", "id"}, Value: "test"}}}, 1452 CharData("abc"), 1453 EndElement{Name{"", "span"}}, 1454 StartElement{Name{"", "br"}, []Attr{}}, 1455 EndElement{Name{"", "br"}}, 1456 StartElement{Name{"", "br"}, []Attr{}}, 1457 EndElement{Name{"", "br"}}, 1458 } 1459 1460 d := NewDecoder(strings.NewReader(testInputHTMLAutoClose)) 1461 d.Strict = false 1462 d.AutoClose = HTMLAutoClose 1463 d.Entity = HTMLEntity 1464 var haveTokens []Token 1465 for { 1466 tok, err := d.Token() 1467 if err != nil { 1468 if err == io.EOF { 1469 break 1470 } 1471 t.Fatalf("unexpected error: %v", err) 1472 } 1473 haveTokens = append(haveTokens, CopyToken(tok)) 1474 } 1475 if len(haveTokens) != len(wantTokens) { 1476 t.Errorf("tokens count mismatch: have %d, want %d", len(haveTokens), len(wantTokens)) 1477 } 1478 for i, want := range wantTokens { 1479 if i >= len(haveTokens) { 1480 t.Errorf("token[%d] expected %#v, have no token", i, want) 1481 } else { 1482 have := haveTokens[i] 1483 if !reflect.DeepEqual(have, want) { 1484 t.Errorf("token[%d] mismatch:\nhave: %#v\nwant: %#v", i, have, want) 1485 } 1486 } 1487 } 1488 }