github.com/twelsh-aw/go/src@v0.0.0-20230516233729-a56fe86a7c81/encoding/xml/xml_test.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package xml 6 7 import ( 8 "bytes" 9 "fmt" 10 "io" 11 "reflect" 12 "strings" 13 "testing" 14 "unicode/utf8" 15 ) 16 17 type toks struct { 18 earlyEOF bool 19 t []Token 20 } 21 22 func (t *toks) Token() (Token, error) { 23 if len(t.t) == 0 { 24 return nil, io.EOF 25 } 26 var tok Token 27 tok, t.t = t.t[0], t.t[1:] 28 if t.earlyEOF && len(t.t) == 0 { 29 return tok, io.EOF 30 } 31 return tok, nil 32 } 33 34 func TestDecodeEOF(t *testing.T) { 35 start := StartElement{Name: Name{Local: "test"}} 36 tests := []struct { 37 name string 38 tokens []Token 39 ok bool 40 }{ 41 { 42 name: "OK", 43 tokens: []Token{ 44 start, 45 start.End(), 46 }, 47 ok: true, 48 }, 49 { 50 name: "Malformed", 51 tokens: []Token{ 52 start, 53 StartElement{Name: Name{Local: "bad"}}, 54 start.End(), 55 }, 56 ok: false, 57 }, 58 } 59 for _, tc := range tests { 60 for _, eof := range []bool{true, false} { 61 name := fmt.Sprintf("%s/earlyEOF=%v", tc.name, eof) 62 t.Run(name, func(t *testing.T) { 63 d := NewTokenDecoder(&toks{ 64 earlyEOF: eof, 65 t: tc.tokens, 66 }) 67 err := d.Decode(&struct { 68 XMLName Name `xml:"test"` 69 }{}) 70 if tc.ok && err != nil { 71 t.Fatalf("d.Decode: expected nil error, got %v", err) 72 } 73 if _, ok := err.(*SyntaxError); !tc.ok && !ok { 74 t.Errorf("d.Decode: expected syntax error, got %v", err) 75 } 76 }) 77 } 78 } 79 } 80 81 type toksNil struct { 82 returnEOF bool 83 t []Token 84 } 85 86 func (t *toksNil) Token() (Token, error) { 87 if len(t.t) == 0 { 88 if !t.returnEOF { 89 // Return nil, nil before returning an EOF. It's legal, but 90 // discouraged. 91 t.returnEOF = true 92 return nil, nil 93 } 94 return nil, io.EOF 95 } 96 var tok Token 97 tok, t.t = t.t[0], t.t[1:] 98 return tok, nil 99 } 100 101 func TestDecodeNilToken(t *testing.T) { 102 for _, strict := range []bool{true, false} { 103 name := fmt.Sprintf("Strict=%v", strict) 104 t.Run(name, func(t *testing.T) { 105 start := StartElement{Name: Name{Local: "test"}} 106 bad := StartElement{Name: Name{Local: "bad"}} 107 d := NewTokenDecoder(&toksNil{ 108 // Malformed 109 t: []Token{start, bad, start.End()}, 110 }) 111 d.Strict = strict 112 err := d.Decode(&struct { 113 XMLName Name `xml:"test"` 114 }{}) 115 if _, ok := err.(*SyntaxError); !ok { 116 t.Errorf("d.Decode: expected syntax error, got %v", err) 117 } 118 }) 119 } 120 } 121 122 const testInput = ` 123 <?xml version="1.0" encoding="UTF-8"?> 124 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" 125 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> 126 <body xmlns:foo="ns1" xmlns="ns2" xmlns:tag="ns3" ` + 127 "\r\n\t" + ` > 128 <hello lang="en">World <>'" 白鵬翔</hello> 129 <query>&何; &is-it;</query> 130 <goodbye /> 131 <outer foo:attr="value" xmlns:tag="ns4"> 132 <inner/> 133 </outer> 134 <tag:name> 135 <![CDATA[Some text here.]]> 136 </tag:name> 137 </body><!-- missing final newline -->` 138 139 var testEntity = map[string]string{"何": "What", "is-it": "is it?"} 140 141 var rawTokens = []Token{ 142 CharData("\n"), 143 ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)}, 144 CharData("\n"), 145 Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" 146 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`), 147 CharData("\n"), 148 StartElement{Name{"", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}}, 149 CharData("\n "), 150 StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}}, 151 CharData("World <>'\" 白鵬翔"), 152 EndElement{Name{"", "hello"}}, 153 CharData("\n "), 154 StartElement{Name{"", "query"}, []Attr{}}, 155 CharData("What is it?"), 156 EndElement{Name{"", "query"}}, 157 CharData("\n "), 158 StartElement{Name{"", "goodbye"}, []Attr{}}, 159 EndElement{Name{"", "goodbye"}}, 160 CharData("\n "), 161 StartElement{Name{"", "outer"}, []Attr{{Name{"foo", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}}, 162 CharData("\n "), 163 StartElement{Name{"", "inner"}, []Attr{}}, 164 EndElement{Name{"", "inner"}}, 165 CharData("\n "), 166 EndElement{Name{"", "outer"}}, 167 CharData("\n "), 168 StartElement{Name{"tag", "name"}, []Attr{}}, 169 CharData("\n "), 170 CharData("Some text here."), 171 CharData("\n "), 172 EndElement{Name{"tag", "name"}}, 173 CharData("\n"), 174 EndElement{Name{"", "body"}}, 175 Comment(" missing final newline "), 176 } 177 178 var cookedTokens = []Token{ 179 CharData("\n"), 180 ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)}, 181 CharData("\n"), 182 Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" 183 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`), 184 CharData("\n"), 185 StartElement{Name{"ns2", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}}, 186 CharData("\n "), 187 StartElement{Name{"ns2", "hello"}, []Attr{{Name{"", "lang"}, "en"}}}, 188 CharData("World <>'\" 白鵬翔"), 189 EndElement{Name{"ns2", "hello"}}, 190 CharData("\n "), 191 StartElement{Name{"ns2", "query"}, []Attr{}}, 192 CharData("What is it?"), 193 EndElement{Name{"ns2", "query"}}, 194 CharData("\n "), 195 StartElement{Name{"ns2", "goodbye"}, []Attr{}}, 196 EndElement{Name{"ns2", "goodbye"}}, 197 CharData("\n "), 198 StartElement{Name{"ns2", "outer"}, []Attr{{Name{"ns1", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}}, 199 CharData("\n "), 200 StartElement{Name{"ns2", "inner"}, []Attr{}}, 201 EndElement{Name{"ns2", "inner"}}, 202 CharData("\n "), 203 EndElement{Name{"ns2", "outer"}}, 204 CharData("\n "), 205 StartElement{Name{"ns3", "name"}, []Attr{}}, 206 CharData("\n "), 207 CharData("Some text here."), 208 CharData("\n "), 209 EndElement{Name{"ns3", "name"}}, 210 CharData("\n"), 211 EndElement{Name{"ns2", "body"}}, 212 Comment(" missing final newline "), 213 } 214 215 const testInputAltEncoding = ` 216 <?xml version="1.0" encoding="x-testing-uppercase"?> 217 <TAG>VALUE</TAG>` 218 219 var rawTokensAltEncoding = []Token{ 220 CharData("\n"), 221 ProcInst{"xml", []byte(`version="1.0" encoding="x-testing-uppercase"`)}, 222 CharData("\n"), 223 StartElement{Name{"", "tag"}, []Attr{}}, 224 CharData("value"), 225 EndElement{Name{"", "tag"}}, 226 } 227 228 var xmlInput = []string{ 229 // unexpected EOF cases 230 "<", 231 "<t", 232 "<t ", 233 "<t/", 234 "<!", 235 "<!-", 236 "<!--", 237 "<!--c-", 238 "<!--c--", 239 "<!d", 240 "<t></", 241 "<t></t", 242 "<?", 243 "<?p", 244 "<t a", 245 "<t a=", 246 "<t a='", 247 "<t a=''", 248 "<t/><![", 249 "<t/><![C", 250 "<t/><![CDATA[d", 251 "<t/><![CDATA[d]", 252 "<t/><![CDATA[d]]", 253 254 // other Syntax errors 255 "<>", 256 "<t/a", 257 "<0 />", 258 "<?0 >", 259 // "<!0 >", // let the Token() caller handle 260 "</0>", 261 "<t 0=''>", 262 "<t a='&'>", 263 "<t a='<'>", 264 "<t> c;</t>", 265 "<t a>", 266 "<t a=>", 267 "<t a=v>", 268 // "<![CDATA[d]]>", // let the Token() caller handle 269 "<t></e>", 270 "<t></>", 271 "<t></t!", 272 "<t>cdata]]></t>", 273 } 274 275 func TestRawToken(t *testing.T) { 276 d := NewDecoder(strings.NewReader(testInput)) 277 d.Entity = testEntity 278 testRawToken(t, d, testInput, rawTokens) 279 } 280 281 const nonStrictInput = ` 282 <tag>non&entity</tag> 283 <tag>&unknown;entity</tag> 284 <tag>{</tag> 285 <tag>&#zzz;</tag> 286 <tag>&なまえ3;</tag> 287 <tag><-gt;</tag> 288 <tag>&;</tag> 289 <tag>&0a;</tag> 290 ` 291 292 var nonStrictTokens = []Token{ 293 CharData("\n"), 294 StartElement{Name{"", "tag"}, []Attr{}}, 295 CharData("non&entity"), 296 EndElement{Name{"", "tag"}}, 297 CharData("\n"), 298 StartElement{Name{"", "tag"}, []Attr{}}, 299 CharData("&unknown;entity"), 300 EndElement{Name{"", "tag"}}, 301 CharData("\n"), 302 StartElement{Name{"", "tag"}, []Attr{}}, 303 CharData("{"), 304 EndElement{Name{"", "tag"}}, 305 CharData("\n"), 306 StartElement{Name{"", "tag"}, []Attr{}}, 307 CharData("&#zzz;"), 308 EndElement{Name{"", "tag"}}, 309 CharData("\n"), 310 StartElement{Name{"", "tag"}, []Attr{}}, 311 CharData("&なまえ3;"), 312 EndElement{Name{"", "tag"}}, 313 CharData("\n"), 314 StartElement{Name{"", "tag"}, []Attr{}}, 315 CharData("<-gt;"), 316 EndElement{Name{"", "tag"}}, 317 CharData("\n"), 318 StartElement{Name{"", "tag"}, []Attr{}}, 319 CharData("&;"), 320 EndElement{Name{"", "tag"}}, 321 CharData("\n"), 322 StartElement{Name{"", "tag"}, []Attr{}}, 323 CharData("&0a;"), 324 EndElement{Name{"", "tag"}}, 325 CharData("\n"), 326 } 327 328 func TestNonStrictRawToken(t *testing.T) { 329 d := NewDecoder(strings.NewReader(nonStrictInput)) 330 d.Strict = false 331 testRawToken(t, d, nonStrictInput, nonStrictTokens) 332 } 333 334 type downCaser struct { 335 t *testing.T 336 r io.ByteReader 337 } 338 339 func (d *downCaser) ReadByte() (c byte, err error) { 340 c, err = d.r.ReadByte() 341 if c >= 'A' && c <= 'Z' { 342 c += 'a' - 'A' 343 } 344 return 345 } 346 347 func (d *downCaser) Read(p []byte) (int, error) { 348 d.t.Fatalf("unexpected Read call on downCaser reader") 349 panic("unreachable") 350 } 351 352 func TestRawTokenAltEncoding(t *testing.T) { 353 d := NewDecoder(strings.NewReader(testInputAltEncoding)) 354 d.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) { 355 if charset != "x-testing-uppercase" { 356 t.Fatalf("unexpected charset %q", charset) 357 } 358 return &downCaser{t, input.(io.ByteReader)}, nil 359 } 360 testRawToken(t, d, testInputAltEncoding, rawTokensAltEncoding) 361 } 362 363 func TestRawTokenAltEncodingNoConverter(t *testing.T) { 364 d := NewDecoder(strings.NewReader(testInputAltEncoding)) 365 token, err := d.RawToken() 366 if token == nil { 367 t.Fatalf("expected a token on first RawToken call") 368 } 369 if err != nil { 370 t.Fatal(err) 371 } 372 token, err = d.RawToken() 373 if token != nil { 374 t.Errorf("expected a nil token; got %#v", token) 375 } 376 if err == nil { 377 t.Fatalf("expected an error on second RawToken call") 378 } 379 const encoding = "x-testing-uppercase" 380 if !strings.Contains(err.Error(), encoding) { 381 t.Errorf("expected error to contain %q; got error: %v", 382 encoding, err) 383 } 384 } 385 386 func testRawToken(t *testing.T, d *Decoder, raw string, rawTokens []Token) { 387 lastEnd := int64(0) 388 for i, want := range rawTokens { 389 start := d.InputOffset() 390 have, err := d.RawToken() 391 end := d.InputOffset() 392 if err != nil { 393 t.Fatalf("token %d: unexpected error: %s", i, err) 394 } 395 if !reflect.DeepEqual(have, want) { 396 var shave, swant string 397 if _, ok := have.(CharData); ok { 398 shave = fmt.Sprintf("CharData(%q)", have) 399 } else { 400 shave = fmt.Sprintf("%#v", have) 401 } 402 if _, ok := want.(CharData); ok { 403 swant = fmt.Sprintf("CharData(%q)", want) 404 } else { 405 swant = fmt.Sprintf("%#v", want) 406 } 407 t.Errorf("token %d = %s, want %s", i, shave, swant) 408 } 409 410 // Check that InputOffset returned actual token. 411 switch { 412 case start < lastEnd: 413 t.Errorf("token %d: position [%d,%d) for %T is before previous token", i, start, end, have) 414 case start >= end: 415 // Special case: EndElement can be synthesized. 416 if start == end && end == lastEnd { 417 break 418 } 419 t.Errorf("token %d: position [%d,%d) for %T is empty", i, start, end, have) 420 case end > int64(len(raw)): 421 t.Errorf("token %d: position [%d,%d) for %T extends beyond input", i, start, end, have) 422 default: 423 text := raw[start:end] 424 if strings.ContainsAny(text, "<>") && (!strings.HasPrefix(text, "<") || !strings.HasSuffix(text, ">")) { 425 t.Errorf("token %d: misaligned raw token %#q for %T", i, text, have) 426 } 427 } 428 lastEnd = end 429 } 430 } 431 432 // Ensure that directives (specifically !DOCTYPE) include the complete 433 // text of any nested directives, noting that < and > do not change 434 // nesting depth if they are in single or double quotes. 435 436 var nestedDirectivesInput = ` 437 <!DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]> 438 <!DOCTYPE [<!ENTITY xlt ">">]> 439 <!DOCTYPE [<!ENTITY xlt "<">]> 440 <!DOCTYPE [<!ENTITY xlt '>'>]> 441 <!DOCTYPE [<!ENTITY xlt '<'>]> 442 <!DOCTYPE [<!ENTITY xlt '">'>]> 443 <!DOCTYPE [<!ENTITY xlt "'<">]> 444 ` 445 446 var nestedDirectivesTokens = []Token{ 447 CharData("\n"), 448 Directive(`DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`), 449 CharData("\n"), 450 Directive(`DOCTYPE [<!ENTITY xlt ">">]`), 451 CharData("\n"), 452 Directive(`DOCTYPE [<!ENTITY xlt "<">]`), 453 CharData("\n"), 454 Directive(`DOCTYPE [<!ENTITY xlt '>'>]`), 455 CharData("\n"), 456 Directive(`DOCTYPE [<!ENTITY xlt '<'>]`), 457 CharData("\n"), 458 Directive(`DOCTYPE [<!ENTITY xlt '">'>]`), 459 CharData("\n"), 460 Directive(`DOCTYPE [<!ENTITY xlt "'<">]`), 461 CharData("\n"), 462 } 463 464 func TestNestedDirectives(t *testing.T) { 465 d := NewDecoder(strings.NewReader(nestedDirectivesInput)) 466 467 for i, want := range nestedDirectivesTokens { 468 have, err := d.Token() 469 if err != nil { 470 t.Fatalf("token %d: unexpected error: %s", i, err) 471 } 472 if !reflect.DeepEqual(have, want) { 473 t.Errorf("token %d = %#v want %#v", i, have, want) 474 } 475 } 476 } 477 478 func TestToken(t *testing.T) { 479 d := NewDecoder(strings.NewReader(testInput)) 480 d.Entity = testEntity 481 482 for i, want := range cookedTokens { 483 have, err := d.Token() 484 if err != nil { 485 t.Fatalf("token %d: unexpected error: %s", i, err) 486 } 487 if !reflect.DeepEqual(have, want) { 488 t.Errorf("token %d = %#v want %#v", i, have, want) 489 } 490 } 491 } 492 493 func TestSyntax(t *testing.T) { 494 for i := range xmlInput { 495 d := NewDecoder(strings.NewReader(xmlInput[i])) 496 var err error 497 for _, err = d.Token(); err == nil; _, err = d.Token() { 498 } 499 if _, ok := err.(*SyntaxError); !ok { 500 t.Fatalf(`xmlInput "%s": expected SyntaxError not received`, xmlInput[i]) 501 } 502 } 503 } 504 505 func TestInputLinePos(t *testing.T) { 506 testInput := `<root> 507 <?pi 508 ?> <elt 509 att 510 = 511 "val"> 512 <![CDATA[ 513 ]]><!-- 514 515 --></elt> 516 </root>` 517 linePos := [][]int{ 518 {1, 7}, 519 {2, 1}, 520 {3, 4}, 521 {3, 6}, 522 {6, 7}, 523 {7, 1}, 524 {8, 4}, 525 {10, 4}, 526 {10, 10}, 527 {11, 1}, 528 {11, 8}, 529 } 530 dec := NewDecoder(strings.NewReader(testInput)) 531 for _, want := range linePos { 532 if _, err := dec.Token(); err != nil { 533 t.Errorf("Unexpected error: %v", err) 534 continue 535 } 536 537 gotLine, gotCol := dec.InputPos() 538 if gotLine != want[0] || gotCol != want[1] { 539 t.Errorf("dec.InputPos() = %d,%d, want %d,%d", gotLine, gotCol, want[0], want[1]) 540 } 541 } 542 } 543 544 type allScalars struct { 545 True1 bool 546 True2 bool 547 False1 bool 548 False2 bool 549 Int int 550 Int8 int8 551 Int16 int16 552 Int32 int32 553 Int64 int64 554 Uint int 555 Uint8 uint8 556 Uint16 uint16 557 Uint32 uint32 558 Uint64 uint64 559 Uintptr uintptr 560 Float32 float32 561 Float64 float64 562 String string 563 PtrString *string 564 } 565 566 var all = allScalars{ 567 True1: true, 568 True2: true, 569 False1: false, 570 False2: false, 571 Int: 1, 572 Int8: -2, 573 Int16: 3, 574 Int32: -4, 575 Int64: 5, 576 Uint: 6, 577 Uint8: 7, 578 Uint16: 8, 579 Uint32: 9, 580 Uint64: 10, 581 Uintptr: 11, 582 Float32: 13.0, 583 Float64: 14.0, 584 String: "15", 585 PtrString: &sixteen, 586 } 587 588 var sixteen = "16" 589 590 const testScalarsInput = `<allscalars> 591 <True1>true</True1> 592 <True2>1</True2> 593 <False1>false</False1> 594 <False2>0</False2> 595 <Int>1</Int> 596 <Int8>-2</Int8> 597 <Int16>3</Int16> 598 <Int32>-4</Int32> 599 <Int64>5</Int64> 600 <Uint>6</Uint> 601 <Uint8>7</Uint8> 602 <Uint16>8</Uint16> 603 <Uint32>9</Uint32> 604 <Uint64>10</Uint64> 605 <Uintptr>11</Uintptr> 606 <Float>12.0</Float> 607 <Float32>13.0</Float32> 608 <Float64>14.0</Float64> 609 <String>15</String> 610 <PtrString>16</PtrString> 611 </allscalars>` 612 613 func TestAllScalars(t *testing.T) { 614 var a allScalars 615 err := Unmarshal([]byte(testScalarsInput), &a) 616 617 if err != nil { 618 t.Fatal(err) 619 } 620 if !reflect.DeepEqual(a, all) { 621 t.Errorf("have %+v want %+v", a, all) 622 } 623 } 624 625 type item struct { 626 FieldA string 627 } 628 629 func TestIssue569(t *testing.T) { 630 data := `<item><FieldA>abcd</FieldA></item>` 631 var i item 632 err := Unmarshal([]byte(data), &i) 633 634 if err != nil || i.FieldA != "abcd" { 635 t.Fatal("Expecting abcd") 636 } 637 } 638 639 func TestUnquotedAttrs(t *testing.T) { 640 data := "<tag attr=azAZ09:-_\t>" 641 d := NewDecoder(strings.NewReader(data)) 642 d.Strict = false 643 token, err := d.Token() 644 if _, ok := err.(*SyntaxError); ok { 645 t.Errorf("Unexpected error: %v", err) 646 } 647 if token.(StartElement).Name.Local != "tag" { 648 t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local) 649 } 650 attr := token.(StartElement).Attr[0] 651 if attr.Value != "azAZ09:-_" { 652 t.Errorf("Unexpected attribute value: %v", attr.Value) 653 } 654 if attr.Name.Local != "attr" { 655 t.Errorf("Unexpected attribute name: %v", attr.Name.Local) 656 } 657 } 658 659 func TestValuelessAttrs(t *testing.T) { 660 tests := [][3]string{ 661 {"<p nowrap>", "p", "nowrap"}, 662 {"<p nowrap >", "p", "nowrap"}, 663 {"<input checked/>", "input", "checked"}, 664 {"<input checked />", "input", "checked"}, 665 } 666 for _, test := range tests { 667 d := NewDecoder(strings.NewReader(test[0])) 668 d.Strict = false 669 token, err := d.Token() 670 if _, ok := err.(*SyntaxError); ok { 671 t.Errorf("Unexpected error: %v", err) 672 } 673 if token.(StartElement).Name.Local != test[1] { 674 t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local) 675 } 676 attr := token.(StartElement).Attr[0] 677 if attr.Value != test[2] { 678 t.Errorf("Unexpected attribute value: %v", attr.Value) 679 } 680 if attr.Name.Local != test[2] { 681 t.Errorf("Unexpected attribute name: %v", attr.Name.Local) 682 } 683 } 684 } 685 686 func TestCopyTokenCharData(t *testing.T) { 687 data := []byte("same data") 688 var tok1 Token = CharData(data) 689 tok2 := CopyToken(tok1) 690 if !reflect.DeepEqual(tok1, tok2) { 691 t.Error("CopyToken(CharData) != CharData") 692 } 693 data[1] = 'o' 694 if reflect.DeepEqual(tok1, tok2) { 695 t.Error("CopyToken(CharData) uses same buffer.") 696 } 697 } 698 699 func TestCopyTokenStartElement(t *testing.T) { 700 elt := StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}} 701 var tok1 Token = elt 702 tok2 := CopyToken(tok1) 703 if tok1.(StartElement).Attr[0].Value != "en" { 704 t.Error("CopyToken overwrote Attr[0]") 705 } 706 if !reflect.DeepEqual(tok1, tok2) { 707 t.Error("CopyToken(StartElement) != StartElement") 708 } 709 tok1.(StartElement).Attr[0] = Attr{Name{"", "lang"}, "de"} 710 if reflect.DeepEqual(tok1, tok2) { 711 t.Error("CopyToken(CharData) uses same buffer.") 712 } 713 } 714 715 func TestCopyTokenComment(t *testing.T) { 716 data := []byte("<!-- some comment -->") 717 var tok1 Token = Comment(data) 718 tok2 := CopyToken(tok1) 719 if !reflect.DeepEqual(tok1, tok2) { 720 t.Error("CopyToken(Comment) != Comment") 721 } 722 data[1] = 'o' 723 if reflect.DeepEqual(tok1, tok2) { 724 t.Error("CopyToken(Comment) uses same buffer.") 725 } 726 } 727 728 func TestSyntaxErrorLineNum(t *testing.T) { 729 testInput := "<P>Foo<P>\n\n<P>Bar</>\n" 730 d := NewDecoder(strings.NewReader(testInput)) 731 var err error 732 for _, err = d.Token(); err == nil; _, err = d.Token() { 733 } 734 synerr, ok := err.(*SyntaxError) 735 if !ok { 736 t.Error("Expected SyntaxError.") 737 } 738 if synerr.Line != 3 { 739 t.Error("SyntaxError didn't have correct line number.") 740 } 741 } 742 743 func TestTrailingRawToken(t *testing.T) { 744 input := `<FOO></FOO> ` 745 d := NewDecoder(strings.NewReader(input)) 746 var err error 747 for _, err = d.RawToken(); err == nil; _, err = d.RawToken() { 748 } 749 if err != io.EOF { 750 t.Fatalf("d.RawToken() = _, %v, want _, io.EOF", err) 751 } 752 } 753 754 func TestTrailingToken(t *testing.T) { 755 input := `<FOO></FOO> ` 756 d := NewDecoder(strings.NewReader(input)) 757 var err error 758 for _, err = d.Token(); err == nil; _, err = d.Token() { 759 } 760 if err != io.EOF { 761 t.Fatalf("d.Token() = _, %v, want _, io.EOF", err) 762 } 763 } 764 765 func TestEntityInsideCDATA(t *testing.T) { 766 input := `<test><![CDATA[ &val=foo ]]></test>` 767 d := NewDecoder(strings.NewReader(input)) 768 var err error 769 for _, err = d.Token(); err == nil; _, err = d.Token() { 770 } 771 if err != io.EOF { 772 t.Fatalf("d.Token() = _, %v, want _, io.EOF", err) 773 } 774 } 775 776 var characterTests = []struct { 777 in string 778 err string 779 }{ 780 {"\x12<doc/>", "illegal character code U+0012"}, 781 {"<?xml version=\"1.0\"?>\x0b<doc/>", "illegal character code U+000B"}, 782 {"\xef\xbf\xbe<doc/>", "illegal character code U+FFFE"}, 783 {"<?xml version=\"1.0\"?><doc>\r\n<hiya/>\x07<toots/></doc>", "illegal character code U+0007"}, 784 {"<?xml version=\"1.0\"?><doc \x12='value'>what's up</doc>", "expected attribute name in element"}, 785 {"<doc>&abc\x01;</doc>", "invalid character entity &abc (no semicolon)"}, 786 {"<doc>&\x01;</doc>", "invalid character entity & (no semicolon)"}, 787 {"<doc>&\xef\xbf\xbe;</doc>", "invalid character entity &\uFFFE;"}, 788 {"<doc>&hello;</doc>", "invalid character entity &hello;"}, 789 } 790 791 func TestDisallowedCharacters(t *testing.T) { 792 793 for i, tt := range characterTests { 794 d := NewDecoder(strings.NewReader(tt.in)) 795 var err error 796 797 for err == nil { 798 _, err = d.Token() 799 } 800 synerr, ok := err.(*SyntaxError) 801 if !ok { 802 t.Fatalf("input %d d.Token() = _, %v, want _, *SyntaxError", i, err) 803 } 804 if synerr.Msg != tt.err { 805 t.Fatalf("input %d synerr.Msg wrong: want %q, got %q", i, tt.err, synerr.Msg) 806 } 807 } 808 } 809 810 func TestIsInCharacterRange(t *testing.T) { 811 invalid := []rune{ 812 utf8.MaxRune + 1, 813 0xD800, // surrogate min 814 0xDFFF, // surrogate max 815 -1, 816 } 817 for _, r := range invalid { 818 if isInCharacterRange(r) { 819 t.Errorf("rune %U considered valid", r) 820 } 821 } 822 } 823 824 var procInstTests = []struct { 825 input string 826 expect [2]string 827 }{ 828 {`version="1.0" encoding="utf-8"`, [2]string{"1.0", "utf-8"}}, 829 {`version="1.0" encoding='utf-8'`, [2]string{"1.0", "utf-8"}}, 830 {`version="1.0" encoding='utf-8' `, [2]string{"1.0", "utf-8"}}, 831 {`version="1.0" encoding=utf-8`, [2]string{"1.0", ""}}, 832 {`encoding="FOO" `, [2]string{"", "FOO"}}, 833 } 834 835 func TestProcInstEncoding(t *testing.T) { 836 for _, test := range procInstTests { 837 if got := procInst("version", test.input); got != test.expect[0] { 838 t.Errorf("procInst(version, %q) = %q; want %q", test.input, got, test.expect[0]) 839 } 840 if got := procInst("encoding", test.input); got != test.expect[1] { 841 t.Errorf("procInst(encoding, %q) = %q; want %q", test.input, got, test.expect[1]) 842 } 843 } 844 } 845 846 // Ensure that directives with comments include the complete 847 // text of any nested directives. 848 849 var directivesWithCommentsInput = ` 850 <!DOCTYPE [<!-- a comment --><!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]> 851 <!DOCTYPE [<!ENTITY go "Golang"><!-- a comment-->]> 852 <!DOCTYPE <!-> <!> <!----> <!-->--> <!--->--> [<!ENTITY go "Golang"><!-- a comment-->]> 853 ` 854 855 var directivesWithCommentsTokens = []Token{ 856 CharData("\n"), 857 Directive(`DOCTYPE [ <!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`), 858 CharData("\n"), 859 Directive(`DOCTYPE [<!ENTITY go "Golang"> ]`), 860 CharData("\n"), 861 Directive(`DOCTYPE <!-> <!> [<!ENTITY go "Golang"> ]`), 862 CharData("\n"), 863 } 864 865 func TestDirectivesWithComments(t *testing.T) { 866 d := NewDecoder(strings.NewReader(directivesWithCommentsInput)) 867 868 for i, want := range directivesWithCommentsTokens { 869 have, err := d.Token() 870 if err != nil { 871 t.Fatalf("token %d: unexpected error: %s", i, err) 872 } 873 if !reflect.DeepEqual(have, want) { 874 t.Errorf("token %d = %#v want %#v", i, have, want) 875 } 876 } 877 } 878 879 // Writer whose Write method always returns an error. 880 type errWriter struct{} 881 882 func (errWriter) Write(p []byte) (n int, err error) { return 0, fmt.Errorf("unwritable") } 883 884 func TestEscapeTextIOErrors(t *testing.T) { 885 expectErr := "unwritable" 886 err := EscapeText(errWriter{}, []byte{'A'}) 887 888 if err == nil || err.Error() != expectErr { 889 t.Errorf("have %v, want %v", err, expectErr) 890 } 891 } 892 893 func TestEscapeTextInvalidChar(t *testing.T) { 894 input := []byte("A \x00 terminated string.") 895 expected := "A \uFFFD terminated string." 896 897 buff := new(strings.Builder) 898 if err := EscapeText(buff, input); err != nil { 899 t.Fatalf("have %v, want nil", err) 900 } 901 text := buff.String() 902 903 if text != expected { 904 t.Errorf("have %v, want %v", text, expected) 905 } 906 } 907 908 func TestIssue5880(t *testing.T) { 909 type T []byte 910 data, err := Marshal(T{192, 168, 0, 1}) 911 if err != nil { 912 t.Errorf("Marshal error: %v", err) 913 } 914 if !utf8.Valid(data) { 915 t.Errorf("Marshal generated invalid UTF-8: %x", data) 916 } 917 } 918 919 func TestIssue8535(t *testing.T) { 920 921 type ExampleConflict struct { 922 XMLName Name `xml:"example"` 923 Link string `xml:"link"` 924 AtomLink string `xml:"http://www.w3.org/2005/Atom link"` // Same name in a different name space 925 } 926 testCase := `<example> 927 <title>Example</title> 928 <link>http://example.com/default</link> <!-- not assigned --> 929 <link>http://example.com/home</link> <!-- not assigned --> 930 <ns:link xmlns:ns="http://www.w3.org/2005/Atom">http://example.com/ns</ns:link> 931 </example>` 932 933 var dest ExampleConflict 934 d := NewDecoder(strings.NewReader(testCase)) 935 if err := d.Decode(&dest); err != nil { 936 t.Fatal(err) 937 } 938 } 939 940 func TestEncodeXMLNS(t *testing.T) { 941 testCases := []struct { 942 f func() ([]byte, error) 943 want string 944 ok bool 945 }{ 946 {encodeXMLNS1, `<Test xmlns="http://example.com/ns"><Body>hello world</Body></Test>`, true}, 947 {encodeXMLNS2, `<Test><body xmlns="http://example.com/ns">hello world</body></Test>`, true}, 948 {encodeXMLNS3, `<Test xmlns="http://example.com/ns"><Body>hello world</Body></Test>`, true}, 949 {encodeXMLNS4, `<Test xmlns="http://example.com/ns"><Body>hello world</Body></Test>`, false}, 950 } 951 952 for i, tc := range testCases { 953 if b, err := tc.f(); err == nil { 954 if got, want := string(b), tc.want; got != want { 955 t.Errorf("%d: got %s, want %s \n", i, got, want) 956 } 957 } else { 958 t.Errorf("%d: marshal failed with %s", i, err) 959 } 960 } 961 } 962 963 func encodeXMLNS1() ([]byte, error) { 964 965 type T struct { 966 XMLName Name `xml:"Test"` 967 Ns string `xml:"xmlns,attr"` 968 Body string 969 } 970 971 s := &T{Ns: "http://example.com/ns", Body: "hello world"} 972 return Marshal(s) 973 } 974 975 func encodeXMLNS2() ([]byte, error) { 976 977 type Test struct { 978 Body string `xml:"http://example.com/ns body"` 979 } 980 981 s := &Test{Body: "hello world"} 982 return Marshal(s) 983 } 984 985 func encodeXMLNS3() ([]byte, error) { 986 987 type Test struct { 988 XMLName Name `xml:"http://example.com/ns Test"` 989 Body string 990 } 991 992 //s := &Test{XMLName: Name{"http://example.com/ns",""}, Body: "hello world"} is unusable as the "-" is missing 993 // as documentation states 994 s := &Test{Body: "hello world"} 995 return Marshal(s) 996 } 997 998 func encodeXMLNS4() ([]byte, error) { 999 1000 type Test struct { 1001 Ns string `xml:"xmlns,attr"` 1002 Body string 1003 } 1004 1005 s := &Test{Ns: "http://example.com/ns", Body: "hello world"} 1006 return Marshal(s) 1007 } 1008 1009 func TestIssue11405(t *testing.T) { 1010 testCases := []string{ 1011 "<root>", 1012 "<root><foo>", 1013 "<root><foo></foo>", 1014 } 1015 for _, tc := range testCases { 1016 d := NewDecoder(strings.NewReader(tc)) 1017 var err error 1018 for { 1019 _, err = d.Token() 1020 if err != nil { 1021 break 1022 } 1023 } 1024 if _, ok := err.(*SyntaxError); !ok { 1025 t.Errorf("%s: Token: Got error %v, want SyntaxError", tc, err) 1026 } 1027 } 1028 } 1029 1030 func TestIssue12417(t *testing.T) { 1031 testCases := []struct { 1032 s string 1033 ok bool 1034 }{ 1035 {`<?xml encoding="UtF-8" version="1.0"?><root/>`, true}, 1036 {`<?xml encoding="UTF-8" version="1.0"?><root/>`, true}, 1037 {`<?xml encoding="utf-8" version="1.0"?><root/>`, true}, 1038 {`<?xml encoding="uuu-9" version="1.0"?><root/>`, false}, 1039 } 1040 for _, tc := range testCases { 1041 d := NewDecoder(strings.NewReader(tc.s)) 1042 var err error 1043 for { 1044 _, err = d.Token() 1045 if err != nil { 1046 if err == io.EOF { 1047 err = nil 1048 } 1049 break 1050 } 1051 } 1052 if err != nil && tc.ok { 1053 t.Errorf("%q: Encoding charset: expected no error, got %s", tc.s, err) 1054 continue 1055 } 1056 if err == nil && !tc.ok { 1057 t.Errorf("%q: Encoding charset: expected error, got nil", tc.s) 1058 } 1059 } 1060 } 1061 1062 func TestIssue7113(t *testing.T) { 1063 type C struct { 1064 XMLName Name `xml:""` // Sets empty namespace 1065 } 1066 1067 type A struct { 1068 XMLName Name `xml:""` 1069 C C `xml:""` 1070 } 1071 1072 var a A 1073 structSpace := "b" 1074 xmlTest := `<A xmlns="` + structSpace + `"><C xmlns=""></C></A>` 1075 t.Log(xmlTest) 1076 err := Unmarshal([]byte(xmlTest), &a) 1077 if err != nil { 1078 t.Fatal(err) 1079 } 1080 1081 if a.XMLName.Space != structSpace { 1082 t.Errorf("overidding with empty namespace: unmarshalling, got %s, want %s\n", a.XMLName.Space, structSpace) 1083 } 1084 if len(a.C.XMLName.Space) != 0 { 1085 t.Fatalf("overidding with empty namespace: unmarshalling, got %s, want empty\n", a.C.XMLName.Space) 1086 } 1087 1088 var b []byte 1089 b, err = Marshal(&a) 1090 if err != nil { 1091 t.Fatal(err) 1092 } 1093 if len(a.C.XMLName.Space) != 0 { 1094 t.Errorf("overidding with empty namespace: marshaling, got %s in C tag which should be empty\n", a.C.XMLName.Space) 1095 } 1096 if string(b) != xmlTest { 1097 t.Fatalf("overidding with empty namespace: marshalling, got %s, want %s\n", b, xmlTest) 1098 } 1099 var c A 1100 err = Unmarshal(b, &c) 1101 if err != nil { 1102 t.Fatalf("second Unmarshal failed: %s", err) 1103 } 1104 if c.XMLName.Space != "b" { 1105 t.Errorf("overidding with empty namespace: after marshaling & unmarshaling, XML name space: got %s, want %s\n", a.XMLName.Space, structSpace) 1106 } 1107 if len(c.C.XMLName.Space) != 0 { 1108 t.Errorf("overidding with empty namespace: after marshaling & unmarshaling, got %s, want empty\n", a.C.XMLName.Space) 1109 } 1110 } 1111 1112 func TestIssue20396(t *testing.T) { 1113 1114 var attrError = UnmarshalError("XML syntax error on line 1: expected attribute name in element") 1115 1116 testCases := []struct { 1117 s string 1118 wantErr error 1119 }{ 1120 {`<a:te:st xmlns:a="abcd"/>`, // Issue 20396 1121 UnmarshalError("XML syntax error on line 1: expected element name after <")}, 1122 {`<a:te=st xmlns:a="abcd"/>`, attrError}, 1123 {`<a:te&st xmlns:a="abcd"/>`, attrError}, 1124 {`<a:test xmlns:a="abcd"/>`, nil}, 1125 {`<a:te:st xmlns:a="abcd">1</a:te:st>`, 1126 UnmarshalError("XML syntax error on line 1: expected element name after <")}, 1127 {`<a:te=st xmlns:a="abcd">1</a:te=st>`, attrError}, 1128 {`<a:te&st xmlns:a="abcd">1</a:te&st>`, attrError}, 1129 {`<a:test xmlns:a="abcd">1</a:test>`, nil}, 1130 } 1131 1132 var dest string 1133 for _, tc := range testCases { 1134 if got, want := Unmarshal([]byte(tc.s), &dest), tc.wantErr; got != want { 1135 if got == nil { 1136 t.Errorf("%s: Unexpected success, want %v", tc.s, want) 1137 } else if want == nil { 1138 t.Errorf("%s: Unexpected error, got %v", tc.s, got) 1139 } else if got.Error() != want.Error() { 1140 t.Errorf("%s: got %v, want %v", tc.s, got, want) 1141 } 1142 } 1143 } 1144 } 1145 1146 func TestIssue20685(t *testing.T) { 1147 testCases := []struct { 1148 s string 1149 ok bool 1150 }{ 1151 {`<x:book xmlns:x="abcd" xmlns:y="abcd"><unclosetag>one</x:book>`, false}, 1152 {`<x:book xmlns:x="abcd" xmlns:y="abcd">one</x:book>`, true}, 1153 {`<x:book xmlns:x="abcd" xmlns:y="abcd">one</y:book>`, false}, 1154 {`<x:book xmlns:y="abcd" xmlns:x="abcd">one</y:book>`, false}, 1155 {`<x:book xmlns:x="abcd">one</y:book>`, false}, 1156 {`<x:book>one</y:book>`, false}, 1157 {`<xbook>one</ybook>`, false}, 1158 } 1159 for _, tc := range testCases { 1160 d := NewDecoder(strings.NewReader(tc.s)) 1161 var err error 1162 for { 1163 _, err = d.Token() 1164 if err != nil { 1165 if err == io.EOF { 1166 err = nil 1167 } 1168 break 1169 } 1170 } 1171 if err != nil && tc.ok { 1172 t.Errorf("%q: Closing tag with namespace : expected no error, got %s", tc.s, err) 1173 continue 1174 } 1175 if err == nil && !tc.ok { 1176 t.Errorf("%q: Closing tag with namespace : expected error, got nil", tc.s) 1177 } 1178 } 1179 } 1180 1181 func tokenMap(mapping func(t Token) Token) func(TokenReader) TokenReader { 1182 return func(src TokenReader) TokenReader { 1183 return mapper{ 1184 t: src, 1185 f: mapping, 1186 } 1187 } 1188 } 1189 1190 type mapper struct { 1191 t TokenReader 1192 f func(Token) Token 1193 } 1194 1195 func (m mapper) Token() (Token, error) { 1196 tok, err := m.t.Token() 1197 if err != nil { 1198 return nil, err 1199 } 1200 return m.f(tok), nil 1201 } 1202 1203 func TestNewTokenDecoderIdempotent(t *testing.T) { 1204 d := NewDecoder(strings.NewReader(`<br>`)) 1205 d2 := NewTokenDecoder(d) 1206 if d != d2 { 1207 t.Error("NewTokenDecoder did not detect underlying Decoder") 1208 } 1209 } 1210 1211 func TestWrapDecoder(t *testing.T) { 1212 d := NewDecoder(strings.NewReader(`<quote>[Re-enter Clown with a letter, and FABIAN]</quote>`)) 1213 m := tokenMap(func(t Token) Token { 1214 switch tok := t.(type) { 1215 case StartElement: 1216 if tok.Name.Local == "quote" { 1217 tok.Name.Local = "blocking" 1218 return tok 1219 } 1220 case EndElement: 1221 if tok.Name.Local == "quote" { 1222 tok.Name.Local = "blocking" 1223 return tok 1224 } 1225 } 1226 return t 1227 }) 1228 1229 d = NewTokenDecoder(m(d)) 1230 1231 o := struct { 1232 XMLName Name `xml:"blocking"` 1233 Chardata string `xml:",chardata"` 1234 }{} 1235 1236 if err := d.Decode(&o); err != nil { 1237 t.Fatal("Got unexpected error while decoding:", err) 1238 } 1239 1240 if o.Chardata != "[Re-enter Clown with a letter, and FABIAN]" { 1241 t.Fatalf("Got unexpected chardata: `%s`\n", o.Chardata) 1242 } 1243 } 1244 1245 type tokReader struct{} 1246 1247 func (tokReader) Token() (Token, error) { 1248 return StartElement{}, nil 1249 } 1250 1251 type Failure struct{} 1252 1253 func (Failure) UnmarshalXML(*Decoder, StartElement) error { 1254 return nil 1255 } 1256 1257 func TestTokenUnmarshaler(t *testing.T) { 1258 defer func() { 1259 if r := recover(); r != nil { 1260 t.Error("Unexpected panic using custom token unmarshaler") 1261 } 1262 }() 1263 1264 d := NewTokenDecoder(tokReader{}) 1265 d.Decode(&Failure{}) 1266 } 1267 1268 func testRoundTrip(t *testing.T, input string) { 1269 d := NewDecoder(strings.NewReader(input)) 1270 var tokens []Token 1271 var buf bytes.Buffer 1272 e := NewEncoder(&buf) 1273 for { 1274 tok, err := d.Token() 1275 if err == io.EOF { 1276 break 1277 } 1278 if err != nil { 1279 t.Fatalf("invalid input: %v", err) 1280 } 1281 if err := e.EncodeToken(tok); err != nil { 1282 t.Fatalf("failed to re-encode input: %v", err) 1283 } 1284 tokens = append(tokens, CopyToken(tok)) 1285 } 1286 if err := e.Flush(); err != nil { 1287 t.Fatal(err) 1288 } 1289 1290 d = NewDecoder(&buf) 1291 for { 1292 tok, err := d.Token() 1293 if err == io.EOF { 1294 break 1295 } 1296 if err != nil { 1297 t.Fatalf("failed to decode output: %v", err) 1298 } 1299 if len(tokens) == 0 { 1300 t.Fatalf("unexpected token: %#v", tok) 1301 } 1302 a, b := tokens[0], tok 1303 if !reflect.DeepEqual(a, b) { 1304 t.Fatalf("token mismatch: %#v vs %#v", a, b) 1305 } 1306 tokens = tokens[1:] 1307 } 1308 if len(tokens) > 0 { 1309 t.Fatalf("lost tokens: %#v", tokens) 1310 } 1311 } 1312 1313 func TestRoundTrip(t *testing.T) { 1314 tests := map[string]string{ 1315 "trailing colon": `<foo abc:="x"></foo>`, 1316 "comments in directives": `<!ENTITY x<!<!-- c1 [ " -->--x --> > <e></e> <!DOCTYPE xxx [ x<!-- c2 " -->--x ]>`, 1317 } 1318 for name, input := range tests { 1319 t.Run(name, func(t *testing.T) { testRoundTrip(t, input) }) 1320 } 1321 } 1322 1323 func TestParseErrors(t *testing.T) { 1324 withDefaultHeader := func(s string) string { 1325 return `<?xml version="1.0" encoding="UTF-8"?>` + s 1326 } 1327 tests := []struct { 1328 src string 1329 err string 1330 }{ 1331 {withDefaultHeader(`</foo>`), `unexpected end element </foo>`}, 1332 {withDefaultHeader(`<x:foo></y:foo>`), `element <foo> in space x closed by </foo> in space y`}, 1333 {withDefaultHeader(`<? not ok ?>`), `expected target name after <?`}, 1334 {withDefaultHeader(`<!- not ok -->`), `invalid sequence <!- not part of <!--`}, 1335 {withDefaultHeader(`<!-? not ok -->`), `invalid sequence <!- not part of <!--`}, 1336 {withDefaultHeader(`<![not ok]>`), `invalid <![ sequence`}, 1337 {withDefaultHeader("\xf1"), `invalid UTF-8`}, 1338 1339 // Header-related errors. 1340 {`<?xml version="1.1" encoding="UTF-8"?>`, `unsupported version "1.1"; only version 1.0 is supported`}, 1341 1342 // Cases below are for "no errors". 1343 {withDefaultHeader(`<?ok?>`), ``}, 1344 {withDefaultHeader(`<?ok version="ok"?>`), ``}, 1345 } 1346 1347 for _, test := range tests { 1348 d := NewDecoder(strings.NewReader(test.src)) 1349 var err error 1350 for { 1351 _, err = d.Token() 1352 if err != nil { 1353 break 1354 } 1355 } 1356 if test.err == "" { 1357 if err != io.EOF { 1358 t.Errorf("parse %s: have %q error, expected none", test.src, err) 1359 } 1360 continue 1361 } 1362 // Inv: err != nil 1363 if err == io.EOF { 1364 t.Errorf("parse %s: unexpected EOF", test.src) 1365 continue 1366 } 1367 if !strings.Contains(err.Error(), test.err) { 1368 t.Errorf("parse %s: can't find %q error sudbstring\nerror: %q", test.src, test.err, err) 1369 continue 1370 } 1371 } 1372 } 1373 1374 const testInputHTMLAutoClose = `<?xml version="1.0" encoding="UTF-8"?> 1375 <br> 1376 <br/><br/> 1377 <br><br> 1378 <br></br> 1379 <BR> 1380 <BR/><BR/> 1381 <Br></Br> 1382 <BR><span id="test">abc</span><br/><br/>` 1383 1384 func BenchmarkHTMLAutoClose(b *testing.B) { 1385 b.RunParallel(func(p *testing.PB) { 1386 for p.Next() { 1387 d := NewDecoder(strings.NewReader(testInputHTMLAutoClose)) 1388 d.Strict = false 1389 d.AutoClose = HTMLAutoClose 1390 d.Entity = HTMLEntity 1391 for { 1392 _, err := d.Token() 1393 if err != nil { 1394 if err == io.EOF { 1395 break 1396 } 1397 b.Fatalf("unexpected error: %v", err) 1398 } 1399 } 1400 } 1401 }) 1402 } 1403 1404 func TestHTMLAutoClose(t *testing.T) { 1405 wantTokens := []Token{ 1406 ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)}, 1407 CharData("\n"), 1408 StartElement{Name{"", "br"}, []Attr{}}, 1409 EndElement{Name{"", "br"}}, 1410 CharData("\n"), 1411 StartElement{Name{"", "br"}, []Attr{}}, 1412 EndElement{Name{"", "br"}}, 1413 StartElement{Name{"", "br"}, []Attr{}}, 1414 EndElement{Name{"", "br"}}, 1415 CharData("\n"), 1416 StartElement{Name{"", "br"}, []Attr{}}, 1417 EndElement{Name{"", "br"}}, 1418 StartElement{Name{"", "br"}, []Attr{}}, 1419 EndElement{Name{"", "br"}}, 1420 CharData("\n"), 1421 StartElement{Name{"", "br"}, []Attr{}}, 1422 EndElement{Name{"", "br"}}, 1423 CharData("\n"), 1424 StartElement{Name{"", "BR"}, []Attr{}}, 1425 EndElement{Name{"", "BR"}}, 1426 CharData("\n"), 1427 StartElement{Name{"", "BR"}, []Attr{}}, 1428 EndElement{Name{"", "BR"}}, 1429 StartElement{Name{"", "BR"}, []Attr{}}, 1430 EndElement{Name{"", "BR"}}, 1431 CharData("\n"), 1432 StartElement{Name{"", "Br"}, []Attr{}}, 1433 EndElement{Name{"", "Br"}}, 1434 CharData("\n"), 1435 StartElement{Name{"", "BR"}, []Attr{}}, 1436 EndElement{Name{"", "BR"}}, 1437 StartElement{Name{"", "span"}, []Attr{{Name: Name{"", "id"}, Value: "test"}}}, 1438 CharData("abc"), 1439 EndElement{Name{"", "span"}}, 1440 StartElement{Name{"", "br"}, []Attr{}}, 1441 EndElement{Name{"", "br"}}, 1442 StartElement{Name{"", "br"}, []Attr{}}, 1443 EndElement{Name{"", "br"}}, 1444 } 1445 1446 d := NewDecoder(strings.NewReader(testInputHTMLAutoClose)) 1447 d.Strict = false 1448 d.AutoClose = HTMLAutoClose 1449 d.Entity = HTMLEntity 1450 var haveTokens []Token 1451 for { 1452 tok, err := d.Token() 1453 if err != nil { 1454 if err == io.EOF { 1455 break 1456 } 1457 t.Fatalf("unexpected error: %v", err) 1458 } 1459 haveTokens = append(haveTokens, CopyToken(tok)) 1460 } 1461 if len(haveTokens) != len(wantTokens) { 1462 t.Errorf("tokens count mismatch: have %d, want %d", len(haveTokens), len(wantTokens)) 1463 } 1464 for i, want := range wantTokens { 1465 if i >= len(haveTokens) { 1466 t.Errorf("token[%d] expected %#v, have no token", i, want) 1467 } else { 1468 have := haveTokens[i] 1469 if !reflect.DeepEqual(have, want) { 1470 t.Errorf("token[%d] mismatch:\nhave: %#v\nwant: %#v", i, have, want) 1471 } 1472 } 1473 } 1474 }