github.com/vmware/govmomi@v0.51.0/vim25/xml/xml_test.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package xml 6 7 import ( 8 "bytes" 9 "fmt" 10 "io" 11 "reflect" 12 "strings" 13 "testing" 14 "unicode/utf8" 15 ) 16 17 type toks struct { 18 earlyEOF bool 19 t []Token 20 } 21 22 func (t *toks) Token() (Token, error) { 23 if len(t.t) == 0 { 24 return nil, io.EOF 25 } 26 var tok Token 27 tok, t.t = t.t[0], t.t[1:] 28 if t.earlyEOF && len(t.t) == 0 { 29 return tok, io.EOF 30 } 31 return tok, nil 32 } 33 34 func TestDecodeEOF(t *testing.T) { 35 start := StartElement{Name: Name{Local: "test"}} 36 tests := []struct { 37 name string 38 tokens []Token 39 ok bool 40 }{ 41 { 42 name: "OK", 43 tokens: []Token{ 44 start, 45 start.End(), 46 }, 47 ok: true, 48 }, 49 { 50 name: "Malformed", 51 tokens: []Token{ 52 start, 53 StartElement{Name: Name{Local: "bad"}}, 54 start.End(), 55 }, 56 ok: false, 57 }, 58 } 59 for _, tc := range tests { 60 for _, eof := range []bool{true, false} { 61 name := fmt.Sprintf("%s/earlyEOF=%v", tc.name, eof) 62 t.Run(name, func(t *testing.T) { 63 d := NewTokenDecoder(&toks{ 64 earlyEOF: eof, 65 t: tc.tokens, 66 }) 67 err := d.Decode(&struct { 68 XMLName Name `xml:"test"` 69 }{}) 70 if tc.ok && err != nil { 71 t.Fatalf("d.Decode: expected nil error, got %v", err) 72 } 73 if _, ok := err.(*SyntaxError); !tc.ok && !ok { 74 t.Errorf("d.Decode: expected syntax error, got %v", err) 75 } 76 }) 77 } 78 } 79 } 80 81 type toksNil struct { 82 returnEOF bool 83 t []Token 84 } 85 86 func (t *toksNil) Token() (Token, error) { 87 if len(t.t) == 0 { 88 if !t.returnEOF { 89 // Return nil, nil before returning an EOF. It's legal, but 90 // discouraged. 91 t.returnEOF = true 92 return nil, nil 93 } 94 return nil, io.EOF 95 } 96 var tok Token 97 tok, t.t = t.t[0], t.t[1:] 98 return tok, nil 99 } 100 101 func TestDecodeNilToken(t *testing.T) { 102 for _, strict := range []bool{true, false} { 103 name := fmt.Sprintf("Strict=%v", strict) 104 t.Run(name, func(t *testing.T) { 105 start := StartElement{Name: Name{Local: "test"}} 106 bad := StartElement{Name: Name{Local: "bad"}} 107 d := NewTokenDecoder(&toksNil{ 108 // Malformed 109 t: []Token{start, bad, start.End()}, 110 }) 111 d.Strict = strict 112 err := d.Decode(&struct { 113 XMLName Name `xml:"test"` 114 }{}) 115 if _, ok := err.(*SyntaxError); !ok { 116 t.Errorf("d.Decode: expected syntax error, got %v", err) 117 } 118 }) 119 } 120 } 121 122 const testInput = ` 123 <?xml version="1.0" encoding="UTF-8"?> 124 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" 125 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> 126 <body xmlns:foo="ns1" xmlns="ns2" xmlns:tag="ns3" ` + 127 "\r\n\t" + ` > 128 <hello lang="en">World <>'" 白鵬翔</hello> 129 <query>&何; &is-it;</query> 130 <goodbye /> 131 <outer foo:attr="value" xmlns:tag="ns4"> 132 <inner/> 133 </outer> 134 <tag:name> 135 <![CDATA[Some text here.]]> 136 </tag:name> 137 </body><!-- missing final newline -->` 138 139 var testEntity = map[string]string{"何": "What", "is-it": "is it?"} 140 141 var rawTokens = []Token{ 142 CharData("\n"), 143 ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)}, 144 CharData("\n"), 145 Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" 146 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`), 147 CharData("\n"), 148 StartElement{Name{"", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}}, 149 CharData("\n "), 150 StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}}, 151 CharData("World <>'\" 白鵬翔"), 152 EndElement{Name{"", "hello"}}, 153 CharData("\n "), 154 StartElement{Name{"", "query"}, []Attr{}}, 155 CharData("What is it?"), 156 EndElement{Name{"", "query"}}, 157 CharData("\n "), 158 StartElement{Name{"", "goodbye"}, []Attr{}}, 159 EndElement{Name{"", "goodbye"}}, 160 CharData("\n "), 161 StartElement{Name{"", "outer"}, []Attr{{Name{"foo", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}}, 162 CharData("\n "), 163 StartElement{Name{"", "inner"}, []Attr{}}, 164 EndElement{Name{"", "inner"}}, 165 CharData("\n "), 166 EndElement{Name{"", "outer"}}, 167 CharData("\n "), 168 StartElement{Name{"tag", "name"}, []Attr{}}, 169 CharData("\n "), 170 CharData("Some text here."), 171 CharData("\n "), 172 EndElement{Name{"tag", "name"}}, 173 CharData("\n"), 174 EndElement{Name{"", "body"}}, 175 Comment(" missing final newline "), 176 } 177 178 var cookedTokens = []Token{ 179 CharData("\n"), 180 ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)}, 181 CharData("\n"), 182 Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" 183 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`), 184 CharData("\n"), 185 StartElement{Name{"ns2", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}}, 186 CharData("\n "), 187 StartElement{Name{"ns2", "hello"}, []Attr{{Name{"", "lang"}, "en"}}}, 188 CharData("World <>'\" 白鵬翔"), 189 EndElement{Name{"ns2", "hello"}}, 190 CharData("\n "), 191 StartElement{Name{"ns2", "query"}, []Attr{}}, 192 CharData("What is it?"), 193 EndElement{Name{"ns2", "query"}}, 194 CharData("\n "), 195 StartElement{Name{"ns2", "goodbye"}, []Attr{}}, 196 EndElement{Name{"ns2", "goodbye"}}, 197 CharData("\n "), 198 StartElement{Name{"ns2", "outer"}, []Attr{{Name{"ns1", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}}, 199 CharData("\n "), 200 StartElement{Name{"ns2", "inner"}, []Attr{}}, 201 EndElement{Name{"ns2", "inner"}}, 202 CharData("\n "), 203 EndElement{Name{"ns2", "outer"}}, 204 CharData("\n "), 205 StartElement{Name{"ns3", "name"}, []Attr{}}, 206 CharData("\n "), 207 CharData("Some text here."), 208 CharData("\n "), 209 EndElement{Name{"ns3", "name"}}, 210 CharData("\n"), 211 EndElement{Name{"ns2", "body"}}, 212 Comment(" missing final newline "), 213 } 214 215 const testInputAltEncoding = ` 216 <?xml version="1.0" encoding="x-testing-uppercase"?> 217 <TAG>VALUE</TAG>` 218 219 var rawTokensAltEncoding = []Token{ 220 CharData("\n"), 221 ProcInst{"xml", []byte(`version="1.0" encoding="x-testing-uppercase"`)}, 222 CharData("\n"), 223 StartElement{Name{"", "tag"}, []Attr{}}, 224 CharData("value"), 225 EndElement{Name{"", "tag"}}, 226 } 227 228 var xmlInput = []string{ 229 // unexpected EOF cases 230 "<", 231 "<t", 232 "<t ", 233 "<t/", 234 "<!", 235 "<!-", 236 "<!--", 237 "<!--c-", 238 "<!--c--", 239 "<!d", 240 "<t></", 241 "<t></t", 242 "<?", 243 "<?p", 244 "<t a", 245 "<t a=", 246 "<t a='", 247 "<t a=''", 248 "<t/><![", 249 "<t/><![C", 250 "<t/><![CDATA[d", 251 "<t/><![CDATA[d]", 252 "<t/><![CDATA[d]]", 253 254 // other Syntax errors 255 "<>", 256 "<t/a", 257 "<0 />", 258 "<?0 >", 259 // "<!0 >", // let the Token() caller handle 260 "</0>", 261 "<t 0=''>", 262 "<t a='&'>", 263 "<t a='<'>", 264 "<t> c;</t>", 265 "<t a>", 266 "<t a=>", 267 "<t a=v>", 268 // "<![CDATA[d]]>", // let the Token() caller handle 269 "<t></e>", 270 "<t></>", 271 "<t></t!", 272 "<t>cdata]]></t>", 273 } 274 275 func TestRawToken(t *testing.T) { 276 d := NewDecoder(strings.NewReader(testInput)) 277 d.Entity = testEntity 278 testRawToken(t, d, testInput, rawTokens) 279 } 280 281 const nonStrictInput = ` 282 <tag>non&entity</tag> 283 <tag>&unknown;entity</tag> 284 <tag>{</tag> 285 <tag>&#zzz;</tag> 286 <tag>&なまえ3;</tag> 287 <tag><-gt;</tag> 288 <tag>&;</tag> 289 <tag>&0a;</tag> 290 ` 291 292 var nonStrictTokens = []Token{ 293 CharData("\n"), 294 StartElement{Name{"", "tag"}, []Attr{}}, 295 CharData("non&entity"), 296 EndElement{Name{"", "tag"}}, 297 CharData("\n"), 298 StartElement{Name{"", "tag"}, []Attr{}}, 299 CharData("&unknown;entity"), 300 EndElement{Name{"", "tag"}}, 301 CharData("\n"), 302 StartElement{Name{"", "tag"}, []Attr{}}, 303 CharData("{"), 304 EndElement{Name{"", "tag"}}, 305 CharData("\n"), 306 StartElement{Name{"", "tag"}, []Attr{}}, 307 CharData("&#zzz;"), 308 EndElement{Name{"", "tag"}}, 309 CharData("\n"), 310 StartElement{Name{"", "tag"}, []Attr{}}, 311 CharData("&なまえ3;"), 312 EndElement{Name{"", "tag"}}, 313 CharData("\n"), 314 StartElement{Name{"", "tag"}, []Attr{}}, 315 CharData("<-gt;"), 316 EndElement{Name{"", "tag"}}, 317 CharData("\n"), 318 StartElement{Name{"", "tag"}, []Attr{}}, 319 CharData("&;"), 320 EndElement{Name{"", "tag"}}, 321 CharData("\n"), 322 StartElement{Name{"", "tag"}, []Attr{}}, 323 CharData("&0a;"), 324 EndElement{Name{"", "tag"}}, 325 CharData("\n"), 326 } 327 328 func TestNonStrictRawToken(t *testing.T) { 329 d := NewDecoder(strings.NewReader(nonStrictInput)) 330 d.Strict = false 331 testRawToken(t, d, nonStrictInput, nonStrictTokens) 332 } 333 334 type downCaser struct { 335 t *testing.T 336 r io.ByteReader 337 } 338 339 func (d *downCaser) ReadByte() (c byte, err error) { 340 c, err = d.r.ReadByte() 341 if c >= 'A' && c <= 'Z' { 342 c += 'a' - 'A' 343 } 344 return 345 } 346 347 func (d *downCaser) Read(p []byte) (int, error) { 348 d.t.Fatalf("unexpected Read call on downCaser reader") 349 panic("unreachable") 350 } 351 352 func TestRawTokenAltEncoding(t *testing.T) { 353 d := NewDecoder(strings.NewReader(testInputAltEncoding)) 354 d.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) { 355 if charset != "x-testing-uppercase" { 356 t.Fatalf("unexpected charset %q", charset) 357 } 358 return &downCaser{t, input.(io.ByteReader)}, nil 359 } 360 testRawToken(t, d, testInputAltEncoding, rawTokensAltEncoding) 361 } 362 363 func TestRawTokenAltEncodingNoConverter(t *testing.T) { 364 d := NewDecoder(strings.NewReader(testInputAltEncoding)) 365 token, err := d.RawToken() 366 if token == nil { 367 t.Fatalf("expected a token on first RawToken call") 368 } 369 if err != nil { 370 t.Fatal(err) 371 } 372 token, err = d.RawToken() 373 if token != nil { 374 t.Errorf("expected a nil token; got %#v", token) 375 } 376 if err == nil { 377 t.Fatalf("expected an error on second RawToken call") 378 } 379 const encoding = "x-testing-uppercase" 380 if !strings.Contains(err.Error(), encoding) { 381 t.Errorf("expected error to contain %q; got error: %v", 382 encoding, err) 383 } 384 } 385 386 func testRawToken(t *testing.T, d *Decoder, raw string, rawTokens []Token) { 387 lastEnd := int64(0) 388 for i, want := range rawTokens { 389 start := d.InputOffset() 390 have, err := d.RawToken() 391 end := d.InputOffset() 392 if err != nil { 393 t.Fatalf("token %d: unexpected error: %s", i, err) 394 } 395 if !reflect.DeepEqual(have, want) { 396 var shave, swant string 397 if _, ok := have.(CharData); ok { 398 shave = fmt.Sprintf("CharData(%q)", have) 399 } else { 400 shave = fmt.Sprintf("%#v", have) 401 } 402 if _, ok := want.(CharData); ok { 403 swant = fmt.Sprintf("CharData(%q)", want) 404 } else { 405 swant = fmt.Sprintf("%#v", want) 406 } 407 t.Errorf("token %d = %s, want %s", i, shave, swant) 408 } 409 410 // Check that InputOffset returned actual token. 411 switch { 412 case start < lastEnd: 413 t.Errorf("token %d: position [%d,%d) for %T is before previous token", i, start, end, have) 414 case start >= end: 415 // Special case: EndElement can be synthesized. 416 if start == end && end == lastEnd { 417 break 418 } 419 t.Errorf("token %d: position [%d,%d) for %T is empty", i, start, end, have) 420 case end > int64(len(raw)): 421 t.Errorf("token %d: position [%d,%d) for %T extends beyond input", i, start, end, have) 422 default: 423 text := raw[start:end] 424 if strings.ContainsAny(text, "<>") && (!strings.HasPrefix(text, "<") || !strings.HasSuffix(text, ">")) { 425 t.Errorf("token %d: misaligned raw token %#q for %T", i, text, have) 426 } 427 } 428 lastEnd = end 429 } 430 } 431 432 // Ensure that directives (specifically !DOCTYPE) include the complete 433 // text of any nested directives, noting that < and > do not change 434 // nesting depth if they are in single or double quotes. 435 436 var nestedDirectivesInput = ` 437 <!DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]> 438 <!DOCTYPE [<!ENTITY xlt ">">]> 439 <!DOCTYPE [<!ENTITY xlt "<">]> 440 <!DOCTYPE [<!ENTITY xlt '>'>]> 441 <!DOCTYPE [<!ENTITY xlt '<'>]> 442 <!DOCTYPE [<!ENTITY xlt '">'>]> 443 <!DOCTYPE [<!ENTITY xlt "'<">]> 444 ` 445 446 var nestedDirectivesTokens = []Token{ 447 CharData("\n"), 448 Directive(`DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`), 449 CharData("\n"), 450 Directive(`DOCTYPE [<!ENTITY xlt ">">]`), 451 CharData("\n"), 452 Directive(`DOCTYPE [<!ENTITY xlt "<">]`), 453 CharData("\n"), 454 Directive(`DOCTYPE [<!ENTITY xlt '>'>]`), 455 CharData("\n"), 456 Directive(`DOCTYPE [<!ENTITY xlt '<'>]`), 457 CharData("\n"), 458 Directive(`DOCTYPE [<!ENTITY xlt '">'>]`), 459 CharData("\n"), 460 Directive(`DOCTYPE [<!ENTITY xlt "'<">]`), 461 CharData("\n"), 462 } 463 464 func TestNestedDirectives(t *testing.T) { 465 d := NewDecoder(strings.NewReader(nestedDirectivesInput)) 466 467 for i, want := range nestedDirectivesTokens { 468 have, err := d.Token() 469 if err != nil { 470 t.Fatalf("token %d: unexpected error: %s", i, err) 471 } 472 if !reflect.DeepEqual(have, want) { 473 t.Errorf("token %d = %#v want %#v", i, have, want) 474 } 475 } 476 } 477 478 func TestToken(t *testing.T) { 479 d := NewDecoder(strings.NewReader(testInput)) 480 d.Entity = testEntity 481 482 for i, want := range cookedTokens { 483 have, err := d.Token() 484 if err != nil { 485 t.Fatalf("token %d: unexpected error: %s", i, err) 486 } 487 if !reflect.DeepEqual(have, want) { 488 t.Errorf("token %d = %#v want %#v", i, have, want) 489 } 490 } 491 } 492 493 func TestSyntax(t *testing.T) { 494 for i := range xmlInput { 495 d := NewDecoder(strings.NewReader(xmlInput[i])) 496 var err error 497 for _, err = d.Token(); err == nil; _, err = d.Token() { 498 } 499 if _, ok := err.(*SyntaxError); !ok { 500 t.Fatalf(`xmlInput "%s": expected SyntaxError not received`, xmlInput[i]) 501 } 502 } 503 } 504 505 func TestInputLinePos(t *testing.T) { 506 testInput := `<root> 507 <?pi 508 ?> <elt 509 att 510 = 511 "val"> 512 <![CDATA[ 513 ]]><!-- 514 515 --></elt> 516 </root>` 517 linePos := [][]int{ 518 {1, 7}, 519 {2, 1}, 520 {3, 4}, 521 {3, 6}, 522 {6, 7}, 523 {7, 1}, 524 {8, 4}, 525 {10, 4}, 526 {10, 10}, 527 {11, 1}, 528 {11, 8}, 529 } 530 dec := NewDecoder(strings.NewReader(testInput)) 531 for _, want := range linePos { 532 if _, err := dec.Token(); err != nil { 533 t.Errorf("Unexpected error: %v", err) 534 continue 535 } 536 537 gotLine, gotCol := dec.InputPos() 538 if gotLine != want[0] || gotCol != want[1] { 539 t.Errorf("dec.InputPos() = %d,%d, want %d,%d", gotLine, gotCol, want[0], want[1]) 540 } 541 } 542 } 543 544 type allScalars struct { 545 True1 bool 546 True2 bool 547 False1 bool 548 False2 bool 549 Int int 550 Int8 int8 551 Int16 int16 552 Int32 int32 553 Int64 int64 554 Uint int 555 Uint8 uint8 556 Uint16 uint16 557 Uint32 uint32 558 Uint64 uint64 559 Uintptr uintptr 560 Float32 float32 561 Float64 float64 562 String string 563 PtrString *string 564 } 565 566 var all = allScalars{ 567 True1: true, 568 True2: true, 569 False1: false, 570 False2: false, 571 Int: 1, 572 Int8: -2, 573 Int16: 3, 574 Int32: -4, 575 Int64: 5, 576 Uint: 6, 577 Uint8: 7, 578 Uint16: 8, 579 Uint32: 9, 580 Uint64: 10, 581 Uintptr: 11, 582 Float32: 13.0, 583 Float64: 14.0, 584 String: "15", 585 PtrString: &sixteen, 586 } 587 588 var sixteen = "16" 589 590 const testScalarsInput = `<allscalars> 591 <True1>true</True1> 592 <True2>1</True2> 593 <False1>false</False1> 594 <False2>0</False2> 595 <Int>1</Int> 596 <Int8>-2</Int8> 597 <Int16>3</Int16> 598 <Int32>-4</Int32> 599 <Int64>5</Int64> 600 <Uint>6</Uint> 601 <Uint8>7</Uint8> 602 <Uint16>8</Uint16> 603 <Uint32>9</Uint32> 604 <Uint64>10</Uint64> 605 <Uintptr>11</Uintptr> 606 <Float>12.0</Float> 607 <Float32>13.0</Float32> 608 <Float64>14.0</Float64> 609 <String>15</String> 610 <PtrString>16</PtrString> 611 </allscalars>` 612 613 func TestAllScalars(t *testing.T) { 614 var a allScalars 615 err := Unmarshal([]byte(testScalarsInput), &a) 616 617 if err != nil { 618 t.Fatal(err) 619 } 620 if !reflect.DeepEqual(a, all) { 621 t.Errorf("have %+v want %+v", a, all) 622 } 623 } 624 625 type item struct { 626 FieldA string 627 } 628 629 func TestIssue68387(t *testing.T) { 630 data := `<item b=']]>'/>` 631 dec := NewDecoder(strings.NewReader(data)) 632 var tok1, tok2, tok3 Token 633 var err error 634 if tok1, err = dec.RawToken(); err != nil { 635 t.Fatalf("RawToken() failed: %v", err) 636 } 637 if tok2, err = dec.RawToken(); err != nil { 638 t.Fatalf("RawToken() failed: %v", err) 639 } 640 if tok3, err = dec.RawToken(); err != io.EOF || tok3 != nil { 641 t.Fatalf("Missed EOF") 642 } 643 s := StartElement{Name{"", "item"}, []Attr{Attr{Name{"","b"}, "]]>"}}} 644 if !reflect.DeepEqual(tok1.(StartElement), s) { 645 t.Error("Wrong start element") 646 } 647 e := EndElement{Name{"","item"}} 648 if tok2.(EndElement) != e { 649 t.Error("Wrong end element") 650 } 651 } 652 653 func TestIssue569(t *testing.T) { 654 data := `<item><FieldA>abcd</FieldA></item>` 655 var i item 656 err := Unmarshal([]byte(data), &i) 657 658 if err != nil || i.FieldA != "abcd" { 659 t.Fatal("Expecting abcd") 660 } 661 } 662 663 func TestUnquotedAttrs(t *testing.T) { 664 data := "<tag attr=azAZ09:-_\t>" 665 d := NewDecoder(strings.NewReader(data)) 666 d.Strict = false 667 token, err := d.Token() 668 if _, ok := err.(*SyntaxError); ok { 669 t.Errorf("Unexpected error: %v", err) 670 } 671 if token.(StartElement).Name.Local != "tag" { 672 t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local) 673 } 674 attr := token.(StartElement).Attr[0] 675 if attr.Value != "azAZ09:-_" { 676 t.Errorf("Unexpected attribute value: %v", attr.Value) 677 } 678 if attr.Name.Local != "attr" { 679 t.Errorf("Unexpected attribute name: %v", attr.Name.Local) 680 } 681 } 682 683 func TestValuelessAttrs(t *testing.T) { 684 tests := [][3]string{ 685 {"<p nowrap>", "p", "nowrap"}, 686 {"<p nowrap >", "p", "nowrap"}, 687 {"<input checked/>", "input", "checked"}, 688 {"<input checked />", "input", "checked"}, 689 } 690 for _, test := range tests { 691 d := NewDecoder(strings.NewReader(test[0])) 692 d.Strict = false 693 token, err := d.Token() 694 if _, ok := err.(*SyntaxError); ok { 695 t.Errorf("Unexpected error: %v", err) 696 } 697 if token.(StartElement).Name.Local != test[1] { 698 t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local) 699 } 700 attr := token.(StartElement).Attr[0] 701 if attr.Value != test[2] { 702 t.Errorf("Unexpected attribute value: %v", attr.Value) 703 } 704 if attr.Name.Local != test[2] { 705 t.Errorf("Unexpected attribute name: %v", attr.Name.Local) 706 } 707 } 708 } 709 710 func TestCopyTokenCharData(t *testing.T) { 711 data := []byte("same data") 712 var tok1 Token = CharData(data) 713 tok2 := CopyToken(tok1) 714 if !reflect.DeepEqual(tok1, tok2) { 715 t.Error("CopyToken(CharData) != CharData") 716 } 717 data[1] = 'o' 718 if reflect.DeepEqual(tok1, tok2) { 719 t.Error("CopyToken(CharData) uses same buffer.") 720 } 721 } 722 723 func TestCopyTokenStartElement(t *testing.T) { 724 elt := StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}} 725 var tok1 Token = elt 726 tok2 := CopyToken(tok1) 727 if tok1.(StartElement).Attr[0].Value != "en" { 728 t.Error("CopyToken overwrote Attr[0]") 729 } 730 if !reflect.DeepEqual(tok1, tok2) { 731 t.Error("CopyToken(StartElement) != StartElement") 732 } 733 tok1.(StartElement).Attr[0] = Attr{Name{"", "lang"}, "de"} 734 if reflect.DeepEqual(tok1, tok2) { 735 t.Error("CopyToken(CharData) uses same buffer.") 736 } 737 } 738 739 func TestCopyTokenComment(t *testing.T) { 740 data := []byte("<!-- some comment -->") 741 var tok1 Token = Comment(data) 742 tok2 := CopyToken(tok1) 743 if !reflect.DeepEqual(tok1, tok2) { 744 t.Error("CopyToken(Comment) != Comment") 745 } 746 data[1] = 'o' 747 if reflect.DeepEqual(tok1, tok2) { 748 t.Error("CopyToken(Comment) uses same buffer.") 749 } 750 } 751 752 func TestSyntaxErrorLineNum(t *testing.T) { 753 testInput := "<P>Foo<P>\n\n<P>Bar</>\n" 754 d := NewDecoder(strings.NewReader(testInput)) 755 var err error 756 for _, err = d.Token(); err == nil; _, err = d.Token() { 757 } 758 synerr, ok := err.(*SyntaxError) 759 if !ok { 760 t.Error("Expected SyntaxError.") 761 } 762 if synerr.Line != 3 { 763 t.Error("SyntaxError didn't have correct line number.") 764 } 765 } 766 767 func TestTrailingRawToken(t *testing.T) { 768 input := `<FOO></FOO> ` 769 d := NewDecoder(strings.NewReader(input)) 770 var err error 771 for _, err = d.RawToken(); err == nil; _, err = d.RawToken() { 772 } 773 if err != io.EOF { 774 t.Fatalf("d.RawToken() = _, %v, want _, io.EOF", err) 775 } 776 } 777 778 func TestTrailingToken(t *testing.T) { 779 input := `<FOO></FOO> ` 780 d := NewDecoder(strings.NewReader(input)) 781 var err error 782 for _, err = d.Token(); err == nil; _, err = d.Token() { 783 } 784 if err != io.EOF { 785 t.Fatalf("d.Token() = _, %v, want _, io.EOF", err) 786 } 787 } 788 789 func TestEntityInsideCDATA(t *testing.T) { 790 input := `<test><![CDATA[ &val=foo ]]></test>` 791 d := NewDecoder(strings.NewReader(input)) 792 var err error 793 for _, err = d.Token(); err == nil; _, err = d.Token() { 794 } 795 if err != io.EOF { 796 t.Fatalf("d.Token() = _, %v, want _, io.EOF", err) 797 } 798 } 799 800 var characterTests = []struct { 801 in string 802 err string 803 }{ 804 {"\x12<doc/>", "illegal character code U+0012"}, 805 {"<?xml version=\"1.0\"?>\x0b<doc/>", "illegal character code U+000B"}, 806 {"\xef\xbf\xbe<doc/>", "illegal character code U+FFFE"}, 807 {"<?xml version=\"1.0\"?><doc>\r\n<hiya/>\x07<toots/></doc>", "illegal character code U+0007"}, 808 {"<?xml version=\"1.0\"?><doc \x12='value'>what's up</doc>", "expected attribute name in element"}, 809 {"<doc>&abc\x01;</doc>", "invalid character entity &abc (no semicolon)"}, 810 {"<doc>&\x01;</doc>", "invalid character entity & (no semicolon)"}, 811 {"<doc>&\xef\xbf\xbe;</doc>", "invalid character entity &\uFFFE;"}, 812 {"<doc>&hello;</doc>", "invalid character entity &hello;"}, 813 } 814 815 func TestDisallowedCharacters(t *testing.T) { 816 817 for i, tt := range characterTests { 818 d := NewDecoder(strings.NewReader(tt.in)) 819 var err error 820 821 for err == nil { 822 _, err = d.Token() 823 } 824 synerr, ok := err.(*SyntaxError) 825 if !ok { 826 t.Fatalf("input %d d.Token() = _, %v, want _, *SyntaxError", i, err) 827 } 828 if synerr.Msg != tt.err { 829 t.Fatalf("input %d synerr.Msg wrong: want %q, got %q", i, tt.err, synerr.Msg) 830 } 831 } 832 } 833 834 func TestIsInCharacterRange(t *testing.T) { 835 invalid := []rune{ 836 utf8.MaxRune + 1, 837 0xD800, // surrogate min 838 0xDFFF, // surrogate max 839 -1, 840 } 841 for _, r := range invalid { 842 if isInCharacterRange(r) { 843 t.Errorf("rune %U considered valid", r) 844 } 845 } 846 } 847 848 var procInstTests = []struct { 849 input string 850 expect [2]string 851 }{ 852 {`version="1.0" encoding="utf-8"`, [2]string{"1.0", "utf-8"}}, 853 {`version="1.0" encoding='utf-8'`, [2]string{"1.0", "utf-8"}}, 854 {`version="1.0" encoding='utf-8' `, [2]string{"1.0", "utf-8"}}, 855 {`version="1.0" encoding=utf-8`, [2]string{"1.0", ""}}, 856 {`encoding="FOO" `, [2]string{"", "FOO"}}, 857 {`version=2.0 version="1.0" encoding=utf-7 encoding='utf-8'`, [2]string{"1.0", "utf-8"}}, 858 {`version= encoding=`, [2]string{"", ""}}, 859 {`encoding="version=1.0"`, [2]string{"", "version=1.0"}}, 860 {``, [2]string{"", ""}}, 861 // TODO: what's the right approach to handle these nested cases? 862 {`encoding="version='1.0'"`, [2]string{"1.0", "version='1.0'"}}, 863 {`version="encoding='utf-8'"`, [2]string{"encoding='utf-8'", "utf-8"}}, 864 } 865 866 func TestProcInstEncoding(t *testing.T) { 867 for _, test := range procInstTests { 868 if got := procInst("version", test.input); got != test.expect[0] { 869 t.Errorf("procInst(version, %q) = %q; want %q", test.input, got, test.expect[0]) 870 } 871 if got := procInst("encoding", test.input); got != test.expect[1] { 872 t.Errorf("procInst(encoding, %q) = %q; want %q", test.input, got, test.expect[1]) 873 } 874 } 875 } 876 877 // Ensure that directives with comments include the complete 878 // text of any nested directives. 879 880 var directivesWithCommentsInput = ` 881 <!DOCTYPE [<!-- a comment --><!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]> 882 <!DOCTYPE [<!ENTITY go "Golang"><!-- a comment-->]> 883 <!DOCTYPE <!-> <!> <!----> <!-->--> <!--->--> [<!ENTITY go "Golang"><!-- a comment-->]> 884 ` 885 886 var directivesWithCommentsTokens = []Token{ 887 CharData("\n"), 888 Directive(`DOCTYPE [ <!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`), 889 CharData("\n"), 890 Directive(`DOCTYPE [<!ENTITY go "Golang"> ]`), 891 CharData("\n"), 892 Directive(`DOCTYPE <!-> <!> [<!ENTITY go "Golang"> ]`), 893 CharData("\n"), 894 } 895 896 func TestDirectivesWithComments(t *testing.T) { 897 d := NewDecoder(strings.NewReader(directivesWithCommentsInput)) 898 899 for i, want := range directivesWithCommentsTokens { 900 have, err := d.Token() 901 if err != nil { 902 t.Fatalf("token %d: unexpected error: %s", i, err) 903 } 904 if !reflect.DeepEqual(have, want) { 905 t.Errorf("token %d = %#v want %#v", i, have, want) 906 } 907 } 908 } 909 910 // Writer whose Write method always returns an error. 911 type errWriter struct{} 912 913 func (errWriter) Write(p []byte) (n int, err error) { return 0, fmt.Errorf("unwritable") } 914 915 func TestEscapeTextIOErrors(t *testing.T) { 916 expectErr := "unwritable" 917 err := EscapeText(errWriter{}, []byte{'A'}) 918 919 if err == nil || err.Error() != expectErr { 920 t.Errorf("have %v, want %v", err, expectErr) 921 } 922 } 923 924 func TestEscapeTextInvalidChar(t *testing.T) { 925 input := []byte("A \x00 terminated string.") 926 expected := "A \uFFFD terminated string." 927 928 buff := new(strings.Builder) 929 if err := EscapeText(buff, input); err != nil { 930 t.Fatalf("have %v, want nil", err) 931 } 932 text := buff.String() 933 934 if text != expected { 935 t.Errorf("have %v, want %v", text, expected) 936 } 937 } 938 939 func TestIssue5880(t *testing.T) { 940 type T []byte 941 data, err := Marshal(T{192, 168, 0, 1}) 942 if err != nil { 943 t.Errorf("Marshal error: %v", err) 944 } 945 if !utf8.Valid(data) { 946 t.Errorf("Marshal generated invalid UTF-8: %x", data) 947 } 948 } 949 950 func TestIssue8535(t *testing.T) { 951 952 type ExampleConflict struct { 953 XMLName Name `xml:"example"` 954 Link string `xml:"link"` 955 AtomLink string `xml:"http://www.w3.org/2005/Atom link"` // Same name in a different name space 956 } 957 testCase := `<example> 958 <title>Example</title> 959 <link>http://example.com/default</link> <!-- not assigned --> 960 <link>http://example.com/home</link> <!-- not assigned --> 961 <ns:link xmlns:ns="http://www.w3.org/2005/Atom">http://example.com/ns</ns:link> 962 </example>` 963 964 var dest ExampleConflict 965 d := NewDecoder(strings.NewReader(testCase)) 966 if err := d.Decode(&dest); err != nil { 967 t.Fatal(err) 968 } 969 } 970 971 func TestEncodeXMLNS(t *testing.T) { 972 testCases := []struct { 973 f func() ([]byte, error) 974 want string 975 ok bool 976 }{ 977 {encodeXMLNS1, `<Test xmlns="http://example.com/ns"><Body>hello world</Body></Test>`, true}, 978 {encodeXMLNS2, `<Test><body xmlns="http://example.com/ns">hello world</body></Test>`, true}, 979 {encodeXMLNS3, `<Test xmlns="http://example.com/ns"><Body>hello world</Body></Test>`, true}, 980 {encodeXMLNS4, `<Test xmlns="http://example.com/ns"><Body>hello world</Body></Test>`, false}, 981 } 982 983 for i, tc := range testCases { 984 if b, err := tc.f(); err == nil { 985 if got, want := string(b), tc.want; got != want { 986 t.Errorf("%d: got %s, want %s \n", i, got, want) 987 } 988 } else { 989 t.Errorf("%d: marshal failed with %s", i, err) 990 } 991 } 992 } 993 994 func encodeXMLNS1() ([]byte, error) { 995 996 type T struct { 997 XMLName Name `xml:"Test"` 998 Ns string `xml:"xmlns,attr"` 999 Body string 1000 } 1001 1002 s := &T{Ns: "http://example.com/ns", Body: "hello world"} 1003 return Marshal(s) 1004 } 1005 1006 func encodeXMLNS2() ([]byte, error) { 1007 1008 type Test struct { 1009 Body string `xml:"http://example.com/ns body"` 1010 } 1011 1012 s := &Test{Body: "hello world"} 1013 return Marshal(s) 1014 } 1015 1016 func encodeXMLNS3() ([]byte, error) { 1017 1018 type Test struct { 1019 XMLName Name `xml:"http://example.com/ns Test"` 1020 Body string 1021 } 1022 1023 //s := &Test{XMLName: Name{"http://example.com/ns",""}, Body: "hello world"} is unusable as the "-" is missing 1024 // as documentation states 1025 s := &Test{Body: "hello world"} 1026 return Marshal(s) 1027 } 1028 1029 func encodeXMLNS4() ([]byte, error) { 1030 1031 type Test struct { 1032 Ns string `xml:"xmlns,attr"` 1033 Body string 1034 } 1035 1036 s := &Test{Ns: "http://example.com/ns", Body: "hello world"} 1037 return Marshal(s) 1038 } 1039 1040 func TestIssue11405(t *testing.T) { 1041 testCases := []string{ 1042 "<root>", 1043 "<root><foo>", 1044 "<root><foo></foo>", 1045 } 1046 for _, tc := range testCases { 1047 d := NewDecoder(strings.NewReader(tc)) 1048 var err error 1049 for { 1050 _, err = d.Token() 1051 if err != nil { 1052 break 1053 } 1054 } 1055 if _, ok := err.(*SyntaxError); !ok { 1056 t.Errorf("%s: Token: Got error %v, want SyntaxError", tc, err) 1057 } 1058 } 1059 } 1060 1061 func TestIssue12417(t *testing.T) { 1062 testCases := []struct { 1063 s string 1064 ok bool 1065 }{ 1066 {`<?xml encoding="UtF-8" version="1.0"?><root/>`, true}, 1067 {`<?xml encoding="UTF-8" version="1.0"?><root/>`, true}, 1068 {`<?xml encoding="utf-8" version="1.0"?><root/>`, true}, 1069 {`<?xml encoding="uuu-9" version="1.0"?><root/>`, false}, 1070 } 1071 for _, tc := range testCases { 1072 d := NewDecoder(strings.NewReader(tc.s)) 1073 var err error 1074 for { 1075 _, err = d.Token() 1076 if err != nil { 1077 if err == io.EOF { 1078 err = nil 1079 } 1080 break 1081 } 1082 } 1083 if err != nil && tc.ok { 1084 t.Errorf("%q: Encoding charset: expected no error, got %s", tc.s, err) 1085 continue 1086 } 1087 if err == nil && !tc.ok { 1088 t.Errorf("%q: Encoding charset: expected error, got nil", tc.s) 1089 } 1090 } 1091 } 1092 1093 func TestIssue7113(t *testing.T) { 1094 type C struct { 1095 XMLName Name `xml:""` // Sets empty namespace 1096 } 1097 1098 type D struct { 1099 XMLName Name `xml:"d"` 1100 } 1101 1102 type A struct { 1103 XMLName Name `xml:""` 1104 C C `xml:""` 1105 D D 1106 } 1107 1108 var a A 1109 structSpace := "b" 1110 xmlTest := `<A xmlns="` + structSpace + `"><C xmlns=""></C><d></d></A>` 1111 t.Log(xmlTest) 1112 err := Unmarshal([]byte(xmlTest), &a) 1113 if err != nil { 1114 t.Fatal(err) 1115 } 1116 1117 if a.XMLName.Space != structSpace { 1118 t.Errorf("overidding with empty namespace: unmarshaling, got %s, want %s\n", a.XMLName.Space, structSpace) 1119 } 1120 if len(a.C.XMLName.Space) != 0 { 1121 t.Fatalf("overidding with empty namespace: unmarshaling, got %s, want empty\n", a.C.XMLName.Space) 1122 } 1123 1124 var b []byte 1125 b, err = Marshal(&a) 1126 if err != nil { 1127 t.Fatal(err) 1128 } 1129 if len(a.C.XMLName.Space) != 0 { 1130 t.Errorf("overidding with empty namespace: marshaling, got %s in C tag which should be empty\n", a.C.XMLName.Space) 1131 } 1132 if string(b) != xmlTest { 1133 t.Fatalf("overidding with empty namespace: marshaling, got %s, want %s\n", b, xmlTest) 1134 } 1135 var c A 1136 err = Unmarshal(b, &c) 1137 if err != nil { 1138 t.Fatalf("second Unmarshal failed: %s", err) 1139 } 1140 if c.XMLName.Space != "b" { 1141 t.Errorf("overidding with empty namespace: after marshaling & unmarshaling, XML name space: got %s, want %s\n", a.XMLName.Space, structSpace) 1142 } 1143 if len(c.C.XMLName.Space) != 0 { 1144 t.Errorf("overidding with empty namespace: after marshaling & unmarshaling, got %s, want empty\n", a.C.XMLName.Space) 1145 } 1146 } 1147 1148 func TestIssue20396(t *testing.T) { 1149 1150 var attrError = UnmarshalError("XML syntax error on line 1: expected attribute name in element") 1151 1152 testCases := []struct { 1153 s string 1154 wantErr error 1155 }{ 1156 {`<a:te:st xmlns:a="abcd"/>`, // Issue 20396 1157 UnmarshalError("XML syntax error on line 1: expected element name after <")}, 1158 {`<a:te=st xmlns:a="abcd"/>`, attrError}, 1159 {`<a:te&st xmlns:a="abcd"/>`, attrError}, 1160 {`<a:test xmlns:a="abcd"/>`, nil}, 1161 {`<a:te:st xmlns:a="abcd">1</a:te:st>`, 1162 UnmarshalError("XML syntax error on line 1: expected element name after <")}, 1163 {`<a:te=st xmlns:a="abcd">1</a:te=st>`, attrError}, 1164 {`<a:te&st xmlns:a="abcd">1</a:te&st>`, attrError}, 1165 {`<a:test xmlns:a="abcd">1</a:test>`, nil}, 1166 } 1167 1168 var dest string 1169 for _, tc := range testCases { 1170 if got, want := Unmarshal([]byte(tc.s), &dest), tc.wantErr; got != want { 1171 if got == nil { 1172 t.Errorf("%s: Unexpected success, want %v", tc.s, want) 1173 } else if want == nil { 1174 t.Errorf("%s: Unexpected error, got %v", tc.s, got) 1175 } else if got.Error() != want.Error() { 1176 t.Errorf("%s: got %v, want %v", tc.s, got, want) 1177 } 1178 } 1179 } 1180 } 1181 1182 func TestIssue20685(t *testing.T) { 1183 testCases := []struct { 1184 s string 1185 ok bool 1186 }{ 1187 {`<x:book xmlns:x="abcd" xmlns:y="abcd"><unclosetag>one</x:book>`, false}, 1188 {`<x:book xmlns:x="abcd" xmlns:y="abcd">one</x:book>`, true}, 1189 {`<x:book xmlns:x="abcd" xmlns:y="abcd">one</y:book>`, false}, 1190 {`<x:book xmlns:y="abcd" xmlns:x="abcd">one</y:book>`, false}, 1191 {`<x:book xmlns:x="abcd">one</y:book>`, false}, 1192 {`<x:book>one</y:book>`, false}, 1193 {`<xbook>one</ybook>`, false}, 1194 } 1195 for _, tc := range testCases { 1196 d := NewDecoder(strings.NewReader(tc.s)) 1197 var err error 1198 for { 1199 _, err = d.Token() 1200 if err != nil { 1201 if err == io.EOF { 1202 err = nil 1203 } 1204 break 1205 } 1206 } 1207 if err != nil && tc.ok { 1208 t.Errorf("%q: Closing tag with namespace : expected no error, got %s", tc.s, err) 1209 continue 1210 } 1211 if err == nil && !tc.ok { 1212 t.Errorf("%q: Closing tag with namespace : expected error, got nil", tc.s) 1213 } 1214 } 1215 } 1216 1217 func tokenMap(mapping func(t Token) Token) func(TokenReader) TokenReader { 1218 return func(src TokenReader) TokenReader { 1219 return mapper{ 1220 t: src, 1221 f: mapping, 1222 } 1223 } 1224 } 1225 1226 type mapper struct { 1227 t TokenReader 1228 f func(Token) Token 1229 } 1230 1231 func (m mapper) Token() (Token, error) { 1232 tok, err := m.t.Token() 1233 if err != nil { 1234 return nil, err 1235 } 1236 return m.f(tok), nil 1237 } 1238 1239 func TestNewTokenDecoderIdempotent(t *testing.T) { 1240 d := NewDecoder(strings.NewReader(`<br>`)) 1241 d2 := NewTokenDecoder(d) 1242 if d != d2 { 1243 t.Error("NewTokenDecoder did not detect underlying Decoder") 1244 } 1245 } 1246 1247 func TestWrapDecoder(t *testing.T) { 1248 d := NewDecoder(strings.NewReader(`<quote>[Re-enter Clown with a letter, and FABIAN]</quote>`)) 1249 m := tokenMap(func(t Token) Token { 1250 switch tok := t.(type) { 1251 case StartElement: 1252 if tok.Name.Local == "quote" { 1253 tok.Name.Local = "blocking" 1254 return tok 1255 } 1256 case EndElement: 1257 if tok.Name.Local == "quote" { 1258 tok.Name.Local = "blocking" 1259 return tok 1260 } 1261 } 1262 return t 1263 }) 1264 1265 d = NewTokenDecoder(m(d)) 1266 1267 o := struct { 1268 XMLName Name `xml:"blocking"` 1269 Chardata string `xml:",chardata"` 1270 }{} 1271 1272 if err := d.Decode(&o); err != nil { 1273 t.Fatal("Got unexpected error while decoding:", err) 1274 } 1275 1276 if o.Chardata != "[Re-enter Clown with a letter, and FABIAN]" { 1277 t.Fatalf("Got unexpected chardata: `%s`\n", o.Chardata) 1278 } 1279 } 1280 1281 type tokReader struct{} 1282 1283 func (tokReader) Token() (Token, error) { 1284 return StartElement{}, nil 1285 } 1286 1287 type Failure struct{} 1288 1289 func (Failure) UnmarshalXML(*Decoder, StartElement) error { 1290 return nil 1291 } 1292 1293 func TestTokenUnmarshaler(t *testing.T) { 1294 defer func() { 1295 if r := recover(); r != nil { 1296 t.Error("Unexpected panic using custom token unmarshaler") 1297 } 1298 }() 1299 1300 d := NewTokenDecoder(tokReader{}) 1301 d.Decode(&Failure{}) 1302 } 1303 1304 func testRoundTrip(t *testing.T, input string) { 1305 d := NewDecoder(strings.NewReader(input)) 1306 var tokens []Token 1307 var buf bytes.Buffer 1308 e := NewEncoder(&buf) 1309 for { 1310 tok, err := d.Token() 1311 if err == io.EOF { 1312 break 1313 } 1314 if err != nil { 1315 t.Fatalf("invalid input: %v", err) 1316 } 1317 if err := e.EncodeToken(tok); err != nil { 1318 t.Fatalf("failed to re-encode input: %v", err) 1319 } 1320 tokens = append(tokens, CopyToken(tok)) 1321 } 1322 if err := e.Flush(); err != nil { 1323 t.Fatal(err) 1324 } 1325 1326 d = NewDecoder(&buf) 1327 for { 1328 tok, err := d.Token() 1329 if err == io.EOF { 1330 break 1331 } 1332 if err != nil { 1333 t.Fatalf("failed to decode output: %v", err) 1334 } 1335 if len(tokens) == 0 { 1336 t.Fatalf("unexpected token: %#v", tok) 1337 } 1338 a, b := tokens[0], tok 1339 if !reflect.DeepEqual(a, b) { 1340 t.Fatalf("token mismatch: %#v vs %#v", a, b) 1341 } 1342 tokens = tokens[1:] 1343 } 1344 if len(tokens) > 0 { 1345 t.Fatalf("lost tokens: %#v", tokens) 1346 } 1347 } 1348 1349 func TestRoundTrip(t *testing.T) { 1350 tests := map[string]string{ 1351 "trailing colon": `<foo abc:="x"></foo>`, 1352 "comments in directives": `<!ENTITY x<!<!-- c1 [ " -->--x --> > <e></e> <!DOCTYPE xxx [ x<!-- c2 " -->--x ]>`, 1353 } 1354 for name, input := range tests { 1355 t.Run(name, func(t *testing.T) { testRoundTrip(t, input) }) 1356 } 1357 } 1358 1359 func TestParseErrors(t *testing.T) { 1360 withDefaultHeader := func(s string) string { 1361 return `<?xml version="1.0" encoding="UTF-8"?>` + s 1362 } 1363 tests := []struct { 1364 src string 1365 err string 1366 }{ 1367 {withDefaultHeader(`</foo>`), `unexpected end element </foo>`}, 1368 {withDefaultHeader(`<x:foo></y:foo>`), `element <foo> in space x closed by </foo> in space y`}, 1369 {withDefaultHeader(`<? not ok ?>`), `expected target name after <?`}, 1370 {withDefaultHeader(`<!- not ok -->`), `invalid sequence <!- not part of <!--`}, 1371 {withDefaultHeader(`<!-? not ok -->`), `invalid sequence <!- not part of <!--`}, 1372 {withDefaultHeader(`<![not ok]>`), `invalid <![ sequence`}, 1373 {withDefaultHeader(`<zzz:foo xmlns:zzz="http://example.com"><bar>baz</bar></foo>`), 1374 `element <foo> in space zzz closed by </foo> in space ""`}, 1375 {withDefaultHeader("\xf1"), `invalid UTF-8`}, 1376 1377 // Header-related errors. 1378 {`<?xml version="1.1" encoding="UTF-8"?>`, `unsupported version "1.1"; only version 1.0 is supported`}, 1379 1380 // Cases below are for "no errors". 1381 {withDefaultHeader(`<?ok?>`), ``}, 1382 {withDefaultHeader(`<?ok version="ok"?>`), ``}, 1383 } 1384 1385 for _, test := range tests { 1386 d := NewDecoder(strings.NewReader(test.src)) 1387 var err error 1388 for { 1389 _, err = d.Token() 1390 if err != nil { 1391 break 1392 } 1393 } 1394 if test.err == "" { 1395 if err != io.EOF { 1396 t.Errorf("parse %s: have %q error, expected none", test.src, err) 1397 } 1398 continue 1399 } 1400 // Inv: err != nil 1401 if err == io.EOF { 1402 t.Errorf("parse %s: unexpected EOF", test.src) 1403 continue 1404 } 1405 if !strings.Contains(err.Error(), test.err) { 1406 t.Errorf("parse %s: can't find %q error substring\nerror: %q", test.src, test.err, err) 1407 continue 1408 } 1409 } 1410 } 1411 1412 const testInputHTMLAutoClose = `<?xml version="1.0" encoding="UTF-8"?> 1413 <br> 1414 <br/><br/> 1415 <br><br> 1416 <br></br> 1417 <BR> 1418 <BR/><BR/> 1419 <Br></Br> 1420 <BR><span id="test">abc</span><br/><br/>` 1421 1422 func BenchmarkHTMLAutoClose(b *testing.B) { 1423 b.RunParallel(func(p *testing.PB) { 1424 for p.Next() { 1425 d := NewDecoder(strings.NewReader(testInputHTMLAutoClose)) 1426 d.Strict = false 1427 d.AutoClose = HTMLAutoClose 1428 d.Entity = HTMLEntity 1429 for { 1430 _, err := d.Token() 1431 if err != nil { 1432 if err == io.EOF { 1433 break 1434 } 1435 b.Fatalf("unexpected error: %v", err) 1436 } 1437 } 1438 } 1439 }) 1440 } 1441 1442 func TestHTMLAutoClose(t *testing.T) { 1443 wantTokens := []Token{ 1444 ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)}, 1445 CharData("\n"), 1446 StartElement{Name{"", "br"}, []Attr{}}, 1447 EndElement{Name{"", "br"}}, 1448 CharData("\n"), 1449 StartElement{Name{"", "br"}, []Attr{}}, 1450 EndElement{Name{"", "br"}}, 1451 StartElement{Name{"", "br"}, []Attr{}}, 1452 EndElement{Name{"", "br"}}, 1453 CharData("\n"), 1454 StartElement{Name{"", "br"}, []Attr{}}, 1455 EndElement{Name{"", "br"}}, 1456 StartElement{Name{"", "br"}, []Attr{}}, 1457 EndElement{Name{"", "br"}}, 1458 CharData("\n"), 1459 StartElement{Name{"", "br"}, []Attr{}}, 1460 EndElement{Name{"", "br"}}, 1461 CharData("\n"), 1462 StartElement{Name{"", "BR"}, []Attr{}}, 1463 EndElement{Name{"", "BR"}}, 1464 CharData("\n"), 1465 StartElement{Name{"", "BR"}, []Attr{}}, 1466 EndElement{Name{"", "BR"}}, 1467 StartElement{Name{"", "BR"}, []Attr{}}, 1468 EndElement{Name{"", "BR"}}, 1469 CharData("\n"), 1470 StartElement{Name{"", "Br"}, []Attr{}}, 1471 EndElement{Name{"", "Br"}}, 1472 CharData("\n"), 1473 StartElement{Name{"", "BR"}, []Attr{}}, 1474 EndElement{Name{"", "BR"}}, 1475 StartElement{Name{"", "span"}, []Attr{{Name: Name{"", "id"}, Value: "test"}}}, 1476 CharData("abc"), 1477 EndElement{Name{"", "span"}}, 1478 StartElement{Name{"", "br"}, []Attr{}}, 1479 EndElement{Name{"", "br"}}, 1480 StartElement{Name{"", "br"}, []Attr{}}, 1481 EndElement{Name{"", "br"}}, 1482 } 1483 1484 d := NewDecoder(strings.NewReader(testInputHTMLAutoClose)) 1485 d.Strict = false 1486 d.AutoClose = HTMLAutoClose 1487 d.Entity = HTMLEntity 1488 var haveTokens []Token 1489 for { 1490 tok, err := d.Token() 1491 if err != nil { 1492 if err == io.EOF { 1493 break 1494 } 1495 t.Fatalf("unexpected error: %v", err) 1496 } 1497 haveTokens = append(haveTokens, CopyToken(tok)) 1498 } 1499 if len(haveTokens) != len(wantTokens) { 1500 t.Errorf("tokens count mismatch: have %d, want %d", len(haveTokens), len(wantTokens)) 1501 } 1502 for i, want := range wantTokens { 1503 if i >= len(haveTokens) { 1504 t.Errorf("token[%d] expected %#v, have no token", i, want) 1505 } else { 1506 have := haveTokens[i] 1507 if !reflect.DeepEqual(have, want) { 1508 t.Errorf("token[%d] mismatch:\nhave: %#v\nwant: %#v", i, have, want) 1509 } 1510 } 1511 } 1512 }