github.com/mtsmfm/go/src@v0.0.0-20221020090648-44bdcb9f8fde/encoding/xml/xml_test.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package xml 6 7 import ( 8 "bytes" 9 "fmt" 10 "io" 11 "reflect" 12 "strings" 13 "testing" 14 "unicode/utf8" 15 ) 16 17 type toks struct { 18 earlyEOF bool 19 t []Token 20 } 21 22 func (t *toks) Token() (Token, error) { 23 if len(t.t) == 0 { 24 return nil, io.EOF 25 } 26 var tok Token 27 tok, t.t = t.t[0], t.t[1:] 28 if t.earlyEOF && len(t.t) == 0 { 29 return tok, io.EOF 30 } 31 return tok, nil 32 } 33 34 func TestDecodeEOF(t *testing.T) { 35 start := StartElement{Name: Name{Local: "test"}} 36 tests := []struct { 37 name string 38 tokens []Token 39 ok bool 40 }{ 41 { 42 name: "OK", 43 tokens: []Token{ 44 start, 45 start.End(), 46 }, 47 ok: true, 48 }, 49 { 50 name: "Malformed", 51 tokens: []Token{ 52 start, 53 StartElement{Name: Name{Local: "bad"}}, 54 start.End(), 55 }, 56 ok: false, 57 }, 58 } 59 for _, tc := range tests { 60 for _, eof := range []bool{true, false} { 61 name := fmt.Sprintf("%s/earlyEOF=%v", tc.name, eof) 62 t.Run(name, func(t *testing.T) { 63 d := NewTokenDecoder(&toks{ 64 earlyEOF: eof, 65 t: tc.tokens, 66 }) 67 err := d.Decode(&struct { 68 XMLName Name `xml:"test"` 69 }{}) 70 if tc.ok && err != nil { 71 t.Fatalf("d.Decode: expected nil error, got %v", err) 72 } 73 if _, ok := err.(*SyntaxError); !tc.ok && !ok { 74 t.Errorf("d.Decode: expected syntax error, got %v", err) 75 } 76 }) 77 } 78 } 79 } 80 81 type toksNil struct { 82 returnEOF bool 83 t []Token 84 } 85 86 func (t *toksNil) Token() (Token, error) { 87 if len(t.t) == 0 { 88 if !t.returnEOF { 89 // Return nil, nil before returning an EOF. It's legal, but 90 // discouraged. 91 t.returnEOF = true 92 return nil, nil 93 } 94 return nil, io.EOF 95 } 96 var tok Token 97 tok, t.t = t.t[0], t.t[1:] 98 return tok, nil 99 } 100 101 func TestDecodeNilToken(t *testing.T) { 102 for _, strict := range []bool{true, false} { 103 name := fmt.Sprintf("Strict=%v", strict) 104 t.Run(name, func(t *testing.T) { 105 start := StartElement{Name: Name{Local: "test"}} 106 bad := StartElement{Name: Name{Local: "bad"}} 107 d := NewTokenDecoder(&toksNil{ 108 // Malformed 109 t: []Token{start, bad, start.End()}, 110 }) 111 d.Strict = strict 112 err := d.Decode(&struct { 113 XMLName Name `xml:"test"` 114 }{}) 115 if _, ok := err.(*SyntaxError); !ok { 116 t.Errorf("d.Decode: expected syntax error, got %v", err) 117 } 118 }) 119 } 120 } 121 122 const testInput = ` 123 <?xml version="1.0" encoding="UTF-8"?> 124 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" 125 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> 126 <body xmlns:foo="ns1" xmlns="ns2" xmlns:tag="ns3" ` + 127 "\r\n\t" + ` > 128 <hello lang="en">World <>'" 白鵬翔</hello> 129 <query>&何; &is-it;</query> 130 <goodbye /> 131 <outer foo:attr="value" xmlns:tag="ns4"> 132 <inner/> 133 </outer> 134 <tag:name> 135 <![CDATA[Some text here.]]> 136 </tag:name> 137 </body><!-- missing final newline -->` 138 139 var testEntity = map[string]string{"何": "What", "is-it": "is it?"} 140 141 var rawTokens = []Token{ 142 CharData("\n"), 143 ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)}, 144 CharData("\n"), 145 Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" 146 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`), 147 CharData("\n"), 148 StartElement{Name{"", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}}, 149 CharData("\n "), 150 StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}}, 151 CharData("World <>'\" 白鵬翔"), 152 EndElement{Name{"", "hello"}}, 153 CharData("\n "), 154 StartElement{Name{"", "query"}, []Attr{}}, 155 CharData("What is it?"), 156 EndElement{Name{"", "query"}}, 157 CharData("\n "), 158 StartElement{Name{"", "goodbye"}, []Attr{}}, 159 EndElement{Name{"", "goodbye"}}, 160 CharData("\n "), 161 StartElement{Name{"", "outer"}, []Attr{{Name{"foo", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}}, 162 CharData("\n "), 163 StartElement{Name{"", "inner"}, []Attr{}}, 164 EndElement{Name{"", "inner"}}, 165 CharData("\n "), 166 EndElement{Name{"", "outer"}}, 167 CharData("\n "), 168 StartElement{Name{"tag", "name"}, []Attr{}}, 169 CharData("\n "), 170 CharData("Some text here."), 171 CharData("\n "), 172 EndElement{Name{"tag", "name"}}, 173 CharData("\n"), 174 EndElement{Name{"", "body"}}, 175 Comment(" missing final newline "), 176 } 177 178 var cookedTokens = []Token{ 179 CharData("\n"), 180 ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)}, 181 CharData("\n"), 182 Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" 183 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`), 184 CharData("\n"), 185 StartElement{Name{"ns2", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}}, 186 CharData("\n "), 187 StartElement{Name{"ns2", "hello"}, []Attr{{Name{"", "lang"}, "en"}}}, 188 CharData("World <>'\" 白鵬翔"), 189 EndElement{Name{"ns2", "hello"}}, 190 CharData("\n "), 191 StartElement{Name{"ns2", "query"}, []Attr{}}, 192 CharData("What is it?"), 193 EndElement{Name{"ns2", "query"}}, 194 CharData("\n "), 195 StartElement{Name{"ns2", "goodbye"}, []Attr{}}, 196 EndElement{Name{"ns2", "goodbye"}}, 197 CharData("\n "), 198 StartElement{Name{"ns2", "outer"}, []Attr{{Name{"ns1", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}}, 199 CharData("\n "), 200 StartElement{Name{"ns2", "inner"}, []Attr{}}, 201 EndElement{Name{"ns2", "inner"}}, 202 CharData("\n "), 203 EndElement{Name{"ns2", "outer"}}, 204 CharData("\n "), 205 StartElement{Name{"ns3", "name"}, []Attr{}}, 206 CharData("\n "), 207 CharData("Some text here."), 208 CharData("\n "), 209 EndElement{Name{"ns3", "name"}}, 210 CharData("\n"), 211 EndElement{Name{"ns2", "body"}}, 212 Comment(" missing final newline "), 213 } 214 215 const testInputAltEncoding = ` 216 <?xml version="1.0" encoding="x-testing-uppercase"?> 217 <TAG>VALUE</TAG>` 218 219 var rawTokensAltEncoding = []Token{ 220 CharData("\n"), 221 ProcInst{"xml", []byte(`version="1.0" encoding="x-testing-uppercase"`)}, 222 CharData("\n"), 223 StartElement{Name{"", "tag"}, []Attr{}}, 224 CharData("value"), 225 EndElement{Name{"", "tag"}}, 226 } 227 228 var xmlInput = []string{ 229 // unexpected EOF cases 230 "<", 231 "<t", 232 "<t ", 233 "<t/", 234 "<!", 235 "<!-", 236 "<!--", 237 "<!--c-", 238 "<!--c--", 239 "<!d", 240 "<t></", 241 "<t></t", 242 "<?", 243 "<?p", 244 "<t a", 245 "<t a=", 246 "<t a='", 247 "<t a=''", 248 "<t/><![", 249 "<t/><![C", 250 "<t/><![CDATA[d", 251 "<t/><![CDATA[d]", 252 "<t/><![CDATA[d]]", 253 254 // other Syntax errors 255 "<>", 256 "<t/a", 257 "<0 />", 258 "<?0 >", 259 // "<!0 >", // let the Token() caller handle 260 "</0>", 261 "<t 0=''>", 262 "<t a='&'>", 263 "<t a='<'>", 264 "<t> c;</t>", 265 "<t a>", 266 "<t a=>", 267 "<t a=v>", 268 // "<![CDATA[d]]>", // let the Token() caller handle 269 "<t></e>", 270 "<t></>", 271 "<t></t!", 272 "<t>cdata]]></t>", 273 } 274 275 func TestRawToken(t *testing.T) { 276 d := NewDecoder(strings.NewReader(testInput)) 277 d.Entity = testEntity 278 testRawToken(t, d, testInput, rawTokens) 279 } 280 281 const nonStrictInput = ` 282 <tag>non&entity</tag> 283 <tag>&unknown;entity</tag> 284 <tag>{</tag> 285 <tag>&#zzz;</tag> 286 <tag>&なまえ3;</tag> 287 <tag><-gt;</tag> 288 <tag>&;</tag> 289 <tag>&0a;</tag> 290 ` 291 292 var nonStrictTokens = []Token{ 293 CharData("\n"), 294 StartElement{Name{"", "tag"}, []Attr{}}, 295 CharData("non&entity"), 296 EndElement{Name{"", "tag"}}, 297 CharData("\n"), 298 StartElement{Name{"", "tag"}, []Attr{}}, 299 CharData("&unknown;entity"), 300 EndElement{Name{"", "tag"}}, 301 CharData("\n"), 302 StartElement{Name{"", "tag"}, []Attr{}}, 303 CharData("{"), 304 EndElement{Name{"", "tag"}}, 305 CharData("\n"), 306 StartElement{Name{"", "tag"}, []Attr{}}, 307 CharData("&#zzz;"), 308 EndElement{Name{"", "tag"}}, 309 CharData("\n"), 310 StartElement{Name{"", "tag"}, []Attr{}}, 311 CharData("&なまえ3;"), 312 EndElement{Name{"", "tag"}}, 313 CharData("\n"), 314 StartElement{Name{"", "tag"}, []Attr{}}, 315 CharData("<-gt;"), 316 EndElement{Name{"", "tag"}}, 317 CharData("\n"), 318 StartElement{Name{"", "tag"}, []Attr{}}, 319 CharData("&;"), 320 EndElement{Name{"", "tag"}}, 321 CharData("\n"), 322 StartElement{Name{"", "tag"}, []Attr{}}, 323 CharData("&0a;"), 324 EndElement{Name{"", "tag"}}, 325 CharData("\n"), 326 } 327 328 func TestNonStrictRawToken(t *testing.T) { 329 d := NewDecoder(strings.NewReader(nonStrictInput)) 330 d.Strict = false 331 testRawToken(t, d, nonStrictInput, nonStrictTokens) 332 } 333 334 type downCaser struct { 335 t *testing.T 336 r io.ByteReader 337 } 338 339 func (d *downCaser) ReadByte() (c byte, err error) { 340 c, err = d.r.ReadByte() 341 if c >= 'A' && c <= 'Z' { 342 c += 'a' - 'A' 343 } 344 return 345 } 346 347 func (d *downCaser) Read(p []byte) (int, error) { 348 d.t.Fatalf("unexpected Read call on downCaser reader") 349 panic("unreachable") 350 } 351 352 func TestRawTokenAltEncoding(t *testing.T) { 353 d := NewDecoder(strings.NewReader(testInputAltEncoding)) 354 d.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) { 355 if charset != "x-testing-uppercase" { 356 t.Fatalf("unexpected charset %q", charset) 357 } 358 return &downCaser{t, input.(io.ByteReader)}, nil 359 } 360 testRawToken(t, d, testInputAltEncoding, rawTokensAltEncoding) 361 } 362 363 func TestRawTokenAltEncodingNoConverter(t *testing.T) { 364 d := NewDecoder(strings.NewReader(testInputAltEncoding)) 365 token, err := d.RawToken() 366 if token == nil { 367 t.Fatalf("expected a token on first RawToken call") 368 } 369 if err != nil { 370 t.Fatal(err) 371 } 372 token, err = d.RawToken() 373 if token != nil { 374 t.Errorf("expected a nil token; got %#v", token) 375 } 376 if err == nil { 377 t.Fatalf("expected an error on second RawToken call") 378 } 379 const encoding = "x-testing-uppercase" 380 if !strings.Contains(err.Error(), encoding) { 381 t.Errorf("expected error to contain %q; got error: %v", 382 encoding, err) 383 } 384 } 385 386 func testRawToken(t *testing.T, d *Decoder, raw string, rawTokens []Token) { 387 lastEnd := int64(0) 388 for i, want := range rawTokens { 389 start := d.InputOffset() 390 have, err := d.RawToken() 391 end := d.InputOffset() 392 if err != nil { 393 t.Fatalf("token %d: unexpected error: %s", i, err) 394 } 395 if !reflect.DeepEqual(have, want) { 396 var shave, swant string 397 if _, ok := have.(CharData); ok { 398 shave = fmt.Sprintf("CharData(%q)", have) 399 } else { 400 shave = fmt.Sprintf("%#v", have) 401 } 402 if _, ok := want.(CharData); ok { 403 swant = fmt.Sprintf("CharData(%q)", want) 404 } else { 405 swant = fmt.Sprintf("%#v", want) 406 } 407 t.Errorf("token %d = %s, want %s", i, shave, swant) 408 } 409 410 // Check that InputOffset returned actual token. 411 switch { 412 case start < lastEnd: 413 t.Errorf("token %d: position [%d,%d) for %T is before previous token", i, start, end, have) 414 case start >= end: 415 // Special case: EndElement can be synthesized. 416 if start == end && end == lastEnd { 417 break 418 } 419 t.Errorf("token %d: position [%d,%d) for %T is empty", i, start, end, have) 420 case end > int64(len(raw)): 421 t.Errorf("token %d: position [%d,%d) for %T extends beyond input", i, start, end, have) 422 default: 423 text := raw[start:end] 424 if strings.ContainsAny(text, "<>") && (!strings.HasPrefix(text, "<") || !strings.HasSuffix(text, ">")) { 425 t.Errorf("token %d: misaligned raw token %#q for %T", i, text, have) 426 } 427 } 428 lastEnd = end 429 } 430 } 431 432 // Ensure that directives (specifically !DOCTYPE) include the complete 433 // text of any nested directives, noting that < and > do not change 434 // nesting depth if they are in single or double quotes. 435 436 var nestedDirectivesInput = ` 437 <!DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]> 438 <!DOCTYPE [<!ENTITY xlt ">">]> 439 <!DOCTYPE [<!ENTITY xlt "<">]> 440 <!DOCTYPE [<!ENTITY xlt '>'>]> 441 <!DOCTYPE [<!ENTITY xlt '<'>]> 442 <!DOCTYPE [<!ENTITY xlt '">'>]> 443 <!DOCTYPE [<!ENTITY xlt "'<">]> 444 ` 445 446 var nestedDirectivesTokens = []Token{ 447 CharData("\n"), 448 Directive(`DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`), 449 CharData("\n"), 450 Directive(`DOCTYPE [<!ENTITY xlt ">">]`), 451 CharData("\n"), 452 Directive(`DOCTYPE [<!ENTITY xlt "<">]`), 453 CharData("\n"), 454 Directive(`DOCTYPE [<!ENTITY xlt '>'>]`), 455 CharData("\n"), 456 Directive(`DOCTYPE [<!ENTITY xlt '<'>]`), 457 CharData("\n"), 458 Directive(`DOCTYPE [<!ENTITY xlt '">'>]`), 459 CharData("\n"), 460 Directive(`DOCTYPE [<!ENTITY xlt "'<">]`), 461 CharData("\n"), 462 } 463 464 func TestNestedDirectives(t *testing.T) { 465 d := NewDecoder(strings.NewReader(nestedDirectivesInput)) 466 467 for i, want := range nestedDirectivesTokens { 468 have, err := d.Token() 469 if err != nil { 470 t.Fatalf("token %d: unexpected error: %s", i, err) 471 } 472 if !reflect.DeepEqual(have, want) { 473 t.Errorf("token %d = %#v want %#v", i, have, want) 474 } 475 } 476 } 477 478 func TestToken(t *testing.T) { 479 d := NewDecoder(strings.NewReader(testInput)) 480 d.Entity = testEntity 481 482 for i, want := range cookedTokens { 483 have, err := d.Token() 484 if err != nil { 485 t.Fatalf("token %d: unexpected error: %s", i, err) 486 } 487 if !reflect.DeepEqual(have, want) { 488 t.Errorf("token %d = %#v want %#v", i, have, want) 489 } 490 } 491 } 492 493 func TestSyntax(t *testing.T) { 494 for i := range xmlInput { 495 d := NewDecoder(strings.NewReader(xmlInput[i])) 496 var err error 497 for _, err = d.Token(); err == nil; _, err = d.Token() { 498 } 499 if _, ok := err.(*SyntaxError); !ok { 500 t.Fatalf(`xmlInput "%s": expected SyntaxError not received`, xmlInput[i]) 501 } 502 } 503 } 504 505 func TestInputLinePos(t *testing.T) { 506 testInput := `<root> 507 <?pi 508 ?> <elt 509 att 510 = 511 "val"> 512 <![CDATA[ 513 ]]><!-- 514 515 --></elt> 516 </root>` 517 linePos := [][]int{ 518 {1, 7}, 519 {2, 1}, 520 {3, 4}, 521 {3, 6}, 522 {6, 7}, 523 {7, 1}, 524 {8, 4}, 525 {10, 4}, 526 {10, 10}, 527 {11, 1}, 528 {11, 8}, 529 } 530 dec := NewDecoder(strings.NewReader(testInput)) 531 for _, want := range linePos { 532 if _, err := dec.Token(); err != nil { 533 t.Errorf("Unexpected error: %v", err) 534 continue 535 } 536 537 gotLine, gotCol := dec.InputPos() 538 if gotLine != want[0] || gotCol != want[1] { 539 t.Errorf("dec.InputPos() = %d,%d, want %d,%d", gotLine, gotCol, want[0], want[1]) 540 } 541 } 542 } 543 544 type allScalars struct { 545 True1 bool 546 True2 bool 547 False1 bool 548 False2 bool 549 Int int 550 Int8 int8 551 Int16 int16 552 Int32 int32 553 Int64 int64 554 Uint int 555 Uint8 uint8 556 Uint16 uint16 557 Uint32 uint32 558 Uint64 uint64 559 Uintptr uintptr 560 Float32 float32 561 Float64 float64 562 String string 563 PtrString *string 564 } 565 566 var all = allScalars{ 567 True1: true, 568 True2: true, 569 False1: false, 570 False2: false, 571 Int: 1, 572 Int8: -2, 573 Int16: 3, 574 Int32: -4, 575 Int64: 5, 576 Uint: 6, 577 Uint8: 7, 578 Uint16: 8, 579 Uint32: 9, 580 Uint64: 10, 581 Uintptr: 11, 582 Float32: 13.0, 583 Float64: 14.0, 584 String: "15", 585 PtrString: &sixteen, 586 } 587 588 var sixteen = "16" 589 590 const testScalarsInput = `<allscalars> 591 <True1>true</True1> 592 <True2>1</True2> 593 <False1>false</False1> 594 <False2>0</False2> 595 <Int>1</Int> 596 <Int8>-2</Int8> 597 <Int16>3</Int16> 598 <Int32>-4</Int32> 599 <Int64>5</Int64> 600 <Uint>6</Uint> 601 <Uint8>7</Uint8> 602 <Uint16>8</Uint16> 603 <Uint32>9</Uint32> 604 <Uint64>10</Uint64> 605 <Uintptr>11</Uintptr> 606 <Float>12.0</Float> 607 <Float32>13.0</Float32> 608 <Float64>14.0</Float64> 609 <String>15</String> 610 <PtrString>16</PtrString> 611 </allscalars>` 612 613 func TestAllScalars(t *testing.T) { 614 var a allScalars 615 err := Unmarshal([]byte(testScalarsInput), &a) 616 617 if err != nil { 618 t.Fatal(err) 619 } 620 if !reflect.DeepEqual(a, all) { 621 t.Errorf("have %+v want %+v", a, all) 622 } 623 } 624 625 type item struct { 626 FieldA string 627 } 628 629 func TestIssue569(t *testing.T) { 630 data := `<item><FieldA>abcd</FieldA></item>` 631 var i item 632 err := Unmarshal([]byte(data), &i) 633 634 if err != nil || i.FieldA != "abcd" { 635 t.Fatal("Expecting abcd") 636 } 637 } 638 639 func TestUnquotedAttrs(t *testing.T) { 640 data := "<tag attr=azAZ09:-_\t>" 641 d := NewDecoder(strings.NewReader(data)) 642 d.Strict = false 643 token, err := d.Token() 644 if _, ok := err.(*SyntaxError); ok { 645 t.Errorf("Unexpected error: %v", err) 646 } 647 if token.(StartElement).Name.Local != "tag" { 648 t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local) 649 } 650 attr := token.(StartElement).Attr[0] 651 if attr.Value != "azAZ09:-_" { 652 t.Errorf("Unexpected attribute value: %v", attr.Value) 653 } 654 if attr.Name.Local != "attr" { 655 t.Errorf("Unexpected attribute name: %v", attr.Name.Local) 656 } 657 } 658 659 func TestValuelessAttrs(t *testing.T) { 660 tests := [][3]string{ 661 {"<p nowrap>", "p", "nowrap"}, 662 {"<p nowrap >", "p", "nowrap"}, 663 {"<input checked/>", "input", "checked"}, 664 {"<input checked />", "input", "checked"}, 665 } 666 for _, test := range tests { 667 d := NewDecoder(strings.NewReader(test[0])) 668 d.Strict = false 669 token, err := d.Token() 670 if _, ok := err.(*SyntaxError); ok { 671 t.Errorf("Unexpected error: %v", err) 672 } 673 if token.(StartElement).Name.Local != test[1] { 674 t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local) 675 } 676 attr := token.(StartElement).Attr[0] 677 if attr.Value != test[2] { 678 t.Errorf("Unexpected attribute value: %v", attr.Value) 679 } 680 if attr.Name.Local != test[2] { 681 t.Errorf("Unexpected attribute name: %v", attr.Name.Local) 682 } 683 } 684 } 685 686 func TestCopyTokenCharData(t *testing.T) { 687 data := []byte("same data") 688 var tok1 Token = CharData(data) 689 tok2 := CopyToken(tok1) 690 if !reflect.DeepEqual(tok1, tok2) { 691 t.Error("CopyToken(CharData) != CharData") 692 } 693 data[1] = 'o' 694 if reflect.DeepEqual(tok1, tok2) { 695 t.Error("CopyToken(CharData) uses same buffer.") 696 } 697 } 698 699 func TestCopyTokenStartElement(t *testing.T) { 700 elt := StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}} 701 var tok1 Token = elt 702 tok2 := CopyToken(tok1) 703 if tok1.(StartElement).Attr[0].Value != "en" { 704 t.Error("CopyToken overwrote Attr[0]") 705 } 706 if !reflect.DeepEqual(tok1, tok2) { 707 t.Error("CopyToken(StartElement) != StartElement") 708 } 709 tok1.(StartElement).Attr[0] = Attr{Name{"", "lang"}, "de"} 710 if reflect.DeepEqual(tok1, tok2) { 711 t.Error("CopyToken(CharData) uses same buffer.") 712 } 713 } 714 715 func TestCopyTokenComment(t *testing.T) { 716 data := []byte("<!-- some comment -->") 717 var tok1 Token = Comment(data) 718 tok2 := CopyToken(tok1) 719 if !reflect.DeepEqual(tok1, tok2) { 720 t.Error("CopyToken(Comment) != Comment") 721 } 722 data[1] = 'o' 723 if reflect.DeepEqual(tok1, tok2) { 724 t.Error("CopyToken(Comment) uses same buffer.") 725 } 726 } 727 728 func TestSyntaxErrorLineNum(t *testing.T) { 729 testInput := "<P>Foo<P>\n\n<P>Bar</>\n" 730 d := NewDecoder(strings.NewReader(testInput)) 731 var err error 732 for _, err = d.Token(); err == nil; _, err = d.Token() { 733 } 734 synerr, ok := err.(*SyntaxError) 735 if !ok { 736 t.Error("Expected SyntaxError.") 737 } 738 if synerr.Line != 3 { 739 t.Error("SyntaxError didn't have correct line number.") 740 } 741 } 742 743 func TestTrailingRawToken(t *testing.T) { 744 input := `<FOO></FOO> ` 745 d := NewDecoder(strings.NewReader(input)) 746 var err error 747 for _, err = d.RawToken(); err == nil; _, err = d.RawToken() { 748 } 749 if err != io.EOF { 750 t.Fatalf("d.RawToken() = _, %v, want _, io.EOF", err) 751 } 752 } 753 754 func TestTrailingToken(t *testing.T) { 755 input := `<FOO></FOO> ` 756 d := NewDecoder(strings.NewReader(input)) 757 var err error 758 for _, err = d.Token(); err == nil; _, err = d.Token() { 759 } 760 if err != io.EOF { 761 t.Fatalf("d.Token() = _, %v, want _, io.EOF", err) 762 } 763 } 764 765 func TestEntityInsideCDATA(t *testing.T) { 766 input := `<test><![CDATA[ &val=foo ]]></test>` 767 d := NewDecoder(strings.NewReader(input)) 768 var err error 769 for _, err = d.Token(); err == nil; _, err = d.Token() { 770 } 771 if err != io.EOF { 772 t.Fatalf("d.Token() = _, %v, want _, io.EOF", err) 773 } 774 } 775 776 var characterTests = []struct { 777 in string 778 err string 779 }{ 780 {"\x12<doc/>", "illegal character code U+0012"}, 781 {"<?xml version=\"1.0\"?>\x0b<doc/>", "illegal character code U+000B"}, 782 {"\xef\xbf\xbe<doc/>", "illegal character code U+FFFE"}, 783 {"<?xml version=\"1.0\"?><doc>\r\n<hiya/>\x07<toots/></doc>", "illegal character code U+0007"}, 784 {"<?xml version=\"1.0\"?><doc \x12='value'>what's up</doc>", "expected attribute name in element"}, 785 {"<doc>&abc\x01;</doc>", "invalid character entity &abc (no semicolon)"}, 786 {"<doc>&\x01;</doc>", "invalid character entity & (no semicolon)"}, 787 {"<doc>&\xef\xbf\xbe;</doc>", "invalid character entity &\uFFFE;"}, 788 {"<doc>&hello;</doc>", "invalid character entity &hello;"}, 789 } 790 791 func TestDisallowedCharacters(t *testing.T) { 792 793 for i, tt := range characterTests { 794 d := NewDecoder(strings.NewReader(tt.in)) 795 var err error 796 797 for err == nil { 798 _, err = d.Token() 799 } 800 synerr, ok := err.(*SyntaxError) 801 if !ok { 802 t.Fatalf("input %d d.Token() = _, %v, want _, *SyntaxError", i, err) 803 } 804 if synerr.Msg != tt.err { 805 t.Fatalf("input %d synerr.Msg wrong: want %q, got %q", i, tt.err, synerr.Msg) 806 } 807 } 808 } 809 810 func TestIsInCharacterRange(t *testing.T) { 811 invalid := []rune{ 812 utf8.MaxRune + 1, 813 0xD800, // surrogate min 814 0xDFFF, // surrogate max 815 -1, 816 } 817 for _, r := range invalid { 818 if isInCharacterRange(r) { 819 t.Errorf("rune %U considered valid", r) 820 } 821 } 822 } 823 824 var procInstTests = []struct { 825 input string 826 expect [2]string 827 }{ 828 {`version="1.0" encoding="utf-8"`, [2]string{"1.0", "utf-8"}}, 829 {`version="1.0" encoding='utf-8'`, [2]string{"1.0", "utf-8"}}, 830 {`version="1.0" encoding='utf-8' `, [2]string{"1.0", "utf-8"}}, 831 {`version="1.0" encoding=utf-8`, [2]string{"1.0", ""}}, 832 {`encoding="FOO" `, [2]string{"", "FOO"}}, 833 } 834 835 func TestProcInstEncoding(t *testing.T) { 836 for _, test := range procInstTests { 837 if got := procInst("version", test.input); got != test.expect[0] { 838 t.Errorf("procInst(version, %q) = %q; want %q", test.input, got, test.expect[0]) 839 } 840 if got := procInst("encoding", test.input); got != test.expect[1] { 841 t.Errorf("procInst(encoding, %q) = %q; want %q", test.input, got, test.expect[1]) 842 } 843 } 844 } 845 846 // Ensure that directives with comments include the complete 847 // text of any nested directives. 848 849 var directivesWithCommentsInput = ` 850 <!DOCTYPE [<!-- a comment --><!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]> 851 <!DOCTYPE [<!ENTITY go "Golang"><!-- a comment-->]> 852 <!DOCTYPE <!-> <!> <!----> <!-->--> <!--->--> [<!ENTITY go "Golang"><!-- a comment-->]> 853 ` 854 855 var directivesWithCommentsTokens = []Token{ 856 CharData("\n"), 857 Directive(`DOCTYPE [ <!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`), 858 CharData("\n"), 859 Directive(`DOCTYPE [<!ENTITY go "Golang"> ]`), 860 CharData("\n"), 861 Directive(`DOCTYPE <!-> <!> [<!ENTITY go "Golang"> ]`), 862 CharData("\n"), 863 } 864 865 func TestDirectivesWithComments(t *testing.T) { 866 d := NewDecoder(strings.NewReader(directivesWithCommentsInput)) 867 868 for i, want := range directivesWithCommentsTokens { 869 have, err := d.Token() 870 if err != nil { 871 t.Fatalf("token %d: unexpected error: %s", i, err) 872 } 873 if !reflect.DeepEqual(have, want) { 874 t.Errorf("token %d = %#v want %#v", i, have, want) 875 } 876 } 877 } 878 879 // Writer whose Write method always returns an error. 880 type errWriter struct{} 881 882 func (errWriter) Write(p []byte) (n int, err error) { return 0, fmt.Errorf("unwritable") } 883 884 func TestEscapeTextIOErrors(t *testing.T) { 885 expectErr := "unwritable" 886 err := EscapeText(errWriter{}, []byte{'A'}) 887 888 if err == nil || err.Error() != expectErr { 889 t.Errorf("have %v, want %v", err, expectErr) 890 } 891 } 892 893 func TestEscapeTextInvalidChar(t *testing.T) { 894 input := []byte("A \x00 terminated string.") 895 expected := "A \uFFFD terminated string." 896 897 buff := new(strings.Builder) 898 if err := EscapeText(buff, input); err != nil { 899 t.Fatalf("have %v, want nil", err) 900 } 901 text := buff.String() 902 903 if text != expected { 904 t.Errorf("have %v, want %v", text, expected) 905 } 906 } 907 908 func TestIssue5880(t *testing.T) { 909 type T []byte 910 data, err := Marshal(T{192, 168, 0, 1}) 911 if err != nil { 912 t.Errorf("Marshal error: %v", err) 913 } 914 if !utf8.Valid(data) { 915 t.Errorf("Marshal generated invalid UTF-8: %x", data) 916 } 917 } 918 919 func TestIssue11405(t *testing.T) { 920 testCases := []string{ 921 "<root>", 922 "<root><foo>", 923 "<root><foo></foo>", 924 } 925 for _, tc := range testCases { 926 d := NewDecoder(strings.NewReader(tc)) 927 var err error 928 for { 929 _, err = d.Token() 930 if err != nil { 931 break 932 } 933 } 934 if _, ok := err.(*SyntaxError); !ok { 935 t.Errorf("%s: Token: Got error %v, want SyntaxError", tc, err) 936 } 937 } 938 } 939 940 func TestIssue12417(t *testing.T) { 941 testCases := []struct { 942 s string 943 ok bool 944 }{ 945 {`<?xml encoding="UtF-8" version="1.0"?><root/>`, true}, 946 {`<?xml encoding="UTF-8" version="1.0"?><root/>`, true}, 947 {`<?xml encoding="utf-8" version="1.0"?><root/>`, true}, 948 {`<?xml encoding="uuu-9" version="1.0"?><root/>`, false}, 949 } 950 for _, tc := range testCases { 951 d := NewDecoder(strings.NewReader(tc.s)) 952 var err error 953 for { 954 _, err = d.Token() 955 if err != nil { 956 if err == io.EOF { 957 err = nil 958 } 959 break 960 } 961 } 962 if err != nil && tc.ok { 963 t.Errorf("%q: Encoding charset: expected no error, got %s", tc.s, err) 964 continue 965 } 966 if err == nil && !tc.ok { 967 t.Errorf("%q: Encoding charset: expected error, got nil", tc.s) 968 } 969 } 970 } 971 972 func tokenMap(mapping func(t Token) Token) func(TokenReader) TokenReader { 973 return func(src TokenReader) TokenReader { 974 return mapper{ 975 t: src, 976 f: mapping, 977 } 978 } 979 } 980 981 type mapper struct { 982 t TokenReader 983 f func(Token) Token 984 } 985 986 func (m mapper) Token() (Token, error) { 987 tok, err := m.t.Token() 988 if err != nil { 989 return nil, err 990 } 991 return m.f(tok), nil 992 } 993 994 func TestNewTokenDecoderIdempotent(t *testing.T) { 995 d := NewDecoder(strings.NewReader(`<br>`)) 996 d2 := NewTokenDecoder(d) 997 if d != d2 { 998 t.Error("NewTokenDecoder did not detect underlying Decoder") 999 } 1000 } 1001 1002 func TestWrapDecoder(t *testing.T) { 1003 d := NewDecoder(strings.NewReader(`<quote>[Re-enter Clown with a letter, and FABIAN]</quote>`)) 1004 m := tokenMap(func(t Token) Token { 1005 switch tok := t.(type) { 1006 case StartElement: 1007 if tok.Name.Local == "quote" { 1008 tok.Name.Local = "blocking" 1009 return tok 1010 } 1011 case EndElement: 1012 if tok.Name.Local == "quote" { 1013 tok.Name.Local = "blocking" 1014 return tok 1015 } 1016 } 1017 return t 1018 }) 1019 1020 d = NewTokenDecoder(m(d)) 1021 1022 o := struct { 1023 XMLName Name `xml:"blocking"` 1024 Chardata string `xml:",chardata"` 1025 }{} 1026 1027 if err := d.Decode(&o); err != nil { 1028 t.Fatal("Got unexpected error while decoding:", err) 1029 } 1030 1031 if o.Chardata != "[Re-enter Clown with a letter, and FABIAN]" { 1032 t.Fatalf("Got unexpected chardata: `%s`\n", o.Chardata) 1033 } 1034 } 1035 1036 type tokReader struct{} 1037 1038 func (tokReader) Token() (Token, error) { 1039 return StartElement{}, nil 1040 } 1041 1042 type Failure struct{} 1043 1044 func (Failure) UnmarshalXML(*Decoder, StartElement) error { 1045 return nil 1046 } 1047 1048 func TestTokenUnmarshaler(t *testing.T) { 1049 defer func() { 1050 if r := recover(); r != nil { 1051 t.Error("Unexpected panic using custom token unmarshaler") 1052 } 1053 }() 1054 1055 d := NewTokenDecoder(tokReader{}) 1056 d.Decode(&Failure{}) 1057 } 1058 1059 func testRoundTrip(t *testing.T, input string) { 1060 d := NewDecoder(strings.NewReader(input)) 1061 var tokens []Token 1062 var buf bytes.Buffer 1063 e := NewEncoder(&buf) 1064 for { 1065 tok, err := d.Token() 1066 if err == io.EOF { 1067 break 1068 } 1069 if err != nil { 1070 t.Fatalf("invalid input: %v", err) 1071 } 1072 if err := e.EncodeToken(tok); err != nil { 1073 t.Fatalf("failed to re-encode input: %v", err) 1074 } 1075 tokens = append(tokens, CopyToken(tok)) 1076 } 1077 if err := e.Flush(); err != nil { 1078 t.Fatal(err) 1079 } 1080 1081 d = NewDecoder(&buf) 1082 for { 1083 tok, err := d.Token() 1084 if err == io.EOF { 1085 break 1086 } 1087 if err != nil { 1088 t.Fatalf("failed to decode output: %v", err) 1089 } 1090 if len(tokens) == 0 { 1091 t.Fatalf("unexpected token: %#v", tok) 1092 } 1093 a, b := tokens[0], tok 1094 if !reflect.DeepEqual(a, b) { 1095 t.Fatalf("token mismatch: %#v vs %#v", a, b) 1096 } 1097 tokens = tokens[1:] 1098 } 1099 if len(tokens) > 0 { 1100 t.Fatalf("lost tokens: %#v", tokens) 1101 } 1102 } 1103 1104 func TestRoundTrip(t *testing.T) { 1105 tests := map[string]string{ 1106 "leading colon": `<::Test ::foo="bar"><:::Hello></:::Hello><Hello></Hello></::Test>`, 1107 "trailing colon": `<foo abc:="x"></foo>`, 1108 "double colon": `<x:y:foo></x:y:foo>`, 1109 "comments in directives": `<!ENTITY x<!<!-- c1 [ " -->--x --> > <e></e> <!DOCTYPE xxx [ x<!-- c2 " -->--x ]>`, 1110 } 1111 for name, input := range tests { 1112 t.Run(name, func(t *testing.T) { testRoundTrip(t, input) }) 1113 } 1114 } 1115 1116 func TestParseErrors(t *testing.T) { 1117 withDefaultHeader := func(s string) string { 1118 return `<?xml version="1.0" encoding="UTF-8"?>` + s 1119 } 1120 tests := []struct { 1121 src string 1122 err string 1123 }{ 1124 {withDefaultHeader(`</foo>`), `unexpected end element </foo>`}, 1125 {withDefaultHeader(`<x:foo></y:foo>`), `element <foo> in space x closed by </foo> in space y`}, 1126 {withDefaultHeader(`<? not ok ?>`), `expected target name after <?`}, 1127 {withDefaultHeader(`<!- not ok -->`), `invalid sequence <!- not part of <!--`}, 1128 {withDefaultHeader(`<!-? not ok -->`), `invalid sequence <!- not part of <!--`}, 1129 {withDefaultHeader(`<![not ok]>`), `invalid <![ sequence`}, 1130 {withDefaultHeader("\xf1"), `invalid UTF-8`}, 1131 1132 // Header-related errors. 1133 {`<?xml version="1.1" encoding="UTF-8"?>`, `unsupported version "1.1"; only version 1.0 is supported`}, 1134 1135 // Cases below are for "no errors". 1136 {withDefaultHeader(`<?ok?>`), ``}, 1137 {withDefaultHeader(`<?ok version="ok"?>`), ``}, 1138 } 1139 1140 for _, test := range tests { 1141 d := NewDecoder(strings.NewReader(test.src)) 1142 var err error 1143 for { 1144 _, err = d.Token() 1145 if err != nil { 1146 break 1147 } 1148 } 1149 if test.err == "" { 1150 if err != io.EOF { 1151 t.Errorf("parse %s: have %q error, expected none", test.src, err) 1152 } 1153 continue 1154 } 1155 if err == nil || err == io.EOF { 1156 t.Errorf("parse %s: have no error, expected a non-nil error", test.src) 1157 continue 1158 } 1159 if !strings.Contains(err.Error(), test.err) { 1160 t.Errorf("parse %s: can't find %q error sudbstring\nerror: %q", test.src, test.err, err) 1161 continue 1162 } 1163 } 1164 } 1165 1166 const testInputHTMLAutoClose = `<?xml version="1.0" encoding="UTF-8"?> 1167 <br> 1168 <br/><br/> 1169 <br><br> 1170 <br></br> 1171 <BR> 1172 <BR/><BR/> 1173 <Br></Br> 1174 <BR><span id="test">abc</span><br/><br/>` 1175 1176 func BenchmarkHTMLAutoClose(b *testing.B) { 1177 b.RunParallel(func(p *testing.PB) { 1178 for p.Next() { 1179 d := NewDecoder(strings.NewReader(testInputHTMLAutoClose)) 1180 d.Strict = false 1181 d.AutoClose = HTMLAutoClose 1182 d.Entity = HTMLEntity 1183 for { 1184 _, err := d.Token() 1185 if err != nil { 1186 if err == io.EOF { 1187 break 1188 } 1189 b.Fatalf("unexpected error: %v", err) 1190 } 1191 } 1192 } 1193 }) 1194 } 1195 1196 func TestHTMLAutoClose(t *testing.T) { 1197 wantTokens := []Token{ 1198 ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)}, 1199 CharData("\n"), 1200 StartElement{Name{"", "br"}, []Attr{}}, 1201 EndElement{Name{"", "br"}}, 1202 CharData("\n"), 1203 StartElement{Name{"", "br"}, []Attr{}}, 1204 EndElement{Name{"", "br"}}, 1205 StartElement{Name{"", "br"}, []Attr{}}, 1206 EndElement{Name{"", "br"}}, 1207 CharData("\n"), 1208 StartElement{Name{"", "br"}, []Attr{}}, 1209 EndElement{Name{"", "br"}}, 1210 StartElement{Name{"", "br"}, []Attr{}}, 1211 EndElement{Name{"", "br"}}, 1212 CharData("\n"), 1213 StartElement{Name{"", "br"}, []Attr{}}, 1214 EndElement{Name{"", "br"}}, 1215 CharData("\n"), 1216 StartElement{Name{"", "BR"}, []Attr{}}, 1217 EndElement{Name{"", "BR"}}, 1218 CharData("\n"), 1219 StartElement{Name{"", "BR"}, []Attr{}}, 1220 EndElement{Name{"", "BR"}}, 1221 StartElement{Name{"", "BR"}, []Attr{}}, 1222 EndElement{Name{"", "BR"}}, 1223 CharData("\n"), 1224 StartElement{Name{"", "Br"}, []Attr{}}, 1225 EndElement{Name{"", "Br"}}, 1226 CharData("\n"), 1227 StartElement{Name{"", "BR"}, []Attr{}}, 1228 EndElement{Name{"", "BR"}}, 1229 StartElement{Name{"", "span"}, []Attr{{Name: Name{"", "id"}, Value: "test"}}}, 1230 CharData("abc"), 1231 EndElement{Name{"", "span"}}, 1232 StartElement{Name{"", "br"}, []Attr{}}, 1233 EndElement{Name{"", "br"}}, 1234 StartElement{Name{"", "br"}, []Attr{}}, 1235 EndElement{Name{"", "br"}}, 1236 } 1237 1238 d := NewDecoder(strings.NewReader(testInputHTMLAutoClose)) 1239 d.Strict = false 1240 d.AutoClose = HTMLAutoClose 1241 d.Entity = HTMLEntity 1242 var haveTokens []Token 1243 for { 1244 tok, err := d.Token() 1245 if err != nil { 1246 if err == io.EOF { 1247 break 1248 } 1249 t.Fatalf("unexpected error: %v", err) 1250 } 1251 haveTokens = append(haveTokens, CopyToken(tok)) 1252 } 1253 if len(haveTokens) != len(wantTokens) { 1254 t.Errorf("tokens count mismatch: have %d, want %d", len(haveTokens), len(wantTokens)) 1255 } 1256 for i, want := range wantTokens { 1257 if i >= len(haveTokens) { 1258 t.Errorf("token[%d] expected %#v, have no token", i, want) 1259 } else { 1260 have := haveTokens[i] 1261 if !reflect.DeepEqual(have, want) { 1262 t.Errorf("token[%d] mismatch:\nhave: %#v\nwant: %#v", i, have, want) 1263 } 1264 } 1265 } 1266 }