github.com/code-reading/golang@v0.0.0-20220303082512-ba5bc0e589a3/go/src/encoding/xml/xml_test.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package xml 6 7 import ( 8 "bytes" 9 "fmt" 10 "io" 11 "reflect" 12 "strings" 13 "testing" 14 "unicode/utf8" 15 ) 16 17 type toks struct { 18 earlyEOF bool 19 t []Token 20 } 21 22 func (t *toks) Token() (Token, error) { 23 if len(t.t) == 0 { 24 return nil, io.EOF 25 } 26 var tok Token 27 tok, t.t = t.t[0], t.t[1:] 28 if t.earlyEOF && len(t.t) == 0 { 29 return tok, io.EOF 30 } 31 return tok, nil 32 } 33 34 func TestDecodeEOF(t *testing.T) { 35 start := StartElement{Name: Name{Local: "test"}} 36 tests := []struct { 37 name string 38 tokens []Token 39 ok bool 40 }{ 41 { 42 name: "OK", 43 tokens: []Token{ 44 start, 45 start.End(), 46 }, 47 ok: true, 48 }, 49 { 50 name: "Malformed", 51 tokens: []Token{ 52 start, 53 StartElement{Name: Name{Local: "bad"}}, 54 start.End(), 55 }, 56 ok: false, 57 }, 58 } 59 for _, tc := range tests { 60 for _, eof := range []bool{true, false} { 61 name := fmt.Sprintf("%s/earlyEOF=%v", tc.name, eof) 62 t.Run(name, func(t *testing.T) { 63 d := NewTokenDecoder(&toks{ 64 earlyEOF: eof, 65 t: tc.tokens, 66 }) 67 err := d.Decode(&struct { 68 XMLName Name `xml:"test"` 69 }{}) 70 if tc.ok && err != nil { 71 t.Fatalf("d.Decode: expected nil error, got %v", err) 72 } 73 if _, ok := err.(*SyntaxError); !tc.ok && !ok { 74 t.Errorf("d.Decode: expected syntax error, got %v", err) 75 } 76 }) 77 } 78 } 79 } 80 81 type toksNil struct { 82 returnEOF bool 83 t []Token 84 } 85 86 func (t *toksNil) Token() (Token, error) { 87 if len(t.t) == 0 { 88 if !t.returnEOF { 89 // Return nil, nil before returning an EOF. It's legal, but 90 // discouraged. 91 t.returnEOF = true 92 return nil, nil 93 } 94 return nil, io.EOF 95 } 96 var tok Token 97 tok, t.t = t.t[0], t.t[1:] 98 return tok, nil 99 } 100 101 func TestDecodeNilToken(t *testing.T) { 102 for _, strict := range []bool{true, false} { 103 name := fmt.Sprintf("Strict=%v", strict) 104 t.Run(name, func(t *testing.T) { 105 start := StartElement{Name: Name{Local: "test"}} 106 bad := StartElement{Name: Name{Local: "bad"}} 107 d := NewTokenDecoder(&toksNil{ 108 // Malformed 109 t: []Token{start, bad, start.End()}, 110 }) 111 d.Strict = strict 112 err := d.Decode(&struct { 113 XMLName Name `xml:"test"` 114 }{}) 115 if _, ok := err.(*SyntaxError); !ok { 116 t.Errorf("d.Decode: expected syntax error, got %v", err) 117 } 118 }) 119 } 120 } 121 122 const testInput = ` 123 <?xml version="1.0" encoding="UTF-8"?> 124 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" 125 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> 126 <body xmlns:foo="ns1" xmlns="ns2" xmlns:tag="ns3" ` + 127 "\r\n\t" + ` > 128 <hello lang="en">World <>'" 白鵬翔</hello> 129 <query>&何; &is-it;</query> 130 <goodbye /> 131 <outer foo:attr="value" xmlns:tag="ns4"> 132 <inner/> 133 </outer> 134 <tag:name> 135 <![CDATA[Some text here.]]> 136 </tag:name> 137 </body><!-- missing final newline -->` 138 139 var testEntity = map[string]string{"何": "What", "is-it": "is it?"} 140 141 var rawTokens = []Token{ 142 CharData("\n"), 143 ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)}, 144 CharData("\n"), 145 Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" 146 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`), 147 CharData("\n"), 148 StartElement{Name{"", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}}, 149 CharData("\n "), 150 StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}}, 151 CharData("World <>'\" 白鵬翔"), 152 EndElement{Name{"", "hello"}}, 153 CharData("\n "), 154 StartElement{Name{"", "query"}, []Attr{}}, 155 CharData("What is it?"), 156 EndElement{Name{"", "query"}}, 157 CharData("\n "), 158 StartElement{Name{"", "goodbye"}, []Attr{}}, 159 EndElement{Name{"", "goodbye"}}, 160 CharData("\n "), 161 StartElement{Name{"", "outer"}, []Attr{{Name{"foo", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}}, 162 CharData("\n "), 163 StartElement{Name{"", "inner"}, []Attr{}}, 164 EndElement{Name{"", "inner"}}, 165 CharData("\n "), 166 EndElement{Name{"", "outer"}}, 167 CharData("\n "), 168 StartElement{Name{"tag", "name"}, []Attr{}}, 169 CharData("\n "), 170 CharData("Some text here."), 171 CharData("\n "), 172 EndElement{Name{"tag", "name"}}, 173 CharData("\n"), 174 EndElement{Name{"", "body"}}, 175 Comment(" missing final newline "), 176 } 177 178 var cookedTokens = []Token{ 179 CharData("\n"), 180 ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)}, 181 CharData("\n"), 182 Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" 183 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`), 184 CharData("\n"), 185 StartElement{Name{"ns2", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}}, 186 CharData("\n "), 187 StartElement{Name{"ns2", "hello"}, []Attr{{Name{"", "lang"}, "en"}}}, 188 CharData("World <>'\" 白鵬翔"), 189 EndElement{Name{"ns2", "hello"}}, 190 CharData("\n "), 191 StartElement{Name{"ns2", "query"}, []Attr{}}, 192 CharData("What is it?"), 193 EndElement{Name{"ns2", "query"}}, 194 CharData("\n "), 195 StartElement{Name{"ns2", "goodbye"}, []Attr{}}, 196 EndElement{Name{"ns2", "goodbye"}}, 197 CharData("\n "), 198 StartElement{Name{"ns2", "outer"}, []Attr{{Name{"ns1", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}}, 199 CharData("\n "), 200 StartElement{Name{"ns2", "inner"}, []Attr{}}, 201 EndElement{Name{"ns2", "inner"}}, 202 CharData("\n "), 203 EndElement{Name{"ns2", "outer"}}, 204 CharData("\n "), 205 StartElement{Name{"ns3", "name"}, []Attr{}}, 206 CharData("\n "), 207 CharData("Some text here."), 208 CharData("\n "), 209 EndElement{Name{"ns3", "name"}}, 210 CharData("\n"), 211 EndElement{Name{"ns2", "body"}}, 212 Comment(" missing final newline "), 213 } 214 215 const testInputAltEncoding = ` 216 <?xml version="1.0" encoding="x-testing-uppercase"?> 217 <TAG>VALUE</TAG>` 218 219 var rawTokensAltEncoding = []Token{ 220 CharData("\n"), 221 ProcInst{"xml", []byte(`version="1.0" encoding="x-testing-uppercase"`)}, 222 CharData("\n"), 223 StartElement{Name{"", "tag"}, []Attr{}}, 224 CharData("value"), 225 EndElement{Name{"", "tag"}}, 226 } 227 228 var xmlInput = []string{ 229 // unexpected EOF cases 230 "<", 231 "<t", 232 "<t ", 233 "<t/", 234 "<!", 235 "<!-", 236 "<!--", 237 "<!--c-", 238 "<!--c--", 239 "<!d", 240 "<t></", 241 "<t></t", 242 "<?", 243 "<?p", 244 "<t a", 245 "<t a=", 246 "<t a='", 247 "<t a=''", 248 "<t/><![", 249 "<t/><![C", 250 "<t/><![CDATA[d", 251 "<t/><![CDATA[d]", 252 "<t/><![CDATA[d]]", 253 254 // other Syntax errors 255 "<>", 256 "<t/a", 257 "<0 />", 258 "<?0 >", 259 // "<!0 >", // let the Token() caller handle 260 "</0>", 261 "<t 0=''>", 262 "<t a='&'>", 263 "<t a='<'>", 264 "<t> c;</t>", 265 "<t a>", 266 "<t a=>", 267 "<t a=v>", 268 // "<![CDATA[d]]>", // let the Token() caller handle 269 "<t></e>", 270 "<t></>", 271 "<t></t!", 272 "<t>cdata]]></t>", 273 } 274 275 func TestRawToken(t *testing.T) { 276 d := NewDecoder(strings.NewReader(testInput)) 277 d.Entity = testEntity 278 testRawToken(t, d, testInput, rawTokens) 279 } 280 281 const nonStrictInput = ` 282 <tag>non&entity</tag> 283 <tag>&unknown;entity</tag> 284 <tag>{</tag> 285 <tag>&#zzz;</tag> 286 <tag>&なまえ3;</tag> 287 <tag><-gt;</tag> 288 <tag>&;</tag> 289 <tag>&0a;</tag> 290 ` 291 292 var nonStrictTokens = []Token{ 293 CharData("\n"), 294 StartElement{Name{"", "tag"}, []Attr{}}, 295 CharData("non&entity"), 296 EndElement{Name{"", "tag"}}, 297 CharData("\n"), 298 StartElement{Name{"", "tag"}, []Attr{}}, 299 CharData("&unknown;entity"), 300 EndElement{Name{"", "tag"}}, 301 CharData("\n"), 302 StartElement{Name{"", "tag"}, []Attr{}}, 303 CharData("{"), 304 EndElement{Name{"", "tag"}}, 305 CharData("\n"), 306 StartElement{Name{"", "tag"}, []Attr{}}, 307 CharData("&#zzz;"), 308 EndElement{Name{"", "tag"}}, 309 CharData("\n"), 310 StartElement{Name{"", "tag"}, []Attr{}}, 311 CharData("&なまえ3;"), 312 EndElement{Name{"", "tag"}}, 313 CharData("\n"), 314 StartElement{Name{"", "tag"}, []Attr{}}, 315 CharData("<-gt;"), 316 EndElement{Name{"", "tag"}}, 317 CharData("\n"), 318 StartElement{Name{"", "tag"}, []Attr{}}, 319 CharData("&;"), 320 EndElement{Name{"", "tag"}}, 321 CharData("\n"), 322 StartElement{Name{"", "tag"}, []Attr{}}, 323 CharData("&0a;"), 324 EndElement{Name{"", "tag"}}, 325 CharData("\n"), 326 } 327 328 func TestNonStrictRawToken(t *testing.T) { 329 d := NewDecoder(strings.NewReader(nonStrictInput)) 330 d.Strict = false 331 testRawToken(t, d, nonStrictInput, nonStrictTokens) 332 } 333 334 type downCaser struct { 335 t *testing.T 336 r io.ByteReader 337 } 338 339 func (d *downCaser) ReadByte() (c byte, err error) { 340 c, err = d.r.ReadByte() 341 if c >= 'A' && c <= 'Z' { 342 c += 'a' - 'A' 343 } 344 return 345 } 346 347 func (d *downCaser) Read(p []byte) (int, error) { 348 d.t.Fatalf("unexpected Read call on downCaser reader") 349 panic("unreachable") 350 } 351 352 func TestRawTokenAltEncoding(t *testing.T) { 353 d := NewDecoder(strings.NewReader(testInputAltEncoding)) 354 d.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) { 355 if charset != "x-testing-uppercase" { 356 t.Fatalf("unexpected charset %q", charset) 357 } 358 return &downCaser{t, input.(io.ByteReader)}, nil 359 } 360 testRawToken(t, d, testInputAltEncoding, rawTokensAltEncoding) 361 } 362 363 func TestRawTokenAltEncodingNoConverter(t *testing.T) { 364 d := NewDecoder(strings.NewReader(testInputAltEncoding)) 365 token, err := d.RawToken() 366 if token == nil { 367 t.Fatalf("expected a token on first RawToken call") 368 } 369 if err != nil { 370 t.Fatal(err) 371 } 372 token, err = d.RawToken() 373 if token != nil { 374 t.Errorf("expected a nil token; got %#v", token) 375 } 376 if err == nil { 377 t.Fatalf("expected an error on second RawToken call") 378 } 379 const encoding = "x-testing-uppercase" 380 if !strings.Contains(err.Error(), encoding) { 381 t.Errorf("expected error to contain %q; got error: %v", 382 encoding, err) 383 } 384 } 385 386 func testRawToken(t *testing.T, d *Decoder, raw string, rawTokens []Token) { 387 lastEnd := int64(0) 388 for i, want := range rawTokens { 389 start := d.InputOffset() 390 have, err := d.RawToken() 391 end := d.InputOffset() 392 if err != nil { 393 t.Fatalf("token %d: unexpected error: %s", i, err) 394 } 395 if !reflect.DeepEqual(have, want) { 396 var shave, swant string 397 if _, ok := have.(CharData); ok { 398 shave = fmt.Sprintf("CharData(%q)", have) 399 } else { 400 shave = fmt.Sprintf("%#v", have) 401 } 402 if _, ok := want.(CharData); ok { 403 swant = fmt.Sprintf("CharData(%q)", want) 404 } else { 405 swant = fmt.Sprintf("%#v", want) 406 } 407 t.Errorf("token %d = %s, want %s", i, shave, swant) 408 } 409 410 // Check that InputOffset returned actual token. 411 switch { 412 case start < lastEnd: 413 t.Errorf("token %d: position [%d,%d) for %T is before previous token", i, start, end, have) 414 case start >= end: 415 // Special case: EndElement can be synthesized. 416 if start == end && end == lastEnd { 417 break 418 } 419 t.Errorf("token %d: position [%d,%d) for %T is empty", i, start, end, have) 420 case end > int64(len(raw)): 421 t.Errorf("token %d: position [%d,%d) for %T extends beyond input", i, start, end, have) 422 default: 423 text := raw[start:end] 424 if strings.ContainsAny(text, "<>") && (!strings.HasPrefix(text, "<") || !strings.HasSuffix(text, ">")) { 425 t.Errorf("token %d: misaligned raw token %#q for %T", i, text, have) 426 } 427 } 428 lastEnd = end 429 } 430 } 431 432 // Ensure that directives (specifically !DOCTYPE) include the complete 433 // text of any nested directives, noting that < and > do not change 434 // nesting depth if they are in single or double quotes. 435 436 var nestedDirectivesInput = ` 437 <!DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]> 438 <!DOCTYPE [<!ENTITY xlt ">">]> 439 <!DOCTYPE [<!ENTITY xlt "<">]> 440 <!DOCTYPE [<!ENTITY xlt '>'>]> 441 <!DOCTYPE [<!ENTITY xlt '<'>]> 442 <!DOCTYPE [<!ENTITY xlt '">'>]> 443 <!DOCTYPE [<!ENTITY xlt "'<">]> 444 ` 445 446 var nestedDirectivesTokens = []Token{ 447 CharData("\n"), 448 Directive(`DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`), 449 CharData("\n"), 450 Directive(`DOCTYPE [<!ENTITY xlt ">">]`), 451 CharData("\n"), 452 Directive(`DOCTYPE [<!ENTITY xlt "<">]`), 453 CharData("\n"), 454 Directive(`DOCTYPE [<!ENTITY xlt '>'>]`), 455 CharData("\n"), 456 Directive(`DOCTYPE [<!ENTITY xlt '<'>]`), 457 CharData("\n"), 458 Directive(`DOCTYPE [<!ENTITY xlt '">'>]`), 459 CharData("\n"), 460 Directive(`DOCTYPE [<!ENTITY xlt "'<">]`), 461 CharData("\n"), 462 } 463 464 func TestNestedDirectives(t *testing.T) { 465 d := NewDecoder(strings.NewReader(nestedDirectivesInput)) 466 467 for i, want := range nestedDirectivesTokens { 468 have, err := d.Token() 469 if err != nil { 470 t.Fatalf("token %d: unexpected error: %s", i, err) 471 } 472 if !reflect.DeepEqual(have, want) { 473 t.Errorf("token %d = %#v want %#v", i, have, want) 474 } 475 } 476 } 477 478 func TestToken(t *testing.T) { 479 d := NewDecoder(strings.NewReader(testInput)) 480 d.Entity = testEntity 481 482 for i, want := range cookedTokens { 483 have, err := d.Token() 484 if err != nil { 485 t.Fatalf("token %d: unexpected error: %s", i, err) 486 } 487 if !reflect.DeepEqual(have, want) { 488 t.Errorf("token %d = %#v want %#v", i, have, want) 489 } 490 } 491 } 492 493 func TestSyntax(t *testing.T) { 494 for i := range xmlInput { 495 d := NewDecoder(strings.NewReader(xmlInput[i])) 496 var err error 497 for _, err = d.Token(); err == nil; _, err = d.Token() { 498 } 499 if _, ok := err.(*SyntaxError); !ok { 500 t.Fatalf(`xmlInput "%s": expected SyntaxError not received`, xmlInput[i]) 501 } 502 } 503 } 504 505 type allScalars struct { 506 True1 bool 507 True2 bool 508 False1 bool 509 False2 bool 510 Int int 511 Int8 int8 512 Int16 int16 513 Int32 int32 514 Int64 int64 515 Uint int 516 Uint8 uint8 517 Uint16 uint16 518 Uint32 uint32 519 Uint64 uint64 520 Uintptr uintptr 521 Float32 float32 522 Float64 float64 523 String string 524 PtrString *string 525 } 526 527 var all = allScalars{ 528 True1: true, 529 True2: true, 530 False1: false, 531 False2: false, 532 Int: 1, 533 Int8: -2, 534 Int16: 3, 535 Int32: -4, 536 Int64: 5, 537 Uint: 6, 538 Uint8: 7, 539 Uint16: 8, 540 Uint32: 9, 541 Uint64: 10, 542 Uintptr: 11, 543 Float32: 13.0, 544 Float64: 14.0, 545 String: "15", 546 PtrString: &sixteen, 547 } 548 549 var sixteen = "16" 550 551 const testScalarsInput = `<allscalars> 552 <True1>true</True1> 553 <True2>1</True2> 554 <False1>false</False1> 555 <False2>0</False2> 556 <Int>1</Int> 557 <Int8>-2</Int8> 558 <Int16>3</Int16> 559 <Int32>-4</Int32> 560 <Int64>5</Int64> 561 <Uint>6</Uint> 562 <Uint8>7</Uint8> 563 <Uint16>8</Uint16> 564 <Uint32>9</Uint32> 565 <Uint64>10</Uint64> 566 <Uintptr>11</Uintptr> 567 <Float>12.0</Float> 568 <Float32>13.0</Float32> 569 <Float64>14.0</Float64> 570 <String>15</String> 571 <PtrString>16</PtrString> 572 </allscalars>` 573 574 func TestAllScalars(t *testing.T) { 575 var a allScalars 576 err := Unmarshal([]byte(testScalarsInput), &a) 577 578 if err != nil { 579 t.Fatal(err) 580 } 581 if !reflect.DeepEqual(a, all) { 582 t.Errorf("have %+v want %+v", a, all) 583 } 584 } 585 586 type item struct { 587 FieldA string 588 } 589 590 func TestIssue569(t *testing.T) { 591 data := `<item><FieldA>abcd</FieldA></item>` 592 var i item 593 err := Unmarshal([]byte(data), &i) 594 595 if err != nil || i.FieldA != "abcd" { 596 t.Fatal("Expecting abcd") 597 } 598 } 599 600 func TestUnquotedAttrs(t *testing.T) { 601 data := "<tag attr=azAZ09:-_\t>" 602 d := NewDecoder(strings.NewReader(data)) 603 d.Strict = false 604 token, err := d.Token() 605 if _, ok := err.(*SyntaxError); ok { 606 t.Errorf("Unexpected error: %v", err) 607 } 608 if token.(StartElement).Name.Local != "tag" { 609 t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local) 610 } 611 attr := token.(StartElement).Attr[0] 612 if attr.Value != "azAZ09:-_" { 613 t.Errorf("Unexpected attribute value: %v", attr.Value) 614 } 615 if attr.Name.Local != "attr" { 616 t.Errorf("Unexpected attribute name: %v", attr.Name.Local) 617 } 618 } 619 620 func TestValuelessAttrs(t *testing.T) { 621 tests := [][3]string{ 622 {"<p nowrap>", "p", "nowrap"}, 623 {"<p nowrap >", "p", "nowrap"}, 624 {"<input checked/>", "input", "checked"}, 625 {"<input checked />", "input", "checked"}, 626 } 627 for _, test := range tests { 628 d := NewDecoder(strings.NewReader(test[0])) 629 d.Strict = false 630 token, err := d.Token() 631 if _, ok := err.(*SyntaxError); ok { 632 t.Errorf("Unexpected error: %v", err) 633 } 634 if token.(StartElement).Name.Local != test[1] { 635 t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local) 636 } 637 attr := token.(StartElement).Attr[0] 638 if attr.Value != test[2] { 639 t.Errorf("Unexpected attribute value: %v", attr.Value) 640 } 641 if attr.Name.Local != test[2] { 642 t.Errorf("Unexpected attribute name: %v", attr.Name.Local) 643 } 644 } 645 } 646 647 func TestCopyTokenCharData(t *testing.T) { 648 data := []byte("same data") 649 var tok1 Token = CharData(data) 650 tok2 := CopyToken(tok1) 651 if !reflect.DeepEqual(tok1, tok2) { 652 t.Error("CopyToken(CharData) != CharData") 653 } 654 data[1] = 'o' 655 if reflect.DeepEqual(tok1, tok2) { 656 t.Error("CopyToken(CharData) uses same buffer.") 657 } 658 } 659 660 func TestCopyTokenStartElement(t *testing.T) { 661 elt := StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}} 662 var tok1 Token = elt 663 tok2 := CopyToken(tok1) 664 if tok1.(StartElement).Attr[0].Value != "en" { 665 t.Error("CopyToken overwrote Attr[0]") 666 } 667 if !reflect.DeepEqual(tok1, tok2) { 668 t.Error("CopyToken(StartElement) != StartElement") 669 } 670 tok1.(StartElement).Attr[0] = Attr{Name{"", "lang"}, "de"} 671 if reflect.DeepEqual(tok1, tok2) { 672 t.Error("CopyToken(CharData) uses same buffer.") 673 } 674 } 675 676 func TestSyntaxErrorLineNum(t *testing.T) { 677 testInput := "<P>Foo<P>\n\n<P>Bar</>\n" 678 d := NewDecoder(strings.NewReader(testInput)) 679 var err error 680 for _, err = d.Token(); err == nil; _, err = d.Token() { 681 } 682 synerr, ok := err.(*SyntaxError) 683 if !ok { 684 t.Error("Expected SyntaxError.") 685 } 686 if synerr.Line != 3 { 687 t.Error("SyntaxError didn't have correct line number.") 688 } 689 } 690 691 func TestTrailingRawToken(t *testing.T) { 692 input := `<FOO></FOO> ` 693 d := NewDecoder(strings.NewReader(input)) 694 var err error 695 for _, err = d.RawToken(); err == nil; _, err = d.RawToken() { 696 } 697 if err != io.EOF { 698 t.Fatalf("d.RawToken() = _, %v, want _, io.EOF", err) 699 } 700 } 701 702 func TestTrailingToken(t *testing.T) { 703 input := `<FOO></FOO> ` 704 d := NewDecoder(strings.NewReader(input)) 705 var err error 706 for _, err = d.Token(); err == nil; _, err = d.Token() { 707 } 708 if err != io.EOF { 709 t.Fatalf("d.Token() = _, %v, want _, io.EOF", err) 710 } 711 } 712 713 func TestEntityInsideCDATA(t *testing.T) { 714 input := `<test><![CDATA[ &val=foo ]]></test>` 715 d := NewDecoder(strings.NewReader(input)) 716 var err error 717 for _, err = d.Token(); err == nil; _, err = d.Token() { 718 } 719 if err != io.EOF { 720 t.Fatalf("d.Token() = _, %v, want _, io.EOF", err) 721 } 722 } 723 724 var characterTests = []struct { 725 in string 726 err string 727 }{ 728 {"\x12<doc/>", "illegal character code U+0012"}, 729 {"<?xml version=\"1.0\"?>\x0b<doc/>", "illegal character code U+000B"}, 730 {"\xef\xbf\xbe<doc/>", "illegal character code U+FFFE"}, 731 {"<?xml version=\"1.0\"?><doc>\r\n<hiya/>\x07<toots/></doc>", "illegal character code U+0007"}, 732 {"<?xml version=\"1.0\"?><doc \x12='value'>what's up</doc>", "expected attribute name in element"}, 733 {"<doc>&abc\x01;</doc>", "invalid character entity &abc (no semicolon)"}, 734 {"<doc>&\x01;</doc>", "invalid character entity & (no semicolon)"}, 735 {"<doc>&\xef\xbf\xbe;</doc>", "invalid character entity &\uFFFE;"}, 736 {"<doc>&hello;</doc>", "invalid character entity &hello;"}, 737 } 738 739 func TestDisallowedCharacters(t *testing.T) { 740 741 for i, tt := range characterTests { 742 d := NewDecoder(strings.NewReader(tt.in)) 743 var err error 744 745 for err == nil { 746 _, err = d.Token() 747 } 748 synerr, ok := err.(*SyntaxError) 749 if !ok { 750 t.Fatalf("input %d d.Token() = _, %v, want _, *SyntaxError", i, err) 751 } 752 if synerr.Msg != tt.err { 753 t.Fatalf("input %d synerr.Msg wrong: want %q, got %q", i, tt.err, synerr.Msg) 754 } 755 } 756 } 757 758 func TestIsInCharacterRange(t *testing.T) { 759 invalid := []rune{ 760 utf8.MaxRune + 1, 761 0xD800, // surrogate min 762 0xDFFF, // surrogate max 763 -1, 764 } 765 for _, r := range invalid { 766 if isInCharacterRange(r) { 767 t.Errorf("rune %U considered valid", r) 768 } 769 } 770 } 771 772 var procInstTests = []struct { 773 input string 774 expect [2]string 775 }{ 776 {`version="1.0" encoding="utf-8"`, [2]string{"1.0", "utf-8"}}, 777 {`version="1.0" encoding='utf-8'`, [2]string{"1.0", "utf-8"}}, 778 {`version="1.0" encoding='utf-8' `, [2]string{"1.0", "utf-8"}}, 779 {`version="1.0" encoding=utf-8`, [2]string{"1.0", ""}}, 780 {`encoding="FOO" `, [2]string{"", "FOO"}}, 781 } 782 783 func TestProcInstEncoding(t *testing.T) { 784 for _, test := range procInstTests { 785 if got := procInst("version", test.input); got != test.expect[0] { 786 t.Errorf("procInst(version, %q) = %q; want %q", test.input, got, test.expect[0]) 787 } 788 if got := procInst("encoding", test.input); got != test.expect[1] { 789 t.Errorf("procInst(encoding, %q) = %q; want %q", test.input, got, test.expect[1]) 790 } 791 } 792 } 793 794 // Ensure that directives with comments include the complete 795 // text of any nested directives. 796 797 var directivesWithCommentsInput = ` 798 <!DOCTYPE [<!-- a comment --><!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]> 799 <!DOCTYPE [<!ENTITY go "Golang"><!-- a comment-->]> 800 <!DOCTYPE <!-> <!> <!----> <!-->--> <!--->--> [<!ENTITY go "Golang"><!-- a comment-->]> 801 ` 802 803 var directivesWithCommentsTokens = []Token{ 804 CharData("\n"), 805 Directive(`DOCTYPE [ <!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`), 806 CharData("\n"), 807 Directive(`DOCTYPE [<!ENTITY go "Golang"> ]`), 808 CharData("\n"), 809 Directive(`DOCTYPE <!-> <!> [<!ENTITY go "Golang"> ]`), 810 CharData("\n"), 811 } 812 813 func TestDirectivesWithComments(t *testing.T) { 814 d := NewDecoder(strings.NewReader(directivesWithCommentsInput)) 815 816 for i, want := range directivesWithCommentsTokens { 817 have, err := d.Token() 818 if err != nil { 819 t.Fatalf("token %d: unexpected error: %s", i, err) 820 } 821 if !reflect.DeepEqual(have, want) { 822 t.Errorf("token %d = %#v want %#v", i, have, want) 823 } 824 } 825 } 826 827 // Writer whose Write method always returns an error. 828 type errWriter struct{} 829 830 func (errWriter) Write(p []byte) (n int, err error) { return 0, fmt.Errorf("unwritable") } 831 832 func TestEscapeTextIOErrors(t *testing.T) { 833 expectErr := "unwritable" 834 err := EscapeText(errWriter{}, []byte{'A'}) 835 836 if err == nil || err.Error() != expectErr { 837 t.Errorf("have %v, want %v", err, expectErr) 838 } 839 } 840 841 func TestEscapeTextInvalidChar(t *testing.T) { 842 input := []byte("A \x00 terminated string.") 843 expected := "A \uFFFD terminated string." 844 845 buff := new(bytes.Buffer) 846 if err := EscapeText(buff, input); err != nil { 847 t.Fatalf("have %v, want nil", err) 848 } 849 text := buff.String() 850 851 if text != expected { 852 t.Errorf("have %v, want %v", text, expected) 853 } 854 } 855 856 func TestIssue5880(t *testing.T) { 857 type T []byte 858 data, err := Marshal(T{192, 168, 0, 1}) 859 if err != nil { 860 t.Errorf("Marshal error: %v", err) 861 } 862 if !utf8.Valid(data) { 863 t.Errorf("Marshal generated invalid UTF-8: %x", data) 864 } 865 } 866 867 func TestIssue11405(t *testing.T) { 868 testCases := []string{ 869 "<root>", 870 "<root><foo>", 871 "<root><foo></foo>", 872 } 873 for _, tc := range testCases { 874 d := NewDecoder(strings.NewReader(tc)) 875 var err error 876 for { 877 _, err = d.Token() 878 if err != nil { 879 break 880 } 881 } 882 if _, ok := err.(*SyntaxError); !ok { 883 t.Errorf("%s: Token: Got error %v, want SyntaxError", tc, err) 884 } 885 } 886 } 887 888 func TestIssue12417(t *testing.T) { 889 testCases := []struct { 890 s string 891 ok bool 892 }{ 893 {`<?xml encoding="UtF-8" version="1.0"?><root/>`, true}, 894 {`<?xml encoding="UTF-8" version="1.0"?><root/>`, true}, 895 {`<?xml encoding="utf-8" version="1.0"?><root/>`, true}, 896 {`<?xml encoding="uuu-9" version="1.0"?><root/>`, false}, 897 } 898 for _, tc := range testCases { 899 d := NewDecoder(strings.NewReader(tc.s)) 900 var err error 901 for { 902 _, err = d.Token() 903 if err != nil { 904 if err == io.EOF { 905 err = nil 906 } 907 break 908 } 909 } 910 if err != nil && tc.ok { 911 t.Errorf("%q: Encoding charset: expected no error, got %s", tc.s, err) 912 continue 913 } 914 if err == nil && !tc.ok { 915 t.Errorf("%q: Encoding charset: expected error, got nil", tc.s) 916 } 917 } 918 } 919 920 func tokenMap(mapping func(t Token) Token) func(TokenReader) TokenReader { 921 return func(src TokenReader) TokenReader { 922 return mapper{ 923 t: src, 924 f: mapping, 925 } 926 } 927 } 928 929 type mapper struct { 930 t TokenReader 931 f func(Token) Token 932 } 933 934 func (m mapper) Token() (Token, error) { 935 tok, err := m.t.Token() 936 if err != nil { 937 return nil, err 938 } 939 return m.f(tok), nil 940 } 941 942 func TestNewTokenDecoderIdempotent(t *testing.T) { 943 d := NewDecoder(strings.NewReader(`<br>`)) 944 d2 := NewTokenDecoder(d) 945 if d != d2 { 946 t.Error("NewTokenDecoder did not detect underlying Decoder") 947 } 948 } 949 950 func TestWrapDecoder(t *testing.T) { 951 d := NewDecoder(strings.NewReader(`<quote>[Re-enter Clown with a letter, and FABIAN]</quote>`)) 952 m := tokenMap(func(t Token) Token { 953 switch tok := t.(type) { 954 case StartElement: 955 if tok.Name.Local == "quote" { 956 tok.Name.Local = "blocking" 957 return tok 958 } 959 case EndElement: 960 if tok.Name.Local == "quote" { 961 tok.Name.Local = "blocking" 962 return tok 963 } 964 } 965 return t 966 }) 967 968 d = NewTokenDecoder(m(d)) 969 970 o := struct { 971 XMLName Name `xml:"blocking"` 972 Chardata string `xml:",chardata"` 973 }{} 974 975 if err := d.Decode(&o); err != nil { 976 t.Fatal("Got unexpected error while decoding:", err) 977 } 978 979 if o.Chardata != "[Re-enter Clown with a letter, and FABIAN]" { 980 t.Fatalf("Got unexpected chardata: `%s`\n", o.Chardata) 981 } 982 } 983 984 type tokReader struct{} 985 986 func (tokReader) Token() (Token, error) { 987 return StartElement{}, nil 988 } 989 990 type Failure struct{} 991 992 func (Failure) UnmarshalXML(*Decoder, StartElement) error { 993 return nil 994 } 995 996 func TestTokenUnmarshaler(t *testing.T) { 997 defer func() { 998 if r := recover(); r != nil { 999 t.Error("Unexpected panic using custom token unmarshaler") 1000 } 1001 }() 1002 1003 d := NewTokenDecoder(tokReader{}) 1004 d.Decode(&Failure{}) 1005 } 1006 1007 func testRoundTrip(t *testing.T, input string) { 1008 d := NewDecoder(strings.NewReader(input)) 1009 var tokens []Token 1010 var buf bytes.Buffer 1011 e := NewEncoder(&buf) 1012 for { 1013 tok, err := d.Token() 1014 if err == io.EOF { 1015 break 1016 } 1017 if err != nil { 1018 t.Fatalf("invalid input: %v", err) 1019 } 1020 if err := e.EncodeToken(tok); err != nil { 1021 t.Fatalf("failed to re-encode input: %v", err) 1022 } 1023 tokens = append(tokens, CopyToken(tok)) 1024 } 1025 if err := e.Flush(); err != nil { 1026 t.Fatal(err) 1027 } 1028 1029 d = NewDecoder(&buf) 1030 for { 1031 tok, err := d.Token() 1032 if err == io.EOF { 1033 break 1034 } 1035 if err != nil { 1036 t.Fatalf("failed to decode output: %v", err) 1037 } 1038 if len(tokens) == 0 { 1039 t.Fatalf("unexpected token: %#v", tok) 1040 } 1041 a, b := tokens[0], tok 1042 if !reflect.DeepEqual(a, b) { 1043 t.Fatalf("token mismatch: %#v vs %#v", a, b) 1044 } 1045 tokens = tokens[1:] 1046 } 1047 if len(tokens) > 0 { 1048 t.Fatalf("lost tokens: %#v", tokens) 1049 } 1050 } 1051 1052 func TestRoundTrip(t *testing.T) { 1053 tests := map[string]string{ 1054 "leading colon": `<::Test ::foo="bar"><:::Hello></:::Hello><Hello></Hello></::Test>`, 1055 "trailing colon": `<foo abc:="x"></foo>`, 1056 "double colon": `<x:y:foo></x:y:foo>`, 1057 "comments in directives": `<!ENTITY x<!<!-- c1 [ " -->--x --> > <e></e> <!DOCTYPE xxx [ x<!-- c2 " -->--x ]>`, 1058 } 1059 for name, input := range tests { 1060 t.Run(name, func(t *testing.T) { testRoundTrip(t, input) }) 1061 } 1062 }