github.com/megatontech/mynoteforgo@v0.0.0-20200507084910-5d0c6ea6e890/源码/encoding/xml/xml_test.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package xml 6 7 import ( 8 "bytes" 9 "fmt" 10 "io" 11 "reflect" 12 "strings" 13 "testing" 14 "unicode/utf8" 15 ) 16 17 const testInput = ` 18 <?xml version="1.0" encoding="UTF-8"?> 19 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" 20 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> 21 <body xmlns:foo="ns1" xmlns="ns2" xmlns:tag="ns3" ` + 22 "\r\n\t" + ` > 23 <hello lang="en">World <>'" 白鵬翔</hello> 24 <query>&何; &is-it;</query> 25 <goodbye /> 26 <outer foo:attr="value" xmlns:tag="ns4"> 27 <inner/> 28 </outer> 29 <tag:name> 30 <![CDATA[Some text here.]]> 31 </tag:name> 32 </body><!-- missing final newline -->` 33 34 var testEntity = map[string]string{"何": "What", "is-it": "is it?"} 35 36 var rawTokens = []Token{ 37 CharData("\n"), 38 ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)}, 39 CharData("\n"), 40 Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" 41 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`), 42 CharData("\n"), 43 StartElement{Name{"", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}}, 44 CharData("\n "), 45 StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}}, 46 CharData("World <>'\" 白鵬翔"), 47 EndElement{Name{"", "hello"}}, 48 CharData("\n "), 49 StartElement{Name{"", "query"}, []Attr{}}, 50 CharData("What is it?"), 51 EndElement{Name{"", "query"}}, 52 CharData("\n "), 53 StartElement{Name{"", "goodbye"}, []Attr{}}, 54 EndElement{Name{"", "goodbye"}}, 55 CharData("\n "), 56 StartElement{Name{"", "outer"}, []Attr{{Name{"foo", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}}, 57 CharData("\n "), 58 StartElement{Name{"", "inner"}, []Attr{}}, 59 EndElement{Name{"", "inner"}}, 60 CharData("\n "), 61 EndElement{Name{"", "outer"}}, 62 CharData("\n "), 63 StartElement{Name{"tag", "name"}, []Attr{}}, 64 CharData("\n "), 65 CharData("Some text here."), 66 CharData("\n "), 67 EndElement{Name{"tag", "name"}}, 68 CharData("\n"), 69 EndElement{Name{"", "body"}}, 70 Comment(" missing final newline "), 71 } 72 73 var cookedTokens = []Token{ 74 CharData("\n"), 75 ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)}, 76 CharData("\n"), 77 Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" 78 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`), 79 CharData("\n"), 80 StartElement{Name{"ns2", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}}, 81 CharData("\n "), 82 StartElement{Name{"ns2", "hello"}, []Attr{{Name{"", "lang"}, "en"}}}, 83 CharData("World <>'\" 白鵬翔"), 84 EndElement{Name{"ns2", "hello"}}, 85 CharData("\n "), 86 StartElement{Name{"ns2", "query"}, []Attr{}}, 87 CharData("What is it?"), 88 EndElement{Name{"ns2", "query"}}, 89 CharData("\n "), 90 StartElement{Name{"ns2", "goodbye"}, []Attr{}}, 91 EndElement{Name{"ns2", "goodbye"}}, 92 CharData("\n "), 93 StartElement{Name{"ns2", "outer"}, []Attr{{Name{"ns1", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}}, 94 CharData("\n "), 95 StartElement{Name{"ns2", "inner"}, []Attr{}}, 96 EndElement{Name{"ns2", "inner"}}, 97 CharData("\n "), 98 EndElement{Name{"ns2", "outer"}}, 99 CharData("\n "), 100 StartElement{Name{"ns3", "name"}, []Attr{}}, 101 CharData("\n "), 102 CharData("Some text here."), 103 CharData("\n "), 104 EndElement{Name{"ns3", "name"}}, 105 CharData("\n"), 106 EndElement{Name{"ns2", "body"}}, 107 Comment(" missing final newline "), 108 } 109 110 const testInputAltEncoding = ` 111 <?xml version="1.0" encoding="x-testing-uppercase"?> 112 <TAG>VALUE</TAG>` 113 114 var rawTokensAltEncoding = []Token{ 115 CharData("\n"), 116 ProcInst{"xml", []byte(`version="1.0" encoding="x-testing-uppercase"`)}, 117 CharData("\n"), 118 StartElement{Name{"", "tag"}, []Attr{}}, 119 CharData("value"), 120 EndElement{Name{"", "tag"}}, 121 } 122 123 var xmlInput = []string{ 124 // unexpected EOF cases 125 "<", 126 "<t", 127 "<t ", 128 "<t/", 129 "<!", 130 "<!-", 131 "<!--", 132 "<!--c-", 133 "<!--c--", 134 "<!d", 135 "<t></", 136 "<t></t", 137 "<?", 138 "<?p", 139 "<t a", 140 "<t a=", 141 "<t a='", 142 "<t a=''", 143 "<t/><![", 144 "<t/><![C", 145 "<t/><![CDATA[d", 146 "<t/><![CDATA[d]", 147 "<t/><![CDATA[d]]", 148 149 // other Syntax errors 150 "<>", 151 "<t/a", 152 "<0 />", 153 "<?0 >", 154 // "<!0 >", // let the Token() caller handle 155 "</0>", 156 "<t 0=''>", 157 "<t a='&'>", 158 "<t a='<'>", 159 "<t> c;</t>", 160 "<t a>", 161 "<t a=>", 162 "<t a=v>", 163 // "<![CDATA[d]]>", // let the Token() caller handle 164 "<t></e>", 165 "<t></>", 166 "<t></t!", 167 "<t>cdata]]></t>", 168 } 169 170 func TestRawToken(t *testing.T) { 171 d := NewDecoder(strings.NewReader(testInput)) 172 d.Entity = testEntity 173 testRawToken(t, d, testInput, rawTokens) 174 } 175 176 const nonStrictInput = ` 177 <tag>non&entity</tag> 178 <tag>&unknown;entity</tag> 179 <tag>{</tag> 180 <tag>&#zzz;</tag> 181 <tag>&なまえ3;</tag> 182 <tag><-gt;</tag> 183 <tag>&;</tag> 184 <tag>&0a;</tag> 185 ` 186 187 var nonStrictTokens = []Token{ 188 CharData("\n"), 189 StartElement{Name{"", "tag"}, []Attr{}}, 190 CharData("non&entity"), 191 EndElement{Name{"", "tag"}}, 192 CharData("\n"), 193 StartElement{Name{"", "tag"}, []Attr{}}, 194 CharData("&unknown;entity"), 195 EndElement{Name{"", "tag"}}, 196 CharData("\n"), 197 StartElement{Name{"", "tag"}, []Attr{}}, 198 CharData("{"), 199 EndElement{Name{"", "tag"}}, 200 CharData("\n"), 201 StartElement{Name{"", "tag"}, []Attr{}}, 202 CharData("&#zzz;"), 203 EndElement{Name{"", "tag"}}, 204 CharData("\n"), 205 StartElement{Name{"", "tag"}, []Attr{}}, 206 CharData("&なまえ3;"), 207 EndElement{Name{"", "tag"}}, 208 CharData("\n"), 209 StartElement{Name{"", "tag"}, []Attr{}}, 210 CharData("<-gt;"), 211 EndElement{Name{"", "tag"}}, 212 CharData("\n"), 213 StartElement{Name{"", "tag"}, []Attr{}}, 214 CharData("&;"), 215 EndElement{Name{"", "tag"}}, 216 CharData("\n"), 217 StartElement{Name{"", "tag"}, []Attr{}}, 218 CharData("&0a;"), 219 EndElement{Name{"", "tag"}}, 220 CharData("\n"), 221 } 222 223 func TestNonStrictRawToken(t *testing.T) { 224 d := NewDecoder(strings.NewReader(nonStrictInput)) 225 d.Strict = false 226 testRawToken(t, d, nonStrictInput, nonStrictTokens) 227 } 228 229 type downCaser struct { 230 t *testing.T 231 r io.ByteReader 232 } 233 234 func (d *downCaser) ReadByte() (c byte, err error) { 235 c, err = d.r.ReadByte() 236 if c >= 'A' && c <= 'Z' { 237 c += 'a' - 'A' 238 } 239 return 240 } 241 242 func (d *downCaser) Read(p []byte) (int, error) { 243 d.t.Fatalf("unexpected Read call on downCaser reader") 244 panic("unreachable") 245 } 246 247 func TestRawTokenAltEncoding(t *testing.T) { 248 d := NewDecoder(strings.NewReader(testInputAltEncoding)) 249 d.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) { 250 if charset != "x-testing-uppercase" { 251 t.Fatalf("unexpected charset %q", charset) 252 } 253 return &downCaser{t, input.(io.ByteReader)}, nil 254 } 255 testRawToken(t, d, testInputAltEncoding, rawTokensAltEncoding) 256 } 257 258 func TestRawTokenAltEncodingNoConverter(t *testing.T) { 259 d := NewDecoder(strings.NewReader(testInputAltEncoding)) 260 token, err := d.RawToken() 261 if token == nil { 262 t.Fatalf("expected a token on first RawToken call") 263 } 264 if err != nil { 265 t.Fatal(err) 266 } 267 token, err = d.RawToken() 268 if token != nil { 269 t.Errorf("expected a nil token; got %#v", token) 270 } 271 if err == nil { 272 t.Fatalf("expected an error on second RawToken call") 273 } 274 const encoding = "x-testing-uppercase" 275 if !strings.Contains(err.Error(), encoding) { 276 t.Errorf("expected error to contain %q; got error: %v", 277 encoding, err) 278 } 279 } 280 281 func testRawToken(t *testing.T, d *Decoder, raw string, rawTokens []Token) { 282 lastEnd := int64(0) 283 for i, want := range rawTokens { 284 start := d.InputOffset() 285 have, err := d.RawToken() 286 end := d.InputOffset() 287 if err != nil { 288 t.Fatalf("token %d: unexpected error: %s", i, err) 289 } 290 if !reflect.DeepEqual(have, want) { 291 var shave, swant string 292 if _, ok := have.(CharData); ok { 293 shave = fmt.Sprintf("CharData(%q)", have) 294 } else { 295 shave = fmt.Sprintf("%#v", have) 296 } 297 if _, ok := want.(CharData); ok { 298 swant = fmt.Sprintf("CharData(%q)", want) 299 } else { 300 swant = fmt.Sprintf("%#v", want) 301 } 302 t.Errorf("token %d = %s, want %s", i, shave, swant) 303 } 304 305 // Check that InputOffset returned actual token. 306 switch { 307 case start < lastEnd: 308 t.Errorf("token %d: position [%d,%d) for %T is before previous token", i, start, end, have) 309 case start >= end: 310 // Special case: EndElement can be synthesized. 311 if start == end && end == lastEnd { 312 break 313 } 314 t.Errorf("token %d: position [%d,%d) for %T is empty", i, start, end, have) 315 case end > int64(len(raw)): 316 t.Errorf("token %d: position [%d,%d) for %T extends beyond input", i, start, end, have) 317 default: 318 text := raw[start:end] 319 if strings.ContainsAny(text, "<>") && (!strings.HasPrefix(text, "<") || !strings.HasSuffix(text, ">")) { 320 t.Errorf("token %d: misaligned raw token %#q for %T", i, text, have) 321 } 322 } 323 lastEnd = end 324 } 325 } 326 327 // Ensure that directives (specifically !DOCTYPE) include the complete 328 // text of any nested directives, noting that < and > do not change 329 // nesting depth if they are in single or double quotes. 330 331 var nestedDirectivesInput = ` 332 <!DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]> 333 <!DOCTYPE [<!ENTITY xlt ">">]> 334 <!DOCTYPE [<!ENTITY xlt "<">]> 335 <!DOCTYPE [<!ENTITY xlt '>'>]> 336 <!DOCTYPE [<!ENTITY xlt '<'>]> 337 <!DOCTYPE [<!ENTITY xlt '">'>]> 338 <!DOCTYPE [<!ENTITY xlt "'<">]> 339 ` 340 341 var nestedDirectivesTokens = []Token{ 342 CharData("\n"), 343 Directive(`DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`), 344 CharData("\n"), 345 Directive(`DOCTYPE [<!ENTITY xlt ">">]`), 346 CharData("\n"), 347 Directive(`DOCTYPE [<!ENTITY xlt "<">]`), 348 CharData("\n"), 349 Directive(`DOCTYPE [<!ENTITY xlt '>'>]`), 350 CharData("\n"), 351 Directive(`DOCTYPE [<!ENTITY xlt '<'>]`), 352 CharData("\n"), 353 Directive(`DOCTYPE [<!ENTITY xlt '">'>]`), 354 CharData("\n"), 355 Directive(`DOCTYPE [<!ENTITY xlt "'<">]`), 356 CharData("\n"), 357 } 358 359 func TestNestedDirectives(t *testing.T) { 360 d := NewDecoder(strings.NewReader(nestedDirectivesInput)) 361 362 for i, want := range nestedDirectivesTokens { 363 have, err := d.Token() 364 if err != nil { 365 t.Fatalf("token %d: unexpected error: %s", i, err) 366 } 367 if !reflect.DeepEqual(have, want) { 368 t.Errorf("token %d = %#v want %#v", i, have, want) 369 } 370 } 371 } 372 373 func TestToken(t *testing.T) { 374 d := NewDecoder(strings.NewReader(testInput)) 375 d.Entity = testEntity 376 377 for i, want := range cookedTokens { 378 have, err := d.Token() 379 if err != nil { 380 t.Fatalf("token %d: unexpected error: %s", i, err) 381 } 382 if !reflect.DeepEqual(have, want) { 383 t.Errorf("token %d = %#v want %#v", i, have, want) 384 } 385 } 386 } 387 388 func TestSyntax(t *testing.T) { 389 for i := range xmlInput { 390 d := NewDecoder(strings.NewReader(xmlInput[i])) 391 var err error 392 for _, err = d.Token(); err == nil; _, err = d.Token() { 393 } 394 if _, ok := err.(*SyntaxError); !ok { 395 t.Fatalf(`xmlInput "%s": expected SyntaxError not received`, xmlInput[i]) 396 } 397 } 398 } 399 400 type allScalars struct { 401 True1 bool 402 True2 bool 403 False1 bool 404 False2 bool 405 Int int 406 Int8 int8 407 Int16 int16 408 Int32 int32 409 Int64 int64 410 Uint int 411 Uint8 uint8 412 Uint16 uint16 413 Uint32 uint32 414 Uint64 uint64 415 Uintptr uintptr 416 Float32 float32 417 Float64 float64 418 String string 419 PtrString *string 420 } 421 422 var all = allScalars{ 423 True1: true, 424 True2: true, 425 False1: false, 426 False2: false, 427 Int: 1, 428 Int8: -2, 429 Int16: 3, 430 Int32: -4, 431 Int64: 5, 432 Uint: 6, 433 Uint8: 7, 434 Uint16: 8, 435 Uint32: 9, 436 Uint64: 10, 437 Uintptr: 11, 438 Float32: 13.0, 439 Float64: 14.0, 440 String: "15", 441 PtrString: &sixteen, 442 } 443 444 var sixteen = "16" 445 446 const testScalarsInput = `<allscalars> 447 <True1>true</True1> 448 <True2>1</True2> 449 <False1>false</False1> 450 <False2>0</False2> 451 <Int>1</Int> 452 <Int8>-2</Int8> 453 <Int16>3</Int16> 454 <Int32>-4</Int32> 455 <Int64>5</Int64> 456 <Uint>6</Uint> 457 <Uint8>7</Uint8> 458 <Uint16>8</Uint16> 459 <Uint32>9</Uint32> 460 <Uint64>10</Uint64> 461 <Uintptr>11</Uintptr> 462 <Float>12.0</Float> 463 <Float32>13.0</Float32> 464 <Float64>14.0</Float64> 465 <String>15</String> 466 <PtrString>16</PtrString> 467 </allscalars>` 468 469 func TestAllScalars(t *testing.T) { 470 var a allScalars 471 err := Unmarshal([]byte(testScalarsInput), &a) 472 473 if err != nil { 474 t.Fatal(err) 475 } 476 if !reflect.DeepEqual(a, all) { 477 t.Errorf("have %+v want %+v", a, all) 478 } 479 } 480 481 type item struct { 482 FieldA string 483 } 484 485 func TestIssue569(t *testing.T) { 486 data := `<item><FieldA>abcd</FieldA></item>` 487 var i item 488 err := Unmarshal([]byte(data), &i) 489 490 if err != nil || i.FieldA != "abcd" { 491 t.Fatal("Expecting abcd") 492 } 493 } 494 495 func TestUnquotedAttrs(t *testing.T) { 496 data := "<tag attr=azAZ09:-_\t>" 497 d := NewDecoder(strings.NewReader(data)) 498 d.Strict = false 499 token, err := d.Token() 500 if _, ok := err.(*SyntaxError); ok { 501 t.Errorf("Unexpected error: %v", err) 502 } 503 if token.(StartElement).Name.Local != "tag" { 504 t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local) 505 } 506 attr := token.(StartElement).Attr[0] 507 if attr.Value != "azAZ09:-_" { 508 t.Errorf("Unexpected attribute value: %v", attr.Value) 509 } 510 if attr.Name.Local != "attr" { 511 t.Errorf("Unexpected attribute name: %v", attr.Name.Local) 512 } 513 } 514 515 func TestValuelessAttrs(t *testing.T) { 516 tests := [][3]string{ 517 {"<p nowrap>", "p", "nowrap"}, 518 {"<p nowrap >", "p", "nowrap"}, 519 {"<input checked/>", "input", "checked"}, 520 {"<input checked />", "input", "checked"}, 521 } 522 for _, test := range tests { 523 d := NewDecoder(strings.NewReader(test[0])) 524 d.Strict = false 525 token, err := d.Token() 526 if _, ok := err.(*SyntaxError); ok { 527 t.Errorf("Unexpected error: %v", err) 528 } 529 if token.(StartElement).Name.Local != test[1] { 530 t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local) 531 } 532 attr := token.(StartElement).Attr[0] 533 if attr.Value != test[2] { 534 t.Errorf("Unexpected attribute value: %v", attr.Value) 535 } 536 if attr.Name.Local != test[2] { 537 t.Errorf("Unexpected attribute name: %v", attr.Name.Local) 538 } 539 } 540 } 541 542 func TestCopyTokenCharData(t *testing.T) { 543 data := []byte("same data") 544 var tok1 Token = CharData(data) 545 tok2 := CopyToken(tok1) 546 if !reflect.DeepEqual(tok1, tok2) { 547 t.Error("CopyToken(CharData) != CharData") 548 } 549 data[1] = 'o' 550 if reflect.DeepEqual(tok1, tok2) { 551 t.Error("CopyToken(CharData) uses same buffer.") 552 } 553 } 554 555 func TestCopyTokenStartElement(t *testing.T) { 556 elt := StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}} 557 var tok1 Token = elt 558 tok2 := CopyToken(tok1) 559 if tok1.(StartElement).Attr[0].Value != "en" { 560 t.Error("CopyToken overwrote Attr[0]") 561 } 562 if !reflect.DeepEqual(tok1, tok2) { 563 t.Error("CopyToken(StartElement) != StartElement") 564 } 565 tok1.(StartElement).Attr[0] = Attr{Name{"", "lang"}, "de"} 566 if reflect.DeepEqual(tok1, tok2) { 567 t.Error("CopyToken(CharData) uses same buffer.") 568 } 569 } 570 571 func TestSyntaxErrorLineNum(t *testing.T) { 572 testInput := "<P>Foo<P>\n\n<P>Bar</>\n" 573 d := NewDecoder(strings.NewReader(testInput)) 574 var err error 575 for _, err = d.Token(); err == nil; _, err = d.Token() { 576 } 577 synerr, ok := err.(*SyntaxError) 578 if !ok { 579 t.Error("Expected SyntaxError.") 580 } 581 if synerr.Line != 3 { 582 t.Error("SyntaxError didn't have correct line number.") 583 } 584 } 585 586 func TestTrailingRawToken(t *testing.T) { 587 input := `<FOO></FOO> ` 588 d := NewDecoder(strings.NewReader(input)) 589 var err error 590 for _, err = d.RawToken(); err == nil; _, err = d.RawToken() { 591 } 592 if err != io.EOF { 593 t.Fatalf("d.RawToken() = _, %v, want _, io.EOF", err) 594 } 595 } 596 597 func TestTrailingToken(t *testing.T) { 598 input := `<FOO></FOO> ` 599 d := NewDecoder(strings.NewReader(input)) 600 var err error 601 for _, err = d.Token(); err == nil; _, err = d.Token() { 602 } 603 if err != io.EOF { 604 t.Fatalf("d.Token() = _, %v, want _, io.EOF", err) 605 } 606 } 607 608 func TestEntityInsideCDATA(t *testing.T) { 609 input := `<test><![CDATA[ &val=foo ]]></test>` 610 d := NewDecoder(strings.NewReader(input)) 611 var err error 612 for _, err = d.Token(); err == nil; _, err = d.Token() { 613 } 614 if err != io.EOF { 615 t.Fatalf("d.Token() = _, %v, want _, io.EOF", err) 616 } 617 } 618 619 var characterTests = []struct { 620 in string 621 err string 622 }{ 623 {"\x12<doc/>", "illegal character code U+0012"}, 624 {"<?xml version=\"1.0\"?>\x0b<doc/>", "illegal character code U+000B"}, 625 {"\xef\xbf\xbe<doc/>", "illegal character code U+FFFE"}, 626 {"<?xml version=\"1.0\"?><doc>\r\n<hiya/>\x07<toots/></doc>", "illegal character code U+0007"}, 627 {"<?xml version=\"1.0\"?><doc \x12='value'>what's up</doc>", "expected attribute name in element"}, 628 {"<doc>&abc\x01;</doc>", "invalid character entity &abc (no semicolon)"}, 629 {"<doc>&\x01;</doc>", "invalid character entity & (no semicolon)"}, 630 {"<doc>&\xef\xbf\xbe;</doc>", "invalid character entity &\uFFFE;"}, 631 {"<doc>&hello;</doc>", "invalid character entity &hello;"}, 632 } 633 634 func TestDisallowedCharacters(t *testing.T) { 635 636 for i, tt := range characterTests { 637 d := NewDecoder(strings.NewReader(tt.in)) 638 var err error 639 640 for err == nil { 641 _, err = d.Token() 642 } 643 synerr, ok := err.(*SyntaxError) 644 if !ok { 645 t.Fatalf("input %d d.Token() = _, %v, want _, *SyntaxError", i, err) 646 } 647 if synerr.Msg != tt.err { 648 t.Fatalf("input %d synerr.Msg wrong: want %q, got %q", i, tt.err, synerr.Msg) 649 } 650 } 651 } 652 653 func TestIsInCharacterRange(t *testing.T) { 654 invalid := []rune{ 655 utf8.MaxRune + 1, 656 0xD800, // surrogate min 657 0xDFFF, // surrogate max 658 -1, 659 } 660 for _, r := range invalid { 661 if isInCharacterRange(r) { 662 t.Errorf("rune %U considered valid", r) 663 } 664 } 665 } 666 667 var procInstTests = []struct { 668 input string 669 expect [2]string 670 }{ 671 {`version="1.0" encoding="utf-8"`, [2]string{"1.0", "utf-8"}}, 672 {`version="1.0" encoding='utf-8'`, [2]string{"1.0", "utf-8"}}, 673 {`version="1.0" encoding='utf-8' `, [2]string{"1.0", "utf-8"}}, 674 {`version="1.0" encoding=utf-8`, [2]string{"1.0", ""}}, 675 {`encoding="FOO" `, [2]string{"", "FOO"}}, 676 } 677 678 func TestProcInstEncoding(t *testing.T) { 679 for _, test := range procInstTests { 680 if got := procInst("version", test.input); got != test.expect[0] { 681 t.Errorf("procInst(version, %q) = %q; want %q", test.input, got, test.expect[0]) 682 } 683 if got := procInst("encoding", test.input); got != test.expect[1] { 684 t.Errorf("procInst(encoding, %q) = %q; want %q", test.input, got, test.expect[1]) 685 } 686 } 687 } 688 689 // Ensure that directives with comments include the complete 690 // text of any nested directives. 691 692 var directivesWithCommentsInput = ` 693 <!DOCTYPE [<!-- a comment --><!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]> 694 <!DOCTYPE [<!ENTITY go "Golang"><!-- a comment-->]> 695 <!DOCTYPE <!-> <!> <!----> <!-->--> <!--->--> [<!ENTITY go "Golang"><!-- a comment-->]> 696 ` 697 698 var directivesWithCommentsTokens = []Token{ 699 CharData("\n"), 700 Directive(`DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`), 701 CharData("\n"), 702 Directive(`DOCTYPE [<!ENTITY go "Golang">]`), 703 CharData("\n"), 704 Directive(`DOCTYPE <!-> <!> [<!ENTITY go "Golang">]`), 705 CharData("\n"), 706 } 707 708 func TestDirectivesWithComments(t *testing.T) { 709 d := NewDecoder(strings.NewReader(directivesWithCommentsInput)) 710 711 for i, want := range directivesWithCommentsTokens { 712 have, err := d.Token() 713 if err != nil { 714 t.Fatalf("token %d: unexpected error: %s", i, err) 715 } 716 if !reflect.DeepEqual(have, want) { 717 t.Errorf("token %d = %#v want %#v", i, have, want) 718 } 719 } 720 } 721 722 // Writer whose Write method always returns an error. 723 type errWriter struct{} 724 725 func (errWriter) Write(p []byte) (n int, err error) { return 0, fmt.Errorf("unwritable") } 726 727 func TestEscapeTextIOErrors(t *testing.T) { 728 expectErr := "unwritable" 729 err := EscapeText(errWriter{}, []byte{'A'}) 730 731 if err == nil || err.Error() != expectErr { 732 t.Errorf("have %v, want %v", err, expectErr) 733 } 734 } 735 736 func TestEscapeTextInvalidChar(t *testing.T) { 737 input := []byte("A \x00 terminated string.") 738 expected := "A \uFFFD terminated string." 739 740 buff := new(bytes.Buffer) 741 if err := EscapeText(buff, input); err != nil { 742 t.Fatalf("have %v, want nil", err) 743 } 744 text := buff.String() 745 746 if text != expected { 747 t.Errorf("have %v, want %v", text, expected) 748 } 749 } 750 751 func TestIssue5880(t *testing.T) { 752 type T []byte 753 data, err := Marshal(T{192, 168, 0, 1}) 754 if err != nil { 755 t.Errorf("Marshal error: %v", err) 756 } 757 if !utf8.Valid(data) { 758 t.Errorf("Marshal generated invalid UTF-8: %x", data) 759 } 760 } 761 762 func TestIssue11405(t *testing.T) { 763 testCases := []string{ 764 "<root>", 765 "<root><foo>", 766 "<root><foo></foo>", 767 } 768 for _, tc := range testCases { 769 d := NewDecoder(strings.NewReader(tc)) 770 var err error 771 for { 772 _, err = d.Token() 773 if err != nil { 774 break 775 } 776 } 777 if _, ok := err.(*SyntaxError); !ok { 778 t.Errorf("%s: Token: Got error %v, want SyntaxError", tc, err) 779 } 780 } 781 } 782 783 func TestIssue12417(t *testing.T) { 784 testCases := []struct { 785 s string 786 ok bool 787 }{ 788 {`<?xml encoding="UtF-8" version="1.0"?><root/>`, true}, 789 {`<?xml encoding="UTF-8" version="1.0"?><root/>`, true}, 790 {`<?xml encoding="utf-8" version="1.0"?><root/>`, true}, 791 {`<?xml encoding="uuu-9" version="1.0"?><root/>`, false}, 792 } 793 for _, tc := range testCases { 794 d := NewDecoder(strings.NewReader(tc.s)) 795 var err error 796 for { 797 _, err = d.Token() 798 if err != nil { 799 if err == io.EOF { 800 err = nil 801 } 802 break 803 } 804 } 805 if err != nil && tc.ok { 806 t.Errorf("%q: Encoding charset: expected no error, got %s", tc.s, err) 807 continue 808 } 809 if err == nil && !tc.ok { 810 t.Errorf("%q: Encoding charset: expected error, got nil", tc.s) 811 } 812 } 813 } 814 815 func tokenMap(mapping func(t Token) Token) func(TokenReader) TokenReader { 816 return func(src TokenReader) TokenReader { 817 return mapper{ 818 t: src, 819 f: mapping, 820 } 821 } 822 } 823 824 type mapper struct { 825 t TokenReader 826 f func(Token) Token 827 } 828 829 func (m mapper) Token() (Token, error) { 830 tok, err := m.t.Token() 831 if err != nil { 832 return nil, err 833 } 834 return m.f(tok), nil 835 } 836 837 func TestNewTokenDecoderIdempotent(t *testing.T) { 838 d := NewDecoder(strings.NewReader(`<br/>`)) 839 d2 := NewTokenDecoder(d) 840 if d != d2 { 841 t.Error("NewTokenDecoder did not detect underlying Decoder") 842 } 843 } 844 845 func TestWrapDecoder(t *testing.T) { 846 d := NewDecoder(strings.NewReader(`<quote>[Re-enter Clown with a letter, and FABIAN]</quote>`)) 847 m := tokenMap(func(t Token) Token { 848 switch tok := t.(type) { 849 case StartElement: 850 if tok.Name.Local == "quote" { 851 tok.Name.Local = "blocking" 852 return tok 853 } 854 case EndElement: 855 if tok.Name.Local == "quote" { 856 tok.Name.Local = "blocking" 857 return tok 858 } 859 } 860 return t 861 }) 862 863 d = NewTokenDecoder(m(d)) 864 865 o := struct { 866 XMLName Name `xml:"blocking"` 867 Chardata string `xml:",chardata"` 868 }{} 869 870 if err := d.Decode(&o); err != nil { 871 t.Fatal("Got unexpected error while decoding:", err) 872 } 873 874 if o.Chardata != "[Re-enter Clown with a letter, and FABIAN]" { 875 t.Fatalf("Got unexpected chardata: `%s`\n", o.Chardata) 876 } 877 } 878 879 type tokReader struct{} 880 881 func (tokReader) Token() (Token, error) { 882 return StartElement{}, nil 883 } 884 885 type Failure struct{} 886 887 func (Failure) UnmarshalXML(*Decoder, StartElement) error { 888 return nil 889 } 890 891 func TestTokenUnmarshaler(t *testing.T) { 892 defer func() { 893 if r := recover(); r != nil { 894 t.Error("Unexpected panic using custom token unmarshaler") 895 } 896 }() 897 898 d := NewTokenDecoder(tokReader{}) 899 d.Decode(&Failure{}) 900 }