github.com/flyinox/gosm@v0.0.0-20171117061539-16768cb62077/src/encoding/xml/xml_test.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package xml 6 7 import ( 8 "bytes" 9 "fmt" 10 "io" 11 "reflect" 12 "strings" 13 "testing" 14 "unicode/utf8" 15 ) 16 17 const testInput = ` 18 <?xml version="1.0" encoding="UTF-8"?> 19 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" 20 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> 21 <body xmlns:foo="ns1" xmlns="ns2" xmlns:tag="ns3" ` + 22 "\r\n\t" + ` > 23 <hello lang="en">World <>'" 白鵬翔</hello> 24 <query>&何; &is-it;</query> 25 <goodbye /> 26 <outer foo:attr="value" xmlns:tag="ns4"> 27 <inner/> 28 </outer> 29 <tag:name> 30 <![CDATA[Some text here.]]> 31 </tag:name> 32 </body><!-- missing final newline -->` 33 34 var testEntity = map[string]string{"何": "What", "is-it": "is it?"} 35 36 var rawTokens = []Token{ 37 CharData("\n"), 38 ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)}, 39 CharData("\n"), 40 Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" 41 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`), 42 CharData("\n"), 43 StartElement{Name{"", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}}, 44 CharData("\n "), 45 StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}}, 46 CharData("World <>'\" 白鵬翔"), 47 EndElement{Name{"", "hello"}}, 48 CharData("\n "), 49 StartElement{Name{"", "query"}, []Attr{}}, 50 CharData("What is it?"), 51 EndElement{Name{"", "query"}}, 52 CharData("\n "), 53 StartElement{Name{"", "goodbye"}, []Attr{}}, 54 EndElement{Name{"", "goodbye"}}, 55 CharData("\n "), 56 StartElement{Name{"", "outer"}, []Attr{{Name{"foo", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}}, 57 CharData("\n "), 58 StartElement{Name{"", "inner"}, []Attr{}}, 59 EndElement{Name{"", "inner"}}, 60 CharData("\n "), 61 EndElement{Name{"", "outer"}}, 62 CharData("\n "), 63 StartElement{Name{"tag", "name"}, []Attr{}}, 64 CharData("\n "), 65 CharData("Some text here."), 66 CharData("\n "), 67 EndElement{Name{"tag", "name"}}, 68 CharData("\n"), 69 EndElement{Name{"", "body"}}, 70 Comment(" missing final newline "), 71 } 72 73 var cookedTokens = []Token{ 74 CharData("\n"), 75 ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)}, 76 CharData("\n"), 77 Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" 78 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`), 79 CharData("\n"), 80 StartElement{Name{"ns2", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}}, 81 CharData("\n "), 82 StartElement{Name{"ns2", "hello"}, []Attr{{Name{"", "lang"}, "en"}}}, 83 CharData("World <>'\" 白鵬翔"), 84 EndElement{Name{"ns2", "hello"}}, 85 CharData("\n "), 86 StartElement{Name{"ns2", "query"}, []Attr{}}, 87 CharData("What is it?"), 88 EndElement{Name{"ns2", "query"}}, 89 CharData("\n "), 90 StartElement{Name{"ns2", "goodbye"}, []Attr{}}, 91 EndElement{Name{"ns2", "goodbye"}}, 92 CharData("\n "), 93 StartElement{Name{"ns2", "outer"}, []Attr{{Name{"ns1", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}}, 94 CharData("\n "), 95 StartElement{Name{"ns2", "inner"}, []Attr{}}, 96 EndElement{Name{"ns2", "inner"}}, 97 CharData("\n "), 98 EndElement{Name{"ns2", "outer"}}, 99 CharData("\n "), 100 StartElement{Name{"ns3", "name"}, []Attr{}}, 101 CharData("\n "), 102 CharData("Some text here."), 103 CharData("\n "), 104 EndElement{Name{"ns3", "name"}}, 105 CharData("\n"), 106 EndElement{Name{"ns2", "body"}}, 107 Comment(" missing final newline "), 108 } 109 110 const testInputAltEncoding = ` 111 <?xml version="1.0" encoding="x-testing-uppercase"?> 112 <TAG>VALUE</TAG>` 113 114 var rawTokensAltEncoding = []Token{ 115 CharData("\n"), 116 ProcInst{"xml", []byte(`version="1.0" encoding="x-testing-uppercase"`)}, 117 CharData("\n"), 118 StartElement{Name{"", "tag"}, []Attr{}}, 119 CharData("value"), 120 EndElement{Name{"", "tag"}}, 121 } 122 123 var xmlInput = []string{ 124 // unexpected EOF cases 125 "<", 126 "<t", 127 "<t ", 128 "<t/", 129 "<!", 130 "<!-", 131 "<!--", 132 "<!--c-", 133 "<!--c--", 134 "<!d", 135 "<t></", 136 "<t></t", 137 "<?", 138 "<?p", 139 "<t a", 140 "<t a=", 141 "<t a='", 142 "<t a=''", 143 "<t/><![", 144 "<t/><![C", 145 "<t/><![CDATA[d", 146 "<t/><![CDATA[d]", 147 "<t/><![CDATA[d]]", 148 149 // other Syntax errors 150 "<>", 151 "<t/a", 152 "<0 />", 153 "<?0 >", 154 // "<!0 >", // let the Token() caller handle 155 "</0>", 156 "<t 0=''>", 157 "<t a='&'>", 158 "<t a='<'>", 159 "<t> c;</t>", 160 "<t a>", 161 "<t a=>", 162 "<t a=v>", 163 // "<![CDATA[d]]>", // let the Token() caller handle 164 "<t></e>", 165 "<t></>", 166 "<t></t!", 167 "<t>cdata]]></t>", 168 } 169 170 func TestRawToken(t *testing.T) { 171 d := NewDecoder(strings.NewReader(testInput)) 172 d.Entity = testEntity 173 testRawToken(t, d, testInput, rawTokens) 174 } 175 176 const nonStrictInput = ` 177 <tag>non&entity</tag> 178 <tag>&unknown;entity</tag> 179 <tag>{</tag> 180 <tag>&#zzz;</tag> 181 <tag>&なまえ3;</tag> 182 <tag><-gt;</tag> 183 <tag>&;</tag> 184 <tag>&0a;</tag> 185 ` 186 187 var nonStrictTokens = []Token{ 188 CharData("\n"), 189 StartElement{Name{"", "tag"}, []Attr{}}, 190 CharData("non&entity"), 191 EndElement{Name{"", "tag"}}, 192 CharData("\n"), 193 StartElement{Name{"", "tag"}, []Attr{}}, 194 CharData("&unknown;entity"), 195 EndElement{Name{"", "tag"}}, 196 CharData("\n"), 197 StartElement{Name{"", "tag"}, []Attr{}}, 198 CharData("{"), 199 EndElement{Name{"", "tag"}}, 200 CharData("\n"), 201 StartElement{Name{"", "tag"}, []Attr{}}, 202 CharData("&#zzz;"), 203 EndElement{Name{"", "tag"}}, 204 CharData("\n"), 205 StartElement{Name{"", "tag"}, []Attr{}}, 206 CharData("&なまえ3;"), 207 EndElement{Name{"", "tag"}}, 208 CharData("\n"), 209 StartElement{Name{"", "tag"}, []Attr{}}, 210 CharData("<-gt;"), 211 EndElement{Name{"", "tag"}}, 212 CharData("\n"), 213 StartElement{Name{"", "tag"}, []Attr{}}, 214 CharData("&;"), 215 EndElement{Name{"", "tag"}}, 216 CharData("\n"), 217 StartElement{Name{"", "tag"}, []Attr{}}, 218 CharData("&0a;"), 219 EndElement{Name{"", "tag"}}, 220 CharData("\n"), 221 } 222 223 func TestNonStrictRawToken(t *testing.T) { 224 d := NewDecoder(strings.NewReader(nonStrictInput)) 225 d.Strict = false 226 testRawToken(t, d, nonStrictInput, nonStrictTokens) 227 } 228 229 type downCaser struct { 230 t *testing.T 231 r io.ByteReader 232 } 233 234 func (d *downCaser) ReadByte() (c byte, err error) { 235 c, err = d.r.ReadByte() 236 if c >= 'A' && c <= 'Z' { 237 c += 'a' - 'A' 238 } 239 return 240 } 241 242 func (d *downCaser) Read(p []byte) (int, error) { 243 d.t.Fatalf("unexpected Read call on downCaser reader") 244 panic("unreachable") 245 } 246 247 func TestRawTokenAltEncoding(t *testing.T) { 248 d := NewDecoder(strings.NewReader(testInputAltEncoding)) 249 d.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) { 250 if charset != "x-testing-uppercase" { 251 t.Fatalf("unexpected charset %q", charset) 252 } 253 return &downCaser{t, input.(io.ByteReader)}, nil 254 } 255 testRawToken(t, d, testInputAltEncoding, rawTokensAltEncoding) 256 } 257 258 func TestRawTokenAltEncodingNoConverter(t *testing.T) { 259 d := NewDecoder(strings.NewReader(testInputAltEncoding)) 260 token, err := d.RawToken() 261 if token == nil { 262 t.Fatalf("expected a token on first RawToken call") 263 } 264 if err != nil { 265 t.Fatal(err) 266 } 267 token, err = d.RawToken() 268 if token != nil { 269 t.Errorf("expected a nil token; got %#v", token) 270 } 271 if err == nil { 272 t.Fatalf("expected an error on second RawToken call") 273 } 274 const encoding = "x-testing-uppercase" 275 if !strings.Contains(err.Error(), encoding) { 276 t.Errorf("expected error to contain %q; got error: %v", 277 encoding, err) 278 } 279 } 280 281 func testRawToken(t *testing.T, d *Decoder, raw string, rawTokens []Token) { 282 lastEnd := int64(0) 283 for i, want := range rawTokens { 284 start := d.InputOffset() 285 have, err := d.RawToken() 286 end := d.InputOffset() 287 if err != nil { 288 t.Fatalf("token %d: unexpected error: %s", i, err) 289 } 290 if !reflect.DeepEqual(have, want) { 291 var shave, swant string 292 if _, ok := have.(CharData); ok { 293 shave = fmt.Sprintf("CharData(%q)", have) 294 } else { 295 shave = fmt.Sprintf("%#v", have) 296 } 297 if _, ok := want.(CharData); ok { 298 swant = fmt.Sprintf("CharData(%q)", want) 299 } else { 300 swant = fmt.Sprintf("%#v", want) 301 } 302 t.Errorf("token %d = %s, want %s", i, shave, swant) 303 } 304 305 // Check that InputOffset returned actual token. 306 switch { 307 case start < lastEnd: 308 t.Errorf("token %d: position [%d,%d) for %T is before previous token", i, start, end, have) 309 case start >= end: 310 // Special case: EndElement can be synthesized. 311 if start == end && end == lastEnd { 312 break 313 } 314 t.Errorf("token %d: position [%d,%d) for %T is empty", i, start, end, have) 315 case end > int64(len(raw)): 316 t.Errorf("token %d: position [%d,%d) for %T extends beyond input", i, start, end, have) 317 default: 318 text := raw[start:end] 319 if strings.ContainsAny(text, "<>") && (!strings.HasPrefix(text, "<") || !strings.HasSuffix(text, ">")) { 320 t.Errorf("token %d: misaligned raw token %#q for %T", i, text, have) 321 } 322 } 323 lastEnd = end 324 } 325 } 326 327 // Ensure that directives (specifically !DOCTYPE) include the complete 328 // text of any nested directives, noting that < and > do not change 329 // nesting depth if they are in single or double quotes. 330 331 var nestedDirectivesInput = ` 332 <!DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]> 333 <!DOCTYPE [<!ENTITY xlt ">">]> 334 <!DOCTYPE [<!ENTITY xlt "<">]> 335 <!DOCTYPE [<!ENTITY xlt '>'>]> 336 <!DOCTYPE [<!ENTITY xlt '<'>]> 337 <!DOCTYPE [<!ENTITY xlt '">'>]> 338 <!DOCTYPE [<!ENTITY xlt "'<">]> 339 ` 340 341 var nestedDirectivesTokens = []Token{ 342 CharData("\n"), 343 Directive(`DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`), 344 CharData("\n"), 345 Directive(`DOCTYPE [<!ENTITY xlt ">">]`), 346 CharData("\n"), 347 Directive(`DOCTYPE [<!ENTITY xlt "<">]`), 348 CharData("\n"), 349 Directive(`DOCTYPE [<!ENTITY xlt '>'>]`), 350 CharData("\n"), 351 Directive(`DOCTYPE [<!ENTITY xlt '<'>]`), 352 CharData("\n"), 353 Directive(`DOCTYPE [<!ENTITY xlt '">'>]`), 354 CharData("\n"), 355 Directive(`DOCTYPE [<!ENTITY xlt "'<">]`), 356 CharData("\n"), 357 } 358 359 func TestNestedDirectives(t *testing.T) { 360 d := NewDecoder(strings.NewReader(nestedDirectivesInput)) 361 362 for i, want := range nestedDirectivesTokens { 363 have, err := d.Token() 364 if err != nil { 365 t.Fatalf("token %d: unexpected error: %s", i, err) 366 } 367 if !reflect.DeepEqual(have, want) { 368 t.Errorf("token %d = %#v want %#v", i, have, want) 369 } 370 } 371 } 372 373 func TestToken(t *testing.T) { 374 d := NewDecoder(strings.NewReader(testInput)) 375 d.Entity = testEntity 376 377 for i, want := range cookedTokens { 378 have, err := d.Token() 379 if err != nil { 380 t.Fatalf("token %d: unexpected error: %s", i, err) 381 } 382 if !reflect.DeepEqual(have, want) { 383 t.Errorf("token %d = %#v want %#v", i, have, want) 384 } 385 } 386 } 387 388 func TestSyntax(t *testing.T) { 389 for i := range xmlInput { 390 d := NewDecoder(strings.NewReader(xmlInput[i])) 391 var err error 392 for _, err = d.Token(); err == nil; _, err = d.Token() { 393 } 394 if _, ok := err.(*SyntaxError); !ok { 395 t.Fatalf(`xmlInput "%s": expected SyntaxError not received`, xmlInput[i]) 396 } 397 } 398 } 399 400 type allScalars struct { 401 True1 bool 402 True2 bool 403 False1 bool 404 False2 bool 405 Int int 406 Int8 int8 407 Int16 int16 408 Int32 int32 409 Int64 int64 410 Uint int 411 Uint8 uint8 412 Uint16 uint16 413 Uint32 uint32 414 Uint64 uint64 415 Uintptr uintptr 416 Float32 float32 417 Float64 float64 418 String string 419 PtrString *string 420 } 421 422 var all = allScalars{ 423 True1: true, 424 True2: true, 425 False1: false, 426 False2: false, 427 Int: 1, 428 Int8: -2, 429 Int16: 3, 430 Int32: -4, 431 Int64: 5, 432 Uint: 6, 433 Uint8: 7, 434 Uint16: 8, 435 Uint32: 9, 436 Uint64: 10, 437 Uintptr: 11, 438 Float32: 13.0, 439 Float64: 14.0, 440 String: "15", 441 PtrString: &sixteen, 442 } 443 444 var sixteen = "16" 445 446 const testScalarsInput = `<allscalars> 447 <True1>true</True1> 448 <True2>1</True2> 449 <False1>false</False1> 450 <False2>0</False2> 451 <Int>1</Int> 452 <Int8>-2</Int8> 453 <Int16>3</Int16> 454 <Int32>-4</Int32> 455 <Int64>5</Int64> 456 <Uint>6</Uint> 457 <Uint8>7</Uint8> 458 <Uint16>8</Uint16> 459 <Uint32>9</Uint32> 460 <Uint64>10</Uint64> 461 <Uintptr>11</Uintptr> 462 <Float>12.0</Float> 463 <Float32>13.0</Float32> 464 <Float64>14.0</Float64> 465 <String>15</String> 466 <PtrString>16</PtrString> 467 </allscalars>` 468 469 func TestAllScalars(t *testing.T) { 470 var a allScalars 471 err := Unmarshal([]byte(testScalarsInput), &a) 472 473 if err != nil { 474 t.Fatal(err) 475 } 476 if !reflect.DeepEqual(a, all) { 477 t.Errorf("have %+v want %+v", a, all) 478 } 479 } 480 481 type item struct { 482 Field_a string 483 } 484 485 func TestIssue569(t *testing.T) { 486 data := `<item><Field_a>abcd</Field_a></item>` 487 var i item 488 err := Unmarshal([]byte(data), &i) 489 490 if err != nil || i.Field_a != "abcd" { 491 t.Fatal("Expecting abcd") 492 } 493 } 494 495 func TestUnquotedAttrs(t *testing.T) { 496 data := "<tag attr=azAZ09:-_\t>" 497 d := NewDecoder(strings.NewReader(data)) 498 d.Strict = false 499 token, err := d.Token() 500 if _, ok := err.(*SyntaxError); ok { 501 t.Errorf("Unexpected error: %v", err) 502 } 503 if token.(StartElement).Name.Local != "tag" { 504 t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local) 505 } 506 attr := token.(StartElement).Attr[0] 507 if attr.Value != "azAZ09:-_" { 508 t.Errorf("Unexpected attribute value: %v", attr.Value) 509 } 510 if attr.Name.Local != "attr" { 511 t.Errorf("Unexpected attribute name: %v", attr.Name.Local) 512 } 513 } 514 515 func TestValuelessAttrs(t *testing.T) { 516 tests := [][3]string{ 517 {"<p nowrap>", "p", "nowrap"}, 518 {"<p nowrap >", "p", "nowrap"}, 519 {"<input checked/>", "input", "checked"}, 520 {"<input checked />", "input", "checked"}, 521 } 522 for _, test := range tests { 523 d := NewDecoder(strings.NewReader(test[0])) 524 d.Strict = false 525 token, err := d.Token() 526 if _, ok := err.(*SyntaxError); ok { 527 t.Errorf("Unexpected error: %v", err) 528 } 529 if token.(StartElement).Name.Local != test[1] { 530 t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local) 531 } 532 attr := token.(StartElement).Attr[0] 533 if attr.Value != test[2] { 534 t.Errorf("Unexpected attribute value: %v", attr.Value) 535 } 536 if attr.Name.Local != test[2] { 537 t.Errorf("Unexpected attribute name: %v", attr.Name.Local) 538 } 539 } 540 } 541 542 func TestCopyTokenCharData(t *testing.T) { 543 data := []byte("same data") 544 var tok1 Token = CharData(data) 545 tok2 := CopyToken(tok1) 546 if !reflect.DeepEqual(tok1, tok2) { 547 t.Error("CopyToken(CharData) != CharData") 548 } 549 data[1] = 'o' 550 if reflect.DeepEqual(tok1, tok2) { 551 t.Error("CopyToken(CharData) uses same buffer.") 552 } 553 } 554 555 func TestCopyTokenStartElement(t *testing.T) { 556 elt := StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}} 557 var tok1 Token = elt 558 tok2 := CopyToken(tok1) 559 if tok1.(StartElement).Attr[0].Value != "en" { 560 t.Error("CopyToken overwrote Attr[0]") 561 } 562 if !reflect.DeepEqual(tok1, tok2) { 563 t.Error("CopyToken(StartElement) != StartElement") 564 } 565 tok1.(StartElement).Attr[0] = Attr{Name{"", "lang"}, "de"} 566 if reflect.DeepEqual(tok1, tok2) { 567 t.Error("CopyToken(CharData) uses same buffer.") 568 } 569 } 570 571 func TestSyntaxErrorLineNum(t *testing.T) { 572 testInput := "<P>Foo<P>\n\n<P>Bar</>\n" 573 d := NewDecoder(strings.NewReader(testInput)) 574 var err error 575 for _, err = d.Token(); err == nil; _, err = d.Token() { 576 } 577 synerr, ok := err.(*SyntaxError) 578 if !ok { 579 t.Error("Expected SyntaxError.") 580 } 581 if synerr.Line != 3 { 582 t.Error("SyntaxError didn't have correct line number.") 583 } 584 } 585 586 func TestTrailingRawToken(t *testing.T) { 587 input := `<FOO></FOO> ` 588 d := NewDecoder(strings.NewReader(input)) 589 var err error 590 for _, err = d.RawToken(); err == nil; _, err = d.RawToken() { 591 } 592 if err != io.EOF { 593 t.Fatalf("d.RawToken() = _, %v, want _, io.EOF", err) 594 } 595 } 596 597 func TestTrailingToken(t *testing.T) { 598 input := `<FOO></FOO> ` 599 d := NewDecoder(strings.NewReader(input)) 600 var err error 601 for _, err = d.Token(); err == nil; _, err = d.Token() { 602 } 603 if err != io.EOF { 604 t.Fatalf("d.Token() = _, %v, want _, io.EOF", err) 605 } 606 } 607 608 func TestEntityInsideCDATA(t *testing.T) { 609 input := `<test><![CDATA[ &val=foo ]]></test>` 610 d := NewDecoder(strings.NewReader(input)) 611 var err error 612 for _, err = d.Token(); err == nil; _, err = d.Token() { 613 } 614 if err != io.EOF { 615 t.Fatalf("d.Token() = _, %v, want _, io.EOF", err) 616 } 617 } 618 619 var characterTests = []struct { 620 in string 621 err string 622 }{ 623 {"\x12<doc/>", "illegal character code U+0012"}, 624 {"<?xml version=\"1.0\"?>\x0b<doc/>", "illegal character code U+000B"}, 625 {"\xef\xbf\xbe<doc/>", "illegal character code U+FFFE"}, 626 {"<?xml version=\"1.0\"?><doc>\r\n<hiya/>\x07<toots/></doc>", "illegal character code U+0007"}, 627 {"<?xml version=\"1.0\"?><doc \x12='value'>what's up</doc>", "expected attribute name in element"}, 628 {"<doc>&abc\x01;</doc>", "invalid character entity &abc (no semicolon)"}, 629 {"<doc>&\x01;</doc>", "invalid character entity & (no semicolon)"}, 630 {"<doc>&\xef\xbf\xbe;</doc>", "invalid character entity &\uFFFE;"}, 631 {"<doc>&hello;</doc>", "invalid character entity &hello;"}, 632 } 633 634 func TestDisallowedCharacters(t *testing.T) { 635 636 for i, tt := range characterTests { 637 d := NewDecoder(strings.NewReader(tt.in)) 638 var err error 639 640 for err == nil { 641 _, err = d.Token() 642 } 643 synerr, ok := err.(*SyntaxError) 644 if !ok { 645 t.Fatalf("input %d d.Token() = _, %v, want _, *SyntaxError", i, err) 646 } 647 if synerr.Msg != tt.err { 648 t.Fatalf("input %d synerr.Msg wrong: want %q, got %q", i, tt.err, synerr.Msg) 649 } 650 } 651 } 652 653 var procInstTests = []struct { 654 input string 655 expect [2]string 656 }{ 657 {`version="1.0" encoding="utf-8"`, [2]string{"1.0", "utf-8"}}, 658 {`version="1.0" encoding='utf-8'`, [2]string{"1.0", "utf-8"}}, 659 {`version="1.0" encoding='utf-8' `, [2]string{"1.0", "utf-8"}}, 660 {`version="1.0" encoding=utf-8`, [2]string{"1.0", ""}}, 661 {`encoding="FOO" `, [2]string{"", "FOO"}}, 662 } 663 664 func TestProcInstEncoding(t *testing.T) { 665 for _, test := range procInstTests { 666 if got := procInst("version", test.input); got != test.expect[0] { 667 t.Errorf("procInst(version, %q) = %q; want %q", test.input, got, test.expect[0]) 668 } 669 if got := procInst("encoding", test.input); got != test.expect[1] { 670 t.Errorf("procInst(encoding, %q) = %q; want %q", test.input, got, test.expect[1]) 671 } 672 } 673 } 674 675 // Ensure that directives with comments include the complete 676 // text of any nested directives. 677 678 var directivesWithCommentsInput = ` 679 <!DOCTYPE [<!-- a comment --><!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]> 680 <!DOCTYPE [<!ENTITY go "Golang"><!-- a comment-->]> 681 <!DOCTYPE <!-> <!> <!----> <!-->--> <!--->--> [<!ENTITY go "Golang"><!-- a comment-->]> 682 ` 683 684 var directivesWithCommentsTokens = []Token{ 685 CharData("\n"), 686 Directive(`DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`), 687 CharData("\n"), 688 Directive(`DOCTYPE [<!ENTITY go "Golang">]`), 689 CharData("\n"), 690 Directive(`DOCTYPE <!-> <!> [<!ENTITY go "Golang">]`), 691 CharData("\n"), 692 } 693 694 func TestDirectivesWithComments(t *testing.T) { 695 d := NewDecoder(strings.NewReader(directivesWithCommentsInput)) 696 697 for i, want := range directivesWithCommentsTokens { 698 have, err := d.Token() 699 if err != nil { 700 t.Fatalf("token %d: unexpected error: %s", i, err) 701 } 702 if !reflect.DeepEqual(have, want) { 703 t.Errorf("token %d = %#v want %#v", i, have, want) 704 } 705 } 706 } 707 708 // Writer whose Write method always returns an error. 709 type errWriter struct{} 710 711 func (errWriter) Write(p []byte) (n int, err error) { return 0, fmt.Errorf("unwritable") } 712 713 func TestEscapeTextIOErrors(t *testing.T) { 714 expectErr := "unwritable" 715 err := EscapeText(errWriter{}, []byte{'A'}) 716 717 if err == nil || err.Error() != expectErr { 718 t.Errorf("have %v, want %v", err, expectErr) 719 } 720 } 721 722 func TestEscapeTextInvalidChar(t *testing.T) { 723 input := []byte("A \x00 terminated string.") 724 expected := "A \uFFFD terminated string." 725 726 buff := new(bytes.Buffer) 727 if err := EscapeText(buff, input); err != nil { 728 t.Fatalf("have %v, want nil", err) 729 } 730 text := buff.String() 731 732 if text != expected { 733 t.Errorf("have %v, want %v", text, expected) 734 } 735 } 736 737 func TestIssue5880(t *testing.T) { 738 type T []byte 739 data, err := Marshal(T{192, 168, 0, 1}) 740 if err != nil { 741 t.Errorf("Marshal error: %v", err) 742 } 743 if !utf8.Valid(data) { 744 t.Errorf("Marshal generated invalid UTF-8: %x", data) 745 } 746 } 747 748 func TestIssue11405(t *testing.T) { 749 testCases := []string{ 750 "<root>", 751 "<root><foo>", 752 "<root><foo></foo>", 753 } 754 for _, tc := range testCases { 755 d := NewDecoder(strings.NewReader(tc)) 756 var err error 757 for { 758 _, err = d.Token() 759 if err != nil { 760 break 761 } 762 } 763 if _, ok := err.(*SyntaxError); !ok { 764 t.Errorf("%s: Token: Got error %v, want SyntaxError", tc, err) 765 } 766 } 767 } 768 769 func TestIssue12417(t *testing.T) { 770 testCases := []struct { 771 s string 772 ok bool 773 }{ 774 {`<?xml encoding="UtF-8" version="1.0"?><root/>`, true}, 775 {`<?xml encoding="UTF-8" version="1.0"?><root/>`, true}, 776 {`<?xml encoding="utf-8" version="1.0"?><root/>`, true}, 777 {`<?xml encoding="uuu-9" version="1.0"?><root/>`, false}, 778 } 779 for _, tc := range testCases { 780 d := NewDecoder(strings.NewReader(tc.s)) 781 var err error 782 for { 783 _, err = d.Token() 784 if err != nil { 785 if err == io.EOF { 786 err = nil 787 } 788 break 789 } 790 } 791 if err != nil && tc.ok { 792 t.Errorf("%q: Encoding charset: expected no error, got %s", tc.s, err) 793 continue 794 } 795 if err == nil && !tc.ok { 796 t.Errorf("%q: Encoding charset: expected error, got nil", tc.s) 797 } 798 } 799 }