golang.org/x/net@v0.25.1-0.20240516223405-c87a5b62e243/webdav/internal/xml/xml_test.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package xml 6 7 import ( 8 "bytes" 9 "fmt" 10 "io" 11 "reflect" 12 "strings" 13 "testing" 14 "unicode/utf8" 15 ) 16 17 const testInput = ` 18 <?xml version="1.0" encoding="UTF-8"?> 19 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" 20 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> 21 <body xmlns:foo="ns1" xmlns="ns2" xmlns:tag="ns3" ` + 22 "\r\n\t" + ` > 23 <hello lang="en">World <>'" 白鵬翔</hello> 24 <query>&何; &is-it;</query> 25 <goodbye /> 26 <outer foo:attr="value" xmlns:tag="ns4"> 27 <inner/> 28 </outer> 29 <tag:name> 30 <![CDATA[Some text here.]]> 31 </tag:name> 32 </body><!-- missing final newline -->` 33 34 var testEntity = map[string]string{"何": "What", "is-it": "is it?"} 35 36 var rawTokens = []Token{ 37 CharData("\n"), 38 ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)}, 39 CharData("\n"), 40 Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" 41 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`), 42 CharData("\n"), 43 StartElement{Name{"", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}}, 44 CharData("\n "), 45 StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}}, 46 CharData("World <>'\" 白鵬翔"), 47 EndElement{Name{"", "hello"}}, 48 CharData("\n "), 49 StartElement{Name{"", "query"}, []Attr{}}, 50 CharData("What is it?"), 51 EndElement{Name{"", "query"}}, 52 CharData("\n "), 53 StartElement{Name{"", "goodbye"}, []Attr{}}, 54 EndElement{Name{"", "goodbye"}}, 55 CharData("\n "), 56 StartElement{Name{"", "outer"}, []Attr{{Name{"foo", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}}, 57 CharData("\n "), 58 StartElement{Name{"", "inner"}, []Attr{}}, 59 EndElement{Name{"", "inner"}}, 60 CharData("\n "), 61 EndElement{Name{"", "outer"}}, 62 CharData("\n "), 63 StartElement{Name{"tag", "name"}, []Attr{}}, 64 CharData("\n "), 65 CharData("Some text here."), 66 CharData("\n "), 67 EndElement{Name{"tag", "name"}}, 68 CharData("\n"), 69 EndElement{Name{"", "body"}}, 70 Comment(" missing final newline "), 71 } 72 73 var cookedTokens = []Token{ 74 CharData("\n"), 75 ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)}, 76 CharData("\n"), 77 Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" 78 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`), 79 CharData("\n"), 80 StartElement{Name{"ns2", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}}, 81 CharData("\n "), 82 StartElement{Name{"ns2", "hello"}, []Attr{{Name{"", "lang"}, "en"}}}, 83 CharData("World <>'\" 白鵬翔"), 84 EndElement{Name{"ns2", "hello"}}, 85 CharData("\n "), 86 StartElement{Name{"ns2", "query"}, []Attr{}}, 87 CharData("What is it?"), 88 EndElement{Name{"ns2", "query"}}, 89 CharData("\n "), 90 StartElement{Name{"ns2", "goodbye"}, []Attr{}}, 91 EndElement{Name{"ns2", "goodbye"}}, 92 CharData("\n "), 93 StartElement{Name{"ns2", "outer"}, []Attr{{Name{"ns1", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}}, 94 CharData("\n "), 95 StartElement{Name{"ns2", "inner"}, []Attr{}}, 96 EndElement{Name{"ns2", "inner"}}, 97 CharData("\n "), 98 EndElement{Name{"ns2", "outer"}}, 99 CharData("\n "), 100 StartElement{Name{"ns3", "name"}, []Attr{}}, 101 CharData("\n "), 102 CharData("Some text here."), 103 CharData("\n "), 104 EndElement{Name{"ns3", "name"}}, 105 CharData("\n"), 106 EndElement{Name{"ns2", "body"}}, 107 Comment(" missing final newline "), 108 } 109 110 const testInputAltEncoding = ` 111 <?xml version="1.0" encoding="x-testing-uppercase"?> 112 <TAG>VALUE</TAG>` 113 114 var rawTokensAltEncoding = []Token{ 115 CharData("\n"), 116 ProcInst{"xml", []byte(`version="1.0" encoding="x-testing-uppercase"`)}, 117 CharData("\n"), 118 StartElement{Name{"", "tag"}, []Attr{}}, 119 CharData("value"), 120 EndElement{Name{"", "tag"}}, 121 } 122 123 var xmlInput = []string{ 124 // unexpected EOF cases 125 "<", 126 "<t", 127 "<t ", 128 "<t/", 129 "<!", 130 "<!-", 131 "<!--", 132 "<!--c-", 133 "<!--c--", 134 "<!d", 135 "<t></", 136 "<t></t", 137 "<?", 138 "<?p", 139 "<t a", 140 "<t a=", 141 "<t a='", 142 "<t a=''", 143 "<t/><![", 144 "<t/><![C", 145 "<t/><![CDATA[d", 146 "<t/><![CDATA[d]", 147 "<t/><![CDATA[d]]", 148 149 // other Syntax errors 150 "<>", 151 "<t/a", 152 "<0 />", 153 "<?0 >", 154 // "<!0 >", // let the Token() caller handle 155 "</0>", 156 "<t 0=''>", 157 "<t a='&'>", 158 "<t a='<'>", 159 "<t> c;</t>", 160 "<t a>", 161 "<t a=>", 162 "<t a=v>", 163 // "<![CDATA[d]]>", // let the Token() caller handle 164 "<t></e>", 165 "<t></>", 166 "<t></t!", 167 "<t>cdata]]></t>", 168 } 169 170 func TestRawToken(t *testing.T) { 171 d := NewDecoder(strings.NewReader(testInput)) 172 d.Entity = testEntity 173 testRawToken(t, d, testInput, rawTokens) 174 } 175 176 const nonStrictInput = ` 177 <tag>non&entity</tag> 178 <tag>&unknown;entity</tag> 179 <tag>{</tag> 180 <tag>&#zzz;</tag> 181 <tag>&なまえ3;</tag> 182 <tag><-gt;</tag> 183 <tag>&;</tag> 184 <tag>&0a;</tag> 185 ` 186 187 var nonStringEntity = map[string]string{"": "oops!", "0a": "oops!"} 188 189 var nonStrictTokens = []Token{ 190 CharData("\n"), 191 StartElement{Name{"", "tag"}, []Attr{}}, 192 CharData("non&entity"), 193 EndElement{Name{"", "tag"}}, 194 CharData("\n"), 195 StartElement{Name{"", "tag"}, []Attr{}}, 196 CharData("&unknown;entity"), 197 EndElement{Name{"", "tag"}}, 198 CharData("\n"), 199 StartElement{Name{"", "tag"}, []Attr{}}, 200 CharData("{"), 201 EndElement{Name{"", "tag"}}, 202 CharData("\n"), 203 StartElement{Name{"", "tag"}, []Attr{}}, 204 CharData("&#zzz;"), 205 EndElement{Name{"", "tag"}}, 206 CharData("\n"), 207 StartElement{Name{"", "tag"}, []Attr{}}, 208 CharData("&なまえ3;"), 209 EndElement{Name{"", "tag"}}, 210 CharData("\n"), 211 StartElement{Name{"", "tag"}, []Attr{}}, 212 CharData("<-gt;"), 213 EndElement{Name{"", "tag"}}, 214 CharData("\n"), 215 StartElement{Name{"", "tag"}, []Attr{}}, 216 CharData("&;"), 217 EndElement{Name{"", "tag"}}, 218 CharData("\n"), 219 StartElement{Name{"", "tag"}, []Attr{}}, 220 CharData("&0a;"), 221 EndElement{Name{"", "tag"}}, 222 CharData("\n"), 223 } 224 225 func TestNonStrictRawToken(t *testing.T) { 226 d := NewDecoder(strings.NewReader(nonStrictInput)) 227 d.Strict = false 228 testRawToken(t, d, nonStrictInput, nonStrictTokens) 229 } 230 231 type downCaser struct { 232 t *testing.T 233 r io.ByteReader 234 } 235 236 func (d *downCaser) ReadByte() (c byte, err error) { 237 c, err = d.r.ReadByte() 238 if c >= 'A' && c <= 'Z' { 239 c += 'a' - 'A' 240 } 241 return 242 } 243 244 func (d *downCaser) Read(p []byte) (int, error) { 245 d.t.Fatalf("unexpected Read call on downCaser reader") 246 panic("unreachable") 247 } 248 249 func TestRawTokenAltEncoding(t *testing.T) { 250 d := NewDecoder(strings.NewReader(testInputAltEncoding)) 251 d.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) { 252 if charset != "x-testing-uppercase" { 253 t.Fatalf("unexpected charset %q", charset) 254 } 255 return &downCaser{t, input.(io.ByteReader)}, nil 256 } 257 testRawToken(t, d, testInputAltEncoding, rawTokensAltEncoding) 258 } 259 260 func TestRawTokenAltEncodingNoConverter(t *testing.T) { 261 d := NewDecoder(strings.NewReader(testInputAltEncoding)) 262 token, err := d.RawToken() 263 if token == nil { 264 t.Fatalf("expected a token on first RawToken call") 265 } 266 if err != nil { 267 t.Fatal(err) 268 } 269 token, err = d.RawToken() 270 if token != nil { 271 t.Errorf("expected a nil token; got %#v", token) 272 } 273 if err == nil { 274 t.Fatalf("expected an error on second RawToken call") 275 } 276 const encoding = "x-testing-uppercase" 277 if !strings.Contains(err.Error(), encoding) { 278 t.Errorf("expected error to contain %q; got error: %v", 279 encoding, err) 280 } 281 } 282 283 func testRawToken(t *testing.T, d *Decoder, raw string, rawTokens []Token) { 284 lastEnd := int64(0) 285 for i, want := range rawTokens { 286 start := d.InputOffset() 287 have, err := d.RawToken() 288 end := d.InputOffset() 289 if err != nil { 290 t.Fatalf("token %d: unexpected error: %s", i, err) 291 } 292 if !reflect.DeepEqual(have, want) { 293 var shave, swant string 294 if _, ok := have.(CharData); ok { 295 shave = fmt.Sprintf("CharData(%q)", have) 296 } else { 297 shave = fmt.Sprintf("%#v", have) 298 } 299 if _, ok := want.(CharData); ok { 300 swant = fmt.Sprintf("CharData(%q)", want) 301 } else { 302 swant = fmt.Sprintf("%#v", want) 303 } 304 t.Errorf("token %d = %s, want %s", i, shave, swant) 305 } 306 307 // Check that InputOffset returned actual token. 308 switch { 309 case start < lastEnd: 310 t.Errorf("token %d: position [%d,%d) for %T is before previous token", i, start, end, have) 311 case start >= end: 312 // Special case: EndElement can be synthesized. 313 if start == end && end == lastEnd { 314 break 315 } 316 t.Errorf("token %d: position [%d,%d) for %T is empty", i, start, end, have) 317 case end > int64(len(raw)): 318 t.Errorf("token %d: position [%d,%d) for %T extends beyond input", i, start, end, have) 319 default: 320 text := raw[start:end] 321 if strings.ContainsAny(text, "<>") && (!strings.HasPrefix(text, "<") || !strings.HasSuffix(text, ">")) { 322 t.Errorf("token %d: misaligned raw token %#q for %T", i, text, have) 323 } 324 } 325 lastEnd = end 326 } 327 } 328 329 // Ensure that directives (specifically !DOCTYPE) include the complete 330 // text of any nested directives, noting that < and > do not change 331 // nesting depth if they are in single or double quotes. 332 333 var nestedDirectivesInput = ` 334 <!DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]> 335 <!DOCTYPE [<!ENTITY xlt ">">]> 336 <!DOCTYPE [<!ENTITY xlt "<">]> 337 <!DOCTYPE [<!ENTITY xlt '>'>]> 338 <!DOCTYPE [<!ENTITY xlt '<'>]> 339 <!DOCTYPE [<!ENTITY xlt '">'>]> 340 <!DOCTYPE [<!ENTITY xlt "'<">]> 341 ` 342 343 var nestedDirectivesTokens = []Token{ 344 CharData("\n"), 345 Directive(`DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`), 346 CharData("\n"), 347 Directive(`DOCTYPE [<!ENTITY xlt ">">]`), 348 CharData("\n"), 349 Directive(`DOCTYPE [<!ENTITY xlt "<">]`), 350 CharData("\n"), 351 Directive(`DOCTYPE [<!ENTITY xlt '>'>]`), 352 CharData("\n"), 353 Directive(`DOCTYPE [<!ENTITY xlt '<'>]`), 354 CharData("\n"), 355 Directive(`DOCTYPE [<!ENTITY xlt '">'>]`), 356 CharData("\n"), 357 Directive(`DOCTYPE [<!ENTITY xlt "'<">]`), 358 CharData("\n"), 359 } 360 361 func TestNestedDirectives(t *testing.T) { 362 d := NewDecoder(strings.NewReader(nestedDirectivesInput)) 363 364 for i, want := range nestedDirectivesTokens { 365 have, err := d.Token() 366 if err != nil { 367 t.Fatalf("token %d: unexpected error: %s", i, err) 368 } 369 if !reflect.DeepEqual(have, want) { 370 t.Errorf("token %d = %#v want %#v", i, have, want) 371 } 372 } 373 } 374 375 func TestToken(t *testing.T) { 376 d := NewDecoder(strings.NewReader(testInput)) 377 d.Entity = testEntity 378 379 for i, want := range cookedTokens { 380 have, err := d.Token() 381 if err != nil { 382 t.Fatalf("token %d: unexpected error: %s", i, err) 383 } 384 if !reflect.DeepEqual(have, want) { 385 t.Errorf("token %d = %#v want %#v", i, have, want) 386 } 387 } 388 } 389 390 func TestSyntax(t *testing.T) { 391 for i := range xmlInput { 392 d := NewDecoder(strings.NewReader(xmlInput[i])) 393 var err error 394 for _, err = d.Token(); err == nil; _, err = d.Token() { 395 } 396 if _, ok := err.(*SyntaxError); !ok { 397 t.Fatalf(`xmlInput "%s": expected SyntaxError not received`, xmlInput[i]) 398 } 399 } 400 } 401 402 type allScalars struct { 403 True1 bool 404 True2 bool 405 False1 bool 406 False2 bool 407 Int int 408 Int8 int8 409 Int16 int16 410 Int32 int32 411 Int64 int64 412 Uint int 413 Uint8 uint8 414 Uint16 uint16 415 Uint32 uint32 416 Uint64 uint64 417 Uintptr uintptr 418 Float32 float32 419 Float64 float64 420 String string 421 PtrString *string 422 } 423 424 var all = allScalars{ 425 True1: true, 426 True2: true, 427 False1: false, 428 False2: false, 429 Int: 1, 430 Int8: -2, 431 Int16: 3, 432 Int32: -4, 433 Int64: 5, 434 Uint: 6, 435 Uint8: 7, 436 Uint16: 8, 437 Uint32: 9, 438 Uint64: 10, 439 Uintptr: 11, 440 Float32: 13.0, 441 Float64: 14.0, 442 String: "15", 443 PtrString: &sixteen, 444 } 445 446 var sixteen = "16" 447 448 const testScalarsInput = `<allscalars> 449 <True1>true</True1> 450 <True2>1</True2> 451 <False1>false</False1> 452 <False2>0</False2> 453 <Int>1</Int> 454 <Int8>-2</Int8> 455 <Int16>3</Int16> 456 <Int32>-4</Int32> 457 <Int64>5</Int64> 458 <Uint>6</Uint> 459 <Uint8>7</Uint8> 460 <Uint16>8</Uint16> 461 <Uint32>9</Uint32> 462 <Uint64>10</Uint64> 463 <Uintptr>11</Uintptr> 464 <Float>12.0</Float> 465 <Float32>13.0</Float32> 466 <Float64>14.0</Float64> 467 <String>15</String> 468 <PtrString>16</PtrString> 469 </allscalars>` 470 471 func TestAllScalars(t *testing.T) { 472 var a allScalars 473 err := Unmarshal([]byte(testScalarsInput), &a) 474 475 if err != nil { 476 t.Fatal(err) 477 } 478 if !reflect.DeepEqual(a, all) { 479 t.Errorf("have %+v want %+v", a, all) 480 } 481 } 482 483 type item struct { 484 Field_a string 485 } 486 487 func TestIssue569(t *testing.T) { 488 data := `<item><Field_a>abcd</Field_a></item>` 489 var i item 490 err := Unmarshal([]byte(data), &i) 491 492 if err != nil || i.Field_a != "abcd" { 493 t.Fatal("Expecting abcd") 494 } 495 } 496 497 func TestUnquotedAttrs(t *testing.T) { 498 data := "<tag attr=azAZ09:-_\t>" 499 d := NewDecoder(strings.NewReader(data)) 500 d.Strict = false 501 token, err := d.Token() 502 if _, ok := err.(*SyntaxError); ok { 503 t.Errorf("Unexpected error: %v", err) 504 } 505 if token.(StartElement).Name.Local != "tag" { 506 t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local) 507 } 508 attr := token.(StartElement).Attr[0] 509 if attr.Value != "azAZ09:-_" { 510 t.Errorf("Unexpected attribute value: %v", attr.Value) 511 } 512 if attr.Name.Local != "attr" { 513 t.Errorf("Unexpected attribute name: %v", attr.Name.Local) 514 } 515 } 516 517 func TestValuelessAttrs(t *testing.T) { 518 tests := [][3]string{ 519 {"<p nowrap>", "p", "nowrap"}, 520 {"<p nowrap >", "p", "nowrap"}, 521 {"<input checked/>", "input", "checked"}, 522 {"<input checked />", "input", "checked"}, 523 } 524 for _, test := range tests { 525 d := NewDecoder(strings.NewReader(test[0])) 526 d.Strict = false 527 token, err := d.Token() 528 if _, ok := err.(*SyntaxError); ok { 529 t.Errorf("Unexpected error: %v", err) 530 } 531 if token.(StartElement).Name.Local != test[1] { 532 t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local) 533 } 534 attr := token.(StartElement).Attr[0] 535 if attr.Value != test[2] { 536 t.Errorf("Unexpected attribute value: %v", attr.Value) 537 } 538 if attr.Name.Local != test[2] { 539 t.Errorf("Unexpected attribute name: %v", attr.Name.Local) 540 } 541 } 542 } 543 544 func TestCopyTokenCharData(t *testing.T) { 545 data := []byte("same data") 546 var tok1 Token = CharData(data) 547 tok2 := CopyToken(tok1) 548 if !reflect.DeepEqual(tok1, tok2) { 549 t.Error("CopyToken(CharData) != CharData") 550 } 551 data[1] = 'o' 552 if reflect.DeepEqual(tok1, tok2) { 553 t.Error("CopyToken(CharData) uses same buffer.") 554 } 555 } 556 557 func TestCopyTokenStartElement(t *testing.T) { 558 elt := StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}} 559 var tok1 Token = elt 560 tok2 := CopyToken(tok1) 561 if tok1.(StartElement).Attr[0].Value != "en" { 562 t.Error("CopyToken overwrote Attr[0]") 563 } 564 if !reflect.DeepEqual(tok1, tok2) { 565 t.Error("CopyToken(StartElement) != StartElement") 566 } 567 tok1.(StartElement).Attr[0] = Attr{Name{"", "lang"}, "de"} 568 if reflect.DeepEqual(tok1, tok2) { 569 t.Error("CopyToken(CharData) uses same buffer.") 570 } 571 } 572 573 func TestSyntaxErrorLineNum(t *testing.T) { 574 testInput := "<P>Foo<P>\n\n<P>Bar</>\n" 575 d := NewDecoder(strings.NewReader(testInput)) 576 var err error 577 for _, err = d.Token(); err == nil; _, err = d.Token() { 578 } 579 synerr, ok := err.(*SyntaxError) 580 if !ok { 581 t.Error("Expected SyntaxError.") 582 } 583 if synerr.Line != 3 { 584 t.Error("SyntaxError didn't have correct line number.") 585 } 586 } 587 588 func TestTrailingRawToken(t *testing.T) { 589 input := `<FOO></FOO> ` 590 d := NewDecoder(strings.NewReader(input)) 591 var err error 592 for _, err = d.RawToken(); err == nil; _, err = d.RawToken() { 593 } 594 if err != io.EOF { 595 t.Fatalf("d.RawToken() = _, %v, want _, io.EOF", err) 596 } 597 } 598 599 func TestTrailingToken(t *testing.T) { 600 input := `<FOO></FOO> ` 601 d := NewDecoder(strings.NewReader(input)) 602 var err error 603 for _, err = d.Token(); err == nil; _, err = d.Token() { 604 } 605 if err != io.EOF { 606 t.Fatalf("d.Token() = _, %v, want _, io.EOF", err) 607 } 608 } 609 610 func TestEntityInsideCDATA(t *testing.T) { 611 input := `<test><![CDATA[ &val=foo ]]></test>` 612 d := NewDecoder(strings.NewReader(input)) 613 var err error 614 for _, err = d.Token(); err == nil; _, err = d.Token() { 615 } 616 if err != io.EOF { 617 t.Fatalf("d.Token() = _, %v, want _, io.EOF", err) 618 } 619 } 620 621 var characterTests = []struct { 622 in string 623 err string 624 }{ 625 {"\x12<doc/>", "illegal character code U+0012"}, 626 {"<?xml version=\"1.0\"?>\x0b<doc/>", "illegal character code U+000B"}, 627 {"\xef\xbf\xbe<doc/>", "illegal character code U+FFFE"}, 628 {"<?xml version=\"1.0\"?><doc>\r\n<hiya/>\x07<toots/></doc>", "illegal character code U+0007"}, 629 {"<?xml version=\"1.0\"?><doc \x12='value'>what's up</doc>", "expected attribute name in element"}, 630 {"<doc>&abc\x01;</doc>", "invalid character entity &abc (no semicolon)"}, 631 {"<doc>&\x01;</doc>", "invalid character entity & (no semicolon)"}, 632 {"<doc>&\xef\xbf\xbe;</doc>", "invalid character entity &\uFFFE;"}, 633 {"<doc>&hello;</doc>", "invalid character entity &hello;"}, 634 } 635 636 func TestDisallowedCharacters(t *testing.T) { 637 638 for i, tt := range characterTests { 639 d := NewDecoder(strings.NewReader(tt.in)) 640 var err error 641 642 for err == nil { 643 _, err = d.Token() 644 } 645 synerr, ok := err.(*SyntaxError) 646 if !ok { 647 t.Fatalf("input %d d.Token() = _, %v, want _, *SyntaxError", i, err) 648 } 649 if synerr.Msg != tt.err { 650 t.Fatalf("input %d synerr.Msg wrong: want %q, got %q", i, tt.err, synerr.Msg) 651 } 652 } 653 } 654 655 type procInstEncodingTest struct { 656 expect, got string 657 } 658 659 var procInstTests = []struct { 660 input string 661 expect [2]string 662 }{ 663 {`version="1.0" encoding="utf-8"`, [2]string{"1.0", "utf-8"}}, 664 {`version="1.0" encoding='utf-8'`, [2]string{"1.0", "utf-8"}}, 665 {`version="1.0" encoding='utf-8' `, [2]string{"1.0", "utf-8"}}, 666 {`version="1.0" encoding=utf-8`, [2]string{"1.0", ""}}, 667 {`encoding="FOO" `, [2]string{"", "FOO"}}, 668 } 669 670 func TestProcInstEncoding(t *testing.T) { 671 for _, test := range procInstTests { 672 if got := procInst("version", test.input); got != test.expect[0] { 673 t.Errorf("procInst(version, %q) = %q; want %q", test.input, got, test.expect[0]) 674 } 675 if got := procInst("encoding", test.input); got != test.expect[1] { 676 t.Errorf("procInst(encoding, %q) = %q; want %q", test.input, got, test.expect[1]) 677 } 678 } 679 } 680 681 // Ensure that directives with comments include the complete 682 // text of any nested directives. 683 684 var directivesWithCommentsInput = ` 685 <!DOCTYPE [<!-- a comment --><!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]> 686 <!DOCTYPE [<!ENTITY go "Golang"><!-- a comment-->]> 687 <!DOCTYPE <!-> <!> <!----> <!-->--> <!--->--> [<!ENTITY go "Golang"><!-- a comment-->]> 688 ` 689 690 var directivesWithCommentsTokens = []Token{ 691 CharData("\n"), 692 Directive(`DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`), 693 CharData("\n"), 694 Directive(`DOCTYPE [<!ENTITY go "Golang">]`), 695 CharData("\n"), 696 Directive(`DOCTYPE <!-> <!> [<!ENTITY go "Golang">]`), 697 CharData("\n"), 698 } 699 700 func TestDirectivesWithComments(t *testing.T) { 701 d := NewDecoder(strings.NewReader(directivesWithCommentsInput)) 702 703 for i, want := range directivesWithCommentsTokens { 704 have, err := d.Token() 705 if err != nil { 706 t.Fatalf("token %d: unexpected error: %s", i, err) 707 } 708 if !reflect.DeepEqual(have, want) { 709 t.Errorf("token %d = %#v want %#v", i, have, want) 710 } 711 } 712 } 713 714 // Writer whose Write method always returns an error. 715 type errWriter struct{} 716 717 func (errWriter) Write(p []byte) (n int, err error) { return 0, fmt.Errorf("unwritable") } 718 719 func TestEscapeTextIOErrors(t *testing.T) { 720 expectErr := "unwritable" 721 err := EscapeText(errWriter{}, []byte{'A'}) 722 723 if err == nil || err.Error() != expectErr { 724 t.Errorf("have %v, want %v", err, expectErr) 725 } 726 } 727 728 func TestEscapeTextInvalidChar(t *testing.T) { 729 input := []byte("A \x00 terminated string.") 730 expected := "A \uFFFD terminated string." 731 732 buff := new(bytes.Buffer) 733 if err := EscapeText(buff, input); err != nil { 734 t.Fatalf("have %v, want nil", err) 735 } 736 text := buff.String() 737 738 if text != expected { 739 t.Errorf("have %v, want %v", text, expected) 740 } 741 } 742 743 func TestIssue5880(t *testing.T) { 744 type T []byte 745 data, err := Marshal(T{192, 168, 0, 1}) 746 if err != nil { 747 t.Errorf("Marshal error: %v", err) 748 } 749 if !utf8.Valid(data) { 750 t.Errorf("Marshal generated invalid UTF-8: %x", data) 751 } 752 }