golang.org/x/text@v0.14.0/encoding/unicode/unicode_test.go (about) 1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package unicode 6 7 import ( 8 "testing" 9 10 "golang.org/x/text/encoding" 11 "golang.org/x/text/encoding/charmap" 12 "golang.org/x/text/encoding/internal/enctest" 13 "golang.org/x/text/transform" 14 ) 15 16 func TestBasics(t *testing.T) { 17 testCases := []struct { 18 e encoding.Encoding 19 encPrefix string 20 encSuffix string 21 encoded string 22 utf8 string 23 }{{ 24 e: utf16BEIB, 25 encoded: "\x00\x57\x00\xe4\xd8\x35\xdd\x65", 26 utf8: "\x57\u00e4\U0001d565", 27 }, { 28 e: utf16BEEB, 29 encPrefix: "\xfe\xff", 30 encoded: "\x00\x57\x00\xe4\xd8\x35\xdd\x65", 31 utf8: "\x57\u00e4\U0001d565", 32 }, { 33 e: utf16LEIB, 34 encoded: "\x57\x00\xe4\x00\x35\xd8\x65\xdd", 35 utf8: "\x57\u00e4\U0001d565", 36 }, { 37 e: utf16LEEB, 38 encPrefix: "\xff\xfe", 39 encoded: "\x57\x00\xe4\x00\x35\xd8\x65\xdd", 40 utf8: "\x57\u00e4\U0001d565", 41 }} 42 43 for _, tc := range testCases { 44 enctest.TestEncoding(t, tc.e, tc.encoded, tc.utf8, tc.encPrefix, tc.encSuffix) 45 } 46 } 47 48 func TestFiles(t *testing.T) { 49 enctest.TestFile(t, UTF8) 50 enctest.TestFile(t, utf16LEIB) 51 } 52 53 func BenchmarkEncoding(b *testing.B) { 54 enctest.Benchmark(b, UTF8) 55 enctest.Benchmark(b, utf16LEIB) 56 } 57 58 var ( 59 utf16LEIB = UTF16(LittleEndian, IgnoreBOM) // UTF-16LE (atypical interpretation) 60 utf16LEUB = UTF16(LittleEndian, UseBOM) // UTF-16, LE 61 utf16LEEB = UTF16(LittleEndian, ExpectBOM) // UTF-16, LE, Expect 62 utf16BEIB = UTF16(BigEndian, IgnoreBOM) // UTF-16BE (atypical interpretation) 63 utf16BEUB = UTF16(BigEndian, UseBOM) // UTF-16 default 64 utf16BEEB = UTF16(BigEndian, ExpectBOM) // UTF-16 Expect 65 ) 66 67 func TestUTF16(t *testing.T) { 68 testCases := []struct { 69 desc string 70 src string 71 notEOF bool // the inverse of atEOF 72 sizeDst int 73 want string 74 nSrc int 75 err error 76 t transform.Transformer 77 }{{ 78 desc: "utf-16 IgnoreBOM dec: empty string", 79 t: utf16BEIB.NewDecoder(), 80 }, { 81 desc: "utf-16 UseBOM dec: empty string", 82 t: utf16BEUB.NewDecoder(), 83 }, { 84 desc: "utf-16 ExpectBOM dec: empty string", 85 err: ErrMissingBOM, 86 t: utf16BEEB.NewDecoder(), 87 }, { 88 desc: "utf-16 dec: BOM determines encoding BE (RFC 2781:3.3)", 89 src: "\xFE\xFF\xD8\x08\xDF\x45\x00\x3D\x00\x52\x00\x61", 90 sizeDst: 100, 91 want: "\U00012345=Ra", 92 nSrc: 12, 93 t: utf16BEUB.NewDecoder(), 94 }, { 95 desc: "utf-16 dec: BOM determines encoding LE (RFC 2781:3.3)", 96 src: "\xFF\xFE\x08\xD8\x45\xDF\x3D\x00\x52\x00\x61\x00", 97 sizeDst: 100, 98 want: "\U00012345=Ra", 99 nSrc: 12, 100 t: utf16LEUB.NewDecoder(), 101 }, { 102 desc: "utf-16 dec: BOM determines encoding LE, change default (RFC 2781:3.3)", 103 src: "\xFF\xFE\x08\xD8\x45\xDF\x3D\x00\x52\x00\x61\x00", 104 sizeDst: 100, 105 want: "\U00012345=Ra", 106 nSrc: 12, 107 t: utf16BEUB.NewDecoder(), 108 }, { 109 desc: "utf-16 dec: Fail on missing BOM when required", 110 src: "\x08\xD8\x45\xDF\x3D\x00\xFF\xFE\xFE\xFF\x00\x52\x00\x61", 111 sizeDst: 100, 112 want: "", 113 nSrc: 0, 114 err: ErrMissingBOM, 115 t: utf16BEEB.NewDecoder(), 116 }, { 117 desc: "utf-16 dec: Fail on single byte missing BOM when required", 118 src: "\x00", 119 sizeDst: 4, 120 t: utf16BEEB.NewDecoder(), 121 err: ErrMissingBOM, 122 }, { 123 desc: "utf-16 dec: Fail on short src missing BOM when required", 124 src: "\x00", 125 notEOF: true, 126 sizeDst: 4, 127 t: utf16BEEB.NewDecoder(), 128 err: transform.ErrShortSrc, 129 }, { 130 desc: "utf-16 dec: SHOULD interpret text as big-endian when BOM not present (RFC 2781:4.3)", 131 src: "\xD8\x08\xDF\x45\x00\x3D\x00\x52\x00\x61", 132 sizeDst: 100, 133 want: "\U00012345=Ra", 134 nSrc: 10, 135 t: utf16BEUB.NewDecoder(), 136 }, { 137 desc: "utf-16 dec: incorrect UTF-16: odd bytes", 138 src: "\x00", 139 sizeDst: 100, 140 want: "\uFFFD", 141 nSrc: 1, 142 t: utf16BEUB.NewDecoder(), 143 }, { 144 desc: "utf-16 dec: Fail on incorrect UTF-16: short source odd bytes", 145 src: "\x00", 146 notEOF: true, 147 sizeDst: 100, 148 t: utf16BEUB.NewDecoder(), 149 err: transform.ErrShortSrc, 150 }, { 151 // This is an error according to RFC 2781. But errors in RFC 2781 are 152 // open to interpretations, so I guess this is fine. 153 desc: "utf-16le dec: incorrect BOM is an error (RFC 2781:4.1)", 154 src: "\xFE\xFF\x08\xD8\x45\xDF\x3D\x00\x52\x00\x61\x00", 155 sizeDst: 100, 156 want: "\uFFFE\U00012345=Ra", 157 nSrc: 12, 158 t: utf16LEIB.NewDecoder(), 159 }, { 160 desc: "utf-16 enc: SHOULD write BOM (RFC 2781:3.3)", 161 src: "\U00012345=Ra", 162 sizeDst: 100, 163 want: "\xFF\xFE\x08\xD8\x45\xDF\x3D\x00\x52\x00\x61\x00", 164 nSrc: 7, 165 t: utf16LEUB.NewEncoder(), 166 }, { 167 desc: "utf-16 enc: SHOULD write BOM (RFC 2781:3.3)", 168 src: "\U00012345=Ra", 169 sizeDst: 100, 170 want: "\xFE\xFF\xD8\x08\xDF\x45\x00\x3D\x00\x52\x00\x61", 171 nSrc: 7, 172 t: utf16BEUB.NewEncoder(), 173 }, { 174 desc: "utf-16le enc: MUST NOT write BOM (RFC 2781:3.3)", 175 src: "\U00012345=Ra", 176 sizeDst: 100, 177 want: "\x08\xD8\x45\xDF\x3D\x00\x52\x00\x61\x00", 178 nSrc: 7, 179 t: utf16LEIB.NewEncoder(), 180 }, { 181 desc: "utf-16be dec: incorrect UTF-16: odd bytes", 182 src: "\x00", 183 sizeDst: 100, 184 want: "\uFFFD", 185 nSrc: 1, 186 t: utf16BEIB.NewDecoder(), 187 }, { 188 desc: "utf-16be dec: unpaired surrogate, odd bytes", 189 src: "\xD8\x45\x00", 190 sizeDst: 100, 191 want: "\uFFFD\uFFFD", 192 nSrc: 3, 193 t: utf16BEIB.NewDecoder(), 194 }, { 195 desc: "utf-16be dec: unpaired low surrogate + valid text", 196 src: "\xD8\x45\x00a", 197 sizeDst: 100, 198 want: "\uFFFDa", 199 nSrc: 4, 200 t: utf16BEIB.NewDecoder(), 201 }, { 202 desc: "utf-16be dec: unpaired low surrogate + valid text + single byte", 203 src: "\xD8\x45\x00ab", 204 sizeDst: 100, 205 want: "\uFFFDa\uFFFD", 206 nSrc: 5, 207 t: utf16BEIB.NewDecoder(), 208 }, { 209 desc: "utf-16le dec: unpaired high surrogate", 210 src: "\x00\x00\x00\xDC\x12\xD8", 211 sizeDst: 100, 212 want: "\x00\uFFFD\uFFFD", 213 nSrc: 6, 214 t: utf16LEIB.NewDecoder(), 215 }, { 216 desc: "utf-16be dec: two unpaired low surrogates", 217 src: "\xD8\x45\xD8\x12", 218 sizeDst: 100, 219 want: "\uFFFD\uFFFD", 220 nSrc: 4, 221 t: utf16BEIB.NewDecoder(), 222 }, { 223 desc: "utf-16be dec: short dst", 224 src: "\x00a", 225 sizeDst: 0, 226 want: "", 227 nSrc: 0, 228 t: utf16BEIB.NewDecoder(), 229 err: transform.ErrShortDst, 230 }, { 231 desc: "utf-16be dec: short dst surrogate", 232 src: "\xD8\xF5\xDC\x12", 233 sizeDst: 3, 234 want: "", 235 nSrc: 0, 236 t: utf16BEIB.NewDecoder(), 237 err: transform.ErrShortDst, 238 }, { 239 desc: "utf-16be dec: short dst trailing byte", 240 src: "\x00", 241 sizeDst: 2, 242 want: "", 243 nSrc: 0, 244 t: utf16BEIB.NewDecoder(), 245 err: transform.ErrShortDst, 246 }, { 247 desc: "utf-16be dec: short src", 248 src: "\x00", 249 notEOF: true, 250 sizeDst: 3, 251 want: "", 252 nSrc: 0, 253 t: utf16BEIB.NewDecoder(), 254 err: transform.ErrShortSrc, 255 }, { 256 desc: "utf-16 enc", 257 src: "\U00012345=Ra", 258 sizeDst: 100, 259 want: "\xFE\xFF\xD8\x08\xDF\x45\x00\x3D\x00\x52\x00\x61", 260 nSrc: 7, 261 t: utf16BEUB.NewEncoder(), 262 }, { 263 desc: "utf-16 enc: short dst normal", 264 src: "\U00012345=Ra", 265 sizeDst: 9, 266 want: "\xD8\x08\xDF\x45\x00\x3D\x00\x52", 267 nSrc: 6, 268 t: utf16BEIB.NewEncoder(), 269 err: transform.ErrShortDst, 270 }, { 271 desc: "utf-16 enc: short dst surrogate", 272 src: "\U00012345=Ra", 273 sizeDst: 3, 274 want: "", 275 nSrc: 0, 276 t: utf16BEIB.NewEncoder(), 277 err: transform.ErrShortDst, 278 }, { 279 desc: "utf-16 enc: short src", 280 src: "\U00012345=Ra\xC2", 281 notEOF: true, 282 sizeDst: 100, 283 want: "\xD8\x08\xDF\x45\x00\x3D\x00\x52\x00\x61", 284 nSrc: 7, 285 t: utf16BEIB.NewEncoder(), 286 err: transform.ErrShortSrc, 287 }, { 288 desc: "utf-16be dec: don't change byte order mid-stream", 289 src: "\xFE\xFF\xD8\x08\xDF\x45\x00\x3D\xFF\xFE\x00\x52\x00\x61", 290 sizeDst: 100, 291 want: "\U00012345=\ufffeRa", 292 nSrc: 14, 293 t: utf16BEUB.NewDecoder(), 294 }, { 295 desc: "utf-16le dec: don't change byte order mid-stream", 296 src: "\xFF\xFE\x08\xD8\x45\xDF\x3D\x00\xFF\xFE\xFE\xFF\x52\x00\x61\x00", 297 sizeDst: 100, 298 want: "\U00012345=\ufeff\ufffeRa", 299 nSrc: 16, 300 t: utf16LEUB.NewDecoder(), 301 }} 302 for i, tc := range testCases { 303 for j := 0; j < 2; j++ { 304 b := make([]byte, tc.sizeDst) 305 nDst, nSrc, err := tc.t.Transform(b, []byte(tc.src), !tc.notEOF) 306 if err != tc.err { 307 t.Errorf("%d:%s: error was %v; want %v", i, tc.desc, err, tc.err) 308 } 309 if got := string(b[:nDst]); got != tc.want { 310 t.Errorf("%d:%s: result was %q: want %q", i, tc.desc, got, tc.want) 311 } 312 if nSrc != tc.nSrc { 313 t.Errorf("%d:%s: nSrc was %d; want %d", i, tc.desc, nSrc, tc.nSrc) 314 } 315 // Since Transform is stateful, run failures again 316 // to ensure that the same error occurs a second time. 317 if err == nil { 318 break 319 } 320 } 321 } 322 } 323 324 func TestUTF8Decoder(t *testing.T) { 325 testCases := []struct { 326 desc string 327 src string 328 notEOF bool // the inverse of atEOF 329 sizeDst int 330 want string 331 nSrc int 332 err error 333 }{{ 334 desc: "empty string, empty dest buffer", 335 }, { 336 desc: "empty string", 337 sizeDst: 8, 338 }, { 339 desc: "empty string, streaming", 340 notEOF: true, 341 sizeDst: 8, 342 }, { 343 desc: "ascii", 344 src: "abcde", 345 sizeDst: 8, 346 want: "abcde", 347 nSrc: 5, 348 }, { 349 desc: "ascii and error", 350 src: "ab\x80de", 351 sizeDst: 7, 352 want: "ab\ufffdde", 353 nSrc: 5, 354 }, { 355 desc: "valid two-byte sequence", 356 src: "a\u0300bc", 357 sizeDst: 7, 358 want: "a\u0300bc", 359 nSrc: 5, 360 }, { 361 desc: "valid three-byte sequence", 362 src: "a\u0300中", 363 sizeDst: 7, 364 want: "a\u0300中", 365 nSrc: 6, 366 }, { 367 desc: "valid four-byte sequence", 368 src: "a中\U00016F50", 369 sizeDst: 8, 370 want: "a中\U00016F50", 371 nSrc: 8, 372 }, { 373 desc: "short source buffer", 374 src: "abc\xf0\x90", 375 notEOF: true, 376 sizeDst: 10, 377 want: "abc", 378 nSrc: 3, 379 err: transform.ErrShortSrc, 380 }, { 381 // We don't check for the maximal subpart of an ill-formed subsequence 382 // at the end of an open segment. 383 desc: "complete invalid that looks like short at end", 384 src: "abc\xf0\x80", 385 notEOF: true, 386 sizeDst: 10, 387 want: "abc", // instead of "abc\ufffd\ufffd", 388 nSrc: 3, 389 err: transform.ErrShortSrc, 390 }, { 391 desc: "incomplete sequence at end", 392 src: "a\x80bc\xf0\x90", 393 sizeDst: 9, 394 want: "a\ufffdbc\ufffd", 395 nSrc: 6, 396 }, { 397 desc: "invalid second byte", 398 src: "abc\xf0dddd", 399 sizeDst: 10, 400 want: "abc\ufffddddd", 401 nSrc: 8, 402 }, { 403 desc: "invalid second byte at end", 404 src: "abc\xf0d", 405 sizeDst: 10, 406 want: "abc\ufffdd", 407 nSrc: 5, 408 }, { 409 desc: "invalid third byte", 410 src: "a\u0300bc\xf0\x90dddd", 411 sizeDst: 12, 412 want: "a\u0300bc\ufffddddd", 413 nSrc: 11, 414 }, { 415 desc: "invalid third byte at end", 416 src: "a\u0300bc\xf0\x90d", 417 sizeDst: 12, 418 want: "a\u0300bc\ufffdd", 419 nSrc: 8, 420 }, { 421 desc: "invalid fourth byte, tight buffer", 422 src: "a\u0300bc\xf0\x90\x80d", 423 sizeDst: 9, 424 want: "a\u0300bc\ufffdd", 425 nSrc: 9, 426 }, { 427 desc: "invalid fourth byte at end", 428 src: "a\u0300bc\xf0\x90\x80", 429 sizeDst: 8, 430 want: "a\u0300bc\ufffd", 431 nSrc: 8, 432 }, { 433 desc: "invalid fourth byte and short four byte sequence", 434 src: "a\u0300bc\xf0\x90\x80\xf0\x90\x80", 435 notEOF: true, 436 sizeDst: 20, 437 want: "a\u0300bc\ufffd", 438 nSrc: 8, 439 err: transform.ErrShortSrc, 440 }, { 441 desc: "valid four-byte sequence overflowing short buffer", 442 src: "a\u0300bc\xf0\x90\x80\x80", 443 notEOF: true, 444 sizeDst: 8, 445 want: "a\u0300bc", 446 nSrc: 5, 447 err: transform.ErrShortDst, 448 }, { 449 desc: "invalid fourth byte at end short, but short dst", 450 src: "a\u0300bc\xf0\x90\x80\xf0\x90\x80", 451 notEOF: true, 452 sizeDst: 8, 453 // More bytes would fit in the buffer, but this seems to require a more 454 // complicated and slower algorithm. 455 want: "a\u0300bc", // instead of "a\u0300bc" 456 nSrc: 5, 457 err: transform.ErrShortDst, 458 }, { 459 desc: "short dst for error", 460 src: "abc\x80", 461 notEOF: true, 462 sizeDst: 5, 463 want: "abc", 464 nSrc: 3, 465 err: transform.ErrShortDst, 466 }, { 467 desc: "adjusting short dst buffer", 468 src: "abc\x80ef", 469 notEOF: true, 470 sizeDst: 6, 471 want: "abc\ufffd", 472 nSrc: 4, 473 err: transform.ErrShortDst, 474 }} 475 tr := UTF8.NewDecoder() 476 for i, tc := range testCases { 477 b := make([]byte, tc.sizeDst) 478 nDst, nSrc, err := tr.Transform(b, []byte(tc.src), !tc.notEOF) 479 if err != tc.err { 480 t.Errorf("%d:%s: error was %v; want %v", i, tc.desc, err, tc.err) 481 } 482 if got := string(b[:nDst]); got != tc.want { 483 t.Errorf("%d:%s: result was %q: want %q", i, tc.desc, got, tc.want) 484 } 485 if nSrc != tc.nSrc { 486 t.Errorf("%d:%s: nSrc was %d; want %d", i, tc.desc, nSrc, tc.nSrc) 487 } 488 } 489 } 490 491 func TestUTF8BOMDecoder(t *testing.T) { 492 testCases := []struct { 493 desc string 494 src string 495 notEOF bool // the inverse of atEOF 496 sizeDst int 497 want string 498 nSrc int 499 err error 500 wantAll string 501 }{{ 502 desc: "empty string, empty dest buffer", 503 }, { 504 desc: "empty string", 505 sizeDst: 8, 506 }, { 507 desc: "empty string, streaming", 508 notEOF: true, 509 sizeDst: 8, 510 }, { 511 desc: "ascii", 512 src: "abcde", 513 sizeDst: 8, 514 want: "abcde", 515 nSrc: 5, 516 wantAll: "abcde", 517 }, { 518 desc: "ascii with bom", 519 src: utf8BOM + "abcde", 520 sizeDst: 11, 521 want: "abcde", 522 nSrc: 8, 523 wantAll: "abcde", 524 }, { 525 desc: "error with bom", 526 src: utf8BOM + "ab\x80de", 527 sizeDst: 11, 528 want: "ab\ufffdde", 529 nSrc: 8, 530 wantAll: "ab\ufffdde", 531 }, { 532 desc: "short bom", 533 src: utf8BOM[:2], 534 notEOF: true, 535 sizeDst: 7, 536 want: "", 537 nSrc: 0, 538 wantAll: "\ufffd", // needs to be 1 replacement 539 err: transform.ErrShortSrc, 540 }, { 541 desc: "short bom at end", 542 src: utf8BOM[:2], 543 sizeDst: 7, 544 want: "\ufffd", // needs to be 1 replacement 545 nSrc: 2, 546 wantAll: "\ufffd", // needs to be 1 replacement 547 err: nil, 548 }, { 549 desc: "short source buffer", 550 src: "abc\xf0\x90", 551 notEOF: true, 552 sizeDst: 10, 553 want: "abc", 554 nSrc: 3, 555 wantAll: "abc\ufffd", 556 err: transform.ErrShortSrc, 557 }, { 558 desc: "short source buffer with bom", 559 src: utf8BOM + "abc\xf0\x90", 560 notEOF: true, 561 sizeDst: 15, 562 want: "abc", 563 nSrc: 6, 564 wantAll: "abc\ufffd", 565 err: transform.ErrShortSrc, 566 }, { 567 desc: "short dst for error", 568 src: utf8BOM + "abc\x80", 569 notEOF: true, 570 sizeDst: 5, 571 want: "abc", 572 nSrc: 6, 573 wantAll: "abc\ufffd", 574 err: transform.ErrShortDst, 575 }} 576 tr := UTF8BOM.NewDecoder() 577 for i, tc := range testCases { 578 tr.Reset() 579 b := make([]byte, tc.sizeDst) 580 nDst, nSrc, err := tr.Transform(b, []byte(tc.src), !tc.notEOF) 581 if err != tc.err { 582 t.Errorf("%d:%s: error was %v; want %v", i, tc.desc, err, tc.err) 583 } 584 if got := string(b[:nDst]); got != tc.want { 585 t.Errorf("%d:%s: result was %q: want %q", i, tc.desc, got, tc.want) 586 } 587 if nSrc != tc.nSrc { 588 t.Errorf("%d:%s: nSrc was %d; want %d", i, tc.desc, nSrc, tc.nSrc) 589 } 590 if got, _ := tr.String(tc.src); got != tc.wantAll { 591 t.Errorf("%d:%s: String was %s; want %s", i, tc.desc, got, tc.wantAll) 592 } 593 } 594 } 595 596 func TestUTF8SigEncoder(t *testing.T) { 597 testCases := []struct { 598 desc string 599 src string 600 notEOF bool // the inverse of atEOF 601 sizeDst int 602 want string 603 wantAll string // converting all bytes 604 nSrc int 605 err error 606 }{{ 607 desc: "empty string, empty dest buffer", 608 err: transform.ErrShortDst, 609 wantAll: utf8BOM, 610 }, { 611 desc: "empty string", 612 sizeDst: 8, 613 want: utf8BOM, 614 wantAll: utf8BOM, 615 }, { 616 desc: "empty string, streaming", 617 notEOF: true, 618 sizeDst: 8, 619 want: utf8BOM, 620 wantAll: utf8BOM, 621 }, { 622 desc: "ascii", 623 src: "abcde", 624 sizeDst: 8, 625 want: utf8BOM + "abcde", 626 nSrc: 5, 627 wantAll: utf8BOM + "abcde", 628 }, { 629 desc: "short bom at end", 630 src: utf8BOM[:2], 631 sizeDst: 11, 632 want: utf8BOM + "\ufffd", 633 nSrc: 2, 634 wantAll: utf8BOM + "\ufffd", 635 }, { 636 desc: "short bom", 637 src: utf8BOM[:2], 638 notEOF: true, 639 sizeDst: 7, 640 want: utf8BOM, 641 nSrc: 0, 642 err: transform.ErrShortSrc, 643 wantAll: utf8BOM + "\ufffd", 644 }, { 645 desc: "short bom at end", 646 src: utf8BOM[:2], 647 sizeDst: 7, 648 want: utf8BOM + "\ufffd", // needs to be 1 replacement 649 nSrc: 2, 650 err: nil, 651 wantAll: utf8BOM + "\ufffd", 652 }, { 653 desc: "short dst buffer 2", 654 src: "ab", 655 sizeDst: 2, 656 want: "", 657 nSrc: 0, 658 err: transform.ErrShortDst, 659 wantAll: utf8BOM + "ab", 660 }, { 661 desc: "short dst buffer 3", 662 src: "ab", 663 sizeDst: 3, 664 want: utf8BOM, 665 nSrc: 0, 666 err: transform.ErrShortDst, 667 wantAll: utf8BOM + "ab", 668 }, { 669 desc: "short dst buffer 4", 670 src: "ab", 671 sizeDst: 4, 672 want: utf8BOM + "a", 673 nSrc: 1, 674 err: transform.ErrShortDst, 675 wantAll: utf8BOM + "ab", 676 }} 677 tr := UTF8BOM.NewEncoder() 678 for i, tc := range testCases { 679 tr.Reset() 680 b := make([]byte, tc.sizeDst) 681 nDst, nSrc, err := tr.Transform(b, []byte(tc.src), !tc.notEOF) 682 if err != tc.err { 683 t.Errorf("%d:%s: error was %v; want %v", i, tc.desc, err, tc.err) 684 } 685 if got := string(b[:nDst]); got != tc.want { 686 t.Errorf("%d:%s: result was %q: want %q", i, tc.desc, got, tc.want) 687 } 688 if nSrc != tc.nSrc { 689 t.Errorf("%d:%s: nSrc was %d; want %d", i, tc.desc, nSrc, tc.nSrc) 690 } 691 if got, _ := tr.String(tc.src); got != tc.wantAll { 692 t.Errorf("%d:%s: String was %s; want %s", i, tc.desc, got, tc.wantAll) 693 } 694 } 695 } 696 697 func TestBOMOverride(t *testing.T) { 698 dec := BOMOverride(charmap.CodePage437.NewDecoder()) 699 dst := make([]byte, 100) 700 for i, tc := range []struct { 701 src string 702 atEOF bool 703 dst string 704 nSrc int 705 err error 706 }{ 707 0: {"H\x82ll\x93", true, "Héllô", 5, nil}, 708 1: {"\uFEFFHéllö", true, "Héllö", 10, nil}, 709 2: {"\xFE\xFF\x00H\x00e\x00l\x00l\x00o", true, "Hello", 12, nil}, 710 3: {"\xFF\xFEH\x00e\x00l\x00l\x00o\x00", true, "Hello", 12, nil}, 711 4: {"\uFEFF", true, "", 3, nil}, 712 5: {"\xFE\xFF", true, "", 2, nil}, 713 6: {"\xFF\xFE", true, "", 2, nil}, 714 7: {"\xEF\xBB", true, "\u2229\u2557", 2, nil}, 715 8: {"\xEF", true, "\u2229", 1, nil}, 716 9: {"", true, "", 0, nil}, 717 10: {"\xFE", true, "\u25a0", 1, nil}, 718 11: {"\xFF", true, "\u00a0", 1, nil}, 719 12: {"\xEF\xBB", false, "", 0, transform.ErrShortSrc}, 720 13: {"\xEF", false, "", 0, transform.ErrShortSrc}, 721 14: {"", false, "", 0, transform.ErrShortSrc}, 722 15: {"\xFE", false, "", 0, transform.ErrShortSrc}, 723 16: {"\xFF", false, "", 0, transform.ErrShortSrc}, 724 17: {"\xFF\xFE", false, "", 0, transform.ErrShortSrc}, 725 } { 726 dec.Reset() 727 nDst, nSrc, err := dec.Transform(dst, []byte(tc.src), tc.atEOF) 728 got := string(dst[:nDst]) 729 if nSrc != tc.nSrc { 730 t.Errorf("%d: nSrc: got %d; want %d", i, nSrc, tc.nSrc) 731 } 732 if got != tc.dst { 733 t.Errorf("%d: got %+q; want %+q", i, got, tc.dst) 734 } 735 if err != tc.err { 736 t.Errorf("%d: error: got %v; want %v", i, err, tc.err) 737 } 738 } 739 }