golang.org/x/text@v0.14.0/width/transform_test.go (about) 1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package width 6 7 import ( 8 "bytes" 9 "strings" 10 "testing" 11 12 "golang.org/x/text/internal/testtext" 13 "golang.org/x/text/transform" 14 ) 15 16 func foldRune(r rune) (folded rune, ok bool) { 17 alt, ok := mapRunes[r] 18 if ok && alt.e&tagNeedsFold != 0 { 19 return alt.r, true 20 } 21 return r, false 22 } 23 24 func widenRune(r rune) (wide rune, ok bool) { 25 alt, ok := mapRunes[r] 26 if k := alt.e.kind(); k == EastAsianHalfwidth || k == EastAsianNarrow { 27 return alt.r, true 28 } 29 return r, false 30 } 31 32 func narrowRune(r rune) (narrow rune, ok bool) { 33 alt, ok := mapRunes[r] 34 if k := alt.e.kind(); k == EastAsianFullwidth || k == EastAsianWide || k == EastAsianAmbiguous { 35 return alt.r, true 36 } 37 return r, false 38 } 39 40 func TestFoldSingleRunes(t *testing.T) { 41 for r := rune(0); r < 0x1FFFF; r++ { 42 if loSurrogate <= r && r <= hiSurrogate { 43 continue 44 } 45 x, _ := foldRune(r) 46 want := string(x) 47 got := Fold.String(string(r)) 48 if got != want { 49 t.Errorf("Fold().String(%U) = %+q; want %+q", r, got, want) 50 } 51 } 52 } 53 54 type transformTest struct { 55 desc string 56 src string 57 nBuf int 58 nDst int 59 atEOF bool 60 dst string 61 nSrc int 62 err error 63 nSpan int 64 errSpan error 65 } 66 67 func (tc *transformTest) doTest(t *testing.T, tr Transformer) { 68 testtext.Run(t, tc.desc, func(t *testing.T) { 69 b := make([]byte, tc.nBuf) 70 nDst, nSrc, err := tr.Transform(b, []byte(tc.src), tc.atEOF) 71 if got := string(b[:nDst]); got != tc.dst[:nDst] { 72 t.Errorf("dst was %+q; want %+q", got, tc.dst) 73 } 74 if nDst != tc.nDst { 75 t.Errorf("nDst was %d; want %d", nDst, tc.nDst) 76 } 77 if nSrc != tc.nSrc { 78 t.Errorf("nSrc was %d; want %d", nSrc, tc.nSrc) 79 } 80 if err != tc.err { 81 t.Errorf("error was %v; want %v", err, tc.err) 82 } 83 if got := tr.String(tc.src); got != tc.dst { 84 t.Errorf("String(%q) = %q; want %q", tc.src, got, tc.dst) 85 } 86 n, err := tr.Span([]byte(tc.src), tc.atEOF) 87 if n != tc.nSpan || err != tc.errSpan { 88 t.Errorf("Span: got %d, %v; want %d, %v", n, err, tc.nSpan, tc.errSpan) 89 } 90 }) 91 } 92 93 func TestFold(t *testing.T) { 94 for _, tc := range []transformTest{{ 95 desc: "empty", 96 src: "", 97 nBuf: 10, 98 dst: "", 99 nDst: 0, 100 nSrc: 0, 101 atEOF: false, 102 err: nil, 103 nSpan: 0, 104 errSpan: nil, 105 }, { 106 desc: "short source 1", 107 src: "a\xc2", 108 nBuf: 10, 109 dst: "a\xc2", 110 nDst: 1, 111 nSrc: 1, 112 atEOF: false, 113 err: transform.ErrShortSrc, 114 nSpan: 1, 115 errSpan: transform.ErrShortSrc, 116 }, { 117 desc: "short source 2", 118 src: "a\xe0\x80", 119 nBuf: 10, 120 dst: "a\xe0\x80", 121 nDst: 1, 122 nSrc: 1, 123 atEOF: false, 124 err: transform.ErrShortSrc, 125 nSpan: 1, 126 errSpan: transform.ErrShortSrc, 127 }, { 128 desc: "incomplete but terminated source 1", 129 src: "a\xc2", 130 nBuf: 10, 131 dst: "a\xc2", 132 nDst: 2, 133 nSrc: 2, 134 atEOF: true, 135 err: nil, 136 nSpan: 2, 137 errSpan: nil, 138 }, { 139 desc: "incomplete but terminated source 2", 140 src: "a\xe0\x80", 141 nBuf: 10, 142 dst: "a\xe0\x80", 143 nDst: 3, 144 nSrc: 3, 145 atEOF: true, 146 err: nil, 147 nSpan: 3, 148 errSpan: nil, 149 }, { 150 desc: "exact fit dst", 151 src: "a\uff01", 152 nBuf: 2, 153 dst: "a!", 154 nDst: 2, 155 nSrc: 4, 156 atEOF: false, 157 err: nil, 158 nSpan: 1, 159 errSpan: transform.ErrEndOfSpan, 160 }, { 161 desc: "exact fit dst and src ascii", 162 src: "ab", 163 nBuf: 2, 164 dst: "ab", 165 nDst: 2, 166 nSrc: 2, 167 atEOF: true, 168 err: nil, 169 nSpan: 2, 170 errSpan: nil, 171 }, { 172 desc: "empty dst", 173 src: "\u0300", 174 nBuf: 0, 175 dst: "\u0300", 176 nDst: 0, 177 nSrc: 0, 178 atEOF: true, 179 err: transform.ErrShortDst, 180 nSpan: 2, 181 errSpan: nil, 182 }, { 183 desc: "empty dst ascii", 184 src: "a", 185 nBuf: 0, 186 dst: "a", 187 nDst: 0, 188 nSrc: 0, 189 atEOF: true, 190 err: transform.ErrShortDst, 191 nSpan: 1, 192 errSpan: nil, 193 }, { 194 desc: "short dst 1", 195 src: "a\uffe0", // ¢ 196 nBuf: 2, 197 dst: "a\u00a2", // ¢ 198 nDst: 1, 199 nSrc: 1, 200 atEOF: false, 201 err: transform.ErrShortDst, 202 nSpan: 1, 203 errSpan: transform.ErrEndOfSpan, 204 }, { 205 desc: "short dst 2", 206 src: "不夠", 207 nBuf: 3, 208 dst: "不夠", 209 nDst: 3, 210 nSrc: 3, 211 atEOF: true, 212 err: transform.ErrShortDst, 213 nSpan: 6, 214 errSpan: nil, 215 }, { 216 desc: "short dst fast path", 217 src: "fast", 218 nDst: 3, 219 dst: "fast", 220 nBuf: 3, 221 nSrc: 3, 222 atEOF: true, 223 err: transform.ErrShortDst, 224 nSpan: 4, 225 errSpan: nil, 226 }, { 227 desc: "short dst larger buffer", 228 src: "\uff21" + strings.Repeat("0", 127) + "B", 229 nBuf: 128, 230 dst: "A" + strings.Repeat("0", 127) + "B", 231 nDst: 128, 232 nSrc: 130, 233 atEOF: true, 234 err: transform.ErrShortDst, 235 nSpan: 0, 236 errSpan: transform.ErrEndOfSpan, 237 }, { 238 desc: "fast path alternation", 239 src: "fast路徑fast路徑", 240 nBuf: 20, 241 dst: "fast路徑fast路徑", 242 nDst: 20, 243 nSrc: 20, 244 atEOF: true, 245 err: nil, 246 nSpan: 20, 247 errSpan: nil, 248 }} { 249 tc.doTest(t, Fold) 250 } 251 } 252 253 func TestWidenSingleRunes(t *testing.T) { 254 for r := rune(0); r < 0x1FFFF; r++ { 255 if loSurrogate <= r && r <= hiSurrogate { 256 continue 257 } 258 alt, _ := widenRune(r) 259 want := string(alt) 260 got := Widen.String(string(r)) 261 if got != want { 262 t.Errorf("Widen().String(%U) = %+q; want %+q", r, got, want) 263 } 264 } 265 } 266 267 func TestWiden(t *testing.T) { 268 for _, tc := range []transformTest{{ 269 desc: "empty", 270 src: "", 271 nBuf: 10, 272 dst: "", 273 nDst: 0, 274 nSrc: 0, 275 atEOF: false, 276 err: nil, 277 nSpan: 0, 278 errSpan: nil, 279 }, { 280 desc: "short source 1", 281 src: "a\xc2", 282 nBuf: 10, 283 dst: "a\xc2", 284 nDst: 3, 285 nSrc: 1, 286 atEOF: false, 287 err: transform.ErrShortSrc, 288 nSpan: 0, 289 errSpan: transform.ErrEndOfSpan, 290 }, { 291 desc: "short source 2", 292 src: "a\xe0\x80", 293 nBuf: 10, 294 dst: "a\xe0\x80", 295 nDst: 3, 296 nSrc: 1, 297 atEOF: false, 298 err: transform.ErrShortSrc, 299 nSpan: 0, 300 errSpan: transform.ErrEndOfSpan, 301 }, { 302 desc: "incomplete but terminated source 1", 303 src: "a\xc2", 304 nBuf: 10, 305 dst: "a\xc2", 306 nDst: 4, 307 nSrc: 2, 308 atEOF: true, 309 err: nil, 310 nSpan: 0, 311 errSpan: transform.ErrEndOfSpan, 312 }, { 313 desc: "incomplete but terminated source 2", 314 src: "a\xe0\x80", 315 nBuf: 10, 316 dst: "a\xe0\x80", 317 nDst: 5, 318 nSrc: 3, 319 atEOF: true, 320 err: nil, 321 nSpan: 0, 322 errSpan: transform.ErrEndOfSpan, 323 }, { 324 desc: "short source 1 some span", 325 src: "a\xc2", 326 nBuf: 10, 327 dst: "a\xc2", 328 nDst: 3, 329 nSrc: 3, 330 atEOF: false, 331 err: transform.ErrShortSrc, 332 nSpan: 3, 333 errSpan: transform.ErrShortSrc, 334 }, { 335 desc: "short source 2 some span", 336 src: "a\xe0\x80", 337 nBuf: 10, 338 dst: "a\xe0\x80", 339 nDst: 3, 340 nSrc: 3, 341 atEOF: false, 342 err: transform.ErrShortSrc, 343 nSpan: 3, 344 errSpan: transform.ErrShortSrc, 345 }, { 346 desc: "incomplete but terminated source 1 some span", 347 src: "a\xc2", 348 nBuf: 10, 349 dst: "a\xc2", 350 nDst: 4, 351 nSrc: 4, 352 atEOF: true, 353 err: nil, 354 nSpan: 4, 355 errSpan: nil, 356 }, { 357 desc: "incomplete but terminated source 2 some span", 358 src: "a\xe0\x80", 359 nBuf: 10, 360 dst: "a\xe0\x80", 361 nDst: 5, 362 nSrc: 5, 363 atEOF: true, 364 err: nil, 365 nSpan: 5, 366 errSpan: nil, 367 }, { 368 desc: "exact fit dst", 369 src: "a!", 370 nBuf: 6, 371 dst: "a\uff01", 372 nDst: 6, 373 nSrc: 2, 374 atEOF: false, 375 err: nil, 376 nSpan: 0, 377 errSpan: transform.ErrEndOfSpan, 378 }, { 379 desc: "empty dst", 380 src: "\u0300", 381 nBuf: 0, 382 dst: "\u0300", 383 nDst: 0, 384 nSrc: 0, 385 atEOF: true, 386 err: transform.ErrShortDst, 387 nSpan: 2, 388 errSpan: nil, 389 }, { 390 desc: "empty dst ascii", 391 src: "a", 392 nBuf: 0, 393 dst: "a", 394 nDst: 0, 395 nSrc: 0, 396 atEOF: true, 397 err: transform.ErrShortDst, 398 nSpan: 0, 399 errSpan: transform.ErrEndOfSpan, 400 }, { 401 desc: "short dst 1", 402 src: "a\uffe0", 403 nBuf: 4, 404 dst: "a\uffe0", 405 nDst: 3, 406 nSrc: 1, 407 atEOF: false, 408 err: transform.ErrShortDst, 409 nSpan: 0, 410 errSpan: transform.ErrEndOfSpan, 411 }, { 412 desc: "short dst 2", 413 src: "不夠", 414 nBuf: 3, 415 dst: "不夠", 416 nDst: 3, 417 nSrc: 3, 418 atEOF: true, 419 err: transform.ErrShortDst, 420 nSpan: 6, 421 errSpan: nil, 422 }, { 423 desc: "short dst ascii", 424 src: "ascii", 425 nBuf: 3, 426 dst: "ascii", // U+ff41, ... 427 nDst: 3, 428 nSrc: 1, 429 atEOF: true, 430 err: transform.ErrShortDst, 431 nSpan: 0, 432 errSpan: transform.ErrEndOfSpan, 433 }, { 434 desc: "ambiguous", 435 src: "\uffe9", 436 nBuf: 4, 437 dst: "\u2190", 438 nDst: 3, 439 nSrc: 3, 440 atEOF: false, 441 err: nil, 442 nSpan: 0, 443 errSpan: transform.ErrEndOfSpan, 444 }} { 445 tc.doTest(t, Widen) 446 } 447 } 448 449 func TestNarrowSingleRunes(t *testing.T) { 450 for r := rune(0); r < 0x1FFFF; r++ { 451 if loSurrogate <= r && r <= hiSurrogate { 452 continue 453 } 454 alt, _ := narrowRune(r) 455 want := string(alt) 456 got := Narrow.String(string(r)) 457 if got != want { 458 t.Errorf("Narrow().String(%U) = %+q; want %+q", r, got, want) 459 } 460 } 461 } 462 463 func TestNarrow(t *testing.T) { 464 for _, tc := range []transformTest{{ 465 desc: "empty", 466 src: "", 467 nBuf: 10, 468 dst: "", 469 nDst: 0, 470 nSrc: 0, 471 atEOF: false, 472 err: nil, 473 nSpan: 0, 474 errSpan: nil, 475 }, { 476 desc: "short source 1", 477 src: "a\xc2", 478 nBuf: 10, 479 dst: "a\xc2", 480 nDst: 1, 481 nSrc: 1, 482 atEOF: false, 483 err: transform.ErrShortSrc, 484 nSpan: 1, 485 errSpan: transform.ErrShortSrc, 486 }, { 487 desc: "short source 2", 488 src: "a\xe0\x80", 489 nBuf: 10, 490 dst: "a\xe0\x80", 491 nDst: 1, 492 nSrc: 3, 493 atEOF: false, 494 err: transform.ErrShortSrc, 495 nSpan: 0, 496 errSpan: transform.ErrEndOfSpan, 497 }, { 498 desc: "incomplete but terminated source 1", 499 src: "a\xc2", 500 nBuf: 10, 501 dst: "a\xc2", 502 nDst: 2, 503 nSrc: 4, 504 atEOF: true, 505 err: nil, 506 nSpan: 0, 507 errSpan: transform.ErrEndOfSpan, 508 }, { 509 desc: "incomplete but terminated source 2", 510 src: "a\xe0\x80", 511 nBuf: 10, 512 dst: "a\xe0\x80", 513 nDst: 3, 514 nSrc: 5, 515 atEOF: true, 516 err: nil, 517 nSpan: 0, 518 errSpan: transform.ErrEndOfSpan, 519 }, { 520 desc: "exact fit dst", 521 src: "a\uff01", 522 nBuf: 2, 523 dst: "a!", 524 nDst: 2, 525 nSrc: 6, 526 atEOF: false, 527 err: nil, 528 nSpan: 0, 529 errSpan: transform.ErrEndOfSpan, 530 }, { 531 desc: "exact fit dst some span", 532 src: "a\uff01", 533 nBuf: 2, 534 dst: "a!", 535 nDst: 2, 536 nSrc: 4, 537 atEOF: false, 538 err: nil, 539 nSpan: 1, 540 errSpan: transform.ErrEndOfSpan, 541 }, { 542 desc: "empty dst", 543 src: "\u0300", 544 nBuf: 0, 545 dst: "\u0300", 546 nDst: 0, 547 nSrc: 0, 548 atEOF: true, 549 err: transform.ErrShortDst, 550 nSpan: 2, 551 errSpan: nil, 552 }, { 553 desc: "empty dst ascii", 554 src: "a", 555 nBuf: 0, 556 dst: "a", 557 nDst: 0, 558 nSrc: 0, 559 atEOF: true, 560 err: transform.ErrShortDst, 561 nSpan: 1, 562 errSpan: nil, 563 }, { 564 desc: "short dst 1", 565 src: "a\uffe0", // ¢ 566 nBuf: 2, 567 dst: "a\u00a2", // ¢ 568 nDst: 1, 569 nSrc: 3, 570 atEOF: false, 571 err: transform.ErrShortDst, 572 nSpan: 0, 573 errSpan: transform.ErrEndOfSpan, 574 }, { 575 desc: "short dst 2", 576 src: "不夠", 577 nBuf: 3, 578 dst: "不夠", 579 nDst: 3, 580 nSrc: 3, 581 atEOF: true, 582 err: transform.ErrShortDst, 583 nSpan: 6, 584 errSpan: nil, 585 }, { 586 // Create a narrow variant of ambiguous runes, if they exist. 587 desc: "ambiguous", 588 src: "\u2190", 589 nBuf: 4, 590 dst: "\uffe9", 591 nDst: 3, 592 nSrc: 3, 593 atEOF: false, 594 err: nil, 595 nSpan: 0, 596 errSpan: transform.ErrEndOfSpan, 597 }, { 598 desc: "short dst fast path", 599 src: "fast", 600 nBuf: 3, 601 dst: "fast", 602 nDst: 3, 603 nSrc: 3, 604 atEOF: true, 605 err: transform.ErrShortDst, 606 nSpan: 4, 607 errSpan: nil, 608 }, { 609 desc: "short dst larger buffer", 610 src: "\uff21" + strings.Repeat("0", 127) + "B", 611 nBuf: 128, 612 dst: "A" + strings.Repeat("0", 127) + "B", 613 nDst: 128, 614 nSrc: 130, 615 atEOF: true, 616 err: transform.ErrShortDst, 617 nSpan: 0, 618 errSpan: transform.ErrEndOfSpan, 619 }, { 620 desc: "fast path alternation", 621 src: "fast路徑fast路徑", 622 nBuf: 20, 623 dst: "fast路徑fast路徑", 624 nDst: 20, 625 nSrc: 20, 626 atEOF: true, 627 err: nil, 628 nSpan: 20, 629 errSpan: nil, 630 }} { 631 tc.doTest(t, Narrow) 632 } 633 } 634 635 func bench(b *testing.B, t Transformer, s string) { 636 dst := make([]byte, 1024) 637 src := []byte(s) 638 b.SetBytes(int64(len(src))) 639 b.ResetTimer() 640 for i := 0; i < b.N; i++ { 641 t.Transform(dst, src, true) 642 } 643 } 644 645 func changingRunes(f func(r rune) (rune, bool)) string { 646 buf := &bytes.Buffer{} 647 for r := rune(0); r <= 0xFFFF; r++ { 648 if _, ok := foldRune(r); ok { 649 buf.WriteRune(r) 650 } 651 } 652 return buf.String() 653 } 654 655 func BenchmarkFoldASCII(b *testing.B) { 656 bench(b, Fold, testtext.ASCII) 657 } 658 659 func BenchmarkFoldCJK(b *testing.B) { 660 bench(b, Fold, testtext.CJK) 661 } 662 663 func BenchmarkFoldNonCanonical(b *testing.B) { 664 bench(b, Fold, changingRunes(foldRune)) 665 } 666 667 func BenchmarkFoldOther(b *testing.B) { 668 bench(b, Fold, testtext.TwoByteUTF8+testtext.ThreeByteUTF8) 669 } 670 671 func BenchmarkWideASCII(b *testing.B) { 672 bench(b, Widen, testtext.ASCII) 673 } 674 675 func BenchmarkWideCJK(b *testing.B) { 676 bench(b, Widen, testtext.CJK) 677 } 678 679 func BenchmarkWideNonCanonical(b *testing.B) { 680 bench(b, Widen, changingRunes(widenRune)) 681 } 682 683 func BenchmarkWideOther(b *testing.B) { 684 bench(b, Widen, testtext.TwoByteUTF8+testtext.ThreeByteUTF8) 685 } 686 687 func BenchmarkNarrowASCII(b *testing.B) { 688 bench(b, Narrow, testtext.ASCII) 689 } 690 691 func BenchmarkNarrowCJK(b *testing.B) { 692 bench(b, Narrow, testtext.CJK) 693 } 694 695 func BenchmarkNarrowNonCanonical(b *testing.B) { 696 bench(b, Narrow, changingRunes(narrowRune)) 697 } 698 699 func BenchmarkNarrowOther(b *testing.B) { 700 bench(b, Narrow, testtext.TwoByteUTF8+testtext.ThreeByteUTF8) 701 }