github.com/go-xe2/third@v1.0.3/golang.org/x/text/transform/transform_test.go (about) 1 // Copyright 2013 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package transform 6 7 import ( 8 "bytes" 9 "errors" 10 "fmt" 11 "io/ioutil" 12 "strconv" 13 "strings" 14 "testing" 15 "time" 16 "unicode/utf8" 17 18 "github.com/go-xe2/third/golang.org/x/text/internal/testtext" 19 ) 20 21 type lowerCaseASCII struct{ NopResetter } 22 23 func (lowerCaseASCII) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { 24 n := len(src) 25 if n > len(dst) { 26 n, err = len(dst), ErrShortDst 27 } 28 for i, c := range src[:n] { 29 if 'A' <= c && c <= 'Z' { 30 c += 'a' - 'A' 31 } 32 dst[i] = c 33 } 34 return n, n, err 35 } 36 37 // lowerCaseASCIILookahead lowercases the string and reports ErrShortSrc as long 38 // as the input is not atEOF. 39 type lowerCaseASCIILookahead struct{ NopResetter } 40 41 func (lowerCaseASCIILookahead) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { 42 n := len(src) 43 if n > len(dst) { 44 n, err = len(dst), ErrShortDst 45 } 46 for i, c := range src[:n] { 47 if 'A' <= c && c <= 'Z' { 48 c += 'a' - 'A' 49 } 50 dst[i] = c 51 } 52 if !atEOF { 53 err = ErrShortSrc 54 } 55 return n, n, err 56 } 57 58 var errYouMentionedX = errors.New("you mentioned X") 59 60 type dontMentionX struct{ NopResetter } 61 62 func (dontMentionX) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { 63 n := len(src) 64 if n > len(dst) { 65 n, err = len(dst), ErrShortDst 66 } 67 for i, c := range src[:n] { 68 if c == 'X' { 69 return i, i, errYouMentionedX 70 } 71 dst[i] = c 72 } 73 return n, n, err 74 } 75 76 var errAtEnd = errors.New("error after all text") 77 78 type errorAtEnd struct{ NopResetter } 79 80 func (errorAtEnd) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { 81 n := copy(dst, src) 82 if n < len(src) { 83 return n, n, ErrShortDst 84 } 85 if atEOF { 86 return n, n, errAtEnd 87 } 88 return n, n, nil 89 } 90 91 type replaceWithConstant struct { 92 replacement string 93 written int 94 } 95 96 func (t *replaceWithConstant) Reset() { 97 t.written = 0 98 } 99 100 func (t *replaceWithConstant) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { 101 if atEOF { 102 nDst = copy(dst, t.replacement[t.written:]) 103 t.written += nDst 104 if t.written < len(t.replacement) { 105 err = ErrShortDst 106 } 107 } 108 return nDst, len(src), err 109 } 110 111 type addAnXAtTheEnd struct{ NopResetter } 112 113 func (addAnXAtTheEnd) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { 114 n := copy(dst, src) 115 if n < len(src) { 116 return n, n, ErrShortDst 117 } 118 if !atEOF { 119 return n, n, nil 120 } 121 if len(dst) == n { 122 return n, n, ErrShortDst 123 } 124 dst[n] = 'X' 125 return n + 1, n, nil 126 } 127 128 // doublerAtEOF is a strange Transformer that transforms "this" to "tthhiiss", 129 // but only if atEOF is true. 130 type doublerAtEOF struct{ NopResetter } 131 132 func (doublerAtEOF) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { 133 if !atEOF { 134 return 0, 0, ErrShortSrc 135 } 136 for i, c := range src { 137 if 2*i+2 >= len(dst) { 138 return 2 * i, i, ErrShortDst 139 } 140 dst[2*i+0] = c 141 dst[2*i+1] = c 142 } 143 return 2 * len(src), len(src), nil 144 } 145 146 // rleDecode and rleEncode implement a toy run-length encoding: "aabbbbbbbbbb" 147 // is encoded as "2a10b". The decoding is assumed to not contain any numbers. 148 149 type rleDecode struct{ NopResetter } 150 151 func (rleDecode) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { 152 loop: 153 for len(src) > 0 { 154 n := 0 155 for i, c := range src { 156 if '0' <= c && c <= '9' { 157 n = 10*n + int(c-'0') 158 continue 159 } 160 if i == 0 { 161 return nDst, nSrc, errors.New("rleDecode: bad input") 162 } 163 if n > len(dst) { 164 return nDst, nSrc, ErrShortDst 165 } 166 for j := 0; j < n; j++ { 167 dst[j] = c 168 } 169 dst, src = dst[n:], src[i+1:] 170 nDst, nSrc = nDst+n, nSrc+i+1 171 continue loop 172 } 173 if atEOF { 174 return nDst, nSrc, errors.New("rleDecode: bad input") 175 } 176 return nDst, nSrc, ErrShortSrc 177 } 178 return nDst, nSrc, nil 179 } 180 181 type rleEncode struct { 182 NopResetter 183 184 // allowStutter means that "xxxxxxxx" can be encoded as "5x3x" 185 // instead of always as "8x". 186 allowStutter bool 187 } 188 189 func (e rleEncode) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { 190 for len(src) > 0 { 191 n, c0 := len(src), src[0] 192 for i, c := range src[1:] { 193 if c != c0 { 194 n = i + 1 195 break 196 } 197 } 198 if n == len(src) && !atEOF && !e.allowStutter { 199 return nDst, nSrc, ErrShortSrc 200 } 201 s := strconv.Itoa(n) 202 if len(s) >= len(dst) { 203 return nDst, nSrc, ErrShortDst 204 } 205 copy(dst, s) 206 dst[len(s)] = c0 207 dst, src = dst[len(s)+1:], src[n:] 208 nDst, nSrc = nDst+len(s)+1, nSrc+n 209 } 210 return nDst, nSrc, nil 211 } 212 213 // trickler consumes all input bytes, but writes a single byte at a time to dst. 214 type trickler []byte 215 216 func (t *trickler) Reset() { 217 *t = nil 218 } 219 220 func (t *trickler) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { 221 *t = append(*t, src...) 222 if len(*t) == 0 { 223 return 0, 0, nil 224 } 225 if len(dst) == 0 { 226 return 0, len(src), ErrShortDst 227 } 228 dst[0] = (*t)[0] 229 *t = (*t)[1:] 230 if len(*t) > 0 { 231 err = ErrShortDst 232 } 233 return 1, len(src), err 234 } 235 236 // delayedTrickler is like trickler, but delays writing output to dst. This is 237 // highly unlikely to be relevant in practice, but it seems like a good idea 238 // to have some tolerance as long as progress can be detected. 239 type delayedTrickler []byte 240 241 func (t *delayedTrickler) Reset() { 242 *t = nil 243 } 244 245 func (t *delayedTrickler) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { 246 if len(*t) > 0 && len(dst) > 0 { 247 dst[0] = (*t)[0] 248 *t = (*t)[1:] 249 nDst = 1 250 } 251 *t = append(*t, src...) 252 if len(*t) > 0 { 253 err = ErrShortDst 254 } 255 return nDst, len(src), err 256 } 257 258 type testCase struct { 259 desc string 260 t Transformer 261 src string 262 dstSize int 263 srcSize int 264 ioSize int 265 wantStr string 266 wantErr error 267 wantIter int // number of iterations taken; 0 means we don't care. 268 } 269 270 func (t testCase) String() string { 271 return tstr(t.t) + "; " + t.desc 272 } 273 274 func tstr(t Transformer) string { 275 if stringer, ok := t.(fmt.Stringer); ok { 276 return stringer.String() 277 } 278 s := fmt.Sprintf("%T", t) 279 return s[1+strings.Index(s, "."):] 280 } 281 282 func (c chain) String() string { 283 buf := &bytes.Buffer{} 284 buf.WriteString("Chain(") 285 for i, l := range c.link[:len(c.link)-1] { 286 if i != 0 { 287 fmt.Fprint(buf, ", ") 288 } 289 buf.WriteString(tstr(l.t)) 290 } 291 buf.WriteString(")") 292 return buf.String() 293 } 294 295 var testCases = []testCase{ 296 { 297 desc: "empty", 298 t: lowerCaseASCII{}, 299 src: "", 300 dstSize: 100, 301 srcSize: 100, 302 wantStr: "", 303 }, 304 305 { 306 desc: "basic", 307 t: lowerCaseASCII{}, 308 src: "Hello WORLD.", 309 dstSize: 100, 310 srcSize: 100, 311 wantStr: "hello world.", 312 }, 313 314 { 315 desc: "small dst", 316 t: lowerCaseASCII{}, 317 src: "Hello WORLD.", 318 dstSize: 3, 319 srcSize: 100, 320 wantStr: "hello world.", 321 }, 322 323 { 324 desc: "small src", 325 t: lowerCaseASCII{}, 326 src: "Hello WORLD.", 327 dstSize: 100, 328 srcSize: 4, 329 wantStr: "hello world.", 330 }, 331 332 { 333 desc: "small buffers", 334 t: lowerCaseASCII{}, 335 src: "Hello WORLD.", 336 dstSize: 3, 337 srcSize: 4, 338 wantStr: "hello world.", 339 }, 340 341 { 342 desc: "very small buffers", 343 t: lowerCaseASCII{}, 344 src: "Hello WORLD.", 345 dstSize: 1, 346 srcSize: 1, 347 wantStr: "hello world.", 348 }, 349 350 { 351 desc: "small dst with lookahead", 352 t: lowerCaseASCIILookahead{}, 353 src: "Hello WORLD.", 354 dstSize: 3, 355 srcSize: 100, 356 wantStr: "hello world.", 357 }, 358 359 { 360 desc: "small src with lookahead", 361 t: lowerCaseASCIILookahead{}, 362 src: "Hello WORLD.", 363 dstSize: 100, 364 srcSize: 4, 365 wantStr: "hello world.", 366 }, 367 368 { 369 desc: "small buffers with lookahead", 370 t: lowerCaseASCIILookahead{}, 371 src: "Hello WORLD.", 372 dstSize: 3, 373 srcSize: 4, 374 wantStr: "hello world.", 375 }, 376 377 { 378 desc: "very small buffers with lookahead", 379 t: lowerCaseASCIILookahead{}, 380 src: "Hello WORLD.", 381 dstSize: 1, 382 srcSize: 2, 383 wantStr: "hello world.", 384 }, 385 386 { 387 desc: "user error", 388 t: dontMentionX{}, 389 src: "The First Rule of Transform Club: don't mention Mister X, ever.", 390 dstSize: 100, 391 srcSize: 100, 392 wantStr: "The First Rule of Transform Club: don't mention Mister ", 393 wantErr: errYouMentionedX, 394 }, 395 396 { 397 desc: "user error at end", 398 t: errorAtEnd{}, 399 src: "All goes well until it doesn't.", 400 dstSize: 100, 401 srcSize: 100, 402 wantStr: "All goes well until it doesn't.", 403 wantErr: errAtEnd, 404 }, 405 406 { 407 desc: "user error at end, incremental", 408 t: errorAtEnd{}, 409 src: "All goes well until it doesn't.", 410 dstSize: 10, 411 srcSize: 10, 412 wantStr: "All goes well until it doesn't.", 413 wantErr: errAtEnd, 414 }, 415 416 { 417 desc: "replace entire non-empty string with one byte", 418 t: &replaceWithConstant{replacement: "X"}, 419 src: "none of this will be copied", 420 dstSize: 1, 421 srcSize: 10, 422 wantStr: "X", 423 }, 424 425 { 426 desc: "replace entire empty string with one byte", 427 t: &replaceWithConstant{replacement: "X"}, 428 src: "", 429 dstSize: 1, 430 srcSize: 10, 431 wantStr: "X", 432 }, 433 434 { 435 desc: "replace entire empty string with seven bytes", 436 t: &replaceWithConstant{replacement: "ABCDEFG"}, 437 src: "", 438 dstSize: 3, 439 srcSize: 10, 440 wantStr: "ABCDEFG", 441 }, 442 443 { 444 desc: "add an X (initialBufSize-1)", 445 t: addAnXAtTheEnd{}, 446 src: aaa[:initialBufSize-1], 447 dstSize: 10, 448 srcSize: 10, 449 wantStr: aaa[:initialBufSize-1] + "X", 450 }, 451 452 { 453 desc: "add an X (initialBufSize+0)", 454 t: addAnXAtTheEnd{}, 455 src: aaa[:initialBufSize+0], 456 dstSize: 10, 457 srcSize: 10, 458 wantStr: aaa[:initialBufSize+0] + "X", 459 }, 460 461 { 462 desc: "add an X (initialBufSize+1)", 463 t: addAnXAtTheEnd{}, 464 src: aaa[:initialBufSize+1], 465 dstSize: 10, 466 srcSize: 10, 467 wantStr: aaa[:initialBufSize+1] + "X", 468 }, 469 470 { 471 desc: "small buffers", 472 t: dontMentionX{}, 473 src: "The First Rule of Transform Club: don't mention Mister X, ever.", 474 dstSize: 10, 475 srcSize: 10, 476 wantStr: "The First Rule of Transform Club: don't mention Mister ", 477 wantErr: errYouMentionedX, 478 }, 479 480 { 481 desc: "very small buffers", 482 t: dontMentionX{}, 483 src: "The First Rule of Transform Club: don't mention Mister X, ever.", 484 dstSize: 1, 485 srcSize: 1, 486 wantStr: "The First Rule of Transform Club: don't mention Mister ", 487 wantErr: errYouMentionedX, 488 }, 489 490 { 491 desc: "only transform at EOF", 492 t: doublerAtEOF{}, 493 src: "this", 494 dstSize: 100, 495 srcSize: 100, 496 wantStr: "tthhiiss", 497 }, 498 499 { 500 desc: "basic", 501 t: rleDecode{}, 502 src: "1a2b3c10d11e0f1g", 503 dstSize: 100, 504 srcSize: 100, 505 wantStr: "abbcccddddddddddeeeeeeeeeeeg", 506 }, 507 508 { 509 desc: "long", 510 t: rleDecode{}, 511 src: "12a23b34c45d56e99z", 512 dstSize: 100, 513 srcSize: 100, 514 wantStr: strings.Repeat("a", 12) + 515 strings.Repeat("b", 23) + 516 strings.Repeat("c", 34) + 517 strings.Repeat("d", 45) + 518 strings.Repeat("e", 56) + 519 strings.Repeat("z", 99), 520 }, 521 522 { 523 desc: "tight buffers", 524 t: rleDecode{}, 525 src: "1a2b3c10d11e0f1g", 526 dstSize: 11, 527 srcSize: 3, 528 wantStr: "abbcccddddddddddeeeeeeeeeeeg", 529 }, 530 531 { 532 desc: "short dst", 533 t: rleDecode{}, 534 src: "1a2b3c10d11e0f1g", 535 dstSize: 10, 536 srcSize: 3, 537 wantStr: "abbcccdddddddddd", 538 wantErr: ErrShortDst, 539 }, 540 541 { 542 desc: "short src", 543 t: rleDecode{}, 544 src: "1a2b3c10d11e0f1g", 545 dstSize: 11, 546 srcSize: 2, 547 ioSize: 2, 548 wantStr: "abbccc", 549 wantErr: ErrShortSrc, 550 }, 551 552 { 553 desc: "basic", 554 t: rleEncode{}, 555 src: "abbcccddddddddddeeeeeeeeeeeg", 556 dstSize: 100, 557 srcSize: 100, 558 wantStr: "1a2b3c10d11e1g", 559 }, 560 561 { 562 desc: "long", 563 t: rleEncode{}, 564 src: strings.Repeat("a", 12) + 565 strings.Repeat("b", 23) + 566 strings.Repeat("c", 34) + 567 strings.Repeat("d", 45) + 568 strings.Repeat("e", 56) + 569 strings.Repeat("z", 99), 570 dstSize: 100, 571 srcSize: 100, 572 wantStr: "12a23b34c45d56e99z", 573 }, 574 575 { 576 desc: "tight buffers", 577 t: rleEncode{}, 578 src: "abbcccddddddddddeeeeeeeeeeeg", 579 dstSize: 3, 580 srcSize: 12, 581 wantStr: "1a2b3c10d11e1g", 582 }, 583 584 { 585 desc: "short dst", 586 t: rleEncode{}, 587 src: "abbcccddddddddddeeeeeeeeeeeg", 588 dstSize: 2, 589 srcSize: 12, 590 wantStr: "1a2b3c", 591 wantErr: ErrShortDst, 592 }, 593 594 { 595 desc: "short src", 596 t: rleEncode{}, 597 src: "abbcccddddddddddeeeeeeeeeeeg", 598 dstSize: 3, 599 srcSize: 11, 600 ioSize: 11, 601 wantStr: "1a2b3c10d", 602 wantErr: ErrShortSrc, 603 }, 604 605 { 606 desc: "allowStutter = false", 607 t: rleEncode{allowStutter: false}, 608 src: "aaaabbbbbbbbccccddddd", 609 dstSize: 10, 610 srcSize: 10, 611 wantStr: "4a8b4c5d", 612 }, 613 614 { 615 desc: "allowStutter = true", 616 t: rleEncode{allowStutter: true}, 617 src: "aaaabbbbbbbbccccddddd", 618 dstSize: 10, 619 srcSize: 10, 620 ioSize: 10, 621 wantStr: "4a6b2b4c4d1d", 622 }, 623 624 { 625 desc: "trickler", 626 t: &trickler{}, 627 src: "abcdefghijklm", 628 dstSize: 3, 629 srcSize: 15, 630 wantStr: "abcdefghijklm", 631 }, 632 633 { 634 desc: "delayedTrickler", 635 t: &delayedTrickler{}, 636 src: "abcdefghijklm", 637 dstSize: 3, 638 srcSize: 15, 639 wantStr: "abcdefghijklm", 640 }, 641 } 642 643 func TestReader(t *testing.T) { 644 for _, tc := range testCases { 645 testtext.Run(t, tc.desc, func(t *testing.T) { 646 r := NewReader(strings.NewReader(tc.src), tc.t) 647 // Differently sized dst and src buffers are not part of the 648 // exported API. We override them manually. 649 r.dst = make([]byte, tc.dstSize) 650 r.src = make([]byte, tc.srcSize) 651 got, err := ioutil.ReadAll(r) 652 str := string(got) 653 if str != tc.wantStr || err != tc.wantErr { 654 t.Errorf("\ngot %q, %v\nwant %q, %v", str, err, tc.wantStr, tc.wantErr) 655 } 656 }) 657 } 658 } 659 660 func TestWriter(t *testing.T) { 661 tests := append(testCases, chainTests()...) 662 for _, tc := range tests { 663 sizes := []int{1, 2, 3, 4, 5, 10, 100, 1000} 664 if tc.ioSize > 0 { 665 sizes = []int{tc.ioSize} 666 } 667 for _, sz := range sizes { 668 testtext.Run(t, fmt.Sprintf("%s/%d", tc.desc, sz), func(t *testing.T) { 669 bb := &bytes.Buffer{} 670 w := NewWriter(bb, tc.t) 671 // Differently sized dst and src buffers are not part of the 672 // exported API. We override them manually. 673 w.dst = make([]byte, tc.dstSize) 674 w.src = make([]byte, tc.srcSize) 675 src := make([]byte, sz) 676 var err error 677 for b := tc.src; len(b) > 0 && err == nil; { 678 n := copy(src, b) 679 b = b[n:] 680 m := 0 681 m, err = w.Write(src[:n]) 682 if m != n && err == nil { 683 t.Errorf("did not consume all bytes %d < %d", m, n) 684 } 685 } 686 if err == nil { 687 err = w.Close() 688 } 689 str := bb.String() 690 if str != tc.wantStr || err != tc.wantErr { 691 t.Errorf("\ngot %q, %v\nwant %q, %v", str, err, tc.wantStr, tc.wantErr) 692 } 693 }) 694 } 695 } 696 } 697 698 func TestNop(t *testing.T) { 699 testCases := []struct { 700 str string 701 dstSize int 702 err error 703 }{ 704 {"", 0, nil}, 705 {"", 10, nil}, 706 {"a", 0, ErrShortDst}, 707 {"a", 1, nil}, 708 {"a", 10, nil}, 709 } 710 for i, tc := range testCases { 711 dst := make([]byte, tc.dstSize) 712 nDst, nSrc, err := Nop.Transform(dst, []byte(tc.str), true) 713 want := tc.str 714 if tc.dstSize < len(want) { 715 want = want[:tc.dstSize] 716 } 717 if got := string(dst[:nDst]); got != want || err != tc.err || nSrc != nDst { 718 t.Errorf("%d:\ngot %q, %d, %v\nwant %q, %d, %v", i, got, nSrc, err, want, nDst, tc.err) 719 } 720 } 721 } 722 723 func TestDiscard(t *testing.T) { 724 testCases := []struct { 725 str string 726 dstSize int 727 }{ 728 {"", 0}, 729 {"", 10}, 730 {"a", 0}, 731 {"ab", 10}, 732 } 733 for i, tc := range testCases { 734 nDst, nSrc, err := Discard.Transform(make([]byte, tc.dstSize), []byte(tc.str), true) 735 if nDst != 0 || nSrc != len(tc.str) || err != nil { 736 t.Errorf("%d:\ngot %q, %d, %v\nwant 0, %d, nil", i, nDst, nSrc, err, len(tc.str)) 737 } 738 } 739 } 740 741 // mkChain creates a Chain transformer. x must be alternating between transformer 742 // and bufSize, like T, (sz, T)* 743 func mkChain(x ...interface{}) *chain { 744 t := []Transformer{} 745 for i := 0; i < len(x); i += 2 { 746 t = append(t, x[i].(Transformer)) 747 } 748 c := Chain(t...).(*chain) 749 for i, j := 1, 1; i < len(x); i, j = i+2, j+1 { 750 c.link[j].b = make([]byte, x[i].(int)) 751 } 752 return c 753 } 754 755 func chainTests() []testCase { 756 return []testCase{ 757 { 758 desc: "nil error", 759 t: mkChain(rleEncode{}, 100, lowerCaseASCII{}), 760 src: "ABB", 761 dstSize: 100, 762 srcSize: 100, 763 wantStr: "1a2b", 764 wantErr: nil, 765 wantIter: 1, 766 }, 767 768 { 769 desc: "short dst buffer", 770 t: mkChain(lowerCaseASCII{}, 3, rleDecode{}), 771 src: "1a2b3c10d11e0f1g", 772 dstSize: 10, 773 srcSize: 3, 774 wantStr: "abbcccdddddddddd", 775 wantErr: ErrShortDst, 776 }, 777 778 { 779 desc: "short internal dst buffer", 780 t: mkChain(lowerCaseASCII{}, 3, rleDecode{}, 10, Nop), 781 src: "1a2b3c10d11e0f1g", 782 dstSize: 100, 783 srcSize: 3, 784 wantStr: "abbcccdddddddddd", 785 wantErr: errShortInternal, 786 }, 787 788 { 789 desc: "short internal dst buffer from input", 790 t: mkChain(rleDecode{}, 10, Nop), 791 src: "1a2b3c10d11e0f1g", 792 dstSize: 100, 793 srcSize: 3, 794 wantStr: "abbcccdddddddddd", 795 wantErr: errShortInternal, 796 }, 797 798 { 799 desc: "empty short internal dst buffer", 800 t: mkChain(lowerCaseASCII{}, 3, rleDecode{}, 10, Nop), 801 src: "4a7b11e0f1g", 802 dstSize: 100, 803 srcSize: 3, 804 wantStr: "aaaabbbbbbb", 805 wantErr: errShortInternal, 806 }, 807 808 { 809 desc: "empty short internal dst buffer from input", 810 t: mkChain(rleDecode{}, 10, Nop), 811 src: "4a7b11e0f1g", 812 dstSize: 100, 813 srcSize: 3, 814 wantStr: "aaaabbbbbbb", 815 wantErr: errShortInternal, 816 }, 817 818 { 819 desc: "short internal src buffer after full dst buffer", 820 t: mkChain(Nop, 5, rleEncode{}, 10, Nop), 821 src: "cccccddddd", 822 dstSize: 100, 823 srcSize: 100, 824 wantStr: "", 825 wantErr: errShortInternal, 826 wantIter: 1, 827 }, 828 829 { 830 desc: "short internal src buffer after short dst buffer; test lastFull", 831 t: mkChain(rleDecode{}, 5, rleEncode{}, 4, Nop), 832 src: "2a1b4c6d", 833 dstSize: 100, 834 srcSize: 100, 835 wantStr: "2a1b", 836 wantErr: errShortInternal, 837 }, 838 839 { 840 desc: "short internal src buffer after successful complete fill", 841 t: mkChain(Nop, 3, rleDecode{}), 842 src: "123a4b", 843 dstSize: 4, 844 srcSize: 3, 845 wantStr: "", 846 wantErr: errShortInternal, 847 wantIter: 1, 848 }, 849 850 { 851 desc: "short internal src buffer after short dst buffer; test lastFull", 852 t: mkChain(rleDecode{}, 5, rleEncode{}), 853 src: "2a1b4c6d", 854 dstSize: 4, 855 srcSize: 100, 856 wantStr: "2a1b", 857 wantErr: errShortInternal, 858 }, 859 860 { 861 desc: "short src buffer", 862 t: mkChain(rleEncode{}, 5, Nop), 863 src: "abbcccddddeeeee", 864 dstSize: 4, 865 srcSize: 4, 866 ioSize: 4, 867 wantStr: "1a2b3c", 868 wantErr: ErrShortSrc, 869 }, 870 871 { 872 desc: "process all in one go", 873 t: mkChain(rleEncode{}, 5, Nop), 874 src: "abbcccddddeeeeeffffff", 875 dstSize: 100, 876 srcSize: 100, 877 wantStr: "1a2b3c4d5e6f", 878 wantErr: nil, 879 wantIter: 1, 880 }, 881 882 { 883 desc: "complete processing downstream after error", 884 t: mkChain(dontMentionX{}, 2, rleDecode{}, 5, Nop), 885 src: "3a4b5eX", 886 dstSize: 100, 887 srcSize: 100, 888 ioSize: 100, 889 wantStr: "aaabbbbeeeee", 890 wantErr: errYouMentionedX, 891 }, 892 893 { 894 desc: "return downstream fatal errors first (followed by short dst)", 895 t: mkChain(dontMentionX{}, 8, rleDecode{}, 4, Nop), 896 src: "3a4b5eX", 897 dstSize: 100, 898 srcSize: 100, 899 ioSize: 100, 900 wantStr: "aaabbbb", 901 wantErr: errShortInternal, 902 }, 903 904 { 905 desc: "return downstream fatal errors first (followed by short src)", 906 t: mkChain(dontMentionX{}, 5, Nop, 1, rleDecode{}), 907 src: "1a5bX", 908 dstSize: 100, 909 srcSize: 100, 910 ioSize: 100, 911 wantStr: "", 912 wantErr: errShortInternal, 913 }, 914 915 { 916 desc: "short internal", 917 t: mkChain(Nop, 11, rleEncode{}, 3, Nop), 918 src: "abbcccddddddddddeeeeeeeeeeeg", 919 dstSize: 3, 920 srcSize: 100, 921 wantStr: "1a2b3c10d", 922 wantErr: errShortInternal, 923 }, 924 } 925 } 926 927 func doTransform(tc testCase) (res string, iter int, err error) { 928 tc.t.Reset() 929 dst := make([]byte, tc.dstSize) 930 out, in := make([]byte, 0, 2*len(tc.src)), []byte(tc.src) 931 for { 932 iter++ 933 src, atEOF := in, true 934 if len(src) > tc.srcSize { 935 src, atEOF = src[:tc.srcSize], false 936 } 937 nDst, nSrc, err := tc.t.Transform(dst, src, atEOF) 938 out = append(out, dst[:nDst]...) 939 in = in[nSrc:] 940 switch { 941 case err == nil && len(in) != 0: 942 case err == ErrShortSrc && nSrc > 0: 943 case err == ErrShortDst && (nDst > 0 || nSrc > 0): 944 default: 945 return string(out), iter, err 946 } 947 } 948 } 949 950 func TestChain(t *testing.T) { 951 if c, ok := Chain().(nop); !ok { 952 t.Errorf("empty chain: %v; want Nop", c) 953 } 954 955 // Test Chain for a single Transformer. 956 for _, tc := range testCases { 957 tc.t = Chain(tc.t) 958 str, _, err := doTransform(tc) 959 if str != tc.wantStr || err != tc.wantErr { 960 t.Errorf("%s:\ngot %q, %v\nwant %q, %v", tc, str, err, tc.wantStr, tc.wantErr) 961 } 962 } 963 964 tests := chainTests() 965 sizes := []int{1, 2, 3, 4, 5, 7, 10, 100, 1000} 966 addTest := func(tc testCase, t *chain) { 967 if t.link[0].t != tc.t && tc.wantErr == ErrShortSrc { 968 tc.wantErr = errShortInternal 969 } 970 if t.link[len(t.link)-2].t != tc.t && tc.wantErr == ErrShortDst { 971 tc.wantErr = errShortInternal 972 } 973 tc.t = t 974 tests = append(tests, tc) 975 } 976 for _, tc := range testCases { 977 for _, sz := range sizes { 978 tt := tc 979 tt.dstSize = sz 980 addTest(tt, mkChain(tc.t, tc.dstSize, Nop)) 981 addTest(tt, mkChain(tc.t, tc.dstSize, Nop, 2, Nop)) 982 addTest(tt, mkChain(Nop, tc.srcSize, tc.t, tc.dstSize, Nop)) 983 if sz >= tc.dstSize && (tc.wantErr != ErrShortDst || sz == tc.dstSize) { 984 addTest(tt, mkChain(Nop, tc.srcSize, tc.t)) 985 addTest(tt, mkChain(Nop, 100, Nop, tc.srcSize, tc.t)) 986 } 987 } 988 } 989 for _, tc := range testCases { 990 tt := tc 991 tt.dstSize = 1 992 tt.wantStr = "" 993 addTest(tt, mkChain(tc.t, tc.dstSize, Discard)) 994 addTest(tt, mkChain(Nop, tc.srcSize, tc.t, tc.dstSize, Discard)) 995 addTest(tt, mkChain(Nop, tc.srcSize, tc.t, tc.dstSize, Nop, tc.dstSize, Discard)) 996 } 997 for _, tc := range testCases { 998 tt := tc 999 tt.dstSize = 100 1000 tt.wantStr = strings.Replace(tc.src, "0f", "", -1) 1001 // Chain encoders and decoders. 1002 if _, ok := tc.t.(rleEncode); ok && tc.wantErr == nil { 1003 addTest(tt, mkChain(tc.t, tc.dstSize, Nop, 1000, rleDecode{})) 1004 addTest(tt, mkChain(tc.t, tc.dstSize, Nop, tc.dstSize, rleDecode{})) 1005 addTest(tt, mkChain(Nop, tc.srcSize, tc.t, tc.dstSize, Nop, 100, rleDecode{})) 1006 // decoding needs larger destinations 1007 addTest(tt, mkChain(Nop, tc.srcSize, tc.t, tc.dstSize, rleDecode{}, 100, Nop)) 1008 addTest(tt, mkChain(Nop, tc.srcSize, tc.t, tc.dstSize, Nop, 100, rleDecode{}, 100, Nop)) 1009 } else if _, ok := tc.t.(rleDecode); ok && tc.wantErr == nil { 1010 // The internal buffer size may need to be the sum of the maximum segment 1011 // size of the two encoders! 1012 addTest(tt, mkChain(tc.t, 2*tc.dstSize, rleEncode{})) 1013 addTest(tt, mkChain(tc.t, tc.dstSize, Nop, 101, rleEncode{})) 1014 addTest(tt, mkChain(Nop, tc.srcSize, tc.t, tc.dstSize, Nop, 100, rleEncode{})) 1015 addTest(tt, mkChain(Nop, tc.srcSize, tc.t, tc.dstSize, Nop, 200, rleEncode{}, 100, Nop)) 1016 } 1017 } 1018 for _, tc := range tests { 1019 str, iter, err := doTransform(tc) 1020 mi := tc.wantIter != 0 && tc.wantIter != iter 1021 if str != tc.wantStr || err != tc.wantErr || mi { 1022 t.Errorf("%s:\ngot iter:%d, %q, %v\nwant iter:%d, %q, %v", tc, iter, str, err, tc.wantIter, tc.wantStr, tc.wantErr) 1023 } 1024 break 1025 } 1026 } 1027 1028 func TestRemoveFunc(t *testing.T) { 1029 filter := RemoveFunc(func(r rune) bool { 1030 return strings.IndexRune("ab\u0300\u1234,", r) != -1 1031 }) 1032 tests := []testCase{ 1033 { 1034 src: ",", 1035 wantStr: "", 1036 }, 1037 1038 { 1039 src: "c", 1040 wantStr: "c", 1041 }, 1042 1043 { 1044 src: "\u2345", 1045 wantStr: "\u2345", 1046 }, 1047 1048 { 1049 src: "tschüß", 1050 wantStr: "tschüß", 1051 }, 1052 1053 { 1054 src: ",до,свидания,", 1055 wantStr: "досвидания", 1056 }, 1057 1058 { 1059 src: "a\xbd\xb2=\xbc ⌘", 1060 wantStr: "\uFFFD\uFFFD=\uFFFD ⌘", 1061 }, 1062 1063 { 1064 // If we didn't replace illegal bytes with RuneError, the result 1065 // would be \u0300 or the code would need to be more complex. 1066 src: "\xcc\u0300\x80", 1067 wantStr: "\uFFFD\uFFFD", 1068 }, 1069 1070 { 1071 src: "\xcc\u0300\x80", 1072 dstSize: 3, 1073 wantStr: "\uFFFD\uFFFD", 1074 wantIter: 2, 1075 }, 1076 1077 { 1078 // Test a long buffer greater than the internal buffer size 1079 src: "hello\xcc\xcc\xccworld", 1080 srcSize: 13, 1081 wantStr: "hello\uFFFD\uFFFD\uFFFDworld", 1082 wantIter: 1, 1083 }, 1084 1085 { 1086 src: "\u2345", 1087 dstSize: 2, 1088 wantStr: "", 1089 wantErr: ErrShortDst, 1090 }, 1091 1092 { 1093 src: "\xcc", 1094 dstSize: 2, 1095 wantStr: "", 1096 wantErr: ErrShortDst, 1097 }, 1098 1099 { 1100 src: "\u0300", 1101 dstSize: 2, 1102 srcSize: 1, 1103 wantStr: "", 1104 wantErr: ErrShortSrc, 1105 }, 1106 1107 { 1108 t: RemoveFunc(func(r rune) bool { 1109 return r == utf8.RuneError 1110 }), 1111 src: "\xcc\u0300\x80", 1112 wantStr: "\u0300", 1113 }, 1114 } 1115 1116 for _, tc := range tests { 1117 tc.desc = tc.src 1118 if tc.t == nil { 1119 tc.t = filter 1120 } 1121 if tc.dstSize == 0 { 1122 tc.dstSize = 100 1123 } 1124 if tc.srcSize == 0 { 1125 tc.srcSize = 100 1126 } 1127 str, iter, err := doTransform(tc) 1128 mi := tc.wantIter != 0 && tc.wantIter != iter 1129 if str != tc.wantStr || err != tc.wantErr || mi { 1130 t.Errorf("%+q:\ngot iter:%d, %+q, %v\nwant iter:%d, %+q, %v", tc.src, iter, str, err, tc.wantIter, tc.wantStr, tc.wantErr) 1131 } 1132 1133 tc.src = str 1134 idem, _, _ := doTransform(tc) 1135 if str != idem { 1136 t.Errorf("%+q: found %+q; want %+q", tc.src, idem, str) 1137 } 1138 } 1139 } 1140 1141 func testString(t *testing.T, f func(Transformer, string) (string, int, error)) { 1142 for _, tt := range append(testCases, chainTests()...) { 1143 if tt.desc == "allowStutter = true" { 1144 // We don't have control over the buffer size, so we eliminate tests 1145 // that depend on a specific buffer size being set. 1146 continue 1147 } 1148 if tt.wantErr == ErrShortDst || tt.wantErr == ErrShortSrc { 1149 // The result string will be different. 1150 continue 1151 } 1152 testtext.Run(t, tt.desc, func(t *testing.T) { 1153 got, n, err := f(tt.t, tt.src) 1154 if tt.wantErr != err { 1155 t.Errorf("error: got %v; want %v", err, tt.wantErr) 1156 } 1157 // Check that err == nil implies that n == len(tt.src). Note that vice 1158 // versa isn't necessarily true. 1159 if err == nil && n != len(tt.src) { 1160 t.Errorf("err == nil: got %d bytes, want %d", n, err) 1161 } 1162 if got != tt.wantStr { 1163 t.Errorf("string: got %q; want %q", got, tt.wantStr) 1164 } 1165 }) 1166 } 1167 } 1168 1169 func TestBytes(t *testing.T) { 1170 testString(t, func(z Transformer, s string) (string, int, error) { 1171 b, n, err := Bytes(z, []byte(s)) 1172 return string(b), n, err 1173 }) 1174 } 1175 1176 func TestAppend(t *testing.T) { 1177 // Create a bunch of subtests for different buffer sizes. 1178 testCases := [][]byte{ 1179 nil, 1180 make([]byte, 0, 0), 1181 make([]byte, 0, 1), 1182 make([]byte, 1, 1), 1183 make([]byte, 1, 5), 1184 make([]byte, 100, 100), 1185 make([]byte, 100, 200), 1186 } 1187 for _, tc := range testCases { 1188 testString(t, func(z Transformer, s string) (string, int, error) { 1189 b, n, err := Append(z, tc, []byte(s)) 1190 return string(b[len(tc):]), n, err 1191 }) 1192 } 1193 } 1194 1195 func TestString(t *testing.T) { 1196 testtext.Run(t, "transform", func(t *testing.T) { testString(t, String) }) 1197 1198 // Overrun the internal destination buffer. 1199 for i, s := range []string{ 1200 aaa[:1*initialBufSize-1], 1201 aaa[:1*initialBufSize+0], 1202 aaa[:1*initialBufSize+1], 1203 AAA[:1*initialBufSize-1], 1204 AAA[:1*initialBufSize+0], 1205 AAA[:1*initialBufSize+1], 1206 AAA[:2*initialBufSize-1], 1207 AAA[:2*initialBufSize+0], 1208 AAA[:2*initialBufSize+1], 1209 aaa[:1*initialBufSize-2] + "A", 1210 aaa[:1*initialBufSize-1] + "A", 1211 aaa[:1*initialBufSize+0] + "A", 1212 aaa[:1*initialBufSize+1] + "A", 1213 } { 1214 testtext.Run(t, fmt.Sprint("dst buffer test using lower/", i), func(t *testing.T) { 1215 got, _, _ := String(lowerCaseASCII{}, s) 1216 if want := strings.ToLower(s); got != want { 1217 t.Errorf("got %s (%d); want %s (%d)", got, len(got), want, len(want)) 1218 } 1219 }) 1220 } 1221 1222 // Overrun the internal source buffer. 1223 for i, s := range []string{ 1224 aaa[:1*initialBufSize-1], 1225 aaa[:1*initialBufSize+0], 1226 aaa[:1*initialBufSize+1], 1227 aaa[:2*initialBufSize+1], 1228 aaa[:2*initialBufSize+0], 1229 aaa[:2*initialBufSize+1], 1230 } { 1231 testtext.Run(t, fmt.Sprint("src buffer test using rleEncode/", i), func(t *testing.T) { 1232 got, _, _ := String(rleEncode{}, s) 1233 if want := fmt.Sprintf("%da", len(s)); got != want { 1234 t.Errorf("got %s (%d); want %s (%d)", got, len(got), want, len(want)) 1235 } 1236 }) 1237 } 1238 1239 // Test allocations for non-changing strings. 1240 // Note we still need to allocate a single buffer. 1241 for i, s := range []string{ 1242 "", 1243 "123456789", 1244 aaa[:initialBufSize-1], 1245 aaa[:initialBufSize+0], 1246 aaa[:initialBufSize+1], 1247 aaa[:10*initialBufSize], 1248 } { 1249 testtext.Run(t, fmt.Sprint("alloc/", i), func(t *testing.T) { 1250 if n := testtext.AllocsPerRun(5, func() { String(&lowerCaseASCIILookahead{}, s) }); n > 1 { 1251 t.Errorf("#allocs was %f; want 1", n) 1252 } 1253 }) 1254 } 1255 } 1256 1257 // TestBytesAllocation tests that buffer growth stays limited with the trickler 1258 // transformer, which behaves oddly but within spec. In case buffer growth is 1259 // not correctly handled, the test will either panic with a failed allocation or 1260 // thrash. To ensure the tests terminate under the last condition, we time out 1261 // after some sufficiently long period of time. 1262 func TestBytesAllocation(t *testing.T) { 1263 done := make(chan bool) 1264 go func() { 1265 in := bytes.Repeat([]byte{'a'}, 1000) 1266 tr := trickler(make([]byte, 1)) 1267 Bytes(&tr, in) 1268 done <- true 1269 }() 1270 select { 1271 case <-done: 1272 case <-time.After(3 * time.Second): 1273 t.Error("time out, likely due to excessive allocation") 1274 } 1275 } 1276 1277 // TestStringAllocation tests that buffer growth stays limited with the trickler 1278 // transformer, which behaves oddly but within spec. In case buffer growth is 1279 // not correctly handled, the test will either panic with a failed allocation or 1280 // thrash. To ensure the tests terminate under the last condition, we time out 1281 // after some sufficiently long period of time. 1282 func TestStringAllocation(t *testing.T) { 1283 done := make(chan bool) 1284 go func() { 1285 tr := trickler(make([]byte, 1)) 1286 String(&tr, aaa[:1000]) 1287 done <- true 1288 }() 1289 select { 1290 case <-done: 1291 case <-time.After(3 * time.Second): 1292 t.Error("time out, likely due to excessive allocation") 1293 } 1294 } 1295 1296 func BenchmarkStringLowerEmpty(b *testing.B) { 1297 for i := 0; i < b.N; i++ { 1298 String(&lowerCaseASCIILookahead{}, "") 1299 } 1300 } 1301 1302 func BenchmarkStringLowerIdentical(b *testing.B) { 1303 for i := 0; i < b.N; i++ { 1304 String(&lowerCaseASCIILookahead{}, aaa[:4096]) 1305 } 1306 } 1307 1308 func BenchmarkStringLowerChanged(b *testing.B) { 1309 for i := 0; i < b.N; i++ { 1310 String(&lowerCaseASCIILookahead{}, AAA[:4096]) 1311 } 1312 } 1313 1314 var ( 1315 aaa = strings.Repeat("a", 4096) 1316 AAA = strings.Repeat("A", 4096) 1317 )