golang.org/x/text@v0.14.0/runes/runes_test.go (about) 1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package runes 6 7 import ( 8 "strings" 9 "testing" 10 "unicode/utf8" 11 12 "golang.org/x/text/internal/testtext" 13 "golang.org/x/text/transform" 14 ) 15 16 type transformTest struct { 17 desc string 18 szDst int 19 atEOF bool 20 repl string 21 in string 22 out string // result string of first call to Transform 23 outFull string // transform of entire input string 24 err error 25 errSpan error 26 nSpan int 27 28 t transform.SpanningTransformer 29 } 30 31 const large = 10240 32 33 func (tt *transformTest) check(t *testing.T, i int) { 34 if tt.t == nil { 35 return 36 } 37 dst := make([]byte, tt.szDst) 38 src := []byte(tt.in) 39 nDst, nSrc, err := tt.t.Transform(dst, src, tt.atEOF) 40 if err != tt.err { 41 t.Errorf("%d:%s:error: got %v; want %v", i, tt.desc, err, tt.err) 42 } 43 if got := string(dst[:nDst]); got != tt.out { 44 t.Errorf("%d:%s:out: got %q; want %q", i, tt.desc, got, tt.out) 45 } 46 47 // Calls tt.t.Transform for the remainder of the input. We use this to test 48 // the nSrc return value. 49 out := make([]byte, large) 50 n := copy(out, dst[:nDst]) 51 nDst, _, _ = tt.t.Transform(out[n:], src[nSrc:], true) 52 if got, want := string(out[:n+nDst]), tt.outFull; got != want { 53 t.Errorf("%d:%s:outFull: got %q; want %q", i, tt.desc, got, want) 54 } 55 56 tt.t.Reset() 57 p := 0 58 for ; p < len(tt.in) && p < len(tt.outFull) && tt.in[p] == tt.outFull[p]; p++ { 59 } 60 if tt.nSpan != 0 { 61 p = tt.nSpan 62 } 63 if n, err = tt.t.Span([]byte(tt.in), tt.atEOF); n != p || err != tt.errSpan { 64 t.Errorf("%d:%s:span: got %d, %v; want %d, %v", i, tt.desc, n, err, p, tt.errSpan) 65 } 66 } 67 68 func idem(r rune) rune { return r } 69 70 func TestMap(t *testing.T) { 71 runes := []rune{'a', 'ç', '中', '\U00012345', 'a'} 72 // Default mapper used for this test. 73 rotate := Map(func(r rune) rune { 74 for i, m := range runes { 75 if m == r { 76 return runes[i+1] 77 } 78 } 79 return r 80 }) 81 82 for i, tt := range []transformTest{{ 83 desc: "empty", 84 szDst: large, 85 atEOF: true, 86 in: "", 87 out: "", 88 outFull: "", 89 t: rotate, 90 }, { 91 desc: "no change", 92 szDst: 1, 93 atEOF: true, 94 in: "b", 95 out: "b", 96 outFull: "b", 97 t: rotate, 98 }, { 99 desc: "short dst", 100 szDst: 2, 101 atEOF: true, 102 in: "aaaa", 103 out: "ç", 104 outFull: "çççç", 105 err: transform.ErrShortDst, 106 errSpan: transform.ErrEndOfSpan, 107 t: rotate, 108 }, { 109 desc: "short dst ascii, no change", 110 szDst: 2, 111 atEOF: true, 112 in: "bbb", 113 out: "bb", 114 outFull: "bbb", 115 err: transform.ErrShortDst, 116 t: rotate, 117 }, { 118 desc: "short dst writing error", 119 szDst: 2, 120 atEOF: false, 121 in: "a\x80", 122 out: "ç", 123 outFull: "ç\ufffd", 124 err: transform.ErrShortDst, 125 errSpan: transform.ErrEndOfSpan, 126 t: rotate, 127 }, { 128 desc: "short dst writing incomplete rune", 129 szDst: 2, 130 atEOF: true, 131 in: "a\xc0", 132 out: "ç", 133 outFull: "ç\ufffd", 134 err: transform.ErrShortDst, 135 errSpan: transform.ErrEndOfSpan, 136 t: rotate, 137 }, { 138 desc: "short dst, longer", 139 szDst: 5, 140 atEOF: true, 141 in: "Hellø", 142 out: "Hell", 143 outFull: "Hellø", 144 err: transform.ErrShortDst, 145 t: rotate, 146 }, { 147 desc: "short dst, single", 148 szDst: 1, 149 atEOF: false, 150 in: "ø", 151 out: "", 152 outFull: "ø", 153 err: transform.ErrShortDst, 154 t: Map(idem), 155 }, { 156 desc: "short dst, longer, writing error", 157 szDst: 8, 158 atEOF: false, 159 in: "\x80Hello\x80", 160 out: "\ufffdHello", 161 outFull: "\ufffdHello\ufffd", 162 err: transform.ErrShortDst, 163 errSpan: transform.ErrEndOfSpan, 164 t: rotate, 165 }, { 166 desc: "short src", 167 szDst: 2, 168 atEOF: false, 169 in: "a\xc2", 170 out: "ç", 171 outFull: "ç\ufffd", 172 err: transform.ErrShortSrc, 173 errSpan: transform.ErrEndOfSpan, 174 t: rotate, 175 }, { 176 desc: "invalid input, atEOF", 177 szDst: large, 178 atEOF: true, 179 in: "\x80", 180 out: "\ufffd", 181 outFull: "\ufffd", 182 errSpan: transform.ErrEndOfSpan, 183 t: rotate, 184 }, { 185 desc: "invalid input, !atEOF", 186 szDst: large, 187 atEOF: false, 188 in: "\x80", 189 out: "\ufffd", 190 outFull: "\ufffd", 191 errSpan: transform.ErrEndOfSpan, 192 t: rotate, 193 }, { 194 desc: "incomplete rune !atEOF", 195 szDst: large, 196 atEOF: false, 197 in: "\xc2", 198 out: "", 199 outFull: "\ufffd", 200 err: transform.ErrShortSrc, 201 errSpan: transform.ErrShortSrc, 202 t: rotate, 203 }, { 204 desc: "invalid input, incomplete rune atEOF", 205 szDst: large, 206 atEOF: true, 207 in: "\xc2", 208 out: "\ufffd", 209 outFull: "\ufffd", 210 errSpan: transform.ErrEndOfSpan, 211 t: rotate, 212 }, { 213 desc: "misc correct", 214 szDst: large, 215 atEOF: true, 216 in: "a\U00012345 ç!", 217 out: "ça 中!", 218 outFull: "ça 中!", 219 errSpan: transform.ErrEndOfSpan, 220 t: rotate, 221 }, { 222 desc: "misc correct and invalid", 223 szDst: large, 224 atEOF: true, 225 in: "Hello\x80 w\x80orl\xc0d!\xc0", 226 out: "Hello\ufffd w\ufffdorl\ufffdd!\ufffd", 227 outFull: "Hello\ufffd w\ufffdorl\ufffdd!\ufffd", 228 errSpan: transform.ErrEndOfSpan, 229 t: rotate, 230 }, { 231 desc: "misc correct and invalid, short src", 232 szDst: large, 233 atEOF: false, 234 in: "Hello\x80 w\x80orl\xc0d!\xc2", 235 out: "Hello\ufffd w\ufffdorl\ufffdd!", 236 outFull: "Hello\ufffd w\ufffdorl\ufffdd!\ufffd", 237 err: transform.ErrShortSrc, 238 errSpan: transform.ErrEndOfSpan, 239 t: rotate, 240 }, { 241 desc: "misc correct and invalid, short src, replacing RuneError", 242 szDst: large, 243 atEOF: false, 244 in: "Hel\ufffdlo\x80 w\x80orl\xc0d!\xc2", 245 out: "Hel?lo? w?orl?d!", 246 outFull: "Hel?lo? w?orl?d!?", 247 errSpan: transform.ErrEndOfSpan, 248 err: transform.ErrShortSrc, 249 t: Map(func(r rune) rune { 250 if r == utf8.RuneError { 251 return '?' 252 } 253 return r 254 }), 255 }} { 256 tt.check(t, i) 257 } 258 } 259 260 func TestRemove(t *testing.T) { 261 remove := Remove(Predicate(func(r rune) bool { 262 return strings.ContainsRune("aeiou\u0300\uFF24\U00012345", r) 263 })) 264 265 for i, tt := range []transformTest{ 266 0: { 267 szDst: large, 268 atEOF: true, 269 in: "", 270 out: "", 271 outFull: "", 272 t: remove, 273 }, 274 1: { 275 szDst: 0, 276 atEOF: true, 277 in: "aaaa", 278 out: "", 279 outFull: "", 280 errSpan: transform.ErrEndOfSpan, 281 t: remove, 282 }, 283 2: { 284 szDst: 1, 285 atEOF: true, 286 in: "aaaa", 287 out: "", 288 outFull: "", 289 errSpan: transform.ErrEndOfSpan, 290 t: remove, 291 }, 292 3: { 293 szDst: 1, 294 atEOF: true, 295 in: "baaaa", 296 out: "b", 297 outFull: "b", 298 errSpan: transform.ErrEndOfSpan, 299 t: remove, 300 }, 301 4: { 302 szDst: 2, 303 atEOF: true, 304 in: "açaaa", 305 out: "ç", 306 outFull: "ç", 307 errSpan: transform.ErrEndOfSpan, 308 t: remove, 309 }, 310 5: { 311 szDst: 2, 312 atEOF: true, 313 in: "aaaç", 314 out: "ç", 315 outFull: "ç", 316 errSpan: transform.ErrEndOfSpan, 317 t: remove, 318 }, 319 6: { 320 szDst: 2, 321 atEOF: false, 322 in: "a\x80", 323 out: "", 324 outFull: "\ufffd", 325 err: transform.ErrShortDst, 326 errSpan: transform.ErrEndOfSpan, 327 t: remove, 328 }, 329 7: { 330 szDst: 1, 331 atEOF: true, 332 in: "a\xc0", 333 out: "", 334 outFull: "\ufffd", 335 err: transform.ErrShortDst, 336 errSpan: transform.ErrEndOfSpan, 337 t: remove, 338 }, 339 8: { 340 szDst: 1, 341 atEOF: false, 342 in: "a\xc2", 343 out: "", 344 outFull: "\ufffd", 345 err: transform.ErrShortSrc, 346 errSpan: transform.ErrEndOfSpan, 347 t: remove, 348 }, 349 9: { 350 szDst: large, 351 atEOF: true, 352 in: "\x80", 353 out: "\ufffd", 354 outFull: "\ufffd", 355 errSpan: transform.ErrEndOfSpan, 356 t: remove, 357 }, 358 10: { 359 szDst: large, 360 atEOF: false, 361 in: "\x80", 362 out: "\ufffd", 363 outFull: "\ufffd", 364 errSpan: transform.ErrEndOfSpan, 365 t: remove, 366 }, 367 11: { 368 szDst: large, 369 atEOF: true, 370 in: "\xc2", 371 out: "\ufffd", 372 outFull: "\ufffd", 373 errSpan: transform.ErrEndOfSpan, 374 t: remove, 375 }, 376 12: { 377 szDst: large, 378 atEOF: false, 379 in: "\xc2", 380 out: "", 381 outFull: "\ufffd", 382 err: transform.ErrShortSrc, 383 errSpan: transform.ErrShortSrc, 384 t: remove, 385 }, 386 13: { 387 szDst: large, 388 atEOF: true, 389 in: "Hello \U00012345world!", 390 out: "Hll wrld!", 391 outFull: "Hll wrld!", 392 errSpan: transform.ErrEndOfSpan, 393 t: remove, 394 }, 395 14: { 396 szDst: large, 397 atEOF: true, 398 in: "Hello\x80 w\x80orl\xc0d!\xc0", 399 out: "Hll\ufffd w\ufffdrl\ufffdd!\ufffd", 400 outFull: "Hll\ufffd w\ufffdrl\ufffdd!\ufffd", 401 errSpan: transform.ErrEndOfSpan, 402 t: remove, 403 }, 404 15: { 405 szDst: large, 406 atEOF: false, 407 in: "Hello\x80 w\x80orl\xc0d!\xc2", 408 out: "Hll\ufffd w\ufffdrl\ufffdd!", 409 outFull: "Hll\ufffd w\ufffdrl\ufffdd!\ufffd", 410 err: transform.ErrShortSrc, 411 errSpan: transform.ErrEndOfSpan, 412 t: remove, 413 }, 414 16: { 415 szDst: large, 416 atEOF: false, 417 in: "Hel\ufffdlo\x80 w\x80orl\xc0d!\xc2", 418 out: "Hello world!", 419 outFull: "Hello world!", 420 err: transform.ErrShortSrc, 421 errSpan: transform.ErrEndOfSpan, 422 t: Remove(Predicate(func(r rune) bool { return r == utf8.RuneError })), 423 }, 424 17: { 425 szDst: 4, 426 atEOF: true, 427 in: "Hellø", 428 out: "Hll", 429 outFull: "Hllø", 430 err: transform.ErrShortDst, 431 errSpan: transform.ErrEndOfSpan, 432 t: remove, 433 }, 434 18: { 435 szDst: 4, 436 atEOF: false, 437 in: "Hellø", 438 out: "Hll", 439 outFull: "Hllø", 440 err: transform.ErrShortDst, 441 errSpan: transform.ErrEndOfSpan, 442 t: remove, 443 }, 444 19: { 445 szDst: 8, 446 atEOF: false, 447 in: "\x80Hello\uFF24\x80", 448 out: "\ufffdHll", 449 outFull: "\ufffdHll\ufffd", 450 err: transform.ErrShortDst, 451 errSpan: transform.ErrEndOfSpan, 452 t: remove, 453 }, 454 20: { 455 szDst: 8, 456 atEOF: false, 457 in: "Hllll", 458 out: "Hllll", 459 outFull: "Hllll", 460 t: remove, 461 }} { 462 tt.check(t, i) 463 } 464 } 465 466 func TestReplaceIllFormed(t *testing.T) { 467 replace := ReplaceIllFormed() 468 469 for i, tt := range []transformTest{ 470 0: { 471 szDst: large, 472 atEOF: true, 473 in: "", 474 out: "", 475 outFull: "", 476 t: replace, 477 }, 478 1: { 479 szDst: 1, 480 atEOF: true, 481 in: "aa", 482 out: "a", 483 outFull: "aa", 484 err: transform.ErrShortDst, 485 t: replace, 486 }, 487 2: { 488 szDst: 1, 489 atEOF: true, 490 in: "a\x80", 491 out: "a", 492 outFull: "a\ufffd", 493 err: transform.ErrShortDst, 494 errSpan: transform.ErrEndOfSpan, 495 t: replace, 496 }, 497 3: { 498 szDst: 1, 499 atEOF: true, 500 in: "a\xc2", 501 out: "a", 502 outFull: "a\ufffd", 503 err: transform.ErrShortDst, 504 errSpan: transform.ErrEndOfSpan, 505 t: replace, 506 }, 507 4: { 508 szDst: large, 509 atEOF: true, 510 in: "\x80", 511 out: "\ufffd", 512 outFull: "\ufffd", 513 errSpan: transform.ErrEndOfSpan, 514 t: replace, 515 }, 516 5: { 517 szDst: large, 518 atEOF: false, 519 in: "\x80", 520 out: "\ufffd", 521 outFull: "\ufffd", 522 errSpan: transform.ErrEndOfSpan, 523 t: replace, 524 }, 525 6: { 526 szDst: large, 527 atEOF: true, 528 in: "\xc2", 529 out: "\ufffd", 530 outFull: "\ufffd", 531 errSpan: transform.ErrEndOfSpan, 532 t: replace, 533 }, 534 7: { 535 szDst: large, 536 atEOF: false, 537 in: "\xc2", 538 out: "", 539 outFull: "\ufffd", 540 err: transform.ErrShortSrc, 541 errSpan: transform.ErrShortSrc, 542 t: replace, 543 }, 544 8: { 545 szDst: large, 546 atEOF: true, 547 in: "Hello world!", 548 out: "Hello world!", 549 outFull: "Hello world!", 550 t: replace, 551 }, 552 9: { 553 szDst: large, 554 atEOF: true, 555 in: "Hello\x80 w\x80orl\xc2d!\xc2", 556 out: "Hello\ufffd w\ufffdorl\ufffdd!\ufffd", 557 outFull: "Hello\ufffd w\ufffdorl\ufffdd!\ufffd", 558 errSpan: transform.ErrEndOfSpan, 559 t: replace, 560 }, 561 10: { 562 szDst: large, 563 atEOF: false, 564 in: "Hello\x80 w\x80orl\xc2d!\xc2", 565 out: "Hello\ufffd w\ufffdorl\ufffdd!", 566 outFull: "Hello\ufffd w\ufffdorl\ufffdd!\ufffd", 567 err: transform.ErrShortSrc, 568 errSpan: transform.ErrEndOfSpan, 569 t: replace, 570 }, 571 16: { 572 szDst: 10, 573 atEOF: false, 574 in: "\x80Hello\x80", 575 out: "\ufffdHello", 576 outFull: "\ufffdHello\ufffd", 577 err: transform.ErrShortDst, 578 errSpan: transform.ErrEndOfSpan, 579 t: replace, 580 }, 581 17: { 582 szDst: 10, 583 atEOF: false, 584 in: "\ufffdHello\ufffd", 585 out: "\ufffdHello", 586 outFull: "\ufffdHello\ufffd", 587 err: transform.ErrShortDst, 588 t: replace, 589 }, 590 } { 591 tt.check(t, i) 592 } 593 } 594 595 func TestMapAlloc(t *testing.T) { 596 if n := testtext.AllocsPerRun(3, func() { 597 Map(idem).Transform(nil, nil, false) 598 }); n > 0 { 599 t.Errorf("got %f; want 0", n) 600 } 601 } 602 603 func rmNop(r rune) bool { return false } 604 605 func TestRemoveAlloc(t *testing.T) { 606 if n := testtext.AllocsPerRun(3, func() { 607 Remove(Predicate(rmNop)).Transform(nil, nil, false) 608 }); n > 0 { 609 t.Errorf("got %f; want 0", n) 610 } 611 } 612 613 func TestReplaceIllFormedAlloc(t *testing.T) { 614 if n := testtext.AllocsPerRun(3, func() { 615 ReplaceIllFormed().Transform(nil, nil, false) 616 }); n > 0 { 617 t.Errorf("got %f; want 0", n) 618 } 619 } 620 621 func doBench(b *testing.B, t Transformer) { 622 for _, bc := range []struct{ name, data string }{ 623 {"ascii", testtext.ASCII}, 624 {"3byte", testtext.ThreeByteUTF8}, 625 } { 626 dst := make([]byte, 2*len(bc.data)) 627 src := []byte(bc.data) 628 629 testtext.Bench(b, bc.name+"/transform", func(b *testing.B) { 630 b.SetBytes(int64(len(src))) 631 for i := 0; i < b.N; i++ { 632 t.Transform(dst, src, true) 633 } 634 }) 635 src = t.Bytes(src) 636 t.Reset() 637 testtext.Bench(b, bc.name+"/span", func(b *testing.B) { 638 b.SetBytes(int64(len(src))) 639 for i := 0; i < b.N; i++ { 640 t.Span(src, true) 641 } 642 }) 643 } 644 } 645 646 func BenchmarkRemove(b *testing.B) { 647 doBench(b, Remove(Predicate(func(r rune) bool { return r == 'e' }))) 648 } 649 650 func BenchmarkMapAll(b *testing.B) { 651 doBench(b, Map(func(r rune) rune { return 'a' })) 652 } 653 654 func BenchmarkMapNone(b *testing.B) { 655 doBench(b, Map(func(r rune) rune { return r })) 656 } 657 658 func BenchmarkReplaceIllFormed(b *testing.B) { 659 doBench(b, ReplaceIllFormed()) 660 } 661 662 var ( 663 input = strings.Repeat("Thé qüick brøwn føx jumps øver the lazy døg. ", 100) 664 )