github.com/varialus/godfly@v0.0.0-20130904042352-1934f9f095ab/src/pkg/unicode/letter_test.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package unicode_test 6 7 import ( 8 "flag" 9 "fmt" 10 "runtime" 11 "sort" 12 "testing" 13 . "unicode" 14 ) 15 16 var upperTest = []rune{ 17 0x41, 18 0xc0, 19 0xd8, 20 0x100, 21 0x139, 22 0x14a, 23 0x178, 24 0x181, 25 0x376, 26 0x3cf, 27 0x1f2a, 28 0x2102, 29 0x2c00, 30 0x2c10, 31 0x2c20, 32 0xa650, 33 0xa722, 34 0xff3a, 35 0x10400, 36 0x1d400, 37 0x1d7ca, 38 } 39 40 var notupperTest = []rune{ 41 0x40, 42 0x5b, 43 0x61, 44 0x185, 45 0x1b0, 46 0x377, 47 0x387, 48 0x2150, 49 0xffff, 50 0x10000, 51 } 52 53 var letterTest = []rune{ 54 0x41, 55 0x61, 56 0xaa, 57 0xba, 58 0xc8, 59 0xdb, 60 0xf9, 61 0x2ec, 62 0x535, 63 0x620, 64 0x6e6, 65 0x93d, 66 0xa15, 67 0xb99, 68 0xdc0, 69 0xedd, 70 0x1000, 71 0x1200, 72 0x1312, 73 0x1401, 74 0x1885, 75 0x2c00, 76 0xa800, 77 0xf900, 78 0xfa30, 79 0xffda, 80 0xffdc, 81 0x10000, 82 0x10300, 83 0x10400, 84 0x20000, 85 0x2f800, 86 0x2fa1d, 87 } 88 89 var notletterTest = []rune{ 90 0x20, 91 0x35, 92 0x375, 93 0x619, 94 0x700, 95 0xfffe, 96 0x1ffff, 97 0x10ffff, 98 } 99 100 // Contains all the special cased Latin-1 chars. 101 var spaceTest = []rune{ 102 0x09, 103 0x0a, 104 0x0b, 105 0x0c, 106 0x0d, 107 0x20, 108 0x85, 109 0xA0, 110 0x2000, 111 0x3000, 112 } 113 114 type caseT struct { 115 cas int 116 in, out rune 117 } 118 119 var caseTest = []caseT{ 120 // errors 121 {-1, '\n', 0xFFFD}, 122 {UpperCase, -1, -1}, 123 {UpperCase, 1 << 30, 1 << 30}, 124 125 // ASCII (special-cased so test carefully) 126 {UpperCase, '\n', '\n'}, 127 {UpperCase, 'a', 'A'}, 128 {UpperCase, 'A', 'A'}, 129 {UpperCase, '7', '7'}, 130 {LowerCase, '\n', '\n'}, 131 {LowerCase, 'a', 'a'}, 132 {LowerCase, 'A', 'a'}, 133 {LowerCase, '7', '7'}, 134 {TitleCase, '\n', '\n'}, 135 {TitleCase, 'a', 'A'}, 136 {TitleCase, 'A', 'A'}, 137 {TitleCase, '7', '7'}, 138 139 // Latin-1: easy to read the tests! 140 {UpperCase, 0x80, 0x80}, 141 {UpperCase, 'Å', 'Å'}, 142 {UpperCase, 'å', 'Å'}, 143 {LowerCase, 0x80, 0x80}, 144 {LowerCase, 'Å', 'å'}, 145 {LowerCase, 'å', 'å'}, 146 {TitleCase, 0x80, 0x80}, 147 {TitleCase, 'Å', 'Å'}, 148 {TitleCase, 'å', 'Å'}, 149 150 // 0131;LATIN SMALL LETTER DOTLESS I;Ll;0;L;;;;;N;;;0049;;0049 151 {UpperCase, 0x0131, 'I'}, 152 {LowerCase, 0x0131, 0x0131}, 153 {TitleCase, 0x0131, 'I'}, 154 155 // 0133;LATIN SMALL LIGATURE IJ;Ll;0;L;<compat> 0069 006A;;;;N;LATIN SMALL LETTER I J;;0132;;0132 156 {UpperCase, 0x0133, 0x0132}, 157 {LowerCase, 0x0133, 0x0133}, 158 {TitleCase, 0x0133, 0x0132}, 159 160 // 212A;KELVIN SIGN;Lu;0;L;004B;;;;N;DEGREES KELVIN;;;006B; 161 {UpperCase, 0x212A, 0x212A}, 162 {LowerCase, 0x212A, 'k'}, 163 {TitleCase, 0x212A, 0x212A}, 164 165 // From an UpperLower sequence 166 // A640;CYRILLIC CAPITAL LETTER ZEMLYA;Lu;0;L;;;;;N;;;;A641; 167 {UpperCase, 0xA640, 0xA640}, 168 {LowerCase, 0xA640, 0xA641}, 169 {TitleCase, 0xA640, 0xA640}, 170 // A641;CYRILLIC SMALL LETTER ZEMLYA;Ll;0;L;;;;;N;;;A640;;A640 171 {UpperCase, 0xA641, 0xA640}, 172 {LowerCase, 0xA641, 0xA641}, 173 {TitleCase, 0xA641, 0xA640}, 174 // A64E;CYRILLIC CAPITAL LETTER NEUTRAL YER;Lu;0;L;;;;;N;;;;A64F; 175 {UpperCase, 0xA64E, 0xA64E}, 176 {LowerCase, 0xA64E, 0xA64F}, 177 {TitleCase, 0xA64E, 0xA64E}, 178 // A65F;CYRILLIC SMALL LETTER YN;Ll;0;L;;;;;N;;;A65E;;A65E 179 {UpperCase, 0xA65F, 0xA65E}, 180 {LowerCase, 0xA65F, 0xA65F}, 181 {TitleCase, 0xA65F, 0xA65E}, 182 183 // From another UpperLower sequence 184 // 0139;LATIN CAPITAL LETTER L WITH ACUTE;Lu;0;L;004C 0301;;;;N;LATIN CAPITAL LETTER L ACUTE;;;013A; 185 {UpperCase, 0x0139, 0x0139}, 186 {LowerCase, 0x0139, 0x013A}, 187 {TitleCase, 0x0139, 0x0139}, 188 // 013F;LATIN CAPITAL LETTER L WITH MIDDLE DOT;Lu;0;L;<compat> 004C 00B7;;;;N;;;;0140; 189 {UpperCase, 0x013f, 0x013f}, 190 {LowerCase, 0x013f, 0x0140}, 191 {TitleCase, 0x013f, 0x013f}, 192 // 0148;LATIN SMALL LETTER N WITH CARON;Ll;0;L;006E 030C;;;;N;LATIN SMALL LETTER N HACEK;;0147;;0147 193 {UpperCase, 0x0148, 0x0147}, 194 {LowerCase, 0x0148, 0x0148}, 195 {TitleCase, 0x0148, 0x0147}, 196 197 // Last block in the 5.1.0 table 198 // 10400;DESERET CAPITAL LETTER LONG I;Lu;0;L;;;;;N;;;;10428; 199 {UpperCase, 0x10400, 0x10400}, 200 {LowerCase, 0x10400, 0x10428}, 201 {TitleCase, 0x10400, 0x10400}, 202 // 10427;DESERET CAPITAL LETTER EW;Lu;0;L;;;;;N;;;;1044F; 203 {UpperCase, 0x10427, 0x10427}, 204 {LowerCase, 0x10427, 0x1044F}, 205 {TitleCase, 0x10427, 0x10427}, 206 // 10428;DESERET SMALL LETTER LONG I;Ll;0;L;;;;;N;;;10400;;10400 207 {UpperCase, 0x10428, 0x10400}, 208 {LowerCase, 0x10428, 0x10428}, 209 {TitleCase, 0x10428, 0x10400}, 210 // 1044F;DESERET SMALL LETTER EW;Ll;0;L;;;;;N;;;10427;;10427 211 {UpperCase, 0x1044F, 0x10427}, 212 {LowerCase, 0x1044F, 0x1044F}, 213 {TitleCase, 0x1044F, 0x10427}, 214 215 // First one not in the 5.1.0 table 216 // 10450;SHAVIAN LETTER PEEP;Lo;0;L;;;;;N;;;;; 217 {UpperCase, 0x10450, 0x10450}, 218 {LowerCase, 0x10450, 0x10450}, 219 {TitleCase, 0x10450, 0x10450}, 220 221 // Non-letters with case. 222 {LowerCase, 0x2161, 0x2171}, 223 {UpperCase, 0x0345, 0x0399}, 224 } 225 226 func TestIsLetter(t *testing.T) { 227 for _, r := range upperTest { 228 if !IsLetter(r) { 229 t.Errorf("IsLetter(U+%04X) = false, want true", r) 230 } 231 } 232 for _, r := range letterTest { 233 if !IsLetter(r) { 234 t.Errorf("IsLetter(U+%04X) = false, want true", r) 235 } 236 } 237 for _, r := range notletterTest { 238 if IsLetter(r) { 239 t.Errorf("IsLetter(U+%04X) = true, want false", r) 240 } 241 } 242 } 243 244 func TestIsUpper(t *testing.T) { 245 for _, r := range upperTest { 246 if !IsUpper(r) { 247 t.Errorf("IsUpper(U+%04X) = false, want true", r) 248 } 249 } 250 for _, r := range notupperTest { 251 if IsUpper(r) { 252 t.Errorf("IsUpper(U+%04X) = true, want false", r) 253 } 254 } 255 for _, r := range notletterTest { 256 if IsUpper(r) { 257 t.Errorf("IsUpper(U+%04X) = true, want false", r) 258 } 259 } 260 } 261 262 func caseString(c int) string { 263 switch c { 264 case UpperCase: 265 return "UpperCase" 266 case LowerCase: 267 return "LowerCase" 268 case TitleCase: 269 return "TitleCase" 270 } 271 return "ErrorCase" 272 } 273 274 func TestTo(t *testing.T) { 275 for _, c := range caseTest { 276 r := To(c.cas, c.in) 277 if c.out != r { 278 t.Errorf("To(U+%04X, %s) = U+%04X want U+%04X", c.in, caseString(c.cas), r, c.out) 279 } 280 } 281 } 282 283 func TestToUpperCase(t *testing.T) { 284 for _, c := range caseTest { 285 if c.cas != UpperCase { 286 continue 287 } 288 r := ToUpper(c.in) 289 if c.out != r { 290 t.Errorf("ToUpper(U+%04X) = U+%04X want U+%04X", c.in, r, c.out) 291 } 292 } 293 } 294 295 func TestToLowerCase(t *testing.T) { 296 for _, c := range caseTest { 297 if c.cas != LowerCase { 298 continue 299 } 300 r := ToLower(c.in) 301 if c.out != r { 302 t.Errorf("ToLower(U+%04X) = U+%04X want U+%04X", c.in, r, c.out) 303 } 304 } 305 } 306 307 func TestToTitleCase(t *testing.T) { 308 for _, c := range caseTest { 309 if c.cas != TitleCase { 310 continue 311 } 312 r := ToTitle(c.in) 313 if c.out != r { 314 t.Errorf("ToTitle(U+%04X) = U+%04X want U+%04X", c.in, r, c.out) 315 } 316 } 317 } 318 319 func TestIsSpace(t *testing.T) { 320 for _, c := range spaceTest { 321 if !IsSpace(c) { 322 t.Errorf("IsSpace(U+%04X) = false; want true", c) 323 } 324 } 325 for _, c := range letterTest { 326 if IsSpace(c) { 327 t.Errorf("IsSpace(U+%04X) = true; want false", c) 328 } 329 } 330 } 331 332 // Check that the optimizations for IsLetter etc. agree with the tables. 333 // We only need to check the Latin-1 range. 334 func TestLetterOptimizations(t *testing.T) { 335 for i := rune(0); i <= MaxLatin1; i++ { 336 if Is(Letter, i) != IsLetter(i) { 337 t.Errorf("IsLetter(U+%04X) disagrees with Is(Letter)", i) 338 } 339 if Is(Upper, i) != IsUpper(i) { 340 t.Errorf("IsUpper(U+%04X) disagrees with Is(Upper)", i) 341 } 342 if Is(Lower, i) != IsLower(i) { 343 t.Errorf("IsLower(U+%04X) disagrees with Is(Lower)", i) 344 } 345 if Is(Title, i) != IsTitle(i) { 346 t.Errorf("IsTitle(U+%04X) disagrees with Is(Title)", i) 347 } 348 if Is(White_Space, i) != IsSpace(i) { 349 t.Errorf("IsSpace(U+%04X) disagrees with Is(White_Space)", i) 350 } 351 if To(UpperCase, i) != ToUpper(i) { 352 t.Errorf("ToUpper(U+%04X) disagrees with To(Upper)", i) 353 } 354 if To(LowerCase, i) != ToLower(i) { 355 t.Errorf("ToLower(U+%04X) disagrees with To(Lower)", i) 356 } 357 if To(TitleCase, i) != ToTitle(i) { 358 t.Errorf("ToTitle(U+%04X) disagrees with To(Title)", i) 359 } 360 } 361 } 362 363 func TestTurkishCase(t *testing.T) { 364 lower := []rune("abcçdefgğhıijklmnoöprsştuüvyz") 365 upper := []rune("ABCÇDEFGĞHIİJKLMNOÖPRSŞTUÜVYZ") 366 for i, l := range lower { 367 u := upper[i] 368 if TurkishCase.ToLower(l) != l { 369 t.Errorf("lower(U+%04X) is U+%04X not U+%04X", l, TurkishCase.ToLower(l), l) 370 } 371 if TurkishCase.ToUpper(u) != u { 372 t.Errorf("upper(U+%04X) is U+%04X not U+%04X", u, TurkishCase.ToUpper(u), u) 373 } 374 if TurkishCase.ToUpper(l) != u { 375 t.Errorf("upper(U+%04X) is U+%04X not U+%04X", l, TurkishCase.ToUpper(l), u) 376 } 377 if TurkishCase.ToLower(u) != l { 378 t.Errorf("lower(U+%04X) is U+%04X not U+%04X", u, TurkishCase.ToLower(l), l) 379 } 380 if TurkishCase.ToTitle(u) != u { 381 t.Errorf("title(U+%04X) is U+%04X not U+%04X", u, TurkishCase.ToTitle(u), u) 382 } 383 if TurkishCase.ToTitle(l) != u { 384 t.Errorf("title(U+%04X) is U+%04X not U+%04X", l, TurkishCase.ToTitle(l), u) 385 } 386 } 387 } 388 389 var simpleFoldTests = []string{ 390 // SimpleFold could order its returned slices in any order it wants, 391 // but we know it orders them in increasing order starting at in 392 // and looping around from MaxRune to 0. 393 394 // Easy cases. 395 "Aa", 396 "aA", 397 "δΔ", 398 "Δδ", 399 400 // ASCII special cases. 401 "KkK", 402 "kKK", 403 "KKk", 404 "Ssſ", 405 "sſS", 406 "ſSs", 407 408 // Non-ASCII special cases. 409 "ρϱΡ", 410 "ϱΡρ", 411 "Ρρϱ", 412 "ͅΙιι", 413 "Ιιιͅ", 414 "ιιͅΙ", 415 "ιͅΙι", 416 417 // Extra special cases: has lower/upper but no case fold. 418 "İ", 419 "ı", 420 } 421 422 func TestSimpleFold(t *testing.T) { 423 for _, tt := range simpleFoldTests { 424 cycle := []rune(tt) 425 r := cycle[len(cycle)-1] 426 for _, out := range cycle { 427 if r := SimpleFold(r); r != out { 428 t.Errorf("SimpleFold(%#U) = %#U, want %#U", r, r, out) 429 } 430 r = out 431 } 432 } 433 } 434 435 // Running 'go test -calibrate' runs the calibration to find a plausible 436 // cutoff point for linear search of a range list vs. binary search. 437 // We create a fake table and then time how long it takes to do a 438 // sequence of searches within that table, for all possible inputs 439 // relative to the ranges (something before all, in each, between each, after all). 440 // This assumes that all possible runes are equally likely. 441 // In practice most runes are ASCII so this is a conservative estimate 442 // of an effective cutoff value. In practice we could probably set it higher 443 // than what this function recommends. 444 445 var calibrate = flag.Bool("calibrate", false, "compute crossover for linear vs. binary search") 446 447 func TestCalibrate(t *testing.T) { 448 if !*calibrate { 449 return 450 } 451 452 if runtime.GOARCH == "amd64" { 453 fmt.Printf("warning: running calibration on %s\n", runtime.GOARCH) 454 } 455 456 // Find the point where binary search wins by more than 10%. 457 // The 10% bias gives linear search an edge when they're close, 458 // because on predominantly ASCII inputs linear search is even 459 // better than our benchmarks measure. 460 n := sort.Search(64, func(n int) bool { 461 tab := fakeTable(n) 462 blinear := func(b *testing.B) { 463 tab := tab 464 max := n*5 + 20 465 for i := 0; i < b.N; i++ { 466 for j := 0; j <= max; j++ { 467 linear(tab, uint16(j)) 468 } 469 } 470 } 471 bbinary := func(b *testing.B) { 472 tab := tab 473 max := n*5 + 20 474 for i := 0; i < b.N; i++ { 475 for j := 0; j <= max; j++ { 476 binary(tab, uint16(j)) 477 } 478 } 479 } 480 bmlinear := testing.Benchmark(blinear) 481 bmbinary := testing.Benchmark(bbinary) 482 fmt.Printf("n=%d: linear=%d binary=%d\n", n, bmlinear.NsPerOp(), bmbinary.NsPerOp()) 483 return bmlinear.NsPerOp()*100 > bmbinary.NsPerOp()*110 484 }) 485 fmt.Printf("calibration: linear cutoff = %d\n", n) 486 } 487 488 func fakeTable(n int) []Range16 { 489 var r16 []Range16 490 for i := 0; i < n; i++ { 491 r16 = append(r16, Range16{uint16(i*5 + 10), uint16(i*5 + 12), 1}) 492 } 493 return r16 494 } 495 496 func linear(ranges []Range16, r uint16) bool { 497 for i := range ranges { 498 range_ := &ranges[i] 499 if r < range_.Lo { 500 return false 501 } 502 if r <= range_.Hi { 503 return (r-range_.Lo)%range_.Stride == 0 504 } 505 } 506 return false 507 } 508 509 func binary(ranges []Range16, r uint16) bool { 510 // binary search over ranges 511 lo := 0 512 hi := len(ranges) 513 for lo < hi { 514 m := lo + (hi-lo)/2 515 range_ := &ranges[m] 516 if range_.Lo <= r && r <= range_.Hi { 517 return (r-range_.Lo)%range_.Stride == 0 518 } 519 if r < range_.Lo { 520 hi = m 521 } else { 522 lo = m + 1 523 } 524 } 525 return false 526 } 527 528 func TestLatinOffset(t *testing.T) { 529 var maps = []map[string]*RangeTable{ 530 Categories, 531 FoldCategory, 532 FoldScript, 533 Properties, 534 Scripts, 535 } 536 for _, m := range maps { 537 for name, tab := range m { 538 i := 0 539 for i < len(tab.R16) && tab.R16[i].Hi <= MaxLatin1 { 540 i++ 541 } 542 if tab.LatinOffset != i { 543 t.Errorf("%s: LatinOffset=%d, want %d", name, tab.LatinOffset, i) 544 } 545 } 546 } 547 }