golang.org/x/text@v0.14.0/collate/collate_test.go (about) 1 // Copyright 2012 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package collate 6 7 import ( 8 "bytes" 9 "testing" 10 11 "golang.org/x/text/internal/colltab" 12 "golang.org/x/text/language" 13 ) 14 15 type weightsTest struct { 16 opt opts 17 in, out ColElems 18 } 19 20 type opts struct { 21 lev int 22 alt alternateHandling 23 top int 24 25 backwards bool 26 caseLevel bool 27 } 28 29 // ignore returns an initialized boolean array based on the given Level. 30 // A negative value means using the default setting of quaternary. 31 func ignore(level colltab.Level) (ignore [colltab.NumLevels]bool) { 32 if level < 0 { 33 level = colltab.Quaternary 34 } 35 for i := range ignore { 36 ignore[i] = level < colltab.Level(i) 37 } 38 return ignore 39 } 40 41 func makeCE(w []int) colltab.Elem { 42 ce, err := colltab.MakeElem(w[0], w[1], w[2], uint8(w[3])) 43 if err != nil { 44 panic(err) 45 } 46 return ce 47 } 48 49 func (o opts) collator() *Collator { 50 c := &Collator{ 51 options: options{ 52 ignore: ignore(colltab.Level(o.lev - 1)), 53 alternate: o.alt, 54 backwards: o.backwards, 55 caseLevel: o.caseLevel, 56 variableTop: uint32(o.top), 57 }, 58 } 59 return c 60 } 61 62 const ( 63 maxQ = 0x1FFFFF 64 ) 65 66 func wpq(p, q int) Weights { 67 return W(p, defaults.Secondary, defaults.Tertiary, q) 68 } 69 70 func wsq(s, q int) Weights { 71 return W(0, s, defaults.Tertiary, q) 72 } 73 74 func wq(q int) Weights { 75 return W(0, 0, 0, q) 76 } 77 78 var zero = W(0, 0, 0, 0) 79 80 var processTests = []weightsTest{ 81 // Shifted 82 { // simple sequence of non-variables 83 opt: opts{alt: altShifted, top: 100}, 84 in: ColElems{W(200), W(300), W(400)}, 85 out: ColElems{wpq(200, maxQ), wpq(300, maxQ), wpq(400, maxQ)}, 86 }, 87 { // first is a variable 88 opt: opts{alt: altShifted, top: 250}, 89 in: ColElems{W(200), W(300), W(400)}, 90 out: ColElems{wq(200), wpq(300, maxQ), wpq(400, maxQ)}, 91 }, 92 { // all but first are variable 93 opt: opts{alt: altShifted, top: 999}, 94 in: ColElems{W(1000), W(200), W(300), W(400)}, 95 out: ColElems{wpq(1000, maxQ), wq(200), wq(300), wq(400)}, 96 }, 97 { // first is a modifier 98 opt: opts{alt: altShifted, top: 999}, 99 in: ColElems{W(0, 10), W(1000)}, 100 out: ColElems{wsq(10, maxQ), wpq(1000, maxQ)}, 101 }, 102 { // primary ignorables 103 opt: opts{alt: altShifted, top: 250}, 104 in: ColElems{W(200), W(0, 10), W(300), W(0, 15), W(400)}, 105 out: ColElems{wq(200), zero, wpq(300, maxQ), wsq(15, maxQ), wpq(400, maxQ)}, 106 }, 107 { // secondary ignorables 108 opt: opts{alt: altShifted, top: 250}, 109 in: ColElems{W(200), W(0, 0, 10), W(300), W(0, 0, 15), W(400)}, 110 out: ColElems{wq(200), zero, wpq(300, maxQ), W(0, 0, 15, maxQ), wpq(400, maxQ)}, 111 }, 112 { // tertiary ignorables, no change 113 opt: opts{alt: altShifted, top: 250}, 114 in: ColElems{W(200), zero, W(300), zero, W(400)}, 115 out: ColElems{wq(200), zero, wpq(300, maxQ), zero, wpq(400, maxQ)}, 116 }, 117 118 // ShiftTrimmed (same as Shifted) 119 { // simple sequence of non-variables 120 opt: opts{alt: altShiftTrimmed, top: 100}, 121 in: ColElems{W(200), W(300), W(400)}, 122 out: ColElems{wpq(200, maxQ), wpq(300, maxQ), wpq(400, maxQ)}, 123 }, 124 { // first is a variable 125 opt: opts{alt: altShiftTrimmed, top: 250}, 126 in: ColElems{W(200), W(300), W(400)}, 127 out: ColElems{wq(200), wpq(300, maxQ), wpq(400, maxQ)}, 128 }, 129 { // all but first are variable 130 opt: opts{alt: altShiftTrimmed, top: 999}, 131 in: ColElems{W(1000), W(200), W(300), W(400)}, 132 out: ColElems{wpq(1000, maxQ), wq(200), wq(300), wq(400)}, 133 }, 134 { // first is a modifier 135 opt: opts{alt: altShiftTrimmed, top: 999}, 136 in: ColElems{W(0, 10), W(1000)}, 137 out: ColElems{wsq(10, maxQ), wpq(1000, maxQ)}, 138 }, 139 { // primary ignorables 140 opt: opts{alt: altShiftTrimmed, top: 250}, 141 in: ColElems{W(200), W(0, 10), W(300), W(0, 15), W(400)}, 142 out: ColElems{wq(200), zero, wpq(300, maxQ), wsq(15, maxQ), wpq(400, maxQ)}, 143 }, 144 { // secondary ignorables 145 opt: opts{alt: altShiftTrimmed, top: 250}, 146 in: ColElems{W(200), W(0, 0, 10), W(300), W(0, 0, 15), W(400)}, 147 out: ColElems{wq(200), zero, wpq(300, maxQ), W(0, 0, 15, maxQ), wpq(400, maxQ)}, 148 }, 149 { // tertiary ignorables, no change 150 opt: opts{alt: altShiftTrimmed, top: 250}, 151 in: ColElems{W(200), zero, W(300), zero, W(400)}, 152 out: ColElems{wq(200), zero, wpq(300, maxQ), zero, wpq(400, maxQ)}, 153 }, 154 155 // Blanked 156 { // simple sequence of non-variables 157 opt: opts{alt: altBlanked, top: 100}, 158 in: ColElems{W(200), W(300), W(400)}, 159 out: ColElems{W(200), W(300), W(400)}, 160 }, 161 { // first is a variable 162 opt: opts{alt: altBlanked, top: 250}, 163 in: ColElems{W(200), W(300), W(400)}, 164 out: ColElems{zero, W(300), W(400)}, 165 }, 166 { // all but first are variable 167 opt: opts{alt: altBlanked, top: 999}, 168 in: ColElems{W(1000), W(200), W(300), W(400)}, 169 out: ColElems{W(1000), zero, zero, zero}, 170 }, 171 { // first is a modifier 172 opt: opts{alt: altBlanked, top: 999}, 173 in: ColElems{W(0, 10), W(1000)}, 174 out: ColElems{W(0, 10), W(1000)}, 175 }, 176 { // primary ignorables 177 opt: opts{alt: altBlanked, top: 250}, 178 in: ColElems{W(200), W(0, 10), W(300), W(0, 15), W(400)}, 179 out: ColElems{zero, zero, W(300), W(0, 15), W(400)}, 180 }, 181 { // secondary ignorables 182 opt: opts{alt: altBlanked, top: 250}, 183 in: ColElems{W(200), W(0, 0, 10), W(300), W(0, 0, 15), W(400)}, 184 out: ColElems{zero, zero, W(300), W(0, 0, 15), W(400)}, 185 }, 186 { // tertiary ignorables, no change 187 opt: opts{alt: altBlanked, top: 250}, 188 in: ColElems{W(200), zero, W(300), zero, W(400)}, 189 out: ColElems{zero, zero, W(300), zero, W(400)}, 190 }, 191 192 // Non-ignorable: input is always equal to output. 193 { // all but first are variable 194 opt: opts{alt: altNonIgnorable, top: 999}, 195 in: ColElems{W(1000), W(200), W(300), W(400)}, 196 out: ColElems{W(1000), W(200), W(300), W(400)}, 197 }, 198 { // primary ignorables 199 opt: opts{alt: altNonIgnorable, top: 250}, 200 in: ColElems{W(200), W(0, 10), W(300), W(0, 15), W(400)}, 201 out: ColElems{W(200), W(0, 10), W(300), W(0, 15), W(400)}, 202 }, 203 { // secondary ignorables 204 opt: opts{alt: altNonIgnorable, top: 250}, 205 in: ColElems{W(200), W(0, 0, 10), W(300), W(0, 0, 15), W(400)}, 206 out: ColElems{W(200), W(0, 0, 10), W(300), W(0, 0, 15), W(400)}, 207 }, 208 { // tertiary ignorables, no change 209 opt: opts{alt: altNonIgnorable, top: 250}, 210 in: ColElems{W(200), zero, W(300), zero, W(400)}, 211 out: ColElems{W(200), zero, W(300), zero, W(400)}, 212 }, 213 } 214 215 func TestProcessWeights(t *testing.T) { 216 for i, tt := range processTests { 217 in := convertFromWeights(tt.in) 218 out := convertFromWeights(tt.out) 219 processWeights(tt.opt.alt, uint32(tt.opt.top), in) 220 for j, w := range in { 221 if w != out[j] { 222 t.Errorf("%d: Weights %d was %v; want %v", i, j, w, out[j]) 223 } 224 } 225 } 226 } 227 228 type keyFromElemTest struct { 229 opt opts 230 in ColElems 231 out []byte 232 } 233 234 var defS = byte(defaults.Secondary) 235 var defT = byte(defaults.Tertiary) 236 237 const sep = 0 // separator byte 238 239 var keyFromElemTests = []keyFromElemTest{ 240 { // simple primary and secondary weights. 241 opts{alt: altShifted}, 242 ColElems{W(0x200), W(0x7FFF), W(0, 0x30), W(0x100)}, 243 []byte{0x2, 0, 0x7F, 0xFF, 0x1, 0x00, // primary 244 sep, sep, 0, defS, 0, defS, 0, 0x30, 0, defS, // secondary 245 sep, sep, defT, defT, defT, defT, // tertiary 246 sep, 0xFF, 0xFF, 0xFF, 0xFF, // quaternary 247 }, 248 }, 249 { // same as first, but with zero element that need to be removed 250 opts{alt: altShifted}, 251 ColElems{W(0x200), zero, W(0x7FFF), W(0, 0x30), zero, W(0x100)}, 252 []byte{0x2, 0, 0x7F, 0xFF, 0x1, 0x00, // primary 253 sep, sep, 0, defS, 0, defS, 0, 0x30, 0, defS, // secondary 254 sep, sep, defT, defT, defT, defT, // tertiary 255 sep, 0xFF, 0xFF, 0xFF, 0xFF, // quaternary 256 }, 257 }, 258 { // same as first, with large primary values 259 opts{alt: altShifted}, 260 ColElems{W(0x200), W(0x8000), W(0, 0x30), W(0x12345)}, 261 []byte{0x2, 0, 0x80, 0x80, 0x00, 0x81, 0x23, 0x45, // primary 262 sep, sep, 0, defS, 0, defS, 0, 0x30, 0, defS, // secondary 263 sep, sep, defT, defT, defT, defT, // tertiary 264 sep, 0xFF, 0xFF, 0xFF, 0xFF, // quaternary 265 }, 266 }, 267 { // same as first, but with the secondary level backwards 268 opts{alt: altShifted, backwards: true}, 269 ColElems{W(0x200), W(0x7FFF), W(0, 0x30), W(0x100)}, 270 []byte{0x2, 0, 0x7F, 0xFF, 0x1, 0x00, // primary 271 sep, sep, 0, defS, 0, 0x30, 0, defS, 0, defS, // secondary 272 sep, sep, defT, defT, defT, defT, // tertiary 273 sep, 0xFF, 0xFF, 0xFF, 0xFF, // quaternary 274 }, 275 }, 276 { // same as first, ignoring quaternary level 277 opts{alt: altShifted, lev: 3}, 278 ColElems{W(0x200), zero, W(0x7FFF), W(0, 0x30), zero, W(0x100)}, 279 []byte{0x2, 0, 0x7F, 0xFF, 0x1, 0x00, // primary 280 sep, sep, 0, defS, 0, defS, 0, 0x30, 0, defS, // secondary 281 sep, sep, defT, defT, defT, defT, // tertiary 282 }, 283 }, 284 { // same as first, ignoring tertiary level 285 opts{alt: altShifted, lev: 2}, 286 ColElems{W(0x200), zero, W(0x7FFF), W(0, 0x30), zero, W(0x100)}, 287 []byte{0x2, 0, 0x7F, 0xFF, 0x1, 0x00, // primary 288 sep, sep, 0, defS, 0, defS, 0, 0x30, 0, defS, // secondary 289 }, 290 }, 291 { // same as first, ignoring secondary level 292 opts{alt: altShifted, lev: 1}, 293 ColElems{W(0x200), zero, W(0x7FFF), W(0, 0x30), zero, W(0x100)}, 294 []byte{0x2, 0, 0x7F, 0xFF, 0x1, 0x00}, 295 }, 296 { // simple primary and secondary weights. 297 opts{alt: altShiftTrimmed, top: 0x250}, 298 ColElems{W(0x300), W(0x200), W(0x7FFF), W(0, 0x30), W(0x800)}, 299 []byte{0x3, 0, 0x7F, 0xFF, 0x8, 0x00, // primary 300 sep, sep, 0, defS, 0, defS, 0, 0x30, 0, defS, // secondary 301 sep, sep, defT, defT, defT, defT, // tertiary 302 sep, 0xFF, 0x2, 0, // quaternary 303 }, 304 }, 305 { // as first, primary with case level enabled 306 opts{alt: altShifted, lev: 1, caseLevel: true}, 307 ColElems{W(0x200), W(0x7FFF), W(0, 0x30), W(0x100)}, 308 []byte{0x2, 0, 0x7F, 0xFF, 0x1, 0x00, // primary 309 sep, sep, // secondary 310 sep, sep, defT, defT, defT, defT, // tertiary 311 }, 312 }, 313 } 314 315 func TestKeyFromElems(t *testing.T) { 316 buf := Buffer{} 317 for i, tt := range keyFromElemTests { 318 buf.Reset() 319 in := convertFromWeights(tt.in) 320 processWeights(tt.opt.alt, uint32(tt.opt.top), in) 321 tt.opt.collator().keyFromElems(&buf, in) 322 res := buf.key 323 if len(res) != len(tt.out) { 324 t.Errorf("%d: len(ws) was %d; want %d (%X should be %X)", i, len(res), len(tt.out), res, tt.out) 325 } 326 n := len(res) 327 if len(tt.out) < n { 328 n = len(tt.out) 329 } 330 for j, c := range res[:n] { 331 if c != tt.out[j] { 332 t.Errorf("%d: byte %d was %X; want %X", i, j, c, tt.out[j]) 333 } 334 } 335 } 336 } 337 338 func TestGetColElems(t *testing.T) { 339 for i, tt := range appendNextTests { 340 c, err := makeTable(tt.in) 341 if err != nil { 342 // error is reported in TestAppendNext 343 continue 344 } 345 // Create one large test per table 346 str := make([]byte, 0, 4000) 347 out := ColElems{} 348 for len(str) < 3000 { 349 for _, chk := range tt.chk { 350 str = append(str, chk.in[:chk.n]...) 351 out = append(out, chk.out...) 352 } 353 } 354 for j, chk := range append(tt.chk, check{string(str), len(str), out}) { 355 out := convertFromWeights(chk.out) 356 ce := c.getColElems([]byte(chk.in)[:chk.n]) 357 if len(ce) != len(out) { 358 t.Errorf("%d:%d: len(ws) was %d; want %d", i, j, len(ce), len(out)) 359 continue 360 } 361 cnt := 0 362 for k, w := range ce { 363 w, _ = colltab.MakeElem(w.Primary(), w.Secondary(), int(w.Tertiary()), 0) 364 if w != out[k] { 365 t.Errorf("%d:%d: Weights %d was %X; want %X", i, j, k, w, out[k]) 366 cnt++ 367 } 368 if cnt > 10 { 369 break 370 } 371 } 372 } 373 } 374 } 375 376 type keyTest struct { 377 in string 378 out []byte 379 } 380 381 var keyTests = []keyTest{ 382 {"abc", 383 []byte{0, 100, 0, 200, 1, 44, 0, 0, 0, 32, 0, 32, 0, 32, 0, 0, 2, 2, 2, 0, 255, 255, 255}, 384 }, 385 {"a\u0301", 386 []byte{0, 102, 0, 0, 0, 32, 0, 0, 2, 0, 255}, 387 }, 388 {"aaaaa", 389 []byte{0, 100, 0, 100, 0, 100, 0, 100, 0, 100, 0, 0, 390 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 0, 391 2, 2, 2, 2, 2, 0, 392 255, 255, 255, 255, 255, 393 }, 394 }, 395 // Issue 16391: incomplete rune at end of UTF-8 sequence. 396 {"\xc2", []byte{133, 255, 253, 0, 0, 0, 32, 0, 0, 2, 0, 255}}, 397 {"\xc2a", []byte{133, 255, 253, 0, 100, 0, 0, 0, 32, 0, 32, 0, 0, 2, 2, 0, 255, 255}}, 398 } 399 400 func TestKey(t *testing.T) { 401 c, _ := makeTable(appendNextTests[4].in) 402 c.alternate = altShifted 403 c.ignore = ignore(colltab.Quaternary) 404 buf := Buffer{} 405 keys1 := [][]byte{} 406 keys2 := [][]byte{} 407 for _, tt := range keyTests { 408 keys1 = append(keys1, c.Key(&buf, []byte(tt.in))) 409 keys2 = append(keys2, c.KeyFromString(&buf, tt.in)) 410 } 411 // Separate generation from testing to ensure buffers are not overwritten. 412 for i, tt := range keyTests { 413 if !bytes.Equal(keys1[i], tt.out) { 414 t.Errorf("%d: Key(%q) = %d; want %d", i, tt.in, keys1[i], tt.out) 415 } 416 if !bytes.Equal(keys2[i], tt.out) { 417 t.Errorf("%d: KeyFromString(%q) = %d; want %d", i, tt.in, keys2[i], tt.out) 418 } 419 } 420 } 421 422 type compareTest struct { 423 a, b string 424 res int // comparison result 425 } 426 427 var compareTests = []compareTest{ 428 {"a\u0301", "a", 1}, 429 {"a\u0301b", "ab", 1}, 430 {"a", "a\u0301", -1}, 431 {"ab", "a\u0301b", -1}, 432 {"bc", "a\u0301c", 1}, 433 {"ab", "aB", -1}, 434 {"a\u0301", "a\u0301", 0}, 435 {"a", "a", 0}, 436 // Only clip prefixes of whole runes. 437 {"\u302E", "\u302F", 1}, 438 // Don't clip prefixes when last rune of prefix may be part of contraction. 439 {"a\u035E", "a\u0301\u035F", -1}, 440 {"a\u0301\u035Fb", "a\u0301\u035F", -1}, 441 } 442 443 func TestCompare(t *testing.T) { 444 c, _ := makeTable(appendNextTests[4].in) 445 for i, tt := range compareTests { 446 if res := c.Compare([]byte(tt.a), []byte(tt.b)); res != tt.res { 447 t.Errorf("%d: Compare(%q, %q) == %d; want %d", i, tt.a, tt.b, res, tt.res) 448 } 449 if res := c.CompareString(tt.a, tt.b); res != tt.res { 450 t.Errorf("%d: CompareString(%q, %q) == %d; want %d", i, tt.a, tt.b, res, tt.res) 451 } 452 } 453 } 454 455 func TestNumeric(t *testing.T) { 456 c := New(language.English, Loose, Numeric) 457 458 for i, tt := range []struct { 459 a, b string 460 want int 461 }{ 462 {"1", "2", -1}, 463 {"2", "12", -1}, 464 {"2", "12", -1}, // Fullwidth is sorted as usual. 465 {"₂", "₁₂", 1}, // Subscript is not sorted as numbers. 466 {"②", "①②", 1}, // Circled is not sorted as numbers. 467 { // Imperial Aramaic, is not sorted as number. 468 "\U00010859", 469 "\U00010858\U00010859", 470 1, 471 }, 472 {"12", "2", 1}, 473 {"A-1", "A-2", -1}, 474 {"A-2", "A-12", -1}, 475 {"A-12", "A-2", 1}, 476 {"A-0001", "A-1", 0}, 477 } { 478 if got := c.CompareString(tt.a, tt.b); got != tt.want { 479 t.Errorf("%d: CompareString(%s, %s) = %d; want %d", i, tt.a, tt.b, got, tt.want) 480 } 481 } 482 }