vitess.io/vitess@v0.16.2/go/mysql/collations/uca.go (about) 1 /* 2 Copyright 2021 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package collations 18 19 import ( 20 "bytes" 21 "math/bits" 22 "sync" 23 "unsafe" 24 25 "vitess.io/vitess/go/mysql/collations/internal/charset" 26 "vitess.io/vitess/go/mysql/collations/internal/uca" 27 ) 28 29 func init() { 30 register(&Collation_utf8mb4_0900_bin{}) 31 } 32 33 type Collation_utf8mb4_uca_0900 struct { 34 name string 35 id ID 36 37 weights uca.Weights 38 tailoring []uca.Patch 39 contract uca.Contractor 40 reorder []uca.Reorder 41 upperCaseFirst bool 42 levelsForCompare int 43 44 uca *uca.Collation900 45 ucainit sync.Once 46 } 47 48 func (c *Collation_utf8mb4_uca_0900) Init() { 49 c.ucainit.Do(func() { 50 c.uca = uca.NewCollation(c.name, c.weights, c.tailoring, c.reorder, c.contract, c.upperCaseFirst, c.levelsForCompare) 51 52 // Clear the external metadata for this collation, so it can be picked up by the GC 53 c.weights = nil 54 c.tailoring = nil 55 c.reorder = nil 56 }) 57 } 58 59 func (c *Collation_utf8mb4_uca_0900) Name() string { 60 return c.name 61 } 62 63 func (c *Collation_utf8mb4_uca_0900) ID() ID { 64 return c.id 65 } 66 67 func (c *Collation_utf8mb4_uca_0900) Charset() charset.Charset { 68 return charset.Charset_utf8mb4{} 69 } 70 71 func (c *Collation_utf8mb4_uca_0900) IsBinary() bool { 72 return false 73 } 74 75 func (c *Collation_utf8mb4_uca_0900) Collate(left, right []byte, rightIsPrefix bool) int { 76 var ( 77 l, r uint16 78 lok, rok bool 79 level int 80 levelsToCompare = c.levelsForCompare 81 itleft = c.uca.Iterator(left) 82 itright = c.uca.Iterator(right) 83 84 fastleft, _ = itleft.(*uca.FastIterator900) 85 fastright, _ = itright.(*uca.FastIterator900) 86 ) 87 88 defer itleft.Done() 89 defer itright.Done() 90 91 nextLevel: 92 if fastleft != nil { 93 for { 94 if cmp := fastleft.FastForward32(fastright); cmp != 0 { 95 return cmp 96 } 97 98 l, lok = fastleft.Next() 99 r, rok = fastright.Next() 100 101 if l != r || !lok || !rok { 102 break 103 } 104 if fastleft.Level() != level || fastright.Level() != level { 105 break 106 } 107 } 108 } else { 109 for { 110 l, lok = itleft.Next() 111 r, rok = itright.Next() 112 113 if l != r || !lok || !rok { 114 break 115 } 116 if itleft.Level() != level || itright.Level() != level { 117 break 118 } 119 } 120 } 121 122 switch { 123 case itleft.Level() == itright.Level(): 124 if l == r && lok && rok { 125 level++ 126 if level < levelsToCompare { 127 goto nextLevel 128 } 129 } 130 case itleft.Level() > level: 131 return -1 132 case itright.Level() > level: 133 if rightIsPrefix { 134 level = itleft.SkipLevel() 135 if level < levelsToCompare { 136 goto nextLevel 137 } 138 return -int(r) 139 } 140 return 1 141 } 142 143 return int(l) - int(r) 144 } 145 146 func (c *Collation_utf8mb4_uca_0900) WeightString(dst, src []byte, numCodepoints int) []byte { 147 it := c.uca.Iterator(src) 148 defer it.Done() 149 150 if fast, ok := it.(*uca.FastIterator900); ok { 151 var chunk [16]byte 152 for { 153 for cap(dst)-len(dst) >= 16 { 154 n := fast.NextWeightBlock64(dst[len(dst) : len(dst)+16]) 155 if n <= 0 { 156 goto performPadding 157 } 158 dst = dst[:len(dst)+n] 159 } 160 n := fast.NextWeightBlock64(chunk[:16]) 161 if n <= 0 { 162 goto performPadding 163 } 164 dst = append(dst, chunk[:n]...) 165 } 166 } else { 167 for { 168 w, ok := it.Next() 169 if !ok { 170 break 171 } 172 dst = append(dst, byte(w>>8), byte(w)) 173 } 174 } 175 176 performPadding: 177 if numCodepoints == PadToMax { 178 for len(dst) < cap(dst) { 179 dst = append(dst, 0x00) 180 } 181 } 182 183 return dst 184 } 185 186 func (c *Collation_utf8mb4_uca_0900) Hash(src []byte, _ int) HashCode { 187 var hash = uintptr(c.id) 188 189 it := c.uca.Iterator(src) 190 defer it.Done() 191 192 if fast, ok := it.(*uca.FastIterator900); ok { 193 var chunk [16]byte 194 var n int 195 for { 196 n = fast.NextWeightBlock64(chunk[:16]) 197 if n < 16 { 198 break 199 } 200 hash = memhash128(unsafe.Pointer(&chunk), hash) 201 } 202 return memhashraw(unsafe.Pointer(&chunk), hash, uintptr(n)) 203 } 204 205 for { 206 w, ok := it.Next() 207 if !ok { 208 break 209 } 210 hash = memhash16(bits.ReverseBytes16(w), hash) 211 } 212 return hash 213 } 214 215 func (c *Collation_utf8mb4_uca_0900) WeightStringLen(numBytes int) int { 216 if numBytes%4 != 0 { 217 panic("WeightStringLen called with non-MOD4 length") 218 } 219 levels := int(c.levelsForCompare) 220 weights := (numBytes / 4) * uca.MaxCollationElementsPerCodepoint * levels 221 weights += levels - 1 // one NULL byte as a separator between levels 222 return weights * 2 // two bytes per weight 223 } 224 225 func (c *Collation_utf8mb4_uca_0900) Wildcard(pat []byte, matchOne rune, matchMany rune, escape rune) WildcardPattern { 226 return newUnicodeWildcardMatcher(charset.Charset_utf8mb4{}, c.uca.WeightsEqual, c.Collate, pat, matchOne, matchMany, escape) 227 } 228 229 func (c *Collation_utf8mb4_uca_0900) ToLower(dst, src []byte) []byte { 230 dst = append(dst, bytes.ToLower(src)...) 231 return dst 232 } 233 234 func (c *Collation_utf8mb4_uca_0900) ToUpper(dst, src []byte) []byte { 235 dst = append(dst, bytes.ToUpper(src)...) 236 return dst 237 } 238 239 type Collation_utf8mb4_0900_bin struct{} 240 241 func (c *Collation_utf8mb4_0900_bin) Init() {} 242 243 func (c *Collation_utf8mb4_0900_bin) ID() ID { 244 return 309 245 } 246 247 func (c *Collation_utf8mb4_0900_bin) Name() string { 248 return "utf8mb4_0900_bin" 249 } 250 251 func (c *Collation_utf8mb4_0900_bin) Charset() charset.Charset { 252 return charset.Charset_utf8mb4{} 253 } 254 255 func (c *Collation_utf8mb4_0900_bin) IsBinary() bool { 256 return true 257 } 258 259 func (c *Collation_utf8mb4_0900_bin) Collate(left, right []byte, isPrefix bool) int { 260 return collationBinary(left, right, isPrefix) 261 } 262 263 func (c *Collation_utf8mb4_0900_bin) WeightString(dst, src []byte, numCodepoints int) []byte { 264 dst = append(dst, src...) 265 if numCodepoints == PadToMax { 266 for len(dst) < cap(dst) { 267 dst = append(dst, 0x0) 268 } 269 } 270 return dst 271 } 272 273 func (c *Collation_utf8mb4_0900_bin) Hash(src []byte, _ int) HashCode { 274 return memhash(src, 0xb900b900) 275 } 276 277 func (c *Collation_utf8mb4_0900_bin) WeightStringLen(numBytes int) int { 278 return numBytes 279 } 280 281 func (c *Collation_utf8mb4_0900_bin) Wildcard(pat []byte, matchOne rune, matchMany rune, escape rune) WildcardPattern { 282 equals := func(a, b rune) bool { 283 return a == b 284 } 285 return newUnicodeWildcardMatcher(charset.Charset_utf8mb4{}, equals, c.Collate, pat, matchOne, matchMany, escape) 286 } 287 288 func (c *Collation_utf8mb4_0900_bin) ToLower(dst, src []byte) []byte { 289 dst = append(dst, bytes.ToLower(src)...) 290 return dst 291 } 292 293 func (c *Collation_utf8mb4_0900_bin) ToUpper(dst, src []byte) []byte { 294 dst = append(dst, bytes.ToUpper(src)...) 295 return dst 296 } 297 298 type Collation_uca_legacy struct { 299 name string 300 id ID 301 302 charset charset.Charset 303 weights uca.Weights 304 tailoring []uca.Patch 305 contract uca.Contractor 306 maxCodepoint rune 307 308 uca *uca.CollationLegacy 309 ucainit sync.Once 310 } 311 312 func (c *Collation_uca_legacy) Init() { 313 c.ucainit.Do(func() { 314 c.uca = uca.NewCollationLegacy(c.charset, c.weights, c.tailoring, c.contract, c.maxCodepoint) 315 c.weights = nil 316 c.tailoring = nil 317 }) 318 } 319 320 func (c *Collation_uca_legacy) ID() ID { 321 return c.id 322 } 323 324 func (c *Collation_uca_legacy) Name() string { 325 return c.name 326 } 327 328 func (c *Collation_uca_legacy) Charset() charset.Charset { 329 return c.charset 330 } 331 332 func (c *Collation_uca_legacy) IsBinary() bool { 333 return false 334 } 335 336 func (c *Collation_uca_legacy) Collate(left, right []byte, isPrefix bool) int { 337 var ( 338 l, r uint16 339 lok, rok bool 340 itleft = c.uca.Iterator(left) 341 itright = c.uca.Iterator(right) 342 ) 343 344 defer itleft.Done() 345 defer itright.Done() 346 347 for { 348 l, lok = itleft.Next() 349 r, rok = itright.Next() 350 351 if l == r && lok && rok { 352 continue 353 } 354 if !rok && isPrefix { 355 return 0 356 } 357 return int(l) - int(r) 358 } 359 } 360 361 func (c *Collation_uca_legacy) WeightString(dst, src []byte, numCodepoints int) []byte { 362 it := c.uca.Iterator(src) 363 defer it.Done() 364 365 for { 366 w, ok := it.Next() 367 if !ok { 368 break 369 } 370 dst = append(dst, byte(w>>8), byte(w)) 371 } 372 373 if numCodepoints > 0 { 374 weightForSpace := c.uca.WeightForSpace() 375 w1, w2 := byte(weightForSpace>>8), byte(weightForSpace) 376 377 if numCodepoints == PadToMax { 378 for len(dst)+1 < cap(dst) { 379 dst = append(dst, w1, w2) 380 } 381 if len(dst) < cap(dst) { 382 dst = append(dst, w1) 383 } 384 } else { 385 numCodepoints -= it.Length() 386 for numCodepoints > 0 { 387 dst = append(dst, w1, w2) 388 numCodepoints-- 389 } 390 } 391 } 392 393 return dst 394 } 395 396 func (c *Collation_uca_legacy) Hash(src []byte, numCodepoints int) HashCode { 397 it := c.uca.Iterator(src) 398 defer it.Done() 399 400 var hash = uintptr(c.id) 401 for { 402 w, ok := it.Next() 403 if !ok { 404 break 405 } 406 hash = memhash16(bits.ReverseBytes16(w), hash) 407 } 408 409 if numCodepoints > 0 { 410 weightForSpace := bits.ReverseBytes16(c.uca.WeightForSpace()) 411 numCodepoints -= it.Length() 412 for numCodepoints > 0 { 413 hash = memhash16(weightForSpace, hash) 414 numCodepoints-- 415 } 416 } 417 418 return hash 419 } 420 421 func (c *Collation_uca_legacy) WeightStringLen(numBytes int) int { 422 // TODO: This is literally the worst case scenario. Improve on this. 423 return numBytes * 8 424 } 425 426 func (c *Collation_uca_legacy) Wildcard(pat []byte, matchOne rune, matchMany rune, escape rune) WildcardPattern { 427 return newUnicodeWildcardMatcher(c.charset, c.uca.WeightsEqual, c.Collate, pat, matchOne, matchMany, escape) 428 }