vitess.io/vitess@v0.16.2/go/mysql/collations/unicode.go (about) 1 /* 2 Copyright 2021 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package collations 18 19 import ( 20 "bytes" 21 "math" 22 "math/bits" 23 24 "vitess.io/vitess/go/mysql/collations/internal/charset" 25 ) 26 27 type Collation_unicode_general_ci struct { 28 id ID 29 name string 30 unicase *UnicaseInfo 31 charset charset.Charset 32 } 33 34 func (c *Collation_unicode_general_ci) Init() {} 35 36 func (c *Collation_unicode_general_ci) ID() ID { 37 return c.id 38 } 39 40 func (c *Collation_unicode_general_ci) Name() string { 41 return c.name 42 } 43 44 func (c *Collation_unicode_general_ci) Charset() charset.Charset { 45 return c.charset 46 } 47 48 func (c *Collation_unicode_general_ci) IsBinary() bool { 49 return false 50 } 51 52 func (c *Collation_unicode_general_ci) Collate(left, right []byte, isPrefix bool) int { 53 unicaseInfo := c.unicase 54 cs := c.charset 55 56 for len(left) > 0 && len(right) > 0 { 57 l, lWidth := cs.DecodeRune(left) 58 r, rWidth := cs.DecodeRune(right) 59 60 if (l == charset.RuneError && lWidth < 3) || (r == charset.RuneError && rWidth < 3) { 61 return bytes.Compare(left, right) 62 } 63 64 lRune := unicaseInfo.unicodeSort(l) 65 rRune := unicaseInfo.unicodeSort(r) 66 67 if lRune > rRune { 68 return 1 69 } else if lRune < rRune { 70 return -1 71 } 72 73 left = left[lWidth:] 74 right = right[rWidth:] 75 } 76 if isPrefix { 77 return len(right) 78 } 79 return len(left) - len(right) 80 } 81 82 func (c *Collation_unicode_general_ci) WeightString(dst, src []byte, numCodepoints int) []byte { 83 unicaseInfo := c.unicase 84 cs := c.charset 85 86 if numCodepoints == 0 || numCodepoints == PadToMax { 87 for { 88 r, width := cs.DecodeRune(src) 89 if r == charset.RuneError && width < 3 { 90 break 91 } 92 93 src = src[width:] 94 sorted := unicaseInfo.unicodeSort(r) 95 dst = append(dst, byte(sorted>>8), byte(sorted)) 96 } 97 98 if numCodepoints == PadToMax { 99 for len(dst)+1 < cap(dst) { 100 dst = append(dst, 0x00, 0x20) 101 } 102 if len(dst) < cap(dst) { 103 dst = append(dst, 0x00) 104 } 105 } 106 } else { 107 for numCodepoints > 0 { 108 r, width := cs.DecodeRune(src) 109 if r == charset.RuneError && width < 3 { 110 break 111 } 112 113 src = src[width:] 114 sorted := unicaseInfo.unicodeSort(r) 115 dst = append(dst, byte(sorted>>8), byte(sorted)) 116 numCodepoints-- 117 } 118 for numCodepoints > 0 { 119 dst = append(dst, 0x00, 0x20) 120 numCodepoints-- 121 } 122 } 123 124 return dst 125 } 126 127 func (c *Collation_unicode_general_ci) Hash(src []byte, numCodepoints int) HashCode { 128 unicaseInfo := c.unicase 129 cs := c.charset 130 131 var hash = uintptr(c.id) 132 var left = numCodepoints 133 if left == 0 { 134 left = math.MaxInt32 135 } 136 137 for left > 0 { 138 r, width := cs.DecodeRune(src) 139 if r == charset.RuneError && width < 3 { 140 break 141 } 142 src = src[width:] 143 hash = memhash16(bits.ReverseBytes16(uint16(unicaseInfo.unicodeSort(r))), hash) 144 left-- 145 } 146 147 if numCodepoints > 0 { 148 for left > 0 { 149 hash = memhash16(bits.ReverseBytes16(0x0020), hash) 150 left-- 151 } 152 } 153 return hash 154 } 155 156 func (c *Collation_unicode_general_ci) WeightStringLen(numBytes int) int { 157 return ((numBytes + 3) / 4) * 2 158 } 159 160 func (c *Collation_unicode_general_ci) Wildcard(pat []byte, matchOne rune, matchMany rune, escape rune) WildcardPattern { 161 var sort = c.unicase.unicodeSort 162 var equals = func(a, b rune) bool { 163 return sort(a) == sort(b) 164 } 165 return newUnicodeWildcardMatcher(c.charset, equals, c.Collate, pat, matchOne, matchMany, escape) 166 } 167 168 type Collation_unicode_bin struct { 169 id ID 170 name string 171 charset charset.Charset 172 } 173 174 func (c *Collation_unicode_bin) Init() {} 175 176 func (c *Collation_unicode_bin) ID() ID { 177 return c.id 178 } 179 180 func (c *Collation_unicode_bin) Name() string { 181 return c.name 182 } 183 184 func (c *Collation_unicode_bin) Charset() charset.Charset { 185 return c.charset 186 } 187 188 func (c *Collation_unicode_bin) IsBinary() bool { 189 return true 190 } 191 192 func (c *Collation_unicode_bin) Collate(left, right []byte, isPrefix bool) int { 193 return collationBinary(left, right, isPrefix) 194 } 195 196 func (c *Collation_unicode_bin) WeightString(dst, src []byte, numCodepoints int) []byte { 197 if c.charset.SupportsSupplementaryChars() { 198 return c.weightStringUnicode(dst, src, numCodepoints) 199 } 200 return c.weightStringBMP(dst, src, numCodepoints) 201 } 202 203 func (c *Collation_unicode_bin) weightStringBMP(dst, src []byte, numCodepoints int) []byte { 204 cs := c.charset 205 if numCodepoints == 0 || numCodepoints == PadToMax { 206 for { 207 r, width := cs.DecodeRune(src) 208 if r == charset.RuneError && width < 3 { 209 break 210 } 211 src = src[width:] 212 dst = append(dst, byte(r>>8), byte(r)) 213 } 214 215 if numCodepoints == PadToMax { 216 for len(dst)+1 < cap(dst) { 217 dst = append(dst, 0x00, 0x20) 218 } 219 if len(dst) < cap(dst) { 220 dst = append(dst, 0x00) 221 } 222 } 223 } else { 224 for numCodepoints > 0 { 225 r, width := cs.DecodeRune(src) 226 if r == charset.RuneError && width < 3 { 227 break 228 } 229 src = src[width:] 230 dst = append(dst, byte(r>>8), byte(r)) 231 numCodepoints-- 232 } 233 for numCodepoints > 0 { 234 dst = append(dst, 0x00, 0x20) 235 numCodepoints-- 236 } 237 } 238 239 return dst 240 } 241 242 func (c *Collation_unicode_bin) weightStringUnicode(dst, src []byte, numCodepoints int) []byte { 243 cs := c.charset 244 if numCodepoints == 0 || numCodepoints == PadToMax { 245 for { 246 r, width := cs.DecodeRune(src) 247 if r == charset.RuneError && width < 3 { 248 break 249 } 250 251 src = src[width:] 252 dst = append(dst, byte((r>>16)&0xFF), byte((r>>8)&0xFF), byte(r&0xFF)) 253 } 254 255 if numCodepoints == PadToMax { 256 for len(dst)+2 < cap(dst) { 257 dst = append(dst, 0x00, 0x00, 0x20) 258 } 259 switch cap(dst) - len(dst) { 260 case 0: 261 case 1: 262 dst = append(dst, 0x00) 263 case 2: 264 dst = append(dst, 0x00, 0x00) 265 default: 266 panic("unreachable") 267 } 268 } 269 } else { 270 for numCodepoints > 0 { 271 r, width := cs.DecodeRune(src) 272 if r == charset.RuneError && width < 3 { 273 break 274 } 275 276 src = src[width:] 277 dst = append(dst, byte((r>>16)&0xFF), byte((r>>8)&0xFF), byte(r&0xFF)) 278 numCodepoints-- 279 } 280 for numCodepoints > 0 { 281 dst = append(dst, 0x00, 0x00, 0x20) 282 numCodepoints-- 283 } 284 } 285 286 return dst 287 } 288 289 func (c *Collation_unicode_bin) Hash(src []byte, numCodepoints int) HashCode { 290 if c.charset.SupportsSupplementaryChars() { 291 return c.hashUnicode(src, numCodepoints) 292 } 293 return c.hashBMP(src, numCodepoints) 294 } 295 296 func (c *Collation_unicode_bin) hashUnicode(src []byte, numCodepoints int) uintptr { 297 cs := c.charset 298 299 var hash = uintptr(c.id) 300 var left = numCodepoints 301 if left == 0 { 302 left = math.MaxInt32 303 } 304 for left > 0 { 305 r, width := cs.DecodeRune(src) 306 if r == charset.RuneError && width < 3 { 307 break 308 } 309 src = src[width:] 310 hash = memhash32(bits.ReverseBytes32(uint32(r)), hash) 311 left-- 312 } 313 if numCodepoints > 0 { 314 for left > 0 { 315 hash = memhash32(bits.ReverseBytes32(0x20), hash) 316 left-- 317 } 318 } 319 return hash 320 } 321 322 func (c *Collation_unicode_bin) hashBMP(src []byte, numCodepoints int) uintptr { 323 cs := c.charset 324 325 var hash = uintptr(c.id) 326 var left = numCodepoints 327 if left == 0 { 328 left = math.MaxInt32 329 } 330 for left > 0 { 331 r, width := cs.DecodeRune(src) 332 if r == charset.RuneError && width < 3 { 333 break 334 } 335 src = src[width:] 336 hash = memhash16(bits.ReverseBytes16(uint16(r)), hash) 337 left-- 338 } 339 if numCodepoints > 0 { 340 for left > 0 { 341 hash = memhash16(bits.ReverseBytes16(0x20), hash) 342 left-- 343 } 344 } 345 return hash 346 } 347 348 func (c *Collation_unicode_bin) WeightStringLen(numBytes int) int { 349 return ((numBytes + 3) / 4) * 3 350 } 351 352 func (c *Collation_unicode_bin) Wildcard(pat []byte, matchOne rune, matchMany rune, escape rune) WildcardPattern { 353 equals := func(a, b rune) bool { 354 return a == b 355 } 356 return newUnicodeWildcardMatcher(c.charset, equals, c.Collate, pat, matchOne, matchMany, escape) 357 } 358 359 func collationBinary(left, right []byte, rightPrefix bool) int { 360 minLen := minInt(len(left), len(right)) 361 if diff := bytes.Compare(left[:minLen], right[:minLen]); diff != 0 { 362 return diff 363 } 364 if rightPrefix { 365 left = left[:minLen] 366 } 367 return len(left) - len(right) 368 }