vitess.io/vitess@v0.16.2/go/mysql/collations/8bit.go (about) 1 /* 2 Copyright 2021 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package collations 18 19 import ( 20 "vitess.io/vitess/go/mysql/collations/internal/charset" 21 ) 22 23 var sortOrderIdentity [256]byte 24 25 func init() { 26 for i := range sortOrderIdentity { 27 sortOrderIdentity[i] = byte(i) 28 } 29 30 register(&Collation_binary{}) 31 } 32 33 type simpletables struct { 34 // By default we're not building in the tables for lower/upper-casing and 35 // character classes, because we're not using them for collation and they 36 // take up a lot of binary space. 37 // Uncomment these fields and pass `-full8bit` to `makemysqldata` to generate 38 // these tables. 39 tolower *[256]byte 40 toupper *[256]byte 41 ctype *[256]byte 42 sort *[256]byte 43 } 44 45 type Collation_8bit_bin struct { 46 id ID 47 name string 48 simpletables 49 charset charset.Charset 50 } 51 52 func (c *Collation_8bit_bin) Init() {} 53 54 func (c *Collation_8bit_bin) Name() string { 55 return c.name 56 } 57 58 func (c *Collation_8bit_bin) ID() ID { 59 return c.id 60 } 61 62 func (c *Collation_8bit_bin) Charset() charset.Charset { 63 return c.charset 64 } 65 66 func (c *Collation_8bit_bin) IsBinary() bool { 67 return true 68 } 69 70 func (c *Collation_8bit_bin) Collate(left, right []byte, rightIsPrefix bool) int { 71 return collationBinary(left, right, rightIsPrefix) 72 } 73 74 func (c *Collation_8bit_bin) WeightString(dst, src []byte, numCodepoints int) []byte { 75 copyCodepoints := len(src) 76 77 var padToMax bool 78 switch numCodepoints { 79 case 0: 80 numCodepoints = copyCodepoints 81 case PadToMax: 82 padToMax = true 83 default: 84 copyCodepoints = minInt(copyCodepoints, numCodepoints) 85 } 86 87 dst = append(dst, src[:copyCodepoints]...) 88 return weightStringPadingSimple(' ', dst, numCodepoints-copyCodepoints, padToMax) 89 } 90 91 func (c *Collation_8bit_bin) Hash(src []byte, numCodepoints int) HashCode { 92 hash := 0x8b8b0000 | uintptr(c.id) 93 if numCodepoints == 0 { 94 return memhash(src, hash) 95 } 96 97 tocopy := minInt(len(src), numCodepoints) 98 hash = memhash(src[:tocopy], hash) 99 100 numCodepoints -= tocopy 101 for numCodepoints > 0 { 102 hash = memhash8(' ', hash) 103 numCodepoints-- 104 } 105 return hash 106 } 107 108 func (c *Collation_8bit_bin) WeightStringLen(numBytes int) int { 109 return numBytes 110 } 111 112 func (c *Collation_8bit_bin) Wildcard(pat []byte, matchOne rune, matchMany rune, escape rune) WildcardPattern { 113 return newEightbitWildcardMatcher(&sortOrderIdentity, c.Collate, pat, matchOne, matchMany, escape) 114 } 115 116 func (c *Collation_8bit_bin) ToLower(dst, src []byte) []byte { 117 lowerTable := c.simpletables.tolower 118 119 for _, c := range src { 120 dst = append(dst, lowerTable[c]) 121 } 122 return dst 123 } 124 125 func (c *Collation_8bit_bin) ToUpper(dst, src []byte) []byte { 126 upperTable := c.simpletables.toupper 127 128 for _, c := range src { 129 dst = append(dst, upperTable[c]) 130 } 131 return dst 132 } 133 134 type Collation_8bit_simple_ci struct { 135 id ID 136 name string 137 simpletables 138 charset charset.Charset 139 } 140 141 func (c *Collation_8bit_simple_ci) Init() { 142 if c.sort == nil { 143 panic("8bit_simple_ci collation without sort table") 144 } 145 } 146 147 func (c *Collation_8bit_simple_ci) Name() string { 148 return c.name 149 } 150 151 func (c *Collation_8bit_simple_ci) ID() ID { 152 return c.id 153 } 154 155 func (c *Collation_8bit_simple_ci) Charset() charset.Charset { 156 return c.charset 157 } 158 159 func (c *Collation_8bit_simple_ci) IsBinary() bool { 160 return false 161 } 162 163 func (c *Collation_8bit_simple_ci) Collate(left, right []byte, rightIsPrefix bool) int { 164 sortOrder := c.sort 165 cmpLen := minInt(len(left), len(right)) 166 167 for i := 0; i < cmpLen; i++ { 168 sortL, sortR := sortOrder[left[i]], sortOrder[right[i]] 169 if sortL != sortR { 170 return int(sortL) - int(sortR) 171 } 172 } 173 if rightIsPrefix { 174 left = left[:cmpLen] 175 } 176 return len(left) - len(right) 177 } 178 179 func (c *Collation_8bit_simple_ci) WeightString(dst, src []byte, numCodepoints int) []byte { 180 padToMax := false 181 sortOrder := c.sort 182 copyCodepoints := len(src) 183 184 switch numCodepoints { 185 case 0: 186 numCodepoints = copyCodepoints 187 case PadToMax: 188 padToMax = true 189 default: 190 copyCodepoints = minInt(copyCodepoints, numCodepoints) 191 } 192 193 for _, ch := range src[:copyCodepoints] { 194 dst = append(dst, sortOrder[ch]) 195 } 196 return weightStringPadingSimple(' ', dst, numCodepoints-copyCodepoints, padToMax) 197 } 198 199 func (c *Collation_8bit_simple_ci) Hash(src []byte, numCodepoints int) HashCode { 200 sortOrder := c.sort 201 202 var tocopy = len(src) 203 if numCodepoints > 0 { 204 tocopy = minInt(tocopy, numCodepoints) 205 } 206 207 var hash = uintptr(c.id) 208 for _, ch := range src[:tocopy] { 209 hash = memhash8(sortOrder[ch], hash) 210 } 211 212 if numCodepoints > 0 { 213 numCodepoints -= tocopy 214 for numCodepoints > 0 { 215 hash = memhash8(' ', hash) 216 numCodepoints-- 217 } 218 } 219 220 return hash 221 } 222 223 func (c *Collation_8bit_simple_ci) WeightStringLen(numBytes int) int { 224 return numBytes 225 } 226 227 func (c *Collation_8bit_simple_ci) Wildcard(pat []byte, matchOne rune, matchMany rune, escape rune) WildcardPattern { 228 return newEightbitWildcardMatcher(c.sort, c.Collate, pat, matchOne, matchMany, escape) 229 } 230 231 func weightStringPadingSimple(padChar byte, dst []byte, numCodepoints int, padToMax bool) []byte { 232 if padToMax { 233 for len(dst) < cap(dst) { 234 dst = append(dst, padChar) 235 } 236 } else { 237 for numCodepoints > 0 { 238 dst = append(dst, padChar) 239 numCodepoints-- 240 } 241 } 242 return dst 243 } 244 245 func (c *Collation_8bit_simple_ci) ToLower(dst, src []byte) []byte { 246 lowerTable := c.simpletables.tolower 247 248 for _, c := range src { 249 dst = append(dst, lowerTable[c]) 250 } 251 return dst 252 } 253 254 func (c *Collation_8bit_simple_ci) ToUpper(dst, src []byte) []byte { 255 upperTable := c.simpletables.toupper 256 257 for _, c := range src { 258 dst = append(dst, upperTable[c]) 259 } 260 return dst 261 } 262 263 type Collation_binary struct{} 264 265 func (c *Collation_binary) Init() {} 266 267 func (c *Collation_binary) ID() ID { 268 return CollationBinaryID 269 } 270 271 func (c *Collation_binary) Name() string { 272 return "binary" 273 } 274 275 func (c *Collation_binary) Charset() charset.Charset { 276 return charset.Charset_binary{} 277 } 278 279 func (c *Collation_binary) IsBinary() bool { 280 return true 281 } 282 283 func (c *Collation_binary) Collate(left, right []byte, isPrefix bool) int { 284 return collationBinary(left, right, isPrefix) 285 } 286 287 func (c *Collation_binary) WeightString(dst, src []byte, numCodepoints int) []byte { 288 padToMax := false 289 copyCodepoints := len(src) 290 291 switch numCodepoints { 292 case 0: // no-op 293 case PadToMax: 294 padToMax = true 295 default: 296 copyCodepoints = minInt(copyCodepoints, numCodepoints) 297 } 298 299 dst = append(dst, src[:copyCodepoints]...) 300 if padToMax { 301 for len(dst) < cap(dst) { 302 dst = append(dst, 0x0) 303 } 304 } 305 return dst 306 } 307 308 func (c *Collation_binary) Hash(src []byte, numCodepoints int) HashCode { 309 if numCodepoints > 0 { 310 src = src[:numCodepoints] 311 } 312 return memhash(src, 0xBBBBBBBB) 313 } 314 315 func (c *Collation_binary) WeightStringLen(numBytes int) int { 316 return numBytes 317 } 318 319 func (c *Collation_binary) Wildcard(pat []byte, matchOne rune, matchMany rune, escape rune) WildcardPattern { 320 return newEightbitWildcardMatcher(&sortOrderIdentity, c.Collate, pat, matchOne, matchMany, escape) 321 } 322 323 func (c *Collation_binary) ToLower(dst, raw []byte) []byte { 324 dst = append(dst, raw...) 325 return dst 326 } 327 328 func (c *Collation_binary) ToUpper(dst, raw []byte) []byte { 329 dst = append(dst, raw...) 330 return dst 331 }