vitess.io/vitess@v0.16.2/go/mysql/collations/8bit.go (about)

     1  /*
     2  Copyright 2021 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package collations
    18  
    19  import (
    20  	"vitess.io/vitess/go/mysql/collations/internal/charset"
    21  )
    22  
    23  var sortOrderIdentity [256]byte
    24  
    25  func init() {
    26  	for i := range sortOrderIdentity {
    27  		sortOrderIdentity[i] = byte(i)
    28  	}
    29  
    30  	register(&Collation_binary{})
    31  }
    32  
    33  type simpletables struct {
    34  	// By default we're not building in the tables for lower/upper-casing and
    35  	// character classes, because we're not using them for collation and they
    36  	// take up a lot of binary space.
    37  	// Uncomment these fields and pass `-full8bit` to `makemysqldata` to generate
    38  	// these tables.
    39  	tolower *[256]byte
    40  	toupper *[256]byte
    41  	ctype   *[256]byte
    42  	sort    *[256]byte
    43  }
    44  
    45  type Collation_8bit_bin struct {
    46  	id   ID
    47  	name string
    48  	simpletables
    49  	charset charset.Charset
    50  }
    51  
    52  func (c *Collation_8bit_bin) Init() {}
    53  
    54  func (c *Collation_8bit_bin) Name() string {
    55  	return c.name
    56  }
    57  
    58  func (c *Collation_8bit_bin) ID() ID {
    59  	return c.id
    60  }
    61  
    62  func (c *Collation_8bit_bin) Charset() charset.Charset {
    63  	return c.charset
    64  }
    65  
    66  func (c *Collation_8bit_bin) IsBinary() bool {
    67  	return true
    68  }
    69  
    70  func (c *Collation_8bit_bin) Collate(left, right []byte, rightIsPrefix bool) int {
    71  	return collationBinary(left, right, rightIsPrefix)
    72  }
    73  
    74  func (c *Collation_8bit_bin) WeightString(dst, src []byte, numCodepoints int) []byte {
    75  	copyCodepoints := len(src)
    76  
    77  	var padToMax bool
    78  	switch numCodepoints {
    79  	case 0:
    80  		numCodepoints = copyCodepoints
    81  	case PadToMax:
    82  		padToMax = true
    83  	default:
    84  		copyCodepoints = minInt(copyCodepoints, numCodepoints)
    85  	}
    86  
    87  	dst = append(dst, src[:copyCodepoints]...)
    88  	return weightStringPadingSimple(' ', dst, numCodepoints-copyCodepoints, padToMax)
    89  }
    90  
    91  func (c *Collation_8bit_bin) Hash(src []byte, numCodepoints int) HashCode {
    92  	hash := 0x8b8b0000 | uintptr(c.id)
    93  	if numCodepoints == 0 {
    94  		return memhash(src, hash)
    95  	}
    96  
    97  	tocopy := minInt(len(src), numCodepoints)
    98  	hash = memhash(src[:tocopy], hash)
    99  
   100  	numCodepoints -= tocopy
   101  	for numCodepoints > 0 {
   102  		hash = memhash8(' ', hash)
   103  		numCodepoints--
   104  	}
   105  	return hash
   106  }
   107  
   108  func (c *Collation_8bit_bin) WeightStringLen(numBytes int) int {
   109  	return numBytes
   110  }
   111  
   112  func (c *Collation_8bit_bin) Wildcard(pat []byte, matchOne rune, matchMany rune, escape rune) WildcardPattern {
   113  	return newEightbitWildcardMatcher(&sortOrderIdentity, c.Collate, pat, matchOne, matchMany, escape)
   114  }
   115  
   116  func (c *Collation_8bit_bin) ToLower(dst, src []byte) []byte {
   117  	lowerTable := c.simpletables.tolower
   118  
   119  	for _, c := range src {
   120  		dst = append(dst, lowerTable[c])
   121  	}
   122  	return dst
   123  }
   124  
   125  func (c *Collation_8bit_bin) ToUpper(dst, src []byte) []byte {
   126  	upperTable := c.simpletables.toupper
   127  
   128  	for _, c := range src {
   129  		dst = append(dst, upperTable[c])
   130  	}
   131  	return dst
   132  }
   133  
   134  type Collation_8bit_simple_ci struct {
   135  	id   ID
   136  	name string
   137  	simpletables
   138  	charset charset.Charset
   139  }
   140  
   141  func (c *Collation_8bit_simple_ci) Init() {
   142  	if c.sort == nil {
   143  		panic("8bit_simple_ci collation without sort table")
   144  	}
   145  }
   146  
   147  func (c *Collation_8bit_simple_ci) Name() string {
   148  	return c.name
   149  }
   150  
   151  func (c *Collation_8bit_simple_ci) ID() ID {
   152  	return c.id
   153  }
   154  
   155  func (c *Collation_8bit_simple_ci) Charset() charset.Charset {
   156  	return c.charset
   157  }
   158  
   159  func (c *Collation_8bit_simple_ci) IsBinary() bool {
   160  	return false
   161  }
   162  
   163  func (c *Collation_8bit_simple_ci) Collate(left, right []byte, rightIsPrefix bool) int {
   164  	sortOrder := c.sort
   165  	cmpLen := minInt(len(left), len(right))
   166  
   167  	for i := 0; i < cmpLen; i++ {
   168  		sortL, sortR := sortOrder[left[i]], sortOrder[right[i]]
   169  		if sortL != sortR {
   170  			return int(sortL) - int(sortR)
   171  		}
   172  	}
   173  	if rightIsPrefix {
   174  		left = left[:cmpLen]
   175  	}
   176  	return len(left) - len(right)
   177  }
   178  
   179  func (c *Collation_8bit_simple_ci) WeightString(dst, src []byte, numCodepoints int) []byte {
   180  	padToMax := false
   181  	sortOrder := c.sort
   182  	copyCodepoints := len(src)
   183  
   184  	switch numCodepoints {
   185  	case 0:
   186  		numCodepoints = copyCodepoints
   187  	case PadToMax:
   188  		padToMax = true
   189  	default:
   190  		copyCodepoints = minInt(copyCodepoints, numCodepoints)
   191  	}
   192  
   193  	for _, ch := range src[:copyCodepoints] {
   194  		dst = append(dst, sortOrder[ch])
   195  	}
   196  	return weightStringPadingSimple(' ', dst, numCodepoints-copyCodepoints, padToMax)
   197  }
   198  
   199  func (c *Collation_8bit_simple_ci) Hash(src []byte, numCodepoints int) HashCode {
   200  	sortOrder := c.sort
   201  
   202  	var tocopy = len(src)
   203  	if numCodepoints > 0 {
   204  		tocopy = minInt(tocopy, numCodepoints)
   205  	}
   206  
   207  	var hash = uintptr(c.id)
   208  	for _, ch := range src[:tocopy] {
   209  		hash = memhash8(sortOrder[ch], hash)
   210  	}
   211  
   212  	if numCodepoints > 0 {
   213  		numCodepoints -= tocopy
   214  		for numCodepoints > 0 {
   215  			hash = memhash8(' ', hash)
   216  			numCodepoints--
   217  		}
   218  	}
   219  
   220  	return hash
   221  }
   222  
   223  func (c *Collation_8bit_simple_ci) WeightStringLen(numBytes int) int {
   224  	return numBytes
   225  }
   226  
   227  func (c *Collation_8bit_simple_ci) Wildcard(pat []byte, matchOne rune, matchMany rune, escape rune) WildcardPattern {
   228  	return newEightbitWildcardMatcher(c.sort, c.Collate, pat, matchOne, matchMany, escape)
   229  }
   230  
   231  func weightStringPadingSimple(padChar byte, dst []byte, numCodepoints int, padToMax bool) []byte {
   232  	if padToMax {
   233  		for len(dst) < cap(dst) {
   234  			dst = append(dst, padChar)
   235  		}
   236  	} else {
   237  		for numCodepoints > 0 {
   238  			dst = append(dst, padChar)
   239  			numCodepoints--
   240  		}
   241  	}
   242  	return dst
   243  }
   244  
   245  func (c *Collation_8bit_simple_ci) ToLower(dst, src []byte) []byte {
   246  	lowerTable := c.simpletables.tolower
   247  
   248  	for _, c := range src {
   249  		dst = append(dst, lowerTable[c])
   250  	}
   251  	return dst
   252  }
   253  
   254  func (c *Collation_8bit_simple_ci) ToUpper(dst, src []byte) []byte {
   255  	upperTable := c.simpletables.toupper
   256  
   257  	for _, c := range src {
   258  		dst = append(dst, upperTable[c])
   259  	}
   260  	return dst
   261  }
   262  
   263  type Collation_binary struct{}
   264  
   265  func (c *Collation_binary) Init() {}
   266  
   267  func (c *Collation_binary) ID() ID {
   268  	return CollationBinaryID
   269  }
   270  
   271  func (c *Collation_binary) Name() string {
   272  	return "binary"
   273  }
   274  
   275  func (c *Collation_binary) Charset() charset.Charset {
   276  	return charset.Charset_binary{}
   277  }
   278  
   279  func (c *Collation_binary) IsBinary() bool {
   280  	return true
   281  }
   282  
   283  func (c *Collation_binary) Collate(left, right []byte, isPrefix bool) int {
   284  	return collationBinary(left, right, isPrefix)
   285  }
   286  
   287  func (c *Collation_binary) WeightString(dst, src []byte, numCodepoints int) []byte {
   288  	padToMax := false
   289  	copyCodepoints := len(src)
   290  
   291  	switch numCodepoints {
   292  	case 0: // no-op
   293  	case PadToMax:
   294  		padToMax = true
   295  	default:
   296  		copyCodepoints = minInt(copyCodepoints, numCodepoints)
   297  	}
   298  
   299  	dst = append(dst, src[:copyCodepoints]...)
   300  	if padToMax {
   301  		for len(dst) < cap(dst) {
   302  			dst = append(dst, 0x0)
   303  		}
   304  	}
   305  	return dst
   306  }
   307  
   308  func (c *Collation_binary) Hash(src []byte, numCodepoints int) HashCode {
   309  	if numCodepoints > 0 {
   310  		src = src[:numCodepoints]
   311  	}
   312  	return memhash(src, 0xBBBBBBBB)
   313  }
   314  
   315  func (c *Collation_binary) WeightStringLen(numBytes int) int {
   316  	return numBytes
   317  }
   318  
   319  func (c *Collation_binary) Wildcard(pat []byte, matchOne rune, matchMany rune, escape rune) WildcardPattern {
   320  	return newEightbitWildcardMatcher(&sortOrderIdentity, c.Collate, pat, matchOne, matchMany, escape)
   321  }
   322  
   323  func (c *Collation_binary) ToLower(dst, raw []byte) []byte {
   324  	dst = append(dst, raw...)
   325  	return dst
   326  }
   327  
   328  func (c *Collation_binary) ToUpper(dst, raw []byte) []byte {
   329  	dst = append(dst, raw...)
   330  	return dst
   331  }