vitess.io/vitess@v0.16.2/go/mysql/collations/uca.go (about)

     1  /*
     2  Copyright 2021 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package collations
    18  
    19  import (
    20  	"bytes"
    21  	"math/bits"
    22  	"sync"
    23  	"unsafe"
    24  
    25  	"vitess.io/vitess/go/mysql/collations/internal/charset"
    26  	"vitess.io/vitess/go/mysql/collations/internal/uca"
    27  )
    28  
    29  func init() {
    30  	register(&Collation_utf8mb4_0900_bin{})
    31  }
    32  
    33  type Collation_utf8mb4_uca_0900 struct {
    34  	name string
    35  	id   ID
    36  
    37  	weights          uca.Weights
    38  	tailoring        []uca.Patch
    39  	contract         uca.Contractor
    40  	reorder          []uca.Reorder
    41  	upperCaseFirst   bool
    42  	levelsForCompare int
    43  
    44  	uca     *uca.Collation900
    45  	ucainit sync.Once
    46  }
    47  
    48  func (c *Collation_utf8mb4_uca_0900) Init() {
    49  	c.ucainit.Do(func() {
    50  		c.uca = uca.NewCollation(c.name, c.weights, c.tailoring, c.reorder, c.contract, c.upperCaseFirst, c.levelsForCompare)
    51  
    52  		// Clear the external metadata for this collation, so it can be picked up by the GC
    53  		c.weights = nil
    54  		c.tailoring = nil
    55  		c.reorder = nil
    56  	})
    57  }
    58  
    59  func (c *Collation_utf8mb4_uca_0900) Name() string {
    60  	return c.name
    61  }
    62  
    63  func (c *Collation_utf8mb4_uca_0900) ID() ID {
    64  	return c.id
    65  }
    66  
    67  func (c *Collation_utf8mb4_uca_0900) Charset() charset.Charset {
    68  	return charset.Charset_utf8mb4{}
    69  }
    70  
    71  func (c *Collation_utf8mb4_uca_0900) IsBinary() bool {
    72  	return false
    73  }
    74  
    75  func (c *Collation_utf8mb4_uca_0900) Collate(left, right []byte, rightIsPrefix bool) int {
    76  	var (
    77  		l, r            uint16
    78  		lok, rok        bool
    79  		level           int
    80  		levelsToCompare = c.levelsForCompare
    81  		itleft          = c.uca.Iterator(left)
    82  		itright         = c.uca.Iterator(right)
    83  
    84  		fastleft, _  = itleft.(*uca.FastIterator900)
    85  		fastright, _ = itright.(*uca.FastIterator900)
    86  	)
    87  
    88  	defer itleft.Done()
    89  	defer itright.Done()
    90  
    91  nextLevel:
    92  	if fastleft != nil {
    93  		for {
    94  			if cmp := fastleft.FastForward32(fastright); cmp != 0 {
    95  				return cmp
    96  			}
    97  
    98  			l, lok = fastleft.Next()
    99  			r, rok = fastright.Next()
   100  
   101  			if l != r || !lok || !rok {
   102  				break
   103  			}
   104  			if fastleft.Level() != level || fastright.Level() != level {
   105  				break
   106  			}
   107  		}
   108  	} else {
   109  		for {
   110  			l, lok = itleft.Next()
   111  			r, rok = itright.Next()
   112  
   113  			if l != r || !lok || !rok {
   114  				break
   115  			}
   116  			if itleft.Level() != level || itright.Level() != level {
   117  				break
   118  			}
   119  		}
   120  	}
   121  
   122  	switch {
   123  	case itleft.Level() == itright.Level():
   124  		if l == r && lok && rok {
   125  			level++
   126  			if level < levelsToCompare {
   127  				goto nextLevel
   128  			}
   129  		}
   130  	case itleft.Level() > level:
   131  		return -1
   132  	case itright.Level() > level:
   133  		if rightIsPrefix {
   134  			level = itleft.SkipLevel()
   135  			if level < levelsToCompare {
   136  				goto nextLevel
   137  			}
   138  			return -int(r)
   139  		}
   140  		return 1
   141  	}
   142  
   143  	return int(l) - int(r)
   144  }
   145  
   146  func (c *Collation_utf8mb4_uca_0900) WeightString(dst, src []byte, numCodepoints int) []byte {
   147  	it := c.uca.Iterator(src)
   148  	defer it.Done()
   149  
   150  	if fast, ok := it.(*uca.FastIterator900); ok {
   151  		var chunk [16]byte
   152  		for {
   153  			for cap(dst)-len(dst) >= 16 {
   154  				n := fast.NextWeightBlock64(dst[len(dst) : len(dst)+16])
   155  				if n <= 0 {
   156  					goto performPadding
   157  				}
   158  				dst = dst[:len(dst)+n]
   159  			}
   160  			n := fast.NextWeightBlock64(chunk[:16])
   161  			if n <= 0 {
   162  				goto performPadding
   163  			}
   164  			dst = append(dst, chunk[:n]...)
   165  		}
   166  	} else {
   167  		for {
   168  			w, ok := it.Next()
   169  			if !ok {
   170  				break
   171  			}
   172  			dst = append(dst, byte(w>>8), byte(w))
   173  		}
   174  	}
   175  
   176  performPadding:
   177  	if numCodepoints == PadToMax {
   178  		for len(dst) < cap(dst) {
   179  			dst = append(dst, 0x00)
   180  		}
   181  	}
   182  
   183  	return dst
   184  }
   185  
   186  func (c *Collation_utf8mb4_uca_0900) Hash(src []byte, _ int) HashCode {
   187  	var hash = uintptr(c.id)
   188  
   189  	it := c.uca.Iterator(src)
   190  	defer it.Done()
   191  
   192  	if fast, ok := it.(*uca.FastIterator900); ok {
   193  		var chunk [16]byte
   194  		var n int
   195  		for {
   196  			n = fast.NextWeightBlock64(chunk[:16])
   197  			if n < 16 {
   198  				break
   199  			}
   200  			hash = memhash128(unsafe.Pointer(&chunk), hash)
   201  		}
   202  		return memhashraw(unsafe.Pointer(&chunk), hash, uintptr(n))
   203  	}
   204  
   205  	for {
   206  		w, ok := it.Next()
   207  		if !ok {
   208  			break
   209  		}
   210  		hash = memhash16(bits.ReverseBytes16(w), hash)
   211  	}
   212  	return hash
   213  }
   214  
   215  func (c *Collation_utf8mb4_uca_0900) WeightStringLen(numBytes int) int {
   216  	if numBytes%4 != 0 {
   217  		panic("WeightStringLen called with non-MOD4 length")
   218  	}
   219  	levels := int(c.levelsForCompare)
   220  	weights := (numBytes / 4) * uca.MaxCollationElementsPerCodepoint * levels
   221  	weights += levels - 1 // one NULL byte as a separator between levels
   222  	return weights * 2    // two bytes per weight
   223  }
   224  
   225  func (c *Collation_utf8mb4_uca_0900) Wildcard(pat []byte, matchOne rune, matchMany rune, escape rune) WildcardPattern {
   226  	return newUnicodeWildcardMatcher(charset.Charset_utf8mb4{}, c.uca.WeightsEqual, c.Collate, pat, matchOne, matchMany, escape)
   227  }
   228  
   229  func (c *Collation_utf8mb4_uca_0900) ToLower(dst, src []byte) []byte {
   230  	dst = append(dst, bytes.ToLower(src)...)
   231  	return dst
   232  }
   233  
   234  func (c *Collation_utf8mb4_uca_0900) ToUpper(dst, src []byte) []byte {
   235  	dst = append(dst, bytes.ToUpper(src)...)
   236  	return dst
   237  }
   238  
   239  type Collation_utf8mb4_0900_bin struct{}
   240  
   241  func (c *Collation_utf8mb4_0900_bin) Init() {}
   242  
   243  func (c *Collation_utf8mb4_0900_bin) ID() ID {
   244  	return 309
   245  }
   246  
   247  func (c *Collation_utf8mb4_0900_bin) Name() string {
   248  	return "utf8mb4_0900_bin"
   249  }
   250  
   251  func (c *Collation_utf8mb4_0900_bin) Charset() charset.Charset {
   252  	return charset.Charset_utf8mb4{}
   253  }
   254  
   255  func (c *Collation_utf8mb4_0900_bin) IsBinary() bool {
   256  	return true
   257  }
   258  
   259  func (c *Collation_utf8mb4_0900_bin) Collate(left, right []byte, isPrefix bool) int {
   260  	return collationBinary(left, right, isPrefix)
   261  }
   262  
   263  func (c *Collation_utf8mb4_0900_bin) WeightString(dst, src []byte, numCodepoints int) []byte {
   264  	dst = append(dst, src...)
   265  	if numCodepoints == PadToMax {
   266  		for len(dst) < cap(dst) {
   267  			dst = append(dst, 0x0)
   268  		}
   269  	}
   270  	return dst
   271  }
   272  
   273  func (c *Collation_utf8mb4_0900_bin) Hash(src []byte, _ int) HashCode {
   274  	return memhash(src, 0xb900b900)
   275  }
   276  
   277  func (c *Collation_utf8mb4_0900_bin) WeightStringLen(numBytes int) int {
   278  	return numBytes
   279  }
   280  
   281  func (c *Collation_utf8mb4_0900_bin) Wildcard(pat []byte, matchOne rune, matchMany rune, escape rune) WildcardPattern {
   282  	equals := func(a, b rune) bool {
   283  		return a == b
   284  	}
   285  	return newUnicodeWildcardMatcher(charset.Charset_utf8mb4{}, equals, c.Collate, pat, matchOne, matchMany, escape)
   286  }
   287  
   288  func (c *Collation_utf8mb4_0900_bin) ToLower(dst, src []byte) []byte {
   289  	dst = append(dst, bytes.ToLower(src)...)
   290  	return dst
   291  }
   292  
   293  func (c *Collation_utf8mb4_0900_bin) ToUpper(dst, src []byte) []byte {
   294  	dst = append(dst, bytes.ToUpper(src)...)
   295  	return dst
   296  }
   297  
   298  type Collation_uca_legacy struct {
   299  	name string
   300  	id   ID
   301  
   302  	charset      charset.Charset
   303  	weights      uca.Weights
   304  	tailoring    []uca.Patch
   305  	contract     uca.Contractor
   306  	maxCodepoint rune
   307  
   308  	uca     *uca.CollationLegacy
   309  	ucainit sync.Once
   310  }
   311  
   312  func (c *Collation_uca_legacy) Init() {
   313  	c.ucainit.Do(func() {
   314  		c.uca = uca.NewCollationLegacy(c.charset, c.weights, c.tailoring, c.contract, c.maxCodepoint)
   315  		c.weights = nil
   316  		c.tailoring = nil
   317  	})
   318  }
   319  
   320  func (c *Collation_uca_legacy) ID() ID {
   321  	return c.id
   322  }
   323  
   324  func (c *Collation_uca_legacy) Name() string {
   325  	return c.name
   326  }
   327  
   328  func (c *Collation_uca_legacy) Charset() charset.Charset {
   329  	return c.charset
   330  }
   331  
   332  func (c *Collation_uca_legacy) IsBinary() bool {
   333  	return false
   334  }
   335  
   336  func (c *Collation_uca_legacy) Collate(left, right []byte, isPrefix bool) int {
   337  	var (
   338  		l, r     uint16
   339  		lok, rok bool
   340  		itleft   = c.uca.Iterator(left)
   341  		itright  = c.uca.Iterator(right)
   342  	)
   343  
   344  	defer itleft.Done()
   345  	defer itright.Done()
   346  
   347  	for {
   348  		l, lok = itleft.Next()
   349  		r, rok = itright.Next()
   350  
   351  		if l == r && lok && rok {
   352  			continue
   353  		}
   354  		if !rok && isPrefix {
   355  			return 0
   356  		}
   357  		return int(l) - int(r)
   358  	}
   359  }
   360  
   361  func (c *Collation_uca_legacy) WeightString(dst, src []byte, numCodepoints int) []byte {
   362  	it := c.uca.Iterator(src)
   363  	defer it.Done()
   364  
   365  	for {
   366  		w, ok := it.Next()
   367  		if !ok {
   368  			break
   369  		}
   370  		dst = append(dst, byte(w>>8), byte(w))
   371  	}
   372  
   373  	if numCodepoints > 0 {
   374  		weightForSpace := c.uca.WeightForSpace()
   375  		w1, w2 := byte(weightForSpace>>8), byte(weightForSpace)
   376  
   377  		if numCodepoints == PadToMax {
   378  			for len(dst)+1 < cap(dst) {
   379  				dst = append(dst, w1, w2)
   380  			}
   381  			if len(dst) < cap(dst) {
   382  				dst = append(dst, w1)
   383  			}
   384  		} else {
   385  			numCodepoints -= it.Length()
   386  			for numCodepoints > 0 {
   387  				dst = append(dst, w1, w2)
   388  				numCodepoints--
   389  			}
   390  		}
   391  	}
   392  
   393  	return dst
   394  }
   395  
   396  func (c *Collation_uca_legacy) Hash(src []byte, numCodepoints int) HashCode {
   397  	it := c.uca.Iterator(src)
   398  	defer it.Done()
   399  
   400  	var hash = uintptr(c.id)
   401  	for {
   402  		w, ok := it.Next()
   403  		if !ok {
   404  			break
   405  		}
   406  		hash = memhash16(bits.ReverseBytes16(w), hash)
   407  	}
   408  
   409  	if numCodepoints > 0 {
   410  		weightForSpace := bits.ReverseBytes16(c.uca.WeightForSpace())
   411  		numCodepoints -= it.Length()
   412  		for numCodepoints > 0 {
   413  			hash = memhash16(weightForSpace, hash)
   414  			numCodepoints--
   415  		}
   416  	}
   417  
   418  	return hash
   419  }
   420  
   421  func (c *Collation_uca_legacy) WeightStringLen(numBytes int) int {
   422  	// TODO: This is literally the worst case scenario. Improve on this.
   423  	return numBytes * 8
   424  }
   425  
   426  func (c *Collation_uca_legacy) Wildcard(pat []byte, matchOne rune, matchMany rune, escape rune) WildcardPattern {
   427  	return newUnicodeWildcardMatcher(c.charset, c.uca.WeightsEqual, c.Collate, pat, matchOne, matchMany, escape)
   428  }