vitess.io/vitess@v0.16.2/go/mysql/collations/internal/uca/iter_900.go (about)

     1  /*
     2  Copyright 2021 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package uca
    18  
    19  import (
    20  	"unicode/utf8"
    21  
    22  	"vitess.io/vitess/go/mysql/collations/internal/charset"
    23  )
    24  
    25  type iterator900 struct {
    26  	// Constant
    27  	Collation900
    28  	original []byte
    29  
    30  	// Internal state
    31  	codepoint codepointIterator
    32  	input     []byte
    33  	level     int
    34  }
    35  
    36  type codepointIterator struct {
    37  	weights []uint16
    38  	scratch [16]uint16
    39  	ce      int
    40  	stride  int
    41  }
    42  
    43  func (it *codepointIterator) next() (uint16, bool) {
    44  	for it.ce > 0 && it.weights[0] == 0x0 {
    45  		if it.stride <= len(it.weights) {
    46  			it.weights = it.weights[it.stride:]
    47  		}
    48  		it.ce--
    49  	}
    50  	if it.ce == 0 {
    51  		return 0, false
    52  	}
    53  	weight := it.weights[0]
    54  	if it.stride <= len(it.weights) {
    55  		it.weights = it.weights[it.stride:]
    56  	}
    57  	it.ce--
    58  	return weight, true
    59  }
    60  
    61  func (it *codepointIterator) init(parent *iterator900, cp rune) {
    62  	p, offset := PageOffset(cp)
    63  	page := parent.table[p]
    64  	if page == nil {
    65  		it.initImplicit(parent, cp)
    66  		return
    67  	}
    68  
    69  	it.stride = CodepointsPerPage * 3
    70  	it.weights = (*page)[(parent.level+1)*CodepointsPerPage+offset:]
    71  	if it.weights[0] == 0x0 {
    72  		it.ce = 0
    73  	} else {
    74  		it.ce = int((*page)[offset])
    75  	}
    76  }
    77  
    78  func (it *codepointIterator) initContraction(weights []uint16, level int) {
    79  	it.ce = len(weights) / 3
    80  	it.weights = weights[level:]
    81  	it.stride = 3
    82  }
    83  
    84  func (it *codepointIterator) initImplicit(parent *iterator900, cp rune) {
    85  	if jamos := UnicodeDecomposeHangulSyllable(cp); jamos != nil {
    86  		jweight := it.scratch[:0]
    87  		for _, jamo := range jamos {
    88  			p, offset := PageOffset(jamo)
    89  			page := *parent.table[p]
    90  			jweight = append(jweight,
    91  				page[1*CodepointsPerPage+offset],
    92  				page[2*CodepointsPerPage+offset],
    93  				page[3*CodepointsPerPage+offset],
    94  			)
    95  		}
    96  
    97  		it.weights = jweight[parent.level:]
    98  		it.ce = len(jamos)
    99  		it.stride = 3
   100  		return
   101  	}
   102  
   103  	parent.implicits(it.scratch[:], cp)
   104  	it.weights = it.scratch[parent.level:]
   105  	it.ce = 2
   106  	it.stride = 3
   107  }
   108  
   109  func (it *iterator900) Level() int {
   110  	return it.level
   111  }
   112  
   113  func (it *iterator900) SkipLevel() int {
   114  	it.codepoint.ce = 0
   115  	it.input = it.original
   116  	it.level++
   117  	return it.level
   118  }
   119  
   120  func (it *iterator900) reset(input []byte) {
   121  	it.input = input
   122  	it.original = input
   123  	it.level = 0
   124  	it.codepoint.ce = 0
   125  }
   126  
   127  type WeightIterator interface {
   128  	Next() (uint16, bool)
   129  	Level() int
   130  	SkipLevel() int
   131  	Done()
   132  	reset(input []byte)
   133  }
   134  
   135  type slowIterator900 struct {
   136  	iterator900
   137  }
   138  
   139  func (it *slowIterator900) Done() {
   140  	it.original = nil
   141  	it.input = nil
   142  	it.iterpool.Put(it)
   143  }
   144  
   145  func (it *slowIterator900) Next() (uint16, bool) {
   146  	for {
   147  		if w, ok := it.codepoint.next(); ok {
   148  			return it.param.adjust(it.level, w), true
   149  		}
   150  
   151  		cp, width := utf8.DecodeRune(it.input)
   152  		if cp == utf8.RuneError && width < 3 {
   153  			it.level++
   154  
   155  			if it.level < it.maxLevel {
   156  				it.input = it.original
   157  				return 0, true
   158  			}
   159  			return 0, false
   160  		}
   161  
   162  		it.input = it.input[width:]
   163  		if it.contract != nil {
   164  			if weights, remainder, _ := it.contract.Find(charset.Charset_utf8mb4{}, cp, it.input); weights != nil {
   165  				it.codepoint.initContraction(weights, it.level)
   166  				it.input = remainder
   167  				continue
   168  			}
   169  		}
   170  		it.codepoint.init(&it.iterator900, cp)
   171  	}
   172  }