vitess.io/vitess@v0.16.2/go/mysql/collations/internal/uca/contractions.go (about) 1 /* 2 Copyright 2021 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package uca 18 19 import ( 20 "fmt" 21 "unicode/utf8" 22 23 "vitess.io/vitess/go/mysql/collations/internal/charset" 24 ) 25 26 type trie struct { 27 children map[rune]*trie 28 weights []uint16 29 } 30 31 func (t *trie) walkUTF8(remainder []byte) ([]uint16, []byte) { 32 if len(remainder) > 0 { 33 cp, width := utf8.DecodeRune(remainder) 34 if cp == utf8.RuneError && width < 3 { 35 return nil, nil 36 } 37 if ch := t.children[cp]; ch != nil { 38 return ch.walkUTF8(remainder[width:]) 39 } 40 } 41 return t.weights, remainder 42 } 43 44 func (t *trie) walkCharset(cs charset.Charset, remainder []byte, depth int) ([]uint16, []byte, int) { 45 if len(remainder) > 0 { 46 cp, width := cs.DecodeRune(remainder) 47 if cp == charset.RuneError && width < 3 { 48 return nil, nil, 0 49 } 50 if ch := t.children[cp]; ch != nil { 51 return ch.walkCharset(cs, remainder[width:], depth+1) 52 } 53 } 54 return t.weights, remainder, depth + 1 55 } 56 57 func (t *trie) insert(path []rune, weights []uint16) { 58 if len(path) == 0 { 59 if t.weights != nil { 60 panic("duplicate contraction") 61 } 62 t.weights = weights 63 return 64 } 65 66 if t.children == nil { 67 t.children = make(map[rune]*trie) 68 } 69 ch := t.children[path[0]] 70 if ch == nil { 71 ch = &trie{} 72 t.children[path[0]] = ch 73 } 74 ch.insert(path[1:], weights) 75 } 76 77 type trieContractor struct { 78 tr trie 79 } 80 81 func (ctr *trieContractor) insert(c *Contraction) { 82 if len(c.Path) < 2 { 83 panic("contraction is too short") 84 } 85 if len(c.Weights)%3 != 0 { 86 panic(fmt.Sprintf("weights are not well-formed: %#v has len=%d", c.Weights, len(c.Weights))) 87 } 88 if c.Contextual && len(c.Path) != 2 { 89 panic("contextual contractions can only span 2 codepoints") 90 } 91 ctr.tr.insert(c.Path, c.Weights) 92 } 93 94 func (ctr *trieContractor) Find(cs charset.Charset, cp rune, remainder []byte) ([]uint16, []byte, int) { 95 if tr := ctr.tr.children[cp]; tr != nil { 96 return tr.walkCharset(cs, remainder, 0) 97 } 98 return nil, nil, 0 99 } 100 101 func (ctr *trieContractor) FindContextual(cp, prev rune) []uint16 { 102 if tr := ctr.tr.children[cp]; tr != nil { 103 if trc := tr.children[prev]; trc != nil { 104 return trc.weights 105 } 106 } 107 return nil 108 } 109 110 func NewTrieContractor(all []Contraction) Contractor { 111 if len(all) == 0 { 112 return nil 113 } 114 ctr := &trieContractor{} 115 for _, c := range all { 116 ctr.insert(&c) 117 } 118 return ctr 119 } 120 121 type Contraction struct { 122 Path []rune 123 Weights []uint16 124 Contextual bool 125 } 126 127 type Contractor interface { 128 Find(cs charset.Charset, cp rune, remainder []byte) ([]uint16, []byte, int) 129 FindContextual(cp1, cp0 rune) []uint16 130 }