vitess.io/vitess@v0.16.2/go/mysql/collations/internal/uca/iter_ja.go (about) 1 /* 2 Copyright 2021 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package uca 18 19 import "unicode/utf8" 20 21 type jaIterator900 struct { 22 iterator900 23 queuedWeight uint16 24 prevCodepoint rune 25 kanas map[rune]byte 26 } 27 28 func (it *jaIterator900) adjustJapaneseWeights(weight uint16) uint16 { 29 // based on the following weights dumped from MySQL: 30 // {0x1C47, 0x1FB5, 0x1C47, 0x1FB5}, // ?? this is a no-op 31 // {0x3D5A, 0x3D8B, 0x1FB6, 0x1FE7}, 32 // {0x1FB6, 0x3D59, 0x0000, 0x0000}, 33 // {0x3D8C, 0x54A3, 0x0000, 0x0000}, 34 if it.level == 0 && weight >= 0x1FB6 && weight <= 0x54A3 { 35 switch { 36 // FIXME: this weight adjustment seems like a no-op, but it comes from the MySQL dump 37 // case weight >= 0x1C47 && weight <= 0x1FB5: 38 // return weight 39 case weight >= 0x3D5A && weight <= 0x3D8B: 40 return weight - 0x3D5A + 0x1FB6 41 case weight >= 0x1FB6 && weight <= 0x3D59 || weight >= 0x3D8C && weight <= 0x54A3: 42 it.queuedWeight = weight 43 return 0xFB86 44 } 45 } 46 return weight 47 } 48 49 func (it *jaIterator900) cacheKana(cp rune) { 50 if unicodeIsHiragana(cp) { 51 if it.kanas == nil { 52 it.kanas = make(map[rune]byte) 53 } 54 it.kanas[cp] = 0x2 55 } else if unicodeIsKatakana(cp) { 56 if it.kanas == nil { 57 it.kanas = make(map[rune]byte) 58 } 59 it.kanas[cp] = 0x8 60 } 61 } 62 63 func (it *jaIterator900) Done() { 64 it.queuedWeight = 0x0 65 it.prevCodepoint = 0 66 it.kanas = nil 67 it.original = nil 68 it.input = nil 69 it.iterpool.Put(it) 70 } 71 72 func (it *jaIterator900) Next() (uint16, bool) { 73 for { 74 if it.queuedWeight != 0x0 { 75 var w uint16 76 w, it.queuedWeight = it.queuedWeight, 0x0 77 return w, true 78 } 79 if w, ok := it.codepoint.next(); ok { 80 return it.adjustJapaneseWeights(w), true 81 } 82 83 decodeNext: 84 cp, width := utf8.DecodeRune(it.input) 85 if cp == utf8.RuneError && width < 3 { 86 it.level++ 87 // if we're at level 3 (Kana-sensitive) and we haven't seen 88 // any Kanas in the previous levels, there's nothing to yield 89 if it.level == 3 && it.kanas == nil { 90 return 0, false 91 } 92 if it.level < it.maxLevel { 93 it.input = it.original 94 return 0, true 95 } 96 return 0, false 97 } 98 99 it.input = it.input[width:] 100 if weights := it.contract.FindContextual(cp, it.prevCodepoint); weights != nil { 101 // if this is a Kana-sensitive iterator and we're at level 3 (the Kana level), 102 // we cannot return the contraction's weight here, we need the actual weights in 103 // our Kana cache. 104 if it.level == 3 { 105 if w, ok := it.kanas[it.prevCodepoint]; ok { 106 it.prevCodepoint = 0 107 return uint16(w), true 108 } 109 } 110 it.codepoint.initContraction(weights, it.level) 111 it.prevCodepoint = 0 112 continue 113 } 114 it.prevCodepoint = cp 115 116 // if this is a Kana-sensitive iterator, we want to keep track of any 117 // kanas we've seen in a cache, so that when we reach level 3, we can 118 // quickly skip over codepoints that are not Kanas, as level 3 will 119 // only yield Kana-weights 120 if it.maxLevel == 4 { 121 switch it.level { 122 case 0: 123 if _, ok := it.kanas[cp]; !ok { 124 it.cacheKana(cp) 125 } 126 case 3: 127 if w, ok := it.kanas[cp]; ok { 128 return uint16(w), true 129 } 130 goto decodeNext 131 } 132 } 133 134 it.codepoint.init(&it.iterator900, cp) 135 } 136 }