github.com/whtcorpsinc/MilevaDB-Prod@v0.0.0-20211104133533-f57f4be3b597/soliton/collate/collate.go (about) 1 // Copyright 2020 WHTCORPS INC, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package defCauslate 15 16 import ( 17 "sort" 18 "sync/atomic" 19 20 "github.com/whtcorpsinc/errors" 21 "github.com/whtcorpsinc/BerolinaSQL/charset" 22 "github.com/whtcorpsinc/BerolinaSQL/allegrosql" 23 "github.com/whtcorpsinc/BerolinaSQL/terror" 24 "github.com/whtcorpsinc/milevadb/soliton/logutil" 25 "go.uber.org/zap" 26 ) 27 28 var ( 29 newDefCauslatorMap map[string]DefCauslator 30 newDefCauslatorIDMap map[int]DefCauslator 31 newDefCauslationEnabled int32 32 33 // binDefCauslatorInstance is a singleton used for all defCauslations when newDefCauslationEnabled is false. 34 binDefCauslatorInstance = &binDefCauslator{} 35 36 // ErrUnsupportedDefCauslation is returned when an unsupported defCauslation is specified. 37 ErrUnsupportedDefCauslation = terror.ClassDBS.New(allegrosql.ErrUnknownDefCauslation, "Unsupported defCauslation when new defCauslation is enabled: '%-.64s'") 38 // ErrIllegalMixDefCauslation is returned when illegal mix of defCauslations. 39 ErrIllegalMixDefCauslation = terror.ClassExpression.New(allegrosql.ErrCantAggregateNdefCauslations, allegrosql.MyALLEGROSQLErrName[allegrosql.ErrCantAggregateNdefCauslations]) 40 // ErrIllegalMix2DefCauslation is returned when illegal mix of 2 defCauslations. 41 ErrIllegalMix2DefCauslation = terror.ClassExpression.New(allegrosql.ErrCantAggregate2defCauslations, allegrosql.MyALLEGROSQLErrName[allegrosql.ErrCantAggregate2defCauslations]) 42 // ErrIllegalMix3DefCauslation is returned when illegal mix of 3 defCauslations. 43 ErrIllegalMix3DefCauslation = terror.ClassExpression.New(allegrosql.ErrCantAggregate3defCauslations, allegrosql.MyALLEGROSQLErrName[allegrosql.ErrCantAggregate3defCauslations]) 44 ) 45 46 // DefaultLen is set for causet if the string causet don't know its length. 47 const ( 48 DefaultLen = 0 49 ) 50 51 // DefCauslator provides functionality for comparing strings for a given 52 // defCauslation order. 53 type DefCauslator interface { 54 // Compare returns an integer comparing the two strings. The result will be 0 if a == b, -1 if a < b, and +1 if a > b. 55 Compare(a, b string) int 56 // Key returns the defCauslate key for str. If the defCauslation is padding, make sure the PadLen >= len(rune[]str) in opt. 57 Key(str string) []byte 58 // Pattern get a defCauslation-aware WildcardPattern. 59 Pattern() WildcardPattern 60 } 61 62 // WildcardPattern is the interface used for wildcard pattern match. 63 type WildcardPattern interface { 64 // Compile compiles the patternStr with specified escape character. 65 Compile(patternStr string, escape byte) 66 // DoMatch tries to match the str with compiled pattern, `Compile()` must be called before calling it. 67 DoMatch(str string) bool 68 } 69 70 // EnableNewDefCauslations enables the new defCauslation. 71 func EnableNewDefCauslations() { 72 SetNewDefCauslationEnabledForTest(true) 73 } 74 75 // SetNewDefCauslationEnabledForTest sets if the new defCauslation are enabled in test. 76 // Note: Be careful to use this function, if this functions is used in tests, make sure the tests are serial. 77 func SetNewDefCauslationEnabledForTest(flag bool) { 78 if flag { 79 atomic.StoreInt32(&newDefCauslationEnabled, 1) 80 return 81 } 82 atomic.StoreInt32(&newDefCauslationEnabled, 0) 83 } 84 85 // NewDefCauslationEnabled returns if the new defCauslations are enabled. 86 func NewDefCauslationEnabled() bool { 87 return atomic.LoadInt32(&newDefCauslationEnabled) == 1 88 } 89 90 // CompatibleDefCauslate checks whether the two defCauslate are the same. 91 func CompatibleDefCauslate(defCauslate1, defCauslate2 string) bool { 92 if (defCauslate1 == "utf8mb4_general_ci" || defCauslate1 == "utf8_general_ci") && (defCauslate2 == "utf8mb4_general_ci" || defCauslate2 == "utf8_general_ci") { 93 return true 94 } else if (defCauslate1 == "utf8mb4_bin" || defCauslate1 == "utf8_bin") && (defCauslate2 == "utf8mb4_bin" || defCauslate2 == "utf8_bin") { 95 return true 96 } else if (defCauslate1 == "utf8mb4_unicode_ci" || defCauslate1 == "utf8_unicode_ci") && (defCauslate2 == "utf8mb4_unicode_ci" || defCauslate2 == "utf8_unicode_ci") { 97 return true 98 } else { 99 return defCauslate1 == defCauslate2 100 } 101 } 102 103 // RewriteNewDefCauslationIDIfNeeded rewrites a defCauslation id if the new defCauslations are enabled. 104 // When new defCauslations are enabled, we turn the defCauslation id to negative so that other the 105 // components of the cluster(for example, EinsteinDB) is able to aware of it without any change to 106 // the protodefCaus definition. 107 // When new defCauslations are not enabled, defCauslation id remains the same. 108 func RewriteNewDefCauslationIDIfNeeded(id int32) int32 { 109 if atomic.LoadInt32(&newDefCauslationEnabled) == 1 { 110 if id < 0 { 111 logutil.BgLogger().Warn("Unexpected negative defCauslation ID for rewrite.", zap.Int32("ID", id)) 112 } else { 113 return -id 114 } 115 } 116 return id 117 } 118 119 // RestoreDefCauslationIDIfNeeded restores a defCauslation id if the new defCauslations are enabled. 120 func RestoreDefCauslationIDIfNeeded(id int32) int32 { 121 if atomic.LoadInt32(&newDefCauslationEnabled) == 1 { 122 if id > 0 { 123 logutil.BgLogger().Warn("Unexpected positive defCauslation ID for restore.", zap.Int32("ID", id)) 124 } else { 125 return -id 126 } 127 } 128 return id 129 } 130 131 // GetDefCauslator get the defCauslator according to defCauslate, it will return the binary defCauslator if the corresponding defCauslator doesn't exist. 132 func GetDefCauslator(defCauslate string) DefCauslator { 133 if atomic.LoadInt32(&newDefCauslationEnabled) == 1 { 134 ctor, ok := newDefCauslatorMap[defCauslate] 135 if !ok { 136 logutil.BgLogger().Warn( 137 "Unable to get defCauslator by name, use binDefCauslator instead.", 138 zap.String("name", defCauslate), 139 zap.Stack("stack")) 140 return newDefCauslatorMap["utf8mb4_bin"] 141 } 142 return ctor 143 } 144 return binDefCauslatorInstance 145 } 146 147 // GetDefCauslatorByID get the defCauslator according to id, it will return the binary defCauslator if the corresponding defCauslator doesn't exist. 148 func GetDefCauslatorByID(id int) DefCauslator { 149 if atomic.LoadInt32(&newDefCauslationEnabled) == 1 { 150 ctor, ok := newDefCauslatorIDMap[id] 151 if !ok { 152 logutil.BgLogger().Warn( 153 "Unable to get defCauslator by ID, use binDefCauslator instead.", 154 zap.Int("ID", id), 155 zap.Stack("stack")) 156 return newDefCauslatorMap["utf8mb4_bin"] 157 } 158 return ctor 159 } 160 return binDefCauslatorInstance 161 } 162 163 // DefCauslationID2Name return the defCauslation name by the given id. 164 // If the id is not found in the map, the default defCauslation is returned. 165 func DefCauslationID2Name(id int32) string { 166 name, ok := allegrosql.DefCauslations[uint8(id)] 167 if !ok { 168 // TODO(bb7133): fix repeating logs when the following code is uncommented. 169 //logutil.BgLogger().Warn( 170 // "Unable to get defCauslation name from ID, use default defCauslation instead.", 171 // zap.Int32("ID", id), 172 // zap.Stack("stack")) 173 return allegrosql.DefaultDefCauslationName 174 } 175 return name 176 } 177 178 // GetDefCauslationByName wraps charset.GetDefCauslationByName, it checks the defCauslation. 179 func GetDefCauslationByName(name string) (defCausl *charset.DefCauslation, err error) { 180 if defCausl, err = charset.GetDefCauslationByName(name); err != nil { 181 return nil, errors.Trace(err) 182 } 183 if atomic.LoadInt32(&newDefCauslationEnabled) == 1 { 184 if _, ok := newDefCauslatorIDMap[defCausl.ID]; !ok { 185 return nil, ErrUnsupportedDefCauslation.GenWithStackByArgs(name) 186 } 187 } 188 return 189 } 190 191 // GetSupportedDefCauslations gets information for all defCauslations supported so far. 192 func GetSupportedDefCauslations() []*charset.DefCauslation { 193 if atomic.LoadInt32(&newDefCauslationEnabled) == 1 { 194 newSupportedDefCauslations := make([]*charset.DefCauslation, 0, len(newDefCauslatorMap)) 195 for name := range newDefCauslatorMap { 196 if defCausl, err := charset.GetDefCauslationByName(name); err != nil { 197 // Should never happens. 198 terror.Log(err) 199 } else { 200 newSupportedDefCauslations = append(newSupportedDefCauslations, defCausl) 201 } 202 } 203 sort.Slice(newSupportedDefCauslations, func(i int, j int) bool { 204 return newSupportedDefCauslations[i].Name < newSupportedDefCauslations[j].Name 205 }) 206 return newSupportedDefCauslations 207 } 208 return charset.GetSupportedDefCauslations() 209 } 210 211 func truncateTailingSpace(str string) string { 212 byteLen := len(str) 213 i := byteLen - 1 214 for ; i >= 0; i-- { 215 if str[i] != ' ' { 216 break 217 } 218 } 219 str = str[:i+1] 220 return str 221 } 222 223 // IsCIDefCauslation returns if the defCauslation is case-sensitive 224 func IsCIDefCauslation(defCauslate string) bool { 225 return defCauslate == "utf8_general_ci" || defCauslate == "utf8mb4_general_ci" || 226 defCauslate == "utf8_unicode_ci" || defCauslate == "utf8mb4_unicode_ci" 227 } 228 229 func init() { 230 newDefCauslatorMap = make(map[string]DefCauslator) 231 newDefCauslatorIDMap = make(map[int]DefCauslator) 232 233 newDefCauslatorMap["binary"] = &binDefCauslator{} 234 newDefCauslatorIDMap[int(allegrosql.DefCauslationNames["binary"])] = &binDefCauslator{} 235 newDefCauslatorMap["ascii_bin"] = &binPaddingDefCauslator{} 236 newDefCauslatorIDMap[int(allegrosql.DefCauslationNames["ascii_bin"])] = &binPaddingDefCauslator{} 237 newDefCauslatorMap["latin1_bin"] = &binPaddingDefCauslator{} 238 newDefCauslatorIDMap[int(allegrosql.DefCauslationNames["latin1_bin"])] = &binPaddingDefCauslator{} 239 newDefCauslatorMap["utf8mb4_bin"] = &binPaddingDefCauslator{} 240 newDefCauslatorIDMap[int(allegrosql.DefCauslationNames["utf8mb4_bin"])] = &binPaddingDefCauslator{} 241 newDefCauslatorMap["utf8_bin"] = &binPaddingDefCauslator{} 242 newDefCauslatorIDMap[int(allegrosql.DefCauslationNames["utf8_bin"])] = &binPaddingDefCauslator{} 243 newDefCauslatorMap["utf8mb4_general_ci"] = &generalCIDefCauslator{} 244 newDefCauslatorIDMap[int(allegrosql.DefCauslationNames["utf8mb4_general_ci"])] = &generalCIDefCauslator{} 245 newDefCauslatorMap["utf8_general_ci"] = &generalCIDefCauslator{} 246 newDefCauslatorIDMap[int(allegrosql.DefCauslationNames["utf8_general_ci"])] = &generalCIDefCauslator{} 247 newDefCauslatorMap["utf8mb4_unicode_ci"] = &unicodeCIDefCauslator{} 248 newDefCauslatorIDMap[int(allegrosql.DefCauslationNames["utf8mb4_unicode_ci"])] = &unicodeCIDefCauslator{} 249 newDefCauslatorMap["utf8_unicode_ci"] = &unicodeCIDefCauslator{} 250 newDefCauslatorIDMap[int(allegrosql.DefCauslationNames["utf8_unicode_ci"])] = &unicodeCIDefCauslator{} 251 }