vitess.io/vitess@v0.16.2/go/mysql/collations/env.go (about) 1 /* 2 Copyright 2021 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package collations 18 19 import ( 20 "fmt" 21 "strings" 22 "sync" 23 ) 24 25 type colldefaults struct { 26 Default Collation 27 Binary Collation 28 } 29 30 // Environment is a collation environment for a MySQL version, which contains 31 // a database of collations and defaults for that specific version. 32 type Environment struct { 33 version collver 34 byName map[string]Collation 35 byID map[ID]Collation 36 byCharset map[string]*colldefaults 37 unsupported map[string]ID 38 } 39 40 // LookupByName returns the collation with the given name. The collation 41 // is initialized if it's the first time being accessed. 42 func (env *Environment) LookupByName(name string) Collation { 43 if coll, ok := env.byName[name]; ok { 44 coll.Init() 45 return coll 46 } 47 return nil 48 } 49 50 // LookupByID returns the collation with the given numerical identifier. The collation 51 // is initialized if it's the first time being accessed. 52 func (env *Environment) LookupByID(id ID) Collation { 53 if coll, ok := env.byID[id]; ok { 54 coll.Init() 55 return coll 56 } 57 return nil 58 } 59 60 // LookupID returns the collation ID for the given name, and whether 61 // the collation is supported by this package. 62 func (env *Environment) LookupID(name string) (ID, bool) { 63 if supported, ok := env.byName[name]; ok { 64 return supported.ID(), true 65 } 66 if unsupported, ok := env.unsupported[name]; ok { 67 return unsupported, false 68 } 69 return Unknown, false 70 } 71 72 // DefaultCollationForCharset returns the default collation for a charset 73 func (env *Environment) DefaultCollationForCharset(charset string) Collation { 74 if defaults, ok := env.byCharset[charset]; ok { 75 if defaults.Default != nil { 76 defaults.Default.Init() 77 return defaults.Default 78 } 79 } 80 return nil 81 } 82 83 // BinaryCollationForCharset returns the default binary collation for a charset 84 func (env *Environment) BinaryCollationForCharset(charset string) Collation { 85 if defaults, ok := env.byCharset[charset]; ok { 86 if defaults.Binary != nil { 87 defaults.Binary.Init() 88 return defaults.Binary 89 } 90 } 91 return nil 92 } 93 94 // AllCollations returns a slice with all known collations in Vitess. This is an expensive call because 95 // it will initialize the internal state of all the collations before returning them. 96 // Used for testing/debugging. 97 func (env *Environment) AllCollations() (all []Collation) { 98 all = make([]Collation, 0, len(env.byID)) 99 for _, col := range env.byID { 100 col.Init() 101 all = append(all, col) 102 } 103 return 104 } 105 106 var globalEnvironments = make(map[collver]*Environment) 107 var globalEnvironmentsMu sync.Mutex 108 109 // fetchCacheEnvironment returns a cached Environment from a global cache. 110 // We can keep a single Environment per collver version because Environment 111 // objects are immutable once constructed. 112 func fetchCacheEnvironment(version collver) *Environment { 113 globalEnvironmentsMu.Lock() 114 defer globalEnvironmentsMu.Unlock() 115 116 var env *Environment 117 if env = globalEnvironments[version]; env == nil { 118 env = makeEnv(version) 119 globalEnvironments[version] = env 120 } 121 return env 122 } 123 124 // NewEnvironment creates a collation Environment for the given MySQL version string. 125 // The version string must be in the format that is sent by the server as the version packet 126 // when opening a new MySQL connection 127 func NewEnvironment(serverVersion string) *Environment { 128 // 5.7 is the oldest version we support today, so use that as 129 // the default. 130 // NOTE: this should be changed when we EOL MySQL 5.7 support 131 var version collver = collverMySQL57 132 serverVersion = strings.TrimSpace(strings.ToLower(serverVersion)) 133 switch { 134 case strings.HasSuffix(serverVersion, "-ripple"): 135 // the ripple binlog server can mask the actual version of mysqld; 136 // assume we have the highest 137 version = collverMySQL80 138 case strings.Contains(serverVersion, "mariadb"): 139 switch { 140 case strings.Contains(serverVersion, "10.0."): 141 version = collverMariaDB100 142 case strings.Contains(serverVersion, "10.1."): 143 version = collverMariaDB101 144 case strings.Contains(serverVersion, "10.2."): 145 version = collverMariaDB102 146 case strings.Contains(serverVersion, "10.3."): 147 version = collverMariaDB103 148 } 149 case strings.HasPrefix(serverVersion, "5.6."): 150 version = collverMySQL56 151 case strings.HasPrefix(serverVersion, "5.7."): 152 version = collverMySQL57 153 case strings.HasPrefix(serverVersion, "8.0."): 154 version = collverMySQL80 155 } 156 return fetchCacheEnvironment(version) 157 } 158 159 func makeEnv(version collver) *Environment { 160 env := &Environment{ 161 version: version, 162 byName: make(map[string]Collation), 163 byID: make(map[ID]Collation), 164 byCharset: make(map[string]*colldefaults), 165 unsupported: make(map[string]ID), 166 } 167 168 for collid, vi := range globalVersionInfo { 169 var ournames []string 170 for _, alias := range vi.alias { 171 if alias.mask&version != 0 { 172 ournames = append(ournames, alias.name) 173 } 174 } 175 if len(ournames) == 0 { 176 continue 177 } 178 179 collation, ok := globalAllCollations[collid] 180 if !ok { 181 for _, name := range ournames { 182 env.unsupported[name] = collid 183 } 184 continue 185 } 186 187 for _, name := range ournames { 188 env.byName[name] = collation 189 } 190 env.byID[collid] = collation 191 192 csname := collation.Charset().Name() 193 if _, ok := env.byCharset[csname]; !ok { 194 env.byCharset[csname] = &colldefaults{} 195 } 196 defaults := env.byCharset[csname] 197 if vi.isdefault&version != 0 { 198 defaults.Default = collation 199 } 200 if collation.IsBinary() { 201 if defaults.Binary != nil && defaults.Binary.ID() > collation.ID() { 202 // If there's more than one binary collation, the one with the 203 // highest ID (i.e. the newest one) takes precedence. This applies 204 // to utf8mb4_bin vs utf8mb4_0900_bin 205 continue 206 } 207 defaults.Binary = collation 208 } 209 } 210 211 for from, to := range version.charsetAliases() { 212 env.byCharset[from] = env.byCharset[to] 213 } 214 215 return env 216 } 217 218 // A few interesting character set values. 219 // See http://dev.mysql.com/doc/internals/en/character-set.html#packet-Protocol::CharacterSet 220 const ( 221 CollationUtf8ID = 33 222 CollationUtf8mb4ID = 255 223 CollationBinaryID = 63 224 ) 225 226 // CharsetAlias returns the internal charset name for the given charset. 227 // For now, this only maps `utf8` to `utf8mb3`; in future versions of MySQL, 228 // this mapping will change, so it's important to use this helper so that 229 // Vitess code has a consistent mapping for the active collations environment. 230 func (env *Environment) CharsetAlias(charset string) (alias string, ok bool) { 231 alias, ok = env.version.charsetAliases()[charset] 232 return 233 } 234 235 // CollationAlias returns the internal collaction name for the given charset. 236 // For now, this maps all `utf8` to `utf8mb3` collation names; in future versions of MySQL, 237 // this mapping will change, so it's important to use this helper so that 238 // Vitess code has a consistent mapping for the active collations environment. 239 func (env *Environment) CollationAlias(collation string) (string, bool) { 240 col := env.LookupByName(collation) 241 if col == nil { 242 return collation, false 243 } 244 allCols, ok := globalVersionInfo[col.ID()] 245 if !ok { 246 return collation, false 247 } 248 if len(allCols.alias) == 1 { 249 return collation, false 250 } 251 for _, alias := range allCols.alias { 252 for source, dest := range env.version.charsetAliases() { 253 if strings.HasPrefix(collation, fmt.Sprintf("%s_", source)) && 254 strings.HasPrefix(alias.name, fmt.Sprintf("%s_", dest)) { 255 return alias.name, true 256 } 257 } 258 } 259 return collation, false 260 } 261 262 // DefaultConnectionCharset is the default charset that Vitess will use when negotiating a 263 // charset in a MySQL connection handshake. Note that in this context, a 'charset' is equivalent 264 // to a Collation ID, with the exception that it can only fit in 1 byte. 265 // For MySQL 8.0+ environments, the default charset is `utf8mb4_0900_ai_ci`. 266 // For older MySQL environments, the default charset is `utf8mb4_general_ci`. 267 func (env *Environment) DefaultConnectionCharset() uint8 { 268 switch env.version { 269 case collverMySQL80: 270 return CollationUtf8mb4ID 271 default: 272 return 45 273 } 274 } 275 276 // ParseConnectionCharset parses the given charset name and returns its numerical 277 // identifier to be used in a MySQL connection handshake. The charset name can be: 278 // - the name of a character set, in which case the default collation ID for the 279 // character set is returned. 280 // - the name of a collation, in which case the ID for the collation is returned, 281 // UNLESS the collation itself has an ID greater than 255; such collations are not 282 // supported because they cannot be negotiated in a single byte in our connection 283 // handshake. 284 // - empty, in which case the default connection charset for this MySQL version 285 // is returned. 286 func (env *Environment) ParseConnectionCharset(csname string) (uint8, error) { 287 if csname == "" { 288 return env.DefaultConnectionCharset(), nil 289 } 290 291 var collid ID = 0 292 csname = strings.ToLower(csname) 293 if defaults, ok := env.byCharset[csname]; ok { 294 collid = defaults.Default.ID() 295 } else if coll, ok := env.byName[csname]; ok { 296 collid = coll.ID() 297 } 298 if collid == 0 || collid > 255 { 299 return 0, fmt.Errorf("unsupported connection charset: %q", csname) 300 } 301 return uint8(collid), nil 302 }