vitess.io/vitess@v0.16.2/go/mysql/collations/env.go (about)

     1  /*
     2  Copyright 2021 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package collations
    18  
    19  import (
    20  	"fmt"
    21  	"strings"
    22  	"sync"
    23  )
    24  
    25  type colldefaults struct {
    26  	Default Collation
    27  	Binary  Collation
    28  }
    29  
    30  // Environment is a collation environment for a MySQL version, which contains
    31  // a database of collations and defaults for that specific version.
    32  type Environment struct {
    33  	version     collver
    34  	byName      map[string]Collation
    35  	byID        map[ID]Collation
    36  	byCharset   map[string]*colldefaults
    37  	unsupported map[string]ID
    38  }
    39  
    40  // LookupByName returns the collation with the given name. The collation
    41  // is initialized if it's the first time being accessed.
    42  func (env *Environment) LookupByName(name string) Collation {
    43  	if coll, ok := env.byName[name]; ok {
    44  		coll.Init()
    45  		return coll
    46  	}
    47  	return nil
    48  }
    49  
    50  // LookupByID returns the collation with the given numerical identifier. The collation
    51  // is initialized if it's the first time being accessed.
    52  func (env *Environment) LookupByID(id ID) Collation {
    53  	if coll, ok := env.byID[id]; ok {
    54  		coll.Init()
    55  		return coll
    56  	}
    57  	return nil
    58  }
    59  
    60  // LookupID returns the collation ID for the given name, and whether
    61  // the collation is supported by this package.
    62  func (env *Environment) LookupID(name string) (ID, bool) {
    63  	if supported, ok := env.byName[name]; ok {
    64  		return supported.ID(), true
    65  	}
    66  	if unsupported, ok := env.unsupported[name]; ok {
    67  		return unsupported, false
    68  	}
    69  	return Unknown, false
    70  }
    71  
    72  // DefaultCollationForCharset returns the default collation for a charset
    73  func (env *Environment) DefaultCollationForCharset(charset string) Collation {
    74  	if defaults, ok := env.byCharset[charset]; ok {
    75  		if defaults.Default != nil {
    76  			defaults.Default.Init()
    77  			return defaults.Default
    78  		}
    79  	}
    80  	return nil
    81  }
    82  
    83  // BinaryCollationForCharset returns the default binary collation for a charset
    84  func (env *Environment) BinaryCollationForCharset(charset string) Collation {
    85  	if defaults, ok := env.byCharset[charset]; ok {
    86  		if defaults.Binary != nil {
    87  			defaults.Binary.Init()
    88  			return defaults.Binary
    89  		}
    90  	}
    91  	return nil
    92  }
    93  
    94  // AllCollations returns a slice with all known collations in Vitess. This is an expensive call because
    95  // it will initialize the internal state of all the collations before returning them.
    96  // Used for testing/debugging.
    97  func (env *Environment) AllCollations() (all []Collation) {
    98  	all = make([]Collation, 0, len(env.byID))
    99  	for _, col := range env.byID {
   100  		col.Init()
   101  		all = append(all, col)
   102  	}
   103  	return
   104  }
   105  
   106  var globalEnvironments = make(map[collver]*Environment)
   107  var globalEnvironmentsMu sync.Mutex
   108  
   109  // fetchCacheEnvironment returns a cached Environment from a global cache.
   110  // We can keep a single Environment per collver version because Environment
   111  // objects are immutable once constructed.
   112  func fetchCacheEnvironment(version collver) *Environment {
   113  	globalEnvironmentsMu.Lock()
   114  	defer globalEnvironmentsMu.Unlock()
   115  
   116  	var env *Environment
   117  	if env = globalEnvironments[version]; env == nil {
   118  		env = makeEnv(version)
   119  		globalEnvironments[version] = env
   120  	}
   121  	return env
   122  }
   123  
   124  // NewEnvironment creates a collation Environment for the given MySQL version string.
   125  // The version string must be in the format that is sent by the server as the version packet
   126  // when opening a new MySQL connection
   127  func NewEnvironment(serverVersion string) *Environment {
   128  	// 5.7 is the oldest version we support today, so use that as
   129  	// the default.
   130  	// NOTE: this should be changed when we EOL MySQL 5.7 support
   131  	var version collver = collverMySQL57
   132  	serverVersion = strings.TrimSpace(strings.ToLower(serverVersion))
   133  	switch {
   134  	case strings.HasSuffix(serverVersion, "-ripple"):
   135  		// the ripple binlog server can mask the actual version of mysqld;
   136  		// assume we have the highest
   137  		version = collverMySQL80
   138  	case strings.Contains(serverVersion, "mariadb"):
   139  		switch {
   140  		case strings.Contains(serverVersion, "10.0."):
   141  			version = collverMariaDB100
   142  		case strings.Contains(serverVersion, "10.1."):
   143  			version = collverMariaDB101
   144  		case strings.Contains(serverVersion, "10.2."):
   145  			version = collverMariaDB102
   146  		case strings.Contains(serverVersion, "10.3."):
   147  			version = collverMariaDB103
   148  		}
   149  	case strings.HasPrefix(serverVersion, "5.6."):
   150  		version = collverMySQL56
   151  	case strings.HasPrefix(serverVersion, "5.7."):
   152  		version = collverMySQL57
   153  	case strings.HasPrefix(serverVersion, "8.0."):
   154  		version = collverMySQL80
   155  	}
   156  	return fetchCacheEnvironment(version)
   157  }
   158  
   159  func makeEnv(version collver) *Environment {
   160  	env := &Environment{
   161  		version:     version,
   162  		byName:      make(map[string]Collation),
   163  		byID:        make(map[ID]Collation),
   164  		byCharset:   make(map[string]*colldefaults),
   165  		unsupported: make(map[string]ID),
   166  	}
   167  
   168  	for collid, vi := range globalVersionInfo {
   169  		var ournames []string
   170  		for _, alias := range vi.alias {
   171  			if alias.mask&version != 0 {
   172  				ournames = append(ournames, alias.name)
   173  			}
   174  		}
   175  		if len(ournames) == 0 {
   176  			continue
   177  		}
   178  
   179  		collation, ok := globalAllCollations[collid]
   180  		if !ok {
   181  			for _, name := range ournames {
   182  				env.unsupported[name] = collid
   183  			}
   184  			continue
   185  		}
   186  
   187  		for _, name := range ournames {
   188  			env.byName[name] = collation
   189  		}
   190  		env.byID[collid] = collation
   191  
   192  		csname := collation.Charset().Name()
   193  		if _, ok := env.byCharset[csname]; !ok {
   194  			env.byCharset[csname] = &colldefaults{}
   195  		}
   196  		defaults := env.byCharset[csname]
   197  		if vi.isdefault&version != 0 {
   198  			defaults.Default = collation
   199  		}
   200  		if collation.IsBinary() {
   201  			if defaults.Binary != nil && defaults.Binary.ID() > collation.ID() {
   202  				// If there's more than one binary collation, the one with the
   203  				// highest ID (i.e. the newest one) takes precedence. This applies
   204  				// to utf8mb4_bin vs utf8mb4_0900_bin
   205  				continue
   206  			}
   207  			defaults.Binary = collation
   208  		}
   209  	}
   210  
   211  	for from, to := range version.charsetAliases() {
   212  		env.byCharset[from] = env.byCharset[to]
   213  	}
   214  
   215  	return env
   216  }
   217  
   218  // A few interesting character set values.
   219  // See http://dev.mysql.com/doc/internals/en/character-set.html#packet-Protocol::CharacterSet
   220  const (
   221  	CollationUtf8ID    = 33
   222  	CollationUtf8mb4ID = 255
   223  	CollationBinaryID  = 63
   224  )
   225  
   226  // CharsetAlias returns the internal charset name for the given charset.
   227  // For now, this only maps `utf8` to `utf8mb3`; in future versions of MySQL,
   228  // this mapping will change, so it's important to use this helper so that
   229  // Vitess code has a consistent mapping for the active collations environment.
   230  func (env *Environment) CharsetAlias(charset string) (alias string, ok bool) {
   231  	alias, ok = env.version.charsetAliases()[charset]
   232  	return
   233  }
   234  
   235  // CollationAlias returns the internal collaction name for the given charset.
   236  // For now, this maps all `utf8` to `utf8mb3` collation names; in future versions of MySQL,
   237  // this mapping will change, so it's important to use this helper so that
   238  // Vitess code has a consistent mapping for the active collations environment.
   239  func (env *Environment) CollationAlias(collation string) (string, bool) {
   240  	col := env.LookupByName(collation)
   241  	if col == nil {
   242  		return collation, false
   243  	}
   244  	allCols, ok := globalVersionInfo[col.ID()]
   245  	if !ok {
   246  		return collation, false
   247  	}
   248  	if len(allCols.alias) == 1 {
   249  		return collation, false
   250  	}
   251  	for _, alias := range allCols.alias {
   252  		for source, dest := range env.version.charsetAliases() {
   253  			if strings.HasPrefix(collation, fmt.Sprintf("%s_", source)) &&
   254  				strings.HasPrefix(alias.name, fmt.Sprintf("%s_", dest)) {
   255  				return alias.name, true
   256  			}
   257  		}
   258  	}
   259  	return collation, false
   260  }
   261  
   262  // DefaultConnectionCharset is the default charset that Vitess will use when negotiating a
   263  // charset in a MySQL connection handshake. Note that in this context, a 'charset' is equivalent
   264  // to a Collation ID, with the exception that it can only fit in 1 byte.
   265  // For MySQL 8.0+ environments, the default charset is `utf8mb4_0900_ai_ci`.
   266  // For older MySQL environments, the default charset is `utf8mb4_general_ci`.
   267  func (env *Environment) DefaultConnectionCharset() uint8 {
   268  	switch env.version {
   269  	case collverMySQL80:
   270  		return CollationUtf8mb4ID
   271  	default:
   272  		return 45
   273  	}
   274  }
   275  
   276  // ParseConnectionCharset parses the given charset name and returns its numerical
   277  // identifier to be used in a MySQL connection handshake. The charset name can be:
   278  // - the name of a character set, in which case the default collation ID for the
   279  // character set is returned.
   280  // - the name of a collation, in which case the ID for the collation is returned,
   281  // UNLESS the collation itself has an ID greater than 255; such collations are not
   282  // supported because they cannot be negotiated in a single byte in our connection
   283  // handshake.
   284  // - empty, in which case the default connection charset for this MySQL version
   285  // is returned.
   286  func (env *Environment) ParseConnectionCharset(csname string) (uint8, error) {
   287  	if csname == "" {
   288  		return env.DefaultConnectionCharset(), nil
   289  	}
   290  
   291  	var collid ID = 0
   292  	csname = strings.ToLower(csname)
   293  	if defaults, ok := env.byCharset[csname]; ok {
   294  		collid = defaults.Default.ID()
   295  	} else if coll, ok := env.byName[csname]; ok {
   296  		collid = coll.ID()
   297  	}
   298  	if collid == 0 || collid > 255 {
   299  		return 0, fmt.Errorf("unsupported connection charset: %q", csname)
   300  	}
   301  	return uint8(collid), nil
   302  }