github.com/pingcap/tidb/parser@v0.0.0-20231013125129-93a834a6bf8d/charset/charset.go (about)

     1  // Copyright 2015 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package charset
    15  
    16  import (
    17  	"cmp"
    18  	"slices"
    19  	"strings"
    20  
    21  	"github.com/pingcap/errors"
    22  	"github.com/pingcap/log"
    23  	"github.com/pingcap/tidb/parser/mysql"
    24  	"github.com/pingcap/tidb/parser/terror"
    25  	"go.uber.org/zap"
    26  )
    27  
    28  var (
    29  	// ErrUnknownCollation is unknown collation.
    30  	ErrUnknownCollation = terror.ClassDDL.NewStd(mysql.ErrUnknownCollation)
    31  	// ErrCollationCharsetMismatch is collation charset mismatch.
    32  	ErrCollationCharsetMismatch = terror.ClassDDL.NewStd(mysql.ErrCollationCharsetMismatch)
    33  )
    34  
    35  // Charset is a charset.
    36  // Now we only support MySQL.
    37  type Charset struct {
    38  	Name             string
    39  	DefaultCollation string
    40  	Collations       map[string]*Collation
    41  	Desc             string
    42  	Maxlen           int
    43  }
    44  
    45  // Collation is a collation.
    46  // Now we only support MySQL.
    47  type Collation struct {
    48  	ID          int
    49  	CharsetName string
    50  	Name        string
    51  	IsDefault   bool
    52  }
    53  
    54  var collationsIDMap = make(map[int]*Collation)
    55  var collationsNameMap = make(map[string]*Collation)
    56  var supportedCollations = make([]*Collation, 0, len(supportedCollationNames))
    57  
    58  // CharacterSetInfos contains all the supported charsets.
    59  var CharacterSetInfos = map[string]*Charset{
    60  	CharsetUTF8:    {CharsetUTF8, CollationUTF8, make(map[string]*Collation), "UTF-8 Unicode", 3},
    61  	CharsetUTF8MB4: {CharsetUTF8MB4, CollationUTF8MB4, make(map[string]*Collation), "UTF-8 Unicode", 4},
    62  	CharsetASCII:   {CharsetASCII, CollationASCII, make(map[string]*Collation), "US ASCII", 1},
    63  	CharsetLatin1:  {CharsetLatin1, CollationLatin1, make(map[string]*Collation), "Latin1", 1},
    64  	CharsetBin:     {CharsetBin, CollationBin, make(map[string]*Collation), "binary", 1},
    65  	CharsetGBK:     {CharsetGBK, CollationGBKBin, make(map[string]*Collation), "Chinese Internal Code Specification", 2},
    66  }
    67  
    68  // All the names supported collations should be in the following table.
    69  var supportedCollationNames = map[string]struct{}{
    70  	CollationUTF8:    {},
    71  	CollationUTF8MB4: {},
    72  	CollationASCII:   {},
    73  	CollationLatin1:  {},
    74  	CollationBin:     {},
    75  	CollationGBKBin:  {},
    76  }
    77  
    78  // TiFlashSupportedCharsets is a map which contains TiFlash supports charsets.
    79  var TiFlashSupportedCharsets = map[string]struct{}{
    80  	CharsetUTF8:    {},
    81  	CharsetUTF8MB4: {},
    82  	CharsetASCII:   {},
    83  	CharsetLatin1:  {},
    84  	CharsetBin:     {},
    85  }
    86  
    87  // GetSupportedCharsets gets descriptions for all charsets supported so far.
    88  func GetSupportedCharsets() []*Charset {
    89  	charsets := make([]*Charset, 0, len(CharacterSetInfos))
    90  	for _, ch := range CharacterSetInfos {
    91  		charsets = append(charsets, ch)
    92  	}
    93  
    94  	// sort charset by name.
    95  	slices.SortFunc(charsets, func(i, j *Charset) int {
    96  		return cmp.Compare(i.Name, j.Name)
    97  	})
    98  	return charsets
    99  }
   100  
   101  // GetSupportedCollations gets information for all collations supported so far.
   102  func GetSupportedCollations() []*Collation {
   103  	return supportedCollations
   104  }
   105  
   106  // ValidCharsetAndCollation checks the charset and the collation validity
   107  // and returns a boolean.
   108  func ValidCharsetAndCollation(cs string, co string) bool {
   109  	// We will use utf8 as a default charset.
   110  	if cs == "" || cs == CharsetUTF8MB3 {
   111  		cs = CharsetUTF8
   112  	}
   113  	chs, err := GetCharsetInfo(cs)
   114  	if err != nil {
   115  		return false
   116  	}
   117  
   118  	if co == "" {
   119  		return true
   120  	}
   121  	co = utf8Alias(strings.ToLower(co))
   122  	_, ok := chs.Collations[co]
   123  	return ok
   124  }
   125  
   126  // GetDefaultCollationLegacy is compatible with the charset support in old version parser.
   127  func GetDefaultCollationLegacy(charset string) (string, error) {
   128  	switch strings.ToLower(charset) {
   129  	case CharsetUTF8MB3:
   130  		return GetDefaultCollation(CharsetUTF8)
   131  	case CharsetUTF8, CharsetUTF8MB4, CharsetASCII, CharsetLatin1, CharsetBin:
   132  		return GetDefaultCollation(charset)
   133  	default:
   134  		return "", errors.Errorf("Unknown charset %s", charset)
   135  	}
   136  }
   137  
   138  // GetDefaultCollation returns the default collation for charset.
   139  func GetDefaultCollation(charset string) (string, error) {
   140  	cs, err := GetCharsetInfo(charset)
   141  	if err != nil {
   142  		return "", err
   143  	}
   144  	return cs.DefaultCollation, nil
   145  }
   146  
   147  // GetDefaultCharsetAndCollate returns the default charset and collation.
   148  func GetDefaultCharsetAndCollate() (defaultCharset string, defaultCollationName string) {
   149  	return mysql.DefaultCharset, mysql.DefaultCollationName
   150  }
   151  
   152  // GetCharsetInfo returns charset and collation for cs as name.
   153  func GetCharsetInfo(cs string) (*Charset, error) {
   154  	if strings.ToLower(cs) == CharsetUTF8MB3 {
   155  		cs = CharsetUTF8
   156  	}
   157  
   158  	if c, ok := CharacterSetInfos[strings.ToLower(cs)]; ok {
   159  		return c, nil
   160  	}
   161  
   162  	if c, ok := charsets[strings.ToLower(cs)]; ok {
   163  		return c, errors.Errorf("Unsupported charset %s", cs)
   164  	}
   165  
   166  	return nil, errors.Errorf("Unknown charset %s", cs)
   167  }
   168  
   169  // GetCharsetInfoByID returns charset and collation for id as cs_number.
   170  func GetCharsetInfoByID(coID int) (charsetStr string, collateStr string, err error) {
   171  	if coID == mysql.DefaultCollationID {
   172  		return mysql.DefaultCharset, mysql.DefaultCollationName, nil
   173  	}
   174  	if collation, ok := collationsIDMap[coID]; ok {
   175  		return collation.CharsetName, collation.Name, nil
   176  	}
   177  
   178  	log.Warn(
   179  		"unable to get collation name from collation ID, return default charset and collation instead",
   180  		zap.Int("ID", coID),
   181  		zap.Stack("stack"))
   182  	return mysql.DefaultCharset, mysql.DefaultCollationName, errors.Errorf("Unknown collation id %d", coID)
   183  }
   184  
   185  func utf8Alias(csname string) string {
   186  	switch csname {
   187  	case "utf8mb3_bin":
   188  		csname = "utf8_bin"
   189  	case "utf8mb3_unicode_ci":
   190  		csname = "utf8_unicode_ci"
   191  	case "utf8mb3_general_ci":
   192  		csname = "utf8_general_ci"
   193  	default:
   194  	}
   195  	return csname
   196  }
   197  
   198  // GetCollationByName returns the collation by name.
   199  func GetCollationByName(name string) (*Collation, error) {
   200  	csname := utf8Alias(strings.ToLower(name))
   201  	collation, ok := collationsNameMap[csname]
   202  	if !ok {
   203  		return nil, ErrUnknownCollation.GenWithStackByArgs(name)
   204  	}
   205  	return collation, nil
   206  }
   207  
   208  // GetCollationByID returns collations by given id.
   209  func GetCollationByID(id int) (*Collation, error) {
   210  	collation, ok := collationsIDMap[id]
   211  	if !ok {
   212  		return nil, errors.Errorf("Unknown collation id %d", id)
   213  	}
   214  
   215  	return collation, nil
   216  }
   217  
   218  const (
   219  	// CollationBin is the default collation for CharsetBin.
   220  	CollationBin = "binary"
   221  	// CollationUTF8 is the default collation for CharsetUTF8.
   222  	CollationUTF8 = "utf8_bin"
   223  	// CollationUTF8MB4 is the default collation for CharsetUTF8MB4.
   224  	CollationUTF8MB4 = "utf8mb4_bin"
   225  	// CollationASCII is the default collation for CharsetACSII.
   226  	CollationASCII = "ascii_bin"
   227  	// CollationLatin1 is the default collation for CharsetLatin1.
   228  	CollationLatin1 = "latin1_bin"
   229  	// CollationGBKBin is the default collation for CharsetGBK when new collation is disabled.
   230  	CollationGBKBin = "gbk_bin"
   231  	// CollationGBKChineseCI is the default collation for CharsetGBK when new collation is enabled.
   232  	CollationGBKChineseCI = "gbk_chinese_ci"
   233  )
   234  
   235  const (
   236  	// CharsetASCII is a subset of UTF8.
   237  	CharsetASCII = "ascii"
   238  	// CharsetBin is used for marking binary charset.
   239  	CharsetBin = "binary"
   240  	// CharsetLatin1 is a single byte charset.
   241  	CharsetLatin1 = "latin1"
   242  	// CharsetUTF8 is the default charset for string types.
   243  	CharsetUTF8 = "utf8"
   244  	// CharsetUTF8MB3 is 3 bytes utf8, a MySQL legacy encoding. "utf8" and "utf8mb3" are aliases.
   245  	CharsetUTF8MB3 = "utf8mb3"
   246  	// CharsetUTF8MB4 represents 4 bytes utf8, which works the same way as utf8 in Go.
   247  	CharsetUTF8MB4 = "utf8mb4"
   248  	//revive:disable:exported
   249  	CharsetARMSCII8 = "armscii8"
   250  	CharsetBig5     = "big5"
   251  	CharsetCP1250   = "cp1250"
   252  	CharsetCP1251   = "cp1251"
   253  	CharsetCP1256   = "cp1256"
   254  	CharsetCP1257   = "cp1257"
   255  	CharsetCP850    = "cp850"
   256  	CharsetCP852    = "cp852"
   257  	CharsetCP866    = "cp866"
   258  	CharsetCP932    = "cp932"
   259  	CharsetDEC8     = "dec8"
   260  	CharsetEUCJPMS  = "eucjpms"
   261  	CharsetEUCKR    = "euckr"
   262  	CharsetGB18030  = "gb18030"
   263  	CharsetGB2312   = "gb2312"
   264  	CharsetGBK      = "gbk"
   265  	CharsetGEOSTD8  = "geostd8"
   266  	CharsetGreek    = "greek"
   267  	CharsetHebrew   = "hebrew"
   268  	CharsetHP8      = "hp8"
   269  	CharsetKEYBCS2  = "keybcs2"
   270  	CharsetKOI8R    = "koi8r"
   271  	CharsetKOI8U    = "koi8u"
   272  	CharsetLatin2   = "latin2"
   273  	CharsetLatin5   = "latin5"
   274  	CharsetLatin7   = "latin7"
   275  	CharsetMacCE    = "macce"
   276  	CharsetMacRoman = "macroman"
   277  	CharsetSJIS     = "sjis"
   278  	CharsetSWE7     = "swe7"
   279  	CharsetTIS620   = "tis620"
   280  	CharsetUCS2     = "ucs2"
   281  	CharsetUJIS     = "ujis"
   282  	CharsetUTF16    = "utf16"
   283  	CharsetUTF16LE  = "utf16le"
   284  	CharsetUTF32    = "utf32"
   285  	//revive:enable:exported
   286  )
   287  
   288  var charsets = map[string]*Charset{
   289  	CharsetARMSCII8: {Name: CharsetARMSCII8, Maxlen: 1, DefaultCollation: "armscii8_general_ci", Desc: "ARMSCII-8 Armenian", Collations: make(map[string]*Collation)},
   290  	CharsetASCII:    {Name: CharsetASCII, Maxlen: 1, DefaultCollation: "ascii_general_ci", Desc: "US ASCII", Collations: make(map[string]*Collation)},
   291  	CharsetBig5:     {Name: CharsetBig5, Maxlen: 2, DefaultCollation: "big5_chinese_ci", Desc: "Big5 Traditional Chinese", Collations: make(map[string]*Collation)},
   292  	CharsetBin:      {Name: CharsetBin, Maxlen: 1, DefaultCollation: "binary", Desc: "Binary pseudo charset", Collations: make(map[string]*Collation)},
   293  	CharsetLatin1:   {Name: CharsetLatin1, Maxlen: 1, DefaultCollation: "cp1250_general_ci", Desc: "Windows Central European", Collations: make(map[string]*Collation)},
   294  	CharsetCP1250:   {Name: CharsetCP1250, Maxlen: 1, DefaultCollation: "cp1251_general_ci", Desc: "Windows Cyrillic", Collations: make(map[string]*Collation)},
   295  	CharsetCP1251:   {Name: CharsetCP1251, Maxlen: 1, DefaultCollation: "cp1256_general_ci", Desc: "Windows Arabic", Collations: make(map[string]*Collation)},
   296  	CharsetCP1256:   {Name: CharsetCP1256, Maxlen: 1, DefaultCollation: "cp1257_general_ci", Desc: "Windows Baltic", Collations: make(map[string]*Collation)},
   297  	CharsetCP1257:   {Name: CharsetCP1257, Maxlen: 1, DefaultCollation: "cp850_general_ci", Desc: "DOS West European", Collations: make(map[string]*Collation)},
   298  	CharsetCP850:    {Name: CharsetCP850, Maxlen: 1, DefaultCollation: "cp852_general_ci", Desc: "DOS Central European", Collations: make(map[string]*Collation)},
   299  	CharsetCP852:    {Name: CharsetCP852, Maxlen: 1, DefaultCollation: "cp866_general_ci", Desc: "DOS Russian", Collations: make(map[string]*Collation)},
   300  	CharsetCP866:    {Name: CharsetCP866, Maxlen: 1, DefaultCollation: "cp932_japanese_ci", Desc: "SJIS for Windows Japanese", Collations: make(map[string]*Collation)},
   301  	CharsetCP932:    {Name: CharsetCP932, Maxlen: 2, DefaultCollation: "dec8_swedish_ci", Desc: "DEC West European", Collations: make(map[string]*Collation)},
   302  	CharsetDEC8:     {Name: CharsetDEC8, Maxlen: 1, DefaultCollation: "eucjpms_japanese_ci", Desc: "UJIS for Windows Japanese", Collations: make(map[string]*Collation)},
   303  	CharsetEUCJPMS:  {Name: CharsetEUCJPMS, Maxlen: 3, DefaultCollation: "euckr_korean_ci", Desc: "EUC-KR Korean", Collations: make(map[string]*Collation)},
   304  	CharsetEUCKR:    {Name: CharsetEUCKR, Maxlen: 2, DefaultCollation: "gb18030_chinese_ci", Desc: "China National Standard GB18030", Collations: make(map[string]*Collation)},
   305  	CharsetGB18030:  {Name: CharsetGB18030, Maxlen: 4, DefaultCollation: "gb2312_chinese_ci", Desc: "GB2312 Simplified Chinese", Collations: make(map[string]*Collation)},
   306  	CharsetGB2312:   {Name: CharsetGB2312, Maxlen: 2, DefaultCollation: "gbk_chinese_ci", Desc: "GBK Simplified Chinese", Collations: make(map[string]*Collation)},
   307  	CharsetGBK:      {Name: CharsetGBK, Maxlen: 2, DefaultCollation: "geostd8_general_ci", Desc: "GEOSTD8 Georgian", Collations: make(map[string]*Collation)},
   308  	CharsetGEOSTD8:  {Name: CharsetGEOSTD8, Maxlen: 1, DefaultCollation: "greek_general_ci", Desc: "ISO 8859-7 Greek", Collations: make(map[string]*Collation)},
   309  	CharsetGreek:    {Name: CharsetGreek, Maxlen: 1, DefaultCollation: "hebrew_general_ci", Desc: "ISO 8859-8 Hebrew", Collations: make(map[string]*Collation)},
   310  	CharsetHebrew:   {Name: CharsetHebrew, Maxlen: 1, DefaultCollation: "hp8_english_ci", Desc: "HP West European", Collations: make(map[string]*Collation)},
   311  	CharsetHP8:      {Name: CharsetHP8, Maxlen: 1, DefaultCollation: "keybcs2_general_ci", Desc: "DOS Kamenicky Czech-Slovak", Collations: make(map[string]*Collation)},
   312  	CharsetKEYBCS2:  {Name: CharsetKEYBCS2, Maxlen: 1, DefaultCollation: "koi8r_general_ci", Desc: "KOI8-R Relcom Russian", Collations: make(map[string]*Collation)},
   313  	CharsetKOI8R:    {Name: CharsetKOI8R, Maxlen: 1, DefaultCollation: "koi8u_general_ci", Desc: "KOI8-U Ukrainian", Collations: make(map[string]*Collation)},
   314  	CharsetKOI8U:    {Name: CharsetKOI8U, Maxlen: 1, DefaultCollation: "latin1_swedish_ci", Desc: "cp1252 West European", Collations: make(map[string]*Collation)},
   315  	CharsetLatin2:   {Name: CharsetLatin2, Maxlen: 1, DefaultCollation: "latin2_general_ci", Desc: "ISO 8859-2 Central European", Collations: make(map[string]*Collation)},
   316  	CharsetLatin5:   {Name: CharsetLatin5, Maxlen: 1, DefaultCollation: "latin5_turkish_ci", Desc: "ISO 8859-9 Turkish", Collations: make(map[string]*Collation)},
   317  	CharsetLatin7:   {Name: CharsetLatin7, Maxlen: 1, DefaultCollation: "latin7_general_ci", Desc: "ISO 8859-13 Baltic", Collations: make(map[string]*Collation)},
   318  	CharsetMacCE:    {Name: CharsetMacCE, Maxlen: 1, DefaultCollation: "macce_general_ci", Desc: "Mac Central European", Collations: make(map[string]*Collation)},
   319  	CharsetMacRoman: {Name: CharsetMacRoman, Maxlen: 1, DefaultCollation: "macroman_general_ci", Desc: "Mac West European", Collations: make(map[string]*Collation)},
   320  	CharsetSJIS:     {Name: CharsetSJIS, Maxlen: 2, DefaultCollation: "sjis_japanese_ci", Desc: "Shift-JIS Japanese", Collations: make(map[string]*Collation)},
   321  	CharsetSWE7:     {Name: CharsetSWE7, Maxlen: 1, DefaultCollation: "swe7_swedish_ci", Desc: "7bit Swedish", Collations: make(map[string]*Collation)},
   322  	CharsetTIS620:   {Name: CharsetTIS620, Maxlen: 1, DefaultCollation: "tis620_thai_ci", Desc: "TIS620 Thai", Collations: make(map[string]*Collation)},
   323  	CharsetUCS2:     {Name: CharsetUCS2, Maxlen: 2, DefaultCollation: "ucs2_general_ci", Desc: "UCS-2 Unicode", Collations: make(map[string]*Collation)},
   324  	CharsetUJIS:     {Name: CharsetUJIS, Maxlen: 3, DefaultCollation: "ujis_japanese_ci", Desc: "EUC-JP Japanese", Collations: make(map[string]*Collation)},
   325  	CharsetUTF16:    {Name: CharsetUTF16, Maxlen: 4, DefaultCollation: "utf16_general_ci", Desc: "UTF-16 Unicode", Collations: make(map[string]*Collation)},
   326  	CharsetUTF16LE:  {Name: CharsetUTF16LE, Maxlen: 4, DefaultCollation: "utf16le_general_ci", Desc: "UTF-16LE Unicode", Collations: make(map[string]*Collation)},
   327  	CharsetUTF32:    {Name: CharsetUTF32, Maxlen: 4, DefaultCollation: "utf32_general_ci", Desc: "UTF-32 Unicode", Collations: make(map[string]*Collation)},
   328  	CharsetUTF8:     {Name: CharsetUTF8, Maxlen: 3, DefaultCollation: "utf8_general_ci", Desc: "UTF-8 Unicode", Collations: make(map[string]*Collation)},
   329  	CharsetUTF8MB4:  {Name: CharsetUTF8MB4, Maxlen: 4, DefaultCollation: "utf8mb4_0900_ai_ci", Desc: "UTF-8 Unicode", Collations: make(map[string]*Collation)},
   330  }
   331  
   332  var collations = []*Collation{
   333  	{1, "big5", "big5_chinese_ci", true},
   334  	{2, "latin2", "latin2_czech_cs", false},
   335  	{3, "dec8", "dec8_swedish_ci", true},
   336  	{4, "cp850", "cp850_general_ci", true},
   337  	{5, "latin1", "latin1_german1_ci", false},
   338  	{6, "hp8", "hp8_english_ci", true},
   339  	{7, "koi8r", "koi8r_general_ci", true},
   340  	{8, "latin1", "latin1_swedish_ci", false},
   341  	{9, "latin2", "latin2_general_ci", true},
   342  	{10, "swe7", "swe7_swedish_ci", true},
   343  	{11, "ascii", "ascii_general_ci", false},
   344  	{12, "ujis", "ujis_japanese_ci", true},
   345  	{13, "sjis", "sjis_japanese_ci", true},
   346  	{14, "cp1251", "cp1251_bulgarian_ci", false},
   347  	{15, "latin1", "latin1_danish_ci", false},
   348  	{16, "hebrew", "hebrew_general_ci", true},
   349  	{18, "tis620", "tis620_thai_ci", true},
   350  	{19, "euckr", "euckr_korean_ci", true},
   351  	{20, "latin7", "latin7_estonian_cs", false},
   352  	{21, "latin2", "latin2_hungarian_ci", false},
   353  	{22, "koi8u", "koi8u_general_ci", true},
   354  	{23, "cp1251", "cp1251_ukrainian_ci", false},
   355  	{24, "gb2312", "gb2312_chinese_ci", true},
   356  	{25, "greek", "greek_general_ci", true},
   357  	{26, "cp1250", "cp1250_general_ci", true},
   358  	{27, "latin2", "latin2_croatian_ci", false},
   359  	{28, "gbk", "gbk_chinese_ci", false},
   360  	{29, "cp1257", "cp1257_lithuanian_ci", false},
   361  	{30, "latin5", "latin5_turkish_ci", true},
   362  	{31, "latin1", "latin1_german2_ci", false},
   363  	{32, "armscii8", "armscii8_general_ci", true},
   364  	{33, "utf8", "utf8_general_ci", false},
   365  	{34, "cp1250", "cp1250_czech_cs", false},
   366  	{35, "ucs2", "ucs2_general_ci", true},
   367  	{36, "cp866", "cp866_general_ci", true},
   368  	{37, "keybcs2", "keybcs2_general_ci", true},
   369  	{38, "macce", "macce_general_ci", true},
   370  	{39, "macroman", "macroman_general_ci", true},
   371  	{40, "cp852", "cp852_general_ci", true},
   372  	{41, "latin7", "latin7_general_ci", true},
   373  	{42, "latin7", "latin7_general_cs", false},
   374  	{43, "macce", "macce_bin", false},
   375  	{44, "cp1250", "cp1250_croatian_ci", false},
   376  	{45, "utf8mb4", "utf8mb4_general_ci", false},
   377  	{46, "utf8mb4", "utf8mb4_bin", true},
   378  	{47, "latin1", "latin1_bin", true},
   379  	{48, "latin1", "latin1_general_ci", false},
   380  	{49, "latin1", "latin1_general_cs", false},
   381  	{50, "cp1251", "cp1251_bin", false},
   382  	{51, "cp1251", "cp1251_general_ci", true},
   383  	{52, "cp1251", "cp1251_general_cs", false},
   384  	{53, "macroman", "macroman_bin", false},
   385  	{54, "utf16", "utf16_general_ci", true},
   386  	{55, "utf16", "utf16_bin", false},
   387  	{56, "utf16le", "utf16le_general_ci", true},
   388  	{57, "cp1256", "cp1256_general_ci", true},
   389  	{58, "cp1257", "cp1257_bin", false},
   390  	{59, "cp1257", "cp1257_general_ci", true},
   391  	{60, "utf32", "utf32_general_ci", true},
   392  	{61, "utf32", "utf32_bin", false},
   393  	{62, "utf16le", "utf16le_bin", false},
   394  	{63, "binary", "binary", true},
   395  	{64, "armscii8", "armscii8_bin", false},
   396  	{65, "ascii", "ascii_bin", true},
   397  	{66, "cp1250", "cp1250_bin", false},
   398  	{67, "cp1256", "cp1256_bin", false},
   399  	{68, "cp866", "cp866_bin", false},
   400  	{69, "dec8", "dec8_bin", false},
   401  	{70, "greek", "greek_bin", false},
   402  	{71, "hebrew", "hebrew_bin", false},
   403  	{72, "hp8", "hp8_bin", false},
   404  	{73, "keybcs2", "keybcs2_bin", false},
   405  	{74, "koi8r", "koi8r_bin", false},
   406  	{75, "koi8u", "koi8u_bin", false},
   407  	{76, "utf8", "utf8_tolower_ci", false},
   408  	{77, "latin2", "latin2_bin", false},
   409  	{78, "latin5", "latin5_bin", false},
   410  	{79, "latin7", "latin7_bin", false},
   411  	{80, "cp850", "cp850_bin", false},
   412  	{81, "cp852", "cp852_bin", false},
   413  	{82, "swe7", "swe7_bin", false},
   414  	{83, "utf8", "utf8_bin", true},
   415  	{84, "big5", "big5_bin", false},
   416  	{85, "euckr", "euckr_bin", false},
   417  	{86, "gb2312", "gb2312_bin", false},
   418  	{87, "gbk", "gbk_bin", true},
   419  	{88, "sjis", "sjis_bin", false},
   420  	{89, "tis620", "tis620_bin", false},
   421  	{90, "ucs2", "ucs2_bin", false},
   422  	{91, "ujis", "ujis_bin", false},
   423  	{92, "geostd8", "geostd8_general_ci", true},
   424  	{93, "geostd8", "geostd8_bin", false},
   425  	{94, "latin1", "latin1_spanish_ci", false},
   426  	{95, "cp932", "cp932_japanese_ci", true},
   427  	{96, "cp932", "cp932_bin", false},
   428  	{97, "eucjpms", "eucjpms_japanese_ci", true},
   429  	{98, "eucjpms", "eucjpms_bin", false},
   430  	{99, "cp1250", "cp1250_polish_ci", false},
   431  	{101, "utf16", "utf16_unicode_ci", false},
   432  	{102, "utf16", "utf16_icelandic_ci", false},
   433  	{103, "utf16", "utf16_latvian_ci", false},
   434  	{104, "utf16", "utf16_romanian_ci", false},
   435  	{105, "utf16", "utf16_slovenian_ci", false},
   436  	{106, "utf16", "utf16_polish_ci", false},
   437  	{107, "utf16", "utf16_estonian_ci", false},
   438  	{108, "utf16", "utf16_spanish_ci", false},
   439  	{109, "utf16", "utf16_swedish_ci", false},
   440  	{110, "utf16", "utf16_turkish_ci", false},
   441  	{111, "utf16", "utf16_czech_ci", false},
   442  	{112, "utf16", "utf16_danish_ci", false},
   443  	{113, "utf16", "utf16_lithuanian_ci", false},
   444  	{114, "utf16", "utf16_slovak_ci", false},
   445  	{115, "utf16", "utf16_spanish2_ci", false},
   446  	{116, "utf16", "utf16_roman_ci", false},
   447  	{117, "utf16", "utf16_persian_ci", false},
   448  	{118, "utf16", "utf16_esperanto_ci", false},
   449  	{119, "utf16", "utf16_hungarian_ci", false},
   450  	{120, "utf16", "utf16_sinhala_ci", false},
   451  	{121, "utf16", "utf16_german2_ci", false},
   452  	{122, "utf16", "utf16_croatian_ci", false},
   453  	{123, "utf16", "utf16_unicode_520_ci", false},
   454  	{124, "utf16", "utf16_vietnamese_ci", false},
   455  	{128, "ucs2", "ucs2_unicode_ci", false},
   456  	{129, "ucs2", "ucs2_icelandic_ci", false},
   457  	{130, "ucs2", "ucs2_latvian_ci", false},
   458  	{131, "ucs2", "ucs2_romanian_ci", false},
   459  	{132, "ucs2", "ucs2_slovenian_ci", false},
   460  	{133, "ucs2", "ucs2_polish_ci", false},
   461  	{134, "ucs2", "ucs2_estonian_ci", false},
   462  	{135, "ucs2", "ucs2_spanish_ci", false},
   463  	{136, "ucs2", "ucs2_swedish_ci", false},
   464  	{137, "ucs2", "ucs2_turkish_ci", false},
   465  	{138, "ucs2", "ucs2_czech_ci", false},
   466  	{139, "ucs2", "ucs2_danish_ci", false},
   467  	{140, "ucs2", "ucs2_lithuanian_ci", false},
   468  	{141, "ucs2", "ucs2_slovak_ci", false},
   469  	{142, "ucs2", "ucs2_spanish2_ci", false},
   470  	{143, "ucs2", "ucs2_roman_ci", false},
   471  	{144, "ucs2", "ucs2_persian_ci", false},
   472  	{145, "ucs2", "ucs2_esperanto_ci", false},
   473  	{146, "ucs2", "ucs2_hungarian_ci", false},
   474  	{147, "ucs2", "ucs2_sinhala_ci", false},
   475  	{148, "ucs2", "ucs2_german2_ci", false},
   476  	{149, "ucs2", "ucs2_croatian_ci", false},
   477  	{150, "ucs2", "ucs2_unicode_520_ci", false},
   478  	{151, "ucs2", "ucs2_vietnamese_ci", false},
   479  	{159, "ucs2", "ucs2_general_mysql500_ci", false},
   480  	{160, "utf32", "utf32_unicode_ci", false},
   481  	{161, "utf32", "utf32_icelandic_ci", false},
   482  	{162, "utf32", "utf32_latvian_ci", false},
   483  	{163, "utf32", "utf32_romanian_ci", false},
   484  	{164, "utf32", "utf32_slovenian_ci", false},
   485  	{165, "utf32", "utf32_polish_ci", false},
   486  	{166, "utf32", "utf32_estonian_ci", false},
   487  	{167, "utf32", "utf32_spanish_ci", false},
   488  	{168, "utf32", "utf32_swedish_ci", false},
   489  	{169, "utf32", "utf32_turkish_ci", false},
   490  	{170, "utf32", "utf32_czech_ci", false},
   491  	{171, "utf32", "utf32_danish_ci", false},
   492  	{172, "utf32", "utf32_lithuanian_ci", false},
   493  	{173, "utf32", "utf32_slovak_ci", false},
   494  	{174, "utf32", "utf32_spanish2_ci", false},
   495  	{175, "utf32", "utf32_roman_ci", false},
   496  	{176, "utf32", "utf32_persian_ci", false},
   497  	{177, "utf32", "utf32_esperanto_ci", false},
   498  	{178, "utf32", "utf32_hungarian_ci", false},
   499  	{179, "utf32", "utf32_sinhala_ci", false},
   500  	{180, "utf32", "utf32_german2_ci", false},
   501  	{181, "utf32", "utf32_croatian_ci", false},
   502  	{182, "utf32", "utf32_unicode_520_ci", false},
   503  	{183, "utf32", "utf32_vietnamese_ci", false},
   504  	{192, "utf8", "utf8_unicode_ci", false},
   505  	{193, "utf8", "utf8_icelandic_ci", false},
   506  	{194, "utf8", "utf8_latvian_ci", false},
   507  	{195, "utf8", "utf8_romanian_ci", false},
   508  	{196, "utf8", "utf8_slovenian_ci", false},
   509  	{197, "utf8", "utf8_polish_ci", false},
   510  	{198, "utf8", "utf8_estonian_ci", false},
   511  	{199, "utf8", "utf8_spanish_ci", false},
   512  	{200, "utf8", "utf8_swedish_ci", false},
   513  	{201, "utf8", "utf8_turkish_ci", false},
   514  	{202, "utf8", "utf8_czech_ci", false},
   515  	{203, "utf8", "utf8_danish_ci", false},
   516  	{204, "utf8", "utf8_lithuanian_ci", false},
   517  	{205, "utf8", "utf8_slovak_ci", false},
   518  	{206, "utf8", "utf8_spanish2_ci", false},
   519  	{207, "utf8", "utf8_roman_ci", false},
   520  	{208, "utf8", "utf8_persian_ci", false},
   521  	{209, "utf8", "utf8_esperanto_ci", false},
   522  	{210, "utf8", "utf8_hungarian_ci", false},
   523  	{211, "utf8", "utf8_sinhala_ci", false},
   524  	{212, "utf8", "utf8_german2_ci", false},
   525  	{213, "utf8", "utf8_croatian_ci", false},
   526  	{214, "utf8", "utf8_unicode_520_ci", false},
   527  	{215, "utf8", "utf8_vietnamese_ci", false},
   528  	{223, "utf8", "utf8_general_mysql500_ci", false},
   529  	{224, "utf8mb4", "utf8mb4_unicode_ci", false},
   530  	{225, "utf8mb4", "utf8mb4_icelandic_ci", false},
   531  	{226, "utf8mb4", "utf8mb4_latvian_ci", false},
   532  	{227, "utf8mb4", "utf8mb4_romanian_ci", false},
   533  	{228, "utf8mb4", "utf8mb4_slovenian_ci", false},
   534  	{229, "utf8mb4", "utf8mb4_polish_ci", false},
   535  	{230, "utf8mb4", "utf8mb4_estonian_ci", false},
   536  	{231, "utf8mb4", "utf8mb4_spanish_ci", false},
   537  	{232, "utf8mb4", "utf8mb4_swedish_ci", false},
   538  	{233, "utf8mb4", "utf8mb4_turkish_ci", false},
   539  	{234, "utf8mb4", "utf8mb4_czech_ci", false},
   540  	{235, "utf8mb4", "utf8mb4_danish_ci", false},
   541  	{236, "utf8mb4", "utf8mb4_lithuanian_ci", false},
   542  	{237, "utf8mb4", "utf8mb4_slovak_ci", false},
   543  	{238, "utf8mb4", "utf8mb4_spanish2_ci", false},
   544  	{239, "utf8mb4", "utf8mb4_roman_ci", false},
   545  	{240, "utf8mb4", "utf8mb4_persian_ci", false},
   546  	{241, "utf8mb4", "utf8mb4_esperanto_ci", false},
   547  	{242, "utf8mb4", "utf8mb4_hungarian_ci", false},
   548  	{243, "utf8mb4", "utf8mb4_sinhala_ci", false},
   549  	{244, "utf8mb4", "utf8mb4_german2_ci", false},
   550  	{245, "utf8mb4", "utf8mb4_croatian_ci", false},
   551  	{246, "utf8mb4", "utf8mb4_unicode_520_ci", false},
   552  	{247, "utf8mb4", "utf8mb4_vietnamese_ci", false},
   553  	{248, "gb18030", "gb18030_chinese_ci", false},
   554  	{249, "gb18030", "gb18030_bin", true},
   555  	{250, "gb18030", "gb18030_unicode_520_ci", false},
   556  	{255, "utf8mb4", "utf8mb4_0900_ai_ci", false},
   557  	{256, "utf8mb4", "utf8mb4_de_pb_0900_ai_ci", false},
   558  	{257, "utf8mb4", "utf8mb4_is_0900_ai_ci", false},
   559  	{258, "utf8mb4", "utf8mb4_lv_0900_ai_ci", false},
   560  	{259, "utf8mb4", "utf8mb4_ro_0900_ai_ci", false},
   561  	{260, "utf8mb4", "utf8mb4_sl_0900_ai_ci", false},
   562  	{261, "utf8mb4", "utf8mb4_pl_0900_ai_ci", false},
   563  	{262, "utf8mb4", "utf8mb4_et_0900_ai_ci", false},
   564  	{263, "utf8mb4", "utf8mb4_es_0900_ai_ci", false},
   565  	{264, "utf8mb4", "utf8mb4_sv_0900_ai_ci", false},
   566  	{265, "utf8mb4", "utf8mb4_tr_0900_ai_ci", false},
   567  	{266, "utf8mb4", "utf8mb4_cs_0900_ai_ci", false},
   568  	{267, "utf8mb4", "utf8mb4_da_0900_ai_ci", false},
   569  	{268, "utf8mb4", "utf8mb4_lt_0900_ai_ci", false},
   570  	{269, "utf8mb4", "utf8mb4_sk_0900_ai_ci", false},
   571  	{270, "utf8mb4", "utf8mb4_es_trad_0900_ai_ci", false},
   572  	{271, "utf8mb4", "utf8mb4_la_0900_ai_ci", false},
   573  	{273, "utf8mb4", "utf8mb4_eo_0900_ai_ci", false},
   574  	{274, "utf8mb4", "utf8mb4_hu_0900_ai_ci", false},
   575  	{275, "utf8mb4", "utf8mb4_hr_0900_ai_ci", false},
   576  	{277, "utf8mb4", "utf8mb4_vi_0900_ai_ci", false},
   577  	{278, "utf8mb4", "utf8mb4_0900_as_cs", false},
   578  	{279, "utf8mb4", "utf8mb4_de_pb_0900_as_cs", false},
   579  	{280, "utf8mb4", "utf8mb4_is_0900_as_cs", false},
   580  	{281, "utf8mb4", "utf8mb4_lv_0900_as_cs", false},
   581  	{282, "utf8mb4", "utf8mb4_ro_0900_as_cs", false},
   582  	{283, "utf8mb4", "utf8mb4_sl_0900_as_cs", false},
   583  	{284, "utf8mb4", "utf8mb4_pl_0900_as_cs", false},
   584  	{285, "utf8mb4", "utf8mb4_et_0900_as_cs", false},
   585  	{286, "utf8mb4", "utf8mb4_es_0900_as_cs", false},
   586  	{287, "utf8mb4", "utf8mb4_sv_0900_as_cs", false},
   587  	{288, "utf8mb4", "utf8mb4_tr_0900_as_cs", false},
   588  	{289, "utf8mb4", "utf8mb4_cs_0900_as_cs", false},
   589  	{290, "utf8mb4", "utf8mb4_da_0900_as_cs", false},
   590  	{291, "utf8mb4", "utf8mb4_lt_0900_as_cs", false},
   591  	{292, "utf8mb4", "utf8mb4_sk_0900_as_cs", false},
   592  	{293, "utf8mb4", "utf8mb4_es_trad_0900_as_cs", false},
   593  	{294, "utf8mb4", "utf8mb4_la_0900_as_cs", false},
   594  	{296, "utf8mb4", "utf8mb4_eo_0900_as_cs", false},
   595  	{297, "utf8mb4", "utf8mb4_hu_0900_as_cs", false},
   596  	{298, "utf8mb4", "utf8mb4_hr_0900_as_cs", false},
   597  	{300, "utf8mb4", "utf8mb4_vi_0900_as_cs", false},
   598  	{303, "utf8mb4", "utf8mb4_ja_0900_as_cs", false},
   599  	{304, "utf8mb4", "utf8mb4_ja_0900_as_cs_ks", false},
   600  	{305, "utf8mb4", "utf8mb4_0900_as_ci", false},
   601  	{306, "utf8mb4", "utf8mb4_ru_0900_ai_ci", false},
   602  	{307, "utf8mb4", "utf8mb4_ru_0900_as_cs", false},
   603  	{308, "utf8mb4", "utf8mb4_zh_0900_as_cs", false},
   604  	{309, "utf8mb4", "utf8mb4_0900_bin", false},
   605  	{2048, "utf8mb4", "utf8mb4_zh_pinyin_tidb_as_cs", false},
   606  }
   607  
   608  // AddCharset adds a new charset.
   609  // Use only when adding a custom charset to the parser.
   610  func AddCharset(c *Charset) {
   611  	CharacterSetInfos[c.Name] = c
   612  }
   613  
   614  // RemoveCharset remove a charset.
   615  // Use only when remove a custom charset to the parser.
   616  func RemoveCharset(c string) {
   617  	delete(CharacterSetInfos, c)
   618  	for i := range supportedCollations {
   619  		if supportedCollations[i].Name == c {
   620  			supportedCollations = append(supportedCollations[:i], supportedCollations[i+1:]...)
   621  		}
   622  	}
   623  }
   624  
   625  // AddCollation adds a new collation.
   626  // Use only when adding a custom collation to the parser.
   627  func AddCollation(c *Collation) {
   628  	collationsIDMap[c.ID] = c
   629  	collationsNameMap[c.Name] = c
   630  
   631  	if _, ok := supportedCollationNames[c.Name]; ok {
   632  		AddSupportedCollation(c)
   633  	}
   634  
   635  	if charset, ok := CharacterSetInfos[c.CharsetName]; ok {
   636  		charset.Collations[c.Name] = c
   637  	}
   638  
   639  	if charset, ok := charsets[c.CharsetName]; ok {
   640  		charset.Collations[c.Name] = c
   641  	}
   642  }
   643  
   644  // AddSupportedCollation adds a new collation into supportedCollations.
   645  // Use only when adding a custom collation to the parser.
   646  func AddSupportedCollation(c *Collation) {
   647  	supportedCollations = append(supportedCollations, c)
   648  }
   649  
   650  // init method always puts to the end of file.
   651  func init() {
   652  	for _, c := range collations {
   653  		AddCollation(c)
   654  	}
   655  }