github.com/pingcap/tidb/parser@v0.0.0-20231013125129-93a834a6bf8d/charset/charset.go (about) 1 // Copyright 2015 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package charset 15 16 import ( 17 "cmp" 18 "slices" 19 "strings" 20 21 "github.com/pingcap/errors" 22 "github.com/pingcap/log" 23 "github.com/pingcap/tidb/parser/mysql" 24 "github.com/pingcap/tidb/parser/terror" 25 "go.uber.org/zap" 26 ) 27 28 var ( 29 // ErrUnknownCollation is unknown collation. 30 ErrUnknownCollation = terror.ClassDDL.NewStd(mysql.ErrUnknownCollation) 31 // ErrCollationCharsetMismatch is collation charset mismatch. 32 ErrCollationCharsetMismatch = terror.ClassDDL.NewStd(mysql.ErrCollationCharsetMismatch) 33 ) 34 35 // Charset is a charset. 36 // Now we only support MySQL. 37 type Charset struct { 38 Name string 39 DefaultCollation string 40 Collations map[string]*Collation 41 Desc string 42 Maxlen int 43 } 44 45 // Collation is a collation. 46 // Now we only support MySQL. 47 type Collation struct { 48 ID int 49 CharsetName string 50 Name string 51 IsDefault bool 52 } 53 54 var collationsIDMap = make(map[int]*Collation) 55 var collationsNameMap = make(map[string]*Collation) 56 var supportedCollations = make([]*Collation, 0, len(supportedCollationNames)) 57 58 // CharacterSetInfos contains all the supported charsets. 59 var CharacterSetInfos = map[string]*Charset{ 60 CharsetUTF8: {CharsetUTF8, CollationUTF8, make(map[string]*Collation), "UTF-8 Unicode", 3}, 61 CharsetUTF8MB4: {CharsetUTF8MB4, CollationUTF8MB4, make(map[string]*Collation), "UTF-8 Unicode", 4}, 62 CharsetASCII: {CharsetASCII, CollationASCII, make(map[string]*Collation), "US ASCII", 1}, 63 CharsetLatin1: {CharsetLatin1, CollationLatin1, make(map[string]*Collation), "Latin1", 1}, 64 CharsetBin: {CharsetBin, CollationBin, make(map[string]*Collation), "binary", 1}, 65 CharsetGBK: {CharsetGBK, CollationGBKBin, make(map[string]*Collation), "Chinese Internal Code Specification", 2}, 66 } 67 68 // All the names supported collations should be in the following table. 69 var supportedCollationNames = map[string]struct{}{ 70 CollationUTF8: {}, 71 CollationUTF8MB4: {}, 72 CollationASCII: {}, 73 CollationLatin1: {}, 74 CollationBin: {}, 75 CollationGBKBin: {}, 76 } 77 78 // TiFlashSupportedCharsets is a map which contains TiFlash supports charsets. 79 var TiFlashSupportedCharsets = map[string]struct{}{ 80 CharsetUTF8: {}, 81 CharsetUTF8MB4: {}, 82 CharsetASCII: {}, 83 CharsetLatin1: {}, 84 CharsetBin: {}, 85 } 86 87 // GetSupportedCharsets gets descriptions for all charsets supported so far. 88 func GetSupportedCharsets() []*Charset { 89 charsets := make([]*Charset, 0, len(CharacterSetInfos)) 90 for _, ch := range CharacterSetInfos { 91 charsets = append(charsets, ch) 92 } 93 94 // sort charset by name. 95 slices.SortFunc(charsets, func(i, j *Charset) int { 96 return cmp.Compare(i.Name, j.Name) 97 }) 98 return charsets 99 } 100 101 // GetSupportedCollations gets information for all collations supported so far. 102 func GetSupportedCollations() []*Collation { 103 return supportedCollations 104 } 105 106 // ValidCharsetAndCollation checks the charset and the collation validity 107 // and returns a boolean. 108 func ValidCharsetAndCollation(cs string, co string) bool { 109 // We will use utf8 as a default charset. 110 if cs == "" || cs == CharsetUTF8MB3 { 111 cs = CharsetUTF8 112 } 113 chs, err := GetCharsetInfo(cs) 114 if err != nil { 115 return false 116 } 117 118 if co == "" { 119 return true 120 } 121 co = utf8Alias(strings.ToLower(co)) 122 _, ok := chs.Collations[co] 123 return ok 124 } 125 126 // GetDefaultCollationLegacy is compatible with the charset support in old version parser. 127 func GetDefaultCollationLegacy(charset string) (string, error) { 128 switch strings.ToLower(charset) { 129 case CharsetUTF8MB3: 130 return GetDefaultCollation(CharsetUTF8) 131 case CharsetUTF8, CharsetUTF8MB4, CharsetASCII, CharsetLatin1, CharsetBin: 132 return GetDefaultCollation(charset) 133 default: 134 return "", errors.Errorf("Unknown charset %s", charset) 135 } 136 } 137 138 // GetDefaultCollation returns the default collation for charset. 139 func GetDefaultCollation(charset string) (string, error) { 140 cs, err := GetCharsetInfo(charset) 141 if err != nil { 142 return "", err 143 } 144 return cs.DefaultCollation, nil 145 } 146 147 // GetDefaultCharsetAndCollate returns the default charset and collation. 148 func GetDefaultCharsetAndCollate() (defaultCharset string, defaultCollationName string) { 149 return mysql.DefaultCharset, mysql.DefaultCollationName 150 } 151 152 // GetCharsetInfo returns charset and collation for cs as name. 153 func GetCharsetInfo(cs string) (*Charset, error) { 154 if strings.ToLower(cs) == CharsetUTF8MB3 { 155 cs = CharsetUTF8 156 } 157 158 if c, ok := CharacterSetInfos[strings.ToLower(cs)]; ok { 159 return c, nil 160 } 161 162 if c, ok := charsets[strings.ToLower(cs)]; ok { 163 return c, errors.Errorf("Unsupported charset %s", cs) 164 } 165 166 return nil, errors.Errorf("Unknown charset %s", cs) 167 } 168 169 // GetCharsetInfoByID returns charset and collation for id as cs_number. 170 func GetCharsetInfoByID(coID int) (charsetStr string, collateStr string, err error) { 171 if coID == mysql.DefaultCollationID { 172 return mysql.DefaultCharset, mysql.DefaultCollationName, nil 173 } 174 if collation, ok := collationsIDMap[coID]; ok { 175 return collation.CharsetName, collation.Name, nil 176 } 177 178 log.Warn( 179 "unable to get collation name from collation ID, return default charset and collation instead", 180 zap.Int("ID", coID), 181 zap.Stack("stack")) 182 return mysql.DefaultCharset, mysql.DefaultCollationName, errors.Errorf("Unknown collation id %d", coID) 183 } 184 185 func utf8Alias(csname string) string { 186 switch csname { 187 case "utf8mb3_bin": 188 csname = "utf8_bin" 189 case "utf8mb3_unicode_ci": 190 csname = "utf8_unicode_ci" 191 case "utf8mb3_general_ci": 192 csname = "utf8_general_ci" 193 default: 194 } 195 return csname 196 } 197 198 // GetCollationByName returns the collation by name. 199 func GetCollationByName(name string) (*Collation, error) { 200 csname := utf8Alias(strings.ToLower(name)) 201 collation, ok := collationsNameMap[csname] 202 if !ok { 203 return nil, ErrUnknownCollation.GenWithStackByArgs(name) 204 } 205 return collation, nil 206 } 207 208 // GetCollationByID returns collations by given id. 209 func GetCollationByID(id int) (*Collation, error) { 210 collation, ok := collationsIDMap[id] 211 if !ok { 212 return nil, errors.Errorf("Unknown collation id %d", id) 213 } 214 215 return collation, nil 216 } 217 218 const ( 219 // CollationBin is the default collation for CharsetBin. 220 CollationBin = "binary" 221 // CollationUTF8 is the default collation for CharsetUTF8. 222 CollationUTF8 = "utf8_bin" 223 // CollationUTF8MB4 is the default collation for CharsetUTF8MB4. 224 CollationUTF8MB4 = "utf8mb4_bin" 225 // CollationASCII is the default collation for CharsetACSII. 226 CollationASCII = "ascii_bin" 227 // CollationLatin1 is the default collation for CharsetLatin1. 228 CollationLatin1 = "latin1_bin" 229 // CollationGBKBin is the default collation for CharsetGBK when new collation is disabled. 230 CollationGBKBin = "gbk_bin" 231 // CollationGBKChineseCI is the default collation for CharsetGBK when new collation is enabled. 232 CollationGBKChineseCI = "gbk_chinese_ci" 233 ) 234 235 const ( 236 // CharsetASCII is a subset of UTF8. 237 CharsetASCII = "ascii" 238 // CharsetBin is used for marking binary charset. 239 CharsetBin = "binary" 240 // CharsetLatin1 is a single byte charset. 241 CharsetLatin1 = "latin1" 242 // CharsetUTF8 is the default charset for string types. 243 CharsetUTF8 = "utf8" 244 // CharsetUTF8MB3 is 3 bytes utf8, a MySQL legacy encoding. "utf8" and "utf8mb3" are aliases. 245 CharsetUTF8MB3 = "utf8mb3" 246 // CharsetUTF8MB4 represents 4 bytes utf8, which works the same way as utf8 in Go. 247 CharsetUTF8MB4 = "utf8mb4" 248 //revive:disable:exported 249 CharsetARMSCII8 = "armscii8" 250 CharsetBig5 = "big5" 251 CharsetCP1250 = "cp1250" 252 CharsetCP1251 = "cp1251" 253 CharsetCP1256 = "cp1256" 254 CharsetCP1257 = "cp1257" 255 CharsetCP850 = "cp850" 256 CharsetCP852 = "cp852" 257 CharsetCP866 = "cp866" 258 CharsetCP932 = "cp932" 259 CharsetDEC8 = "dec8" 260 CharsetEUCJPMS = "eucjpms" 261 CharsetEUCKR = "euckr" 262 CharsetGB18030 = "gb18030" 263 CharsetGB2312 = "gb2312" 264 CharsetGBK = "gbk" 265 CharsetGEOSTD8 = "geostd8" 266 CharsetGreek = "greek" 267 CharsetHebrew = "hebrew" 268 CharsetHP8 = "hp8" 269 CharsetKEYBCS2 = "keybcs2" 270 CharsetKOI8R = "koi8r" 271 CharsetKOI8U = "koi8u" 272 CharsetLatin2 = "latin2" 273 CharsetLatin5 = "latin5" 274 CharsetLatin7 = "latin7" 275 CharsetMacCE = "macce" 276 CharsetMacRoman = "macroman" 277 CharsetSJIS = "sjis" 278 CharsetSWE7 = "swe7" 279 CharsetTIS620 = "tis620" 280 CharsetUCS2 = "ucs2" 281 CharsetUJIS = "ujis" 282 CharsetUTF16 = "utf16" 283 CharsetUTF16LE = "utf16le" 284 CharsetUTF32 = "utf32" 285 //revive:enable:exported 286 ) 287 288 var charsets = map[string]*Charset{ 289 CharsetARMSCII8: {Name: CharsetARMSCII8, Maxlen: 1, DefaultCollation: "armscii8_general_ci", Desc: "ARMSCII-8 Armenian", Collations: make(map[string]*Collation)}, 290 CharsetASCII: {Name: CharsetASCII, Maxlen: 1, DefaultCollation: "ascii_general_ci", Desc: "US ASCII", Collations: make(map[string]*Collation)}, 291 CharsetBig5: {Name: CharsetBig5, Maxlen: 2, DefaultCollation: "big5_chinese_ci", Desc: "Big5 Traditional Chinese", Collations: make(map[string]*Collation)}, 292 CharsetBin: {Name: CharsetBin, Maxlen: 1, DefaultCollation: "binary", Desc: "Binary pseudo charset", Collations: make(map[string]*Collation)}, 293 CharsetLatin1: {Name: CharsetLatin1, Maxlen: 1, DefaultCollation: "cp1250_general_ci", Desc: "Windows Central European", Collations: make(map[string]*Collation)}, 294 CharsetCP1250: {Name: CharsetCP1250, Maxlen: 1, DefaultCollation: "cp1251_general_ci", Desc: "Windows Cyrillic", Collations: make(map[string]*Collation)}, 295 CharsetCP1251: {Name: CharsetCP1251, Maxlen: 1, DefaultCollation: "cp1256_general_ci", Desc: "Windows Arabic", Collations: make(map[string]*Collation)}, 296 CharsetCP1256: {Name: CharsetCP1256, Maxlen: 1, DefaultCollation: "cp1257_general_ci", Desc: "Windows Baltic", Collations: make(map[string]*Collation)}, 297 CharsetCP1257: {Name: CharsetCP1257, Maxlen: 1, DefaultCollation: "cp850_general_ci", Desc: "DOS West European", Collations: make(map[string]*Collation)}, 298 CharsetCP850: {Name: CharsetCP850, Maxlen: 1, DefaultCollation: "cp852_general_ci", Desc: "DOS Central European", Collations: make(map[string]*Collation)}, 299 CharsetCP852: {Name: CharsetCP852, Maxlen: 1, DefaultCollation: "cp866_general_ci", Desc: "DOS Russian", Collations: make(map[string]*Collation)}, 300 CharsetCP866: {Name: CharsetCP866, Maxlen: 1, DefaultCollation: "cp932_japanese_ci", Desc: "SJIS for Windows Japanese", Collations: make(map[string]*Collation)}, 301 CharsetCP932: {Name: CharsetCP932, Maxlen: 2, DefaultCollation: "dec8_swedish_ci", Desc: "DEC West European", Collations: make(map[string]*Collation)}, 302 CharsetDEC8: {Name: CharsetDEC8, Maxlen: 1, DefaultCollation: "eucjpms_japanese_ci", Desc: "UJIS for Windows Japanese", Collations: make(map[string]*Collation)}, 303 CharsetEUCJPMS: {Name: CharsetEUCJPMS, Maxlen: 3, DefaultCollation: "euckr_korean_ci", Desc: "EUC-KR Korean", Collations: make(map[string]*Collation)}, 304 CharsetEUCKR: {Name: CharsetEUCKR, Maxlen: 2, DefaultCollation: "gb18030_chinese_ci", Desc: "China National Standard GB18030", Collations: make(map[string]*Collation)}, 305 CharsetGB18030: {Name: CharsetGB18030, Maxlen: 4, DefaultCollation: "gb2312_chinese_ci", Desc: "GB2312 Simplified Chinese", Collations: make(map[string]*Collation)}, 306 CharsetGB2312: {Name: CharsetGB2312, Maxlen: 2, DefaultCollation: "gbk_chinese_ci", Desc: "GBK Simplified Chinese", Collations: make(map[string]*Collation)}, 307 CharsetGBK: {Name: CharsetGBK, Maxlen: 2, DefaultCollation: "geostd8_general_ci", Desc: "GEOSTD8 Georgian", Collations: make(map[string]*Collation)}, 308 CharsetGEOSTD8: {Name: CharsetGEOSTD8, Maxlen: 1, DefaultCollation: "greek_general_ci", Desc: "ISO 8859-7 Greek", Collations: make(map[string]*Collation)}, 309 CharsetGreek: {Name: CharsetGreek, Maxlen: 1, DefaultCollation: "hebrew_general_ci", Desc: "ISO 8859-8 Hebrew", Collations: make(map[string]*Collation)}, 310 CharsetHebrew: {Name: CharsetHebrew, Maxlen: 1, DefaultCollation: "hp8_english_ci", Desc: "HP West European", Collations: make(map[string]*Collation)}, 311 CharsetHP8: {Name: CharsetHP8, Maxlen: 1, DefaultCollation: "keybcs2_general_ci", Desc: "DOS Kamenicky Czech-Slovak", Collations: make(map[string]*Collation)}, 312 CharsetKEYBCS2: {Name: CharsetKEYBCS2, Maxlen: 1, DefaultCollation: "koi8r_general_ci", Desc: "KOI8-R Relcom Russian", Collations: make(map[string]*Collation)}, 313 CharsetKOI8R: {Name: CharsetKOI8R, Maxlen: 1, DefaultCollation: "koi8u_general_ci", Desc: "KOI8-U Ukrainian", Collations: make(map[string]*Collation)}, 314 CharsetKOI8U: {Name: CharsetKOI8U, Maxlen: 1, DefaultCollation: "latin1_swedish_ci", Desc: "cp1252 West European", Collations: make(map[string]*Collation)}, 315 CharsetLatin2: {Name: CharsetLatin2, Maxlen: 1, DefaultCollation: "latin2_general_ci", Desc: "ISO 8859-2 Central European", Collations: make(map[string]*Collation)}, 316 CharsetLatin5: {Name: CharsetLatin5, Maxlen: 1, DefaultCollation: "latin5_turkish_ci", Desc: "ISO 8859-9 Turkish", Collations: make(map[string]*Collation)}, 317 CharsetLatin7: {Name: CharsetLatin7, Maxlen: 1, DefaultCollation: "latin7_general_ci", Desc: "ISO 8859-13 Baltic", Collations: make(map[string]*Collation)}, 318 CharsetMacCE: {Name: CharsetMacCE, Maxlen: 1, DefaultCollation: "macce_general_ci", Desc: "Mac Central European", Collations: make(map[string]*Collation)}, 319 CharsetMacRoman: {Name: CharsetMacRoman, Maxlen: 1, DefaultCollation: "macroman_general_ci", Desc: "Mac West European", Collations: make(map[string]*Collation)}, 320 CharsetSJIS: {Name: CharsetSJIS, Maxlen: 2, DefaultCollation: "sjis_japanese_ci", Desc: "Shift-JIS Japanese", Collations: make(map[string]*Collation)}, 321 CharsetSWE7: {Name: CharsetSWE7, Maxlen: 1, DefaultCollation: "swe7_swedish_ci", Desc: "7bit Swedish", Collations: make(map[string]*Collation)}, 322 CharsetTIS620: {Name: CharsetTIS620, Maxlen: 1, DefaultCollation: "tis620_thai_ci", Desc: "TIS620 Thai", Collations: make(map[string]*Collation)}, 323 CharsetUCS2: {Name: CharsetUCS2, Maxlen: 2, DefaultCollation: "ucs2_general_ci", Desc: "UCS-2 Unicode", Collations: make(map[string]*Collation)}, 324 CharsetUJIS: {Name: CharsetUJIS, Maxlen: 3, DefaultCollation: "ujis_japanese_ci", Desc: "EUC-JP Japanese", Collations: make(map[string]*Collation)}, 325 CharsetUTF16: {Name: CharsetUTF16, Maxlen: 4, DefaultCollation: "utf16_general_ci", Desc: "UTF-16 Unicode", Collations: make(map[string]*Collation)}, 326 CharsetUTF16LE: {Name: CharsetUTF16LE, Maxlen: 4, DefaultCollation: "utf16le_general_ci", Desc: "UTF-16LE Unicode", Collations: make(map[string]*Collation)}, 327 CharsetUTF32: {Name: CharsetUTF32, Maxlen: 4, DefaultCollation: "utf32_general_ci", Desc: "UTF-32 Unicode", Collations: make(map[string]*Collation)}, 328 CharsetUTF8: {Name: CharsetUTF8, Maxlen: 3, DefaultCollation: "utf8_general_ci", Desc: "UTF-8 Unicode", Collations: make(map[string]*Collation)}, 329 CharsetUTF8MB4: {Name: CharsetUTF8MB4, Maxlen: 4, DefaultCollation: "utf8mb4_0900_ai_ci", Desc: "UTF-8 Unicode", Collations: make(map[string]*Collation)}, 330 } 331 332 var collations = []*Collation{ 333 {1, "big5", "big5_chinese_ci", true}, 334 {2, "latin2", "latin2_czech_cs", false}, 335 {3, "dec8", "dec8_swedish_ci", true}, 336 {4, "cp850", "cp850_general_ci", true}, 337 {5, "latin1", "latin1_german1_ci", false}, 338 {6, "hp8", "hp8_english_ci", true}, 339 {7, "koi8r", "koi8r_general_ci", true}, 340 {8, "latin1", "latin1_swedish_ci", false}, 341 {9, "latin2", "latin2_general_ci", true}, 342 {10, "swe7", "swe7_swedish_ci", true}, 343 {11, "ascii", "ascii_general_ci", false}, 344 {12, "ujis", "ujis_japanese_ci", true}, 345 {13, "sjis", "sjis_japanese_ci", true}, 346 {14, "cp1251", "cp1251_bulgarian_ci", false}, 347 {15, "latin1", "latin1_danish_ci", false}, 348 {16, "hebrew", "hebrew_general_ci", true}, 349 {18, "tis620", "tis620_thai_ci", true}, 350 {19, "euckr", "euckr_korean_ci", true}, 351 {20, "latin7", "latin7_estonian_cs", false}, 352 {21, "latin2", "latin2_hungarian_ci", false}, 353 {22, "koi8u", "koi8u_general_ci", true}, 354 {23, "cp1251", "cp1251_ukrainian_ci", false}, 355 {24, "gb2312", "gb2312_chinese_ci", true}, 356 {25, "greek", "greek_general_ci", true}, 357 {26, "cp1250", "cp1250_general_ci", true}, 358 {27, "latin2", "latin2_croatian_ci", false}, 359 {28, "gbk", "gbk_chinese_ci", false}, 360 {29, "cp1257", "cp1257_lithuanian_ci", false}, 361 {30, "latin5", "latin5_turkish_ci", true}, 362 {31, "latin1", "latin1_german2_ci", false}, 363 {32, "armscii8", "armscii8_general_ci", true}, 364 {33, "utf8", "utf8_general_ci", false}, 365 {34, "cp1250", "cp1250_czech_cs", false}, 366 {35, "ucs2", "ucs2_general_ci", true}, 367 {36, "cp866", "cp866_general_ci", true}, 368 {37, "keybcs2", "keybcs2_general_ci", true}, 369 {38, "macce", "macce_general_ci", true}, 370 {39, "macroman", "macroman_general_ci", true}, 371 {40, "cp852", "cp852_general_ci", true}, 372 {41, "latin7", "latin7_general_ci", true}, 373 {42, "latin7", "latin7_general_cs", false}, 374 {43, "macce", "macce_bin", false}, 375 {44, "cp1250", "cp1250_croatian_ci", false}, 376 {45, "utf8mb4", "utf8mb4_general_ci", false}, 377 {46, "utf8mb4", "utf8mb4_bin", true}, 378 {47, "latin1", "latin1_bin", true}, 379 {48, "latin1", "latin1_general_ci", false}, 380 {49, "latin1", "latin1_general_cs", false}, 381 {50, "cp1251", "cp1251_bin", false}, 382 {51, "cp1251", "cp1251_general_ci", true}, 383 {52, "cp1251", "cp1251_general_cs", false}, 384 {53, "macroman", "macroman_bin", false}, 385 {54, "utf16", "utf16_general_ci", true}, 386 {55, "utf16", "utf16_bin", false}, 387 {56, "utf16le", "utf16le_general_ci", true}, 388 {57, "cp1256", "cp1256_general_ci", true}, 389 {58, "cp1257", "cp1257_bin", false}, 390 {59, "cp1257", "cp1257_general_ci", true}, 391 {60, "utf32", "utf32_general_ci", true}, 392 {61, "utf32", "utf32_bin", false}, 393 {62, "utf16le", "utf16le_bin", false}, 394 {63, "binary", "binary", true}, 395 {64, "armscii8", "armscii8_bin", false}, 396 {65, "ascii", "ascii_bin", true}, 397 {66, "cp1250", "cp1250_bin", false}, 398 {67, "cp1256", "cp1256_bin", false}, 399 {68, "cp866", "cp866_bin", false}, 400 {69, "dec8", "dec8_bin", false}, 401 {70, "greek", "greek_bin", false}, 402 {71, "hebrew", "hebrew_bin", false}, 403 {72, "hp8", "hp8_bin", false}, 404 {73, "keybcs2", "keybcs2_bin", false}, 405 {74, "koi8r", "koi8r_bin", false}, 406 {75, "koi8u", "koi8u_bin", false}, 407 {76, "utf8", "utf8_tolower_ci", false}, 408 {77, "latin2", "latin2_bin", false}, 409 {78, "latin5", "latin5_bin", false}, 410 {79, "latin7", "latin7_bin", false}, 411 {80, "cp850", "cp850_bin", false}, 412 {81, "cp852", "cp852_bin", false}, 413 {82, "swe7", "swe7_bin", false}, 414 {83, "utf8", "utf8_bin", true}, 415 {84, "big5", "big5_bin", false}, 416 {85, "euckr", "euckr_bin", false}, 417 {86, "gb2312", "gb2312_bin", false}, 418 {87, "gbk", "gbk_bin", true}, 419 {88, "sjis", "sjis_bin", false}, 420 {89, "tis620", "tis620_bin", false}, 421 {90, "ucs2", "ucs2_bin", false}, 422 {91, "ujis", "ujis_bin", false}, 423 {92, "geostd8", "geostd8_general_ci", true}, 424 {93, "geostd8", "geostd8_bin", false}, 425 {94, "latin1", "latin1_spanish_ci", false}, 426 {95, "cp932", "cp932_japanese_ci", true}, 427 {96, "cp932", "cp932_bin", false}, 428 {97, "eucjpms", "eucjpms_japanese_ci", true}, 429 {98, "eucjpms", "eucjpms_bin", false}, 430 {99, "cp1250", "cp1250_polish_ci", false}, 431 {101, "utf16", "utf16_unicode_ci", false}, 432 {102, "utf16", "utf16_icelandic_ci", false}, 433 {103, "utf16", "utf16_latvian_ci", false}, 434 {104, "utf16", "utf16_romanian_ci", false}, 435 {105, "utf16", "utf16_slovenian_ci", false}, 436 {106, "utf16", "utf16_polish_ci", false}, 437 {107, "utf16", "utf16_estonian_ci", false}, 438 {108, "utf16", "utf16_spanish_ci", false}, 439 {109, "utf16", "utf16_swedish_ci", false}, 440 {110, "utf16", "utf16_turkish_ci", false}, 441 {111, "utf16", "utf16_czech_ci", false}, 442 {112, "utf16", "utf16_danish_ci", false}, 443 {113, "utf16", "utf16_lithuanian_ci", false}, 444 {114, "utf16", "utf16_slovak_ci", false}, 445 {115, "utf16", "utf16_spanish2_ci", false}, 446 {116, "utf16", "utf16_roman_ci", false}, 447 {117, "utf16", "utf16_persian_ci", false}, 448 {118, "utf16", "utf16_esperanto_ci", false}, 449 {119, "utf16", "utf16_hungarian_ci", false}, 450 {120, "utf16", "utf16_sinhala_ci", false}, 451 {121, "utf16", "utf16_german2_ci", false}, 452 {122, "utf16", "utf16_croatian_ci", false}, 453 {123, "utf16", "utf16_unicode_520_ci", false}, 454 {124, "utf16", "utf16_vietnamese_ci", false}, 455 {128, "ucs2", "ucs2_unicode_ci", false}, 456 {129, "ucs2", "ucs2_icelandic_ci", false}, 457 {130, "ucs2", "ucs2_latvian_ci", false}, 458 {131, "ucs2", "ucs2_romanian_ci", false}, 459 {132, "ucs2", "ucs2_slovenian_ci", false}, 460 {133, "ucs2", "ucs2_polish_ci", false}, 461 {134, "ucs2", "ucs2_estonian_ci", false}, 462 {135, "ucs2", "ucs2_spanish_ci", false}, 463 {136, "ucs2", "ucs2_swedish_ci", false}, 464 {137, "ucs2", "ucs2_turkish_ci", false}, 465 {138, "ucs2", "ucs2_czech_ci", false}, 466 {139, "ucs2", "ucs2_danish_ci", false}, 467 {140, "ucs2", "ucs2_lithuanian_ci", false}, 468 {141, "ucs2", "ucs2_slovak_ci", false}, 469 {142, "ucs2", "ucs2_spanish2_ci", false}, 470 {143, "ucs2", "ucs2_roman_ci", false}, 471 {144, "ucs2", "ucs2_persian_ci", false}, 472 {145, "ucs2", "ucs2_esperanto_ci", false}, 473 {146, "ucs2", "ucs2_hungarian_ci", false}, 474 {147, "ucs2", "ucs2_sinhala_ci", false}, 475 {148, "ucs2", "ucs2_german2_ci", false}, 476 {149, "ucs2", "ucs2_croatian_ci", false}, 477 {150, "ucs2", "ucs2_unicode_520_ci", false}, 478 {151, "ucs2", "ucs2_vietnamese_ci", false}, 479 {159, "ucs2", "ucs2_general_mysql500_ci", false}, 480 {160, "utf32", "utf32_unicode_ci", false}, 481 {161, "utf32", "utf32_icelandic_ci", false}, 482 {162, "utf32", "utf32_latvian_ci", false}, 483 {163, "utf32", "utf32_romanian_ci", false}, 484 {164, "utf32", "utf32_slovenian_ci", false}, 485 {165, "utf32", "utf32_polish_ci", false}, 486 {166, "utf32", "utf32_estonian_ci", false}, 487 {167, "utf32", "utf32_spanish_ci", false}, 488 {168, "utf32", "utf32_swedish_ci", false}, 489 {169, "utf32", "utf32_turkish_ci", false}, 490 {170, "utf32", "utf32_czech_ci", false}, 491 {171, "utf32", "utf32_danish_ci", false}, 492 {172, "utf32", "utf32_lithuanian_ci", false}, 493 {173, "utf32", "utf32_slovak_ci", false}, 494 {174, "utf32", "utf32_spanish2_ci", false}, 495 {175, "utf32", "utf32_roman_ci", false}, 496 {176, "utf32", "utf32_persian_ci", false}, 497 {177, "utf32", "utf32_esperanto_ci", false}, 498 {178, "utf32", "utf32_hungarian_ci", false}, 499 {179, "utf32", "utf32_sinhala_ci", false}, 500 {180, "utf32", "utf32_german2_ci", false}, 501 {181, "utf32", "utf32_croatian_ci", false}, 502 {182, "utf32", "utf32_unicode_520_ci", false}, 503 {183, "utf32", "utf32_vietnamese_ci", false}, 504 {192, "utf8", "utf8_unicode_ci", false}, 505 {193, "utf8", "utf8_icelandic_ci", false}, 506 {194, "utf8", "utf8_latvian_ci", false}, 507 {195, "utf8", "utf8_romanian_ci", false}, 508 {196, "utf8", "utf8_slovenian_ci", false}, 509 {197, "utf8", "utf8_polish_ci", false}, 510 {198, "utf8", "utf8_estonian_ci", false}, 511 {199, "utf8", "utf8_spanish_ci", false}, 512 {200, "utf8", "utf8_swedish_ci", false}, 513 {201, "utf8", "utf8_turkish_ci", false}, 514 {202, "utf8", "utf8_czech_ci", false}, 515 {203, "utf8", "utf8_danish_ci", false}, 516 {204, "utf8", "utf8_lithuanian_ci", false}, 517 {205, "utf8", "utf8_slovak_ci", false}, 518 {206, "utf8", "utf8_spanish2_ci", false}, 519 {207, "utf8", "utf8_roman_ci", false}, 520 {208, "utf8", "utf8_persian_ci", false}, 521 {209, "utf8", "utf8_esperanto_ci", false}, 522 {210, "utf8", "utf8_hungarian_ci", false}, 523 {211, "utf8", "utf8_sinhala_ci", false}, 524 {212, "utf8", "utf8_german2_ci", false}, 525 {213, "utf8", "utf8_croatian_ci", false}, 526 {214, "utf8", "utf8_unicode_520_ci", false}, 527 {215, "utf8", "utf8_vietnamese_ci", false}, 528 {223, "utf8", "utf8_general_mysql500_ci", false}, 529 {224, "utf8mb4", "utf8mb4_unicode_ci", false}, 530 {225, "utf8mb4", "utf8mb4_icelandic_ci", false}, 531 {226, "utf8mb4", "utf8mb4_latvian_ci", false}, 532 {227, "utf8mb4", "utf8mb4_romanian_ci", false}, 533 {228, "utf8mb4", "utf8mb4_slovenian_ci", false}, 534 {229, "utf8mb4", "utf8mb4_polish_ci", false}, 535 {230, "utf8mb4", "utf8mb4_estonian_ci", false}, 536 {231, "utf8mb4", "utf8mb4_spanish_ci", false}, 537 {232, "utf8mb4", "utf8mb4_swedish_ci", false}, 538 {233, "utf8mb4", "utf8mb4_turkish_ci", false}, 539 {234, "utf8mb4", "utf8mb4_czech_ci", false}, 540 {235, "utf8mb4", "utf8mb4_danish_ci", false}, 541 {236, "utf8mb4", "utf8mb4_lithuanian_ci", false}, 542 {237, "utf8mb4", "utf8mb4_slovak_ci", false}, 543 {238, "utf8mb4", "utf8mb4_spanish2_ci", false}, 544 {239, "utf8mb4", "utf8mb4_roman_ci", false}, 545 {240, "utf8mb4", "utf8mb4_persian_ci", false}, 546 {241, "utf8mb4", "utf8mb4_esperanto_ci", false}, 547 {242, "utf8mb4", "utf8mb4_hungarian_ci", false}, 548 {243, "utf8mb4", "utf8mb4_sinhala_ci", false}, 549 {244, "utf8mb4", "utf8mb4_german2_ci", false}, 550 {245, "utf8mb4", "utf8mb4_croatian_ci", false}, 551 {246, "utf8mb4", "utf8mb4_unicode_520_ci", false}, 552 {247, "utf8mb4", "utf8mb4_vietnamese_ci", false}, 553 {248, "gb18030", "gb18030_chinese_ci", false}, 554 {249, "gb18030", "gb18030_bin", true}, 555 {250, "gb18030", "gb18030_unicode_520_ci", false}, 556 {255, "utf8mb4", "utf8mb4_0900_ai_ci", false}, 557 {256, "utf8mb4", "utf8mb4_de_pb_0900_ai_ci", false}, 558 {257, "utf8mb4", "utf8mb4_is_0900_ai_ci", false}, 559 {258, "utf8mb4", "utf8mb4_lv_0900_ai_ci", false}, 560 {259, "utf8mb4", "utf8mb4_ro_0900_ai_ci", false}, 561 {260, "utf8mb4", "utf8mb4_sl_0900_ai_ci", false}, 562 {261, "utf8mb4", "utf8mb4_pl_0900_ai_ci", false}, 563 {262, "utf8mb4", "utf8mb4_et_0900_ai_ci", false}, 564 {263, "utf8mb4", "utf8mb4_es_0900_ai_ci", false}, 565 {264, "utf8mb4", "utf8mb4_sv_0900_ai_ci", false}, 566 {265, "utf8mb4", "utf8mb4_tr_0900_ai_ci", false}, 567 {266, "utf8mb4", "utf8mb4_cs_0900_ai_ci", false}, 568 {267, "utf8mb4", "utf8mb4_da_0900_ai_ci", false}, 569 {268, "utf8mb4", "utf8mb4_lt_0900_ai_ci", false}, 570 {269, "utf8mb4", "utf8mb4_sk_0900_ai_ci", false}, 571 {270, "utf8mb4", "utf8mb4_es_trad_0900_ai_ci", false}, 572 {271, "utf8mb4", "utf8mb4_la_0900_ai_ci", false}, 573 {273, "utf8mb4", "utf8mb4_eo_0900_ai_ci", false}, 574 {274, "utf8mb4", "utf8mb4_hu_0900_ai_ci", false}, 575 {275, "utf8mb4", "utf8mb4_hr_0900_ai_ci", false}, 576 {277, "utf8mb4", "utf8mb4_vi_0900_ai_ci", false}, 577 {278, "utf8mb4", "utf8mb4_0900_as_cs", false}, 578 {279, "utf8mb4", "utf8mb4_de_pb_0900_as_cs", false}, 579 {280, "utf8mb4", "utf8mb4_is_0900_as_cs", false}, 580 {281, "utf8mb4", "utf8mb4_lv_0900_as_cs", false}, 581 {282, "utf8mb4", "utf8mb4_ro_0900_as_cs", false}, 582 {283, "utf8mb4", "utf8mb4_sl_0900_as_cs", false}, 583 {284, "utf8mb4", "utf8mb4_pl_0900_as_cs", false}, 584 {285, "utf8mb4", "utf8mb4_et_0900_as_cs", false}, 585 {286, "utf8mb4", "utf8mb4_es_0900_as_cs", false}, 586 {287, "utf8mb4", "utf8mb4_sv_0900_as_cs", false}, 587 {288, "utf8mb4", "utf8mb4_tr_0900_as_cs", false}, 588 {289, "utf8mb4", "utf8mb4_cs_0900_as_cs", false}, 589 {290, "utf8mb4", "utf8mb4_da_0900_as_cs", false}, 590 {291, "utf8mb4", "utf8mb4_lt_0900_as_cs", false}, 591 {292, "utf8mb4", "utf8mb4_sk_0900_as_cs", false}, 592 {293, "utf8mb4", "utf8mb4_es_trad_0900_as_cs", false}, 593 {294, "utf8mb4", "utf8mb4_la_0900_as_cs", false}, 594 {296, "utf8mb4", "utf8mb4_eo_0900_as_cs", false}, 595 {297, "utf8mb4", "utf8mb4_hu_0900_as_cs", false}, 596 {298, "utf8mb4", "utf8mb4_hr_0900_as_cs", false}, 597 {300, "utf8mb4", "utf8mb4_vi_0900_as_cs", false}, 598 {303, "utf8mb4", "utf8mb4_ja_0900_as_cs", false}, 599 {304, "utf8mb4", "utf8mb4_ja_0900_as_cs_ks", false}, 600 {305, "utf8mb4", "utf8mb4_0900_as_ci", false}, 601 {306, "utf8mb4", "utf8mb4_ru_0900_ai_ci", false}, 602 {307, "utf8mb4", "utf8mb4_ru_0900_as_cs", false}, 603 {308, "utf8mb4", "utf8mb4_zh_0900_as_cs", false}, 604 {309, "utf8mb4", "utf8mb4_0900_bin", false}, 605 {2048, "utf8mb4", "utf8mb4_zh_pinyin_tidb_as_cs", false}, 606 } 607 608 // AddCharset adds a new charset. 609 // Use only when adding a custom charset to the parser. 610 func AddCharset(c *Charset) { 611 CharacterSetInfos[c.Name] = c 612 } 613 614 // RemoveCharset remove a charset. 615 // Use only when remove a custom charset to the parser. 616 func RemoveCharset(c string) { 617 delete(CharacterSetInfos, c) 618 for i := range supportedCollations { 619 if supportedCollations[i].Name == c { 620 supportedCollations = append(supportedCollations[:i], supportedCollations[i+1:]...) 621 } 622 } 623 } 624 625 // AddCollation adds a new collation. 626 // Use only when adding a custom collation to the parser. 627 func AddCollation(c *Collation) { 628 collationsIDMap[c.ID] = c 629 collationsNameMap[c.Name] = c 630 631 if _, ok := supportedCollationNames[c.Name]; ok { 632 AddSupportedCollation(c) 633 } 634 635 if charset, ok := CharacterSetInfos[c.CharsetName]; ok { 636 charset.Collations[c.Name] = c 637 } 638 639 if charset, ok := charsets[c.CharsetName]; ok { 640 charset.Collations[c.Name] = c 641 } 642 } 643 644 // AddSupportedCollation adds a new collation into supportedCollations. 645 // Use only when adding a custom collation to the parser. 646 func AddSupportedCollation(c *Collation) { 647 supportedCollations = append(supportedCollations, c) 648 } 649 650 // init method always puts to the end of file. 651 func init() { 652 for _, c := range collations { 653 AddCollation(c) 654 } 655 }