github.com/XiaoMi/Gaea@v1.2.5/mysql/charset_tidb.go (about) 1 // Copyright 2015 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package mysql 15 16 import ( 17 "strings" 18 19 "github.com/pingcap/errors" 20 ) 21 22 // Charset is a charset. 23 // Now we only support MySQL. 24 type Charset struct { 25 Name string 26 DefaultCollation string 27 Collations map[string]*Collation 28 Desc string 29 Maxlen int 30 } 31 32 // Collation is a collation. 33 // Now we only support MySQL. 34 type Collation struct { 35 ID int 36 CharsetName string 37 Name string 38 IsDefault bool 39 } 40 41 var charsets = make(map[string]*Charset) 42 43 // All the supported charsets should be in the following table. 44 var charsetInfos = []*Charset{ 45 {CharsetUTF8, CollationUTF8, make(map[string]*Collation), "UTF-8 Unicode", 3}, 46 {CharsetUTF8MB4, CollationUTF8MB4, make(map[string]*Collation), "UTF-8 Unicode", 4}, 47 {CharsetASCII, CollationASCII, make(map[string]*Collation), "US ASCII", 1}, 48 {CharsetLatin1, CollationLatin1, make(map[string]*Collation), "Latin1", 1}, 49 {CharsetBin, CollationBin, make(map[string]*Collation), "binary", 1}, 50 } 51 52 // Desc is a charset description. 53 type Desc struct { 54 Name string 55 Desc string 56 DefaultCollation string 57 Maxlen int 58 } 59 60 // GetAllCharsets gets all charset descriptions in the local charsets. 61 func GetAllCharsets() []*Desc { 62 descs := make([]*Desc, 0, len(charsets)) 63 // The charsetInfos is an array, so the iterate order will be stable. 64 for _, ci := range charsetInfos { 65 c, ok := charsets[ci.Name] 66 if !ok { 67 continue 68 } 69 desc := &Desc{ 70 Name: c.Name, 71 DefaultCollation: c.DefaultCollation, 72 Desc: c.Desc, 73 Maxlen: c.Maxlen, 74 } 75 descs = append(descs, desc) 76 } 77 return descs 78 } 79 80 // ValidCharsetAndCollation checks the charset and the collation validity 81 // and returns a boolean. 82 func ValidCharsetAndCollation(cs string, co string) bool { 83 // We will use utf8 as a default charset. 84 if cs == "" { 85 cs = "utf8" 86 } 87 cs = strings.ToLower(cs) 88 c, ok := charsets[cs] 89 if !ok { 90 return false 91 } 92 93 if co == "" { 94 return true 95 } 96 co = strings.ToLower(co) 97 _, ok = c.Collations[co] 98 if !ok { 99 return false 100 } 101 102 return true 103 } 104 105 // GetDefaultCollation returns the default collation for charset. 106 func GetDefaultCollation(charset string) (string, error) { 107 charset = strings.ToLower(charset) 108 if charset == CharsetBin { 109 return CollationBin, nil 110 } 111 c, ok := charsets[charset] 112 if !ok { 113 return "", errors.Errorf("Unknown charset %s", charset) 114 } 115 return c.DefaultCollation, nil 116 } 117 118 // GetDefaultCharsetAndCollate returns the default charset and collation. 119 func GetDefaultCharsetAndCollate() (string, string) { 120 return DefaultCharset, DefaultCollationName 121 } 122 123 // GetCharsetInfo returns charset and collation for cs as name. 124 func GetCharsetInfo(cs string) (string, string, error) { 125 c, ok := charsets[strings.ToLower(cs)] 126 if !ok { 127 return "", "", errors.Errorf("Unknown charset %s", cs) 128 } 129 return c.Name, c.DefaultCollation, nil 130 } 131 132 // GetCharsetDesc gets charset descriptions in the local charsets. 133 func GetCharsetDesc(cs string) (*Desc, error) { 134 c, ok := charsets[strings.ToLower(cs)] 135 if !ok { 136 return nil, errors.Errorf("Unknown charset %s", cs) 137 } 138 desc := &Desc{ 139 Name: c.Name, 140 DefaultCollation: c.DefaultCollation, 141 Desc: c.Desc, 142 Maxlen: c.Maxlen, 143 } 144 return desc, nil 145 } 146 147 // GetCollations returns a list for all collations. 148 func GetCollations() []*Collation { 149 return collations 150 } 151 152 const ( 153 // CharsetBin is used for marking binary charset. 154 CharsetBin = "binary" 155 // CollationBin is the default collation for CharsetBin. 156 CollationBin = "binary" 157 // CharsetUTF8 is the default charset for string types. 158 CharsetUTF8 = "utf8" 159 // CollationUTF8 is the default collation for CharsetUTF8. 160 CollationUTF8 = "utf8_bin" 161 // CharsetUTF8MB4 represents 4 bytes utf8, which works the same way as utf8 in Go. 162 CharsetUTF8MB4 = "utf8mb4" 163 // CollationUTF8MB4 is the default collation for CharsetUTF8MB4. 164 CollationUTF8MB4 = "utf8mb4_bin" 165 // CharsetASCII is a subset of UTF8. 166 CharsetASCII = "ascii" 167 // CollationASCII is the default collation for CharsetACSII. 168 CollationASCII = "ascii_bin" 169 // CharsetLatin1 is a single byte charset. 170 CharsetLatin1 = "latin1" 171 // CollationLatin1 is the default collation for CharsetLatin1. 172 CollationLatin1 = "latin1_bin" 173 ) 174 175 var collations = []*Collation{ 176 {1, "big5", "big5_chinese_ci", true}, 177 {2, "latin2", "latin2_czech_cs", false}, 178 {3, "dec8", "dec8_swedish_ci", true}, 179 {4, "cp850", "cp850_general_ci", true}, 180 {5, "latin1", "latin1_german1_ci", false}, 181 {6, "hp8", "hp8_english_ci", true}, 182 {7, "koi8r", "koi8r_general_ci", true}, 183 {8, "latin1", "latin1_swedish_ci", true}, 184 {9, "latin2", "latin2_general_ci", true}, 185 {10, "swe7", "swe7_swedish_ci", true}, 186 {11, "ascii", "ascii_general_ci", true}, 187 {12, "ujis", "ujis_japanese_ci", true}, 188 {13, "sjis", "sjis_japanese_ci", true}, 189 {14, "cp1251", "cp1251_bulgarian_ci", false}, 190 {15, "latin1", "latin1_danish_ci", false}, 191 {16, "hebrew", "hebrew_general_ci", true}, 192 {18, "tis620", "tis620_thai_ci", true}, 193 {19, "euckr", "euckr_korean_ci", true}, 194 {20, "latin7", "latin7_estonian_cs", false}, 195 {21, "latin2", "latin2_hungarian_ci", false}, 196 {22, "koi8u", "koi8u_general_ci", true}, 197 {23, "cp1251", "cp1251_ukrainian_ci", false}, 198 {24, "gb2312", "gb2312_chinese_ci", true}, 199 {25, "greek", "greek_general_ci", true}, 200 {26, "cp1250", "cp1250_general_ci", true}, 201 {27, "latin2", "latin2_croatian_ci", false}, 202 {28, "gbk", "gbk_chinese_ci", true}, 203 {29, "cp1257", "cp1257_lithuanian_ci", false}, 204 {30, "latin5", "latin5_turkish_ci", true}, 205 {31, "latin1", "latin1_german2_ci", false}, 206 {32, "armscii8", "armscii8_general_ci", true}, 207 {33, "utf8", "utf8_general_ci", true}, 208 {34, "cp1250", "cp1250_czech_cs", false}, 209 {35, "ucs2", "ucs2_general_ci", true}, 210 {36, "cp866", "cp866_general_ci", true}, 211 {37, "keybcs2", "keybcs2_general_ci", true}, 212 {38, "macce", "macce_general_ci", true}, 213 {39, "macroman", "macroman_general_ci", true}, 214 {40, "cp852", "cp852_general_ci", true}, 215 {41, "latin7", "latin7_general_ci", true}, 216 {42, "latin7", "latin7_general_cs", false}, 217 {43, "macce", "macce_bin", false}, 218 {44, "cp1250", "cp1250_croatian_ci", false}, 219 {45, "utf8mb4", "utf8mb4_general_ci", true}, 220 {46, "utf8mb4", "utf8mb4_bin", false}, 221 {47, "latin1", "latin1_bin", false}, 222 {48, "latin1", "latin1_general_ci", false}, 223 {49, "latin1", "latin1_general_cs", false}, 224 {50, "cp1251", "cp1251_bin", false}, 225 {51, "cp1251", "cp1251_general_ci", true}, 226 {52, "cp1251", "cp1251_general_cs", false}, 227 {53, "macroman", "macroman_bin", false}, 228 {54, "utf16", "utf16_general_ci", true}, 229 {55, "utf16", "utf16_bin", false}, 230 {56, "utf16le", "utf16le_general_ci", true}, 231 {57, "cp1256", "cp1256_general_ci", true}, 232 {58, "cp1257", "cp1257_bin", false}, 233 {59, "cp1257", "cp1257_general_ci", true}, 234 {60, "utf32", "utf32_general_ci", true}, 235 {61, "utf32", "utf32_bin", false}, 236 {62, "utf16le", "utf16le_bin", false}, 237 {63, "binary", "binary", true}, 238 {64, "armscii8", "armscii8_bin", false}, 239 {65, "ascii", "ascii_bin", false}, 240 {66, "cp1250", "cp1250_bin", false}, 241 {67, "cp1256", "cp1256_bin", false}, 242 {68, "cp866", "cp866_bin", false}, 243 {69, "dec8", "dec8_bin", false}, 244 {70, "greek", "greek_bin", false}, 245 {71, "hebrew", "hebrew_bin", false}, 246 {72, "hp8", "hp8_bin", false}, 247 {73, "keybcs2", "keybcs2_bin", false}, 248 {74, "koi8r", "koi8r_bin", false}, 249 {75, "koi8u", "koi8u_bin", false}, 250 {77, "latin2", "latin2_bin", false}, 251 {78, "latin5", "latin5_bin", false}, 252 {79, "latin7", "latin7_bin", false}, 253 {80, "cp850", "cp850_bin", false}, 254 {81, "cp852", "cp852_bin", false}, 255 {82, "swe7", "swe7_bin", false}, 256 {83, "utf8", "utf8_bin", false}, 257 {84, "big5", "big5_bin", false}, 258 {85, "euckr", "euckr_bin", false}, 259 {86, "gb2312", "gb2312_bin", false}, 260 {87, "gbk", "gbk_bin", false}, 261 {88, "sjis", "sjis_bin", false}, 262 {89, "tis620", "tis620_bin", false}, 263 {90, "ucs2", "ucs2_bin", false}, 264 {91, "ujis", "ujis_bin", false}, 265 {92, "geostd8", "geostd8_general_ci", true}, 266 {93, "geostd8", "geostd8_bin", false}, 267 {94, "latin1", "latin1_spanish_ci", false}, 268 {95, "cp932", "cp932_japanese_ci", true}, 269 {96, "cp932", "cp932_bin", false}, 270 {97, "eucjpms", "eucjpms_japanese_ci", true}, 271 {98, "eucjpms", "eucjpms_bin", false}, 272 {99, "cp1250", "cp1250_polish_ci", false}, 273 {101, "utf16", "utf16_unicode_ci", false}, 274 {102, "utf16", "utf16_icelandic_ci", false}, 275 {103, "utf16", "utf16_latvian_ci", false}, 276 {104, "utf16", "utf16_romanian_ci", false}, 277 {105, "utf16", "utf16_slovenian_ci", false}, 278 {106, "utf16", "utf16_polish_ci", false}, 279 {107, "utf16", "utf16_estonian_ci", false}, 280 {108, "utf16", "utf16_spanish_ci", false}, 281 {109, "utf16", "utf16_swedish_ci", false}, 282 {110, "utf16", "utf16_turkish_ci", false}, 283 {111, "utf16", "utf16_czech_ci", false}, 284 {112, "utf16", "utf16_danish_ci", false}, 285 {113, "utf16", "utf16_lithuanian_ci", false}, 286 {114, "utf16", "utf16_slovak_ci", false}, 287 {115, "utf16", "utf16_spanish2_ci", false}, 288 {116, "utf16", "utf16_roman_ci", false}, 289 {117, "utf16", "utf16_persian_ci", false}, 290 {118, "utf16", "utf16_esperanto_ci", false}, 291 {119, "utf16", "utf16_hungarian_ci", false}, 292 {120, "utf16", "utf16_sinhala_ci", false}, 293 {121, "utf16", "utf16_german2_ci", false}, 294 {122, "utf16", "utf16_croatian_ci", false}, 295 {123, "utf16", "utf16_unicode_520_ci", false}, 296 {124, "utf16", "utf16_vietnamese_ci", false}, 297 {128, "ucs2", "ucs2_unicode_ci", false}, 298 {129, "ucs2", "ucs2_icelandic_ci", false}, 299 {130, "ucs2", "ucs2_latvian_ci", false}, 300 {131, "ucs2", "ucs2_romanian_ci", false}, 301 {132, "ucs2", "ucs2_slovenian_ci", false}, 302 {133, "ucs2", "ucs2_polish_ci", false}, 303 {134, "ucs2", "ucs2_estonian_ci", false}, 304 {135, "ucs2", "ucs2_spanish_ci", false}, 305 {136, "ucs2", "ucs2_swedish_ci", false}, 306 {137, "ucs2", "ucs2_turkish_ci", false}, 307 {138, "ucs2", "ucs2_czech_ci", false}, 308 {139, "ucs2", "ucs2_danish_ci", false}, 309 {140, "ucs2", "ucs2_lithuanian_ci", false}, 310 {141, "ucs2", "ucs2_slovak_ci", false}, 311 {142, "ucs2", "ucs2_spanish2_ci", false}, 312 {143, "ucs2", "ucs2_roman_ci", false}, 313 {144, "ucs2", "ucs2_persian_ci", false}, 314 {145, "ucs2", "ucs2_esperanto_ci", false}, 315 {146, "ucs2", "ucs2_hungarian_ci", false}, 316 {147, "ucs2", "ucs2_sinhala_ci", false}, 317 {148, "ucs2", "ucs2_german2_ci", false}, 318 {149, "ucs2", "ucs2_croatian_ci", false}, 319 {150, "ucs2", "ucs2_unicode_520_ci", false}, 320 {151, "ucs2", "ucs2_vietnamese_ci", false}, 321 {159, "ucs2", "ucs2_general_mysql500_ci", false}, 322 {160, "utf32", "utf32_unicode_ci", false}, 323 {161, "utf32", "utf32_icelandic_ci", false}, 324 {162, "utf32", "utf32_latvian_ci", false}, 325 {163, "utf32", "utf32_romanian_ci", false}, 326 {164, "utf32", "utf32_slovenian_ci", false}, 327 {165, "utf32", "utf32_polish_ci", false}, 328 {166, "utf32", "utf32_estonian_ci", false}, 329 {167, "utf32", "utf32_spanish_ci", false}, 330 {168, "utf32", "utf32_swedish_ci", false}, 331 {169, "utf32", "utf32_turkish_ci", false}, 332 {170, "utf32", "utf32_czech_ci", false}, 333 {171, "utf32", "utf32_danish_ci", false}, 334 {172, "utf32", "utf32_lithuanian_ci", false}, 335 {173, "utf32", "utf32_slovak_ci", false}, 336 {174, "utf32", "utf32_spanish2_ci", false}, 337 {175, "utf32", "utf32_roman_ci", false}, 338 {176, "utf32", "utf32_persian_ci", false}, 339 {177, "utf32", "utf32_esperanto_ci", false}, 340 {178, "utf32", "utf32_hungarian_ci", false}, 341 {179, "utf32", "utf32_sinhala_ci", false}, 342 {180, "utf32", "utf32_german2_ci", false}, 343 {181, "utf32", "utf32_croatian_ci", false}, 344 {182, "utf32", "utf32_unicode_520_ci", false}, 345 {183, "utf32", "utf32_vietnamese_ci", false}, 346 {192, "utf8", "utf8_unicode_ci", false}, 347 {193, "utf8", "utf8_icelandic_ci", false}, 348 {194, "utf8", "utf8_latvian_ci", false}, 349 {195, "utf8", "utf8_romanian_ci", false}, 350 {196, "utf8", "utf8_slovenian_ci", false}, 351 {197, "utf8", "utf8_polish_ci", false}, 352 {198, "utf8", "utf8_estonian_ci", false}, 353 {199, "utf8", "utf8_spanish_ci", false}, 354 {200, "utf8", "utf8_swedish_ci", false}, 355 {201, "utf8", "utf8_turkish_ci", false}, 356 {202, "utf8", "utf8_czech_ci", false}, 357 {203, "utf8", "utf8_danish_ci", false}, 358 {204, "utf8", "utf8_lithuanian_ci", false}, 359 {205, "utf8", "utf8_slovak_ci", false}, 360 {206, "utf8", "utf8_spanish2_ci", false}, 361 {207, "utf8", "utf8_roman_ci", false}, 362 {208, "utf8", "utf8_persian_ci", false}, 363 {209, "utf8", "utf8_esperanto_ci", false}, 364 {210, "utf8", "utf8_hungarian_ci", false}, 365 {211, "utf8", "utf8_sinhala_ci", false}, 366 {212, "utf8", "utf8_german2_ci", false}, 367 {213, "utf8", "utf8_croatian_ci", false}, 368 {214, "utf8", "utf8_unicode_520_ci", false}, 369 {215, "utf8", "utf8_vietnamese_ci", false}, 370 {223, "utf8", "utf8_general_mysql500_ci", false}, 371 {224, "utf8mb4", "utf8mb4_unicode_ci", false}, 372 {225, "utf8mb4", "utf8mb4_icelandic_ci", false}, 373 {226, "utf8mb4", "utf8mb4_latvian_ci", false}, 374 {227, "utf8mb4", "utf8mb4_romanian_ci", false}, 375 {228, "utf8mb4", "utf8mb4_slovenian_ci", false}, 376 {229, "utf8mb4", "utf8mb4_polish_ci", false}, 377 {230, "utf8mb4", "utf8mb4_estonian_ci", false}, 378 {231, "utf8mb4", "utf8mb4_spanish_ci", false}, 379 {232, "utf8mb4", "utf8mb4_swedish_ci", false}, 380 {233, "utf8mb4", "utf8mb4_turkish_ci", false}, 381 {234, "utf8mb4", "utf8mb4_czech_ci", false}, 382 {235, "utf8mb4", "utf8mb4_danish_ci", false}, 383 {236, "utf8mb4", "utf8mb4_lithuanian_ci", false}, 384 {237, "utf8mb4", "utf8mb4_slovak_ci", false}, 385 {238, "utf8mb4", "utf8mb4_spanish2_ci", false}, 386 {239, "utf8mb4", "utf8mb4_roman_ci", false}, 387 {240, "utf8mb4", "utf8mb4_persian_ci", false}, 388 {241, "utf8mb4", "utf8mb4_esperanto_ci", false}, 389 {242, "utf8mb4", "utf8mb4_hungarian_ci", false}, 390 {243, "utf8mb4", "utf8mb4_sinhala_ci", false}, 391 {244, "utf8mb4", "utf8mb4_german2_ci", false}, 392 {245, "utf8mb4", "utf8mb4_croatian_ci", false}, 393 {246, "utf8mb4", "utf8mb4_unicode_520_ci", false}, 394 {247, "utf8mb4", "utf8mb4_vietnamese_ci", false}, 395 } 396 397 // init method always puts to the end of file. 398 func init() { 399 for _, c := range charsetInfos { 400 charsets[c.Name] = c 401 } 402 for _, c := range collations { 403 charset, ok := charsets[c.CharsetName] 404 if !ok { 405 continue 406 } 407 charset.Collations[c.Name] = c 408 } 409 }