github.com/bingoohuang/gg@v0.0.0-20240325092523-45da7dee9335/pkg/sqlparse/tidbparser/dependency/util/charset/charset.go (about) 1 // Copyright 2015 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package charset 15 16 import ( 17 "strings" 18 19 "github.com/bingoohuang/gg/pkg/sqlparse/tidbparser/dependency/mysql" 20 "github.com/juju/errors" 21 ) 22 23 // Charset is a charset. 24 // Now we only support MySQL. 25 type Charset struct { 26 Name string 27 DefaultCollation string 28 Collations map[string]*Collation 29 Desc string 30 Maxlen int 31 } 32 33 // Collation is a collation. 34 // Now we only support MySQL. 35 type Collation struct { 36 ID int 37 CharsetName string 38 Name string 39 IsDefault bool 40 } 41 42 var charsets = make(map[string]*Charset) 43 44 // All the supported charsets should be in the following table. 45 var charsetInfos = []*Charset{ 46 {CharsetUTF8, CollationUTF8, make(map[string]*Collation), "UTF-8 Unicode", 3}, 47 {CharsetUTF8MB4, CollationUTF8MB4, make(map[string]*Collation), "UTF-8 Unicode", 4}, 48 {CharsetASCII, CollationASCII, make(map[string]*Collation), "US ASCII", 1}, 49 {CharsetLatin1, CollationLatin1, make(map[string]*Collation), "Latin1", 1}, 50 {CharsetBin, CollationBin, make(map[string]*Collation), "binary", 1}, 51 } 52 53 func init() { 54 for _, c := range charsetInfos { 55 charsets[c.Name] = c 56 } 57 for _, c := range collations { 58 charset, ok := charsets[c.CharsetName] 59 if !ok { 60 continue 61 } 62 charset.Collations[c.Name] = c 63 } 64 } 65 66 // Desc is a charset description. 67 type Desc struct { 68 Name string 69 Desc string 70 DefaultCollation string 71 Maxlen int 72 } 73 74 // GetAllCharsets gets all charset descriptions in the local charsets. 75 func GetAllCharsets() []*Desc { 76 descs := make([]*Desc, 0, len(charsets)) 77 // The charsetInfos is an array, so the iterate order will be stable. 78 for _, ci := range charsetInfos { 79 c, ok := charsets[ci.Name] 80 if !ok { 81 continue 82 } 83 desc := &Desc{ 84 Name: c.Name, 85 DefaultCollation: c.DefaultCollation, 86 Desc: c.Desc, 87 Maxlen: c.Maxlen, 88 } 89 descs = append(descs, desc) 90 } 91 return descs 92 } 93 94 // ValidCharsetAndCollation checks the charset and the collation validity 95 // and returns a boolean. 96 func ValidCharsetAndCollation(cs string, co string) bool { 97 // We will use utf8 as a default charset. 98 if cs == "" { 99 cs = "utf8" 100 } 101 102 c, ok := charsets[cs] 103 if !ok { 104 return false 105 } 106 107 if co == "" { 108 return true 109 } 110 _, ok = c.Collations[co] 111 if !ok { 112 return false 113 } 114 115 return true 116 } 117 118 // GetDefaultCollation returns the default collation for charset. 119 func GetDefaultCollation(charset string) (string, error) { 120 charset = strings.ToLower(charset) 121 if charset == CharsetBin { 122 return CollationBin, nil 123 } 124 c, ok := charsets[charset] 125 if !ok { 126 return "", errors.Errorf("Unknown charset %s", charset) 127 } 128 return c.DefaultCollation, nil 129 } 130 131 // GetCharsetInfo returns charset and collation for cs as name. 132 func GetCharsetInfo(cs string) (string, string, error) { 133 c, ok := charsets[strings.ToLower(cs)] 134 if !ok { 135 return "", "", errors.Errorf("Unknown charset %s", cs) 136 } 137 return c.Name, c.DefaultCollation, nil 138 } 139 140 // GetCharsetDesc gets charset descriptions in the local charsets. 141 func GetCharsetDesc(cs string) (*Desc, error) { 142 c, ok := charsets[strings.ToLower(cs)] 143 if !ok { 144 return nil, errors.Errorf("Unknown charset %s", cs) 145 } 146 desc := &Desc{ 147 Name: c.Name, 148 DefaultCollation: c.DefaultCollation, 149 Desc: c.Desc, 150 Maxlen: c.Maxlen, 151 } 152 return desc, nil 153 } 154 155 // GetCharsetInfoByID returns charset and collation for id as cs_number. 156 func GetCharsetInfoByID(coID int) (string, string, error) { 157 if coID == mysql.DefaultCollationID { 158 return mysql.DefaultCharset, mysql.DefaultCollationName, nil 159 } 160 for _, collation := range collations { 161 if coID == collation.ID { 162 return collation.CharsetName, collation.Name, nil 163 } 164 } 165 return "", "", errors.Errorf("Unknown charset id %d", coID) 166 } 167 168 // GetCollations returns a list for all collations. 169 func GetCollations() []*Collation { 170 return collations 171 } 172 173 const ( 174 // CharsetBin is used for marking binary charset. 175 CharsetBin = "binary" 176 // CollationBin is the default collation for CharsetBin. 177 CollationBin = "binary" 178 // CharsetUTF8 is the default charset for string types. 179 CharsetUTF8 = "utf8" 180 // CollationUTF8 is the default collation for CharsetUTF8. 181 CollationUTF8 = "utf8_bin" 182 // CharsetUTF8MB4 represents 4 bytes utf8, which works the same way as utf8 in Go. 183 CharsetUTF8MB4 = "utf8mb4" 184 // CollationUTF8MB4 is the default collation for CharsetUTF8MB4. 185 CollationUTF8MB4 = "utf8mb4_bin" 186 // CharsetASCII is a subset of UTF8. 187 CharsetASCII = "ascii" 188 // CollationASCII is the default collation for CharsetACSII. 189 CollationASCII = "ascii_bin" 190 // CharsetLatin1 is a single byte charset. 191 CharsetLatin1 = "latin1" 192 // CollationLatin1 is the default collation for CharsetLatin1. 193 CollationLatin1 = "latin1_bin" 194 ) 195 196 var collations = []*Collation{ 197 {1, "big5", "big5_chinese_ci", true}, 198 {2, "latin2", "latin2_czech_cs", false}, 199 {3, "dec8", "dec8_swedish_ci", true}, 200 {4, "cp850", "cp850_general_ci", true}, 201 {5, "latin1", "latin1_german1_ci", false}, 202 {6, "hp8", "hp8_english_ci", true}, 203 {7, "koi8r", "koi8r_general_ci", true}, 204 {8, "latin1", "latin1_swedish_ci", true}, 205 {9, "latin2", "latin2_general_ci", true}, 206 {10, "swe7", "swe7_swedish_ci", true}, 207 {11, "ascii", "ascii_general_ci", true}, 208 {12, "ujis", "ujis_japanese_ci", true}, 209 {13, "sjis", "sjis_japanese_ci", true}, 210 {14, "cp1251", "cp1251_bulgarian_ci", false}, 211 {15, "latin1", "latin1_danish_ci", false}, 212 {16, "hebrew", "hebrew_general_ci", true}, 213 {18, "tis620", "tis620_thai_ci", true}, 214 {19, "euckr", "euckr_korean_ci", true}, 215 {20, "latin7", "latin7_estonian_cs", false}, 216 {21, "latin2", "latin2_hungarian_ci", false}, 217 {22, "koi8u", "koi8u_general_ci", true}, 218 {23, "cp1251", "cp1251_ukrainian_ci", false}, 219 {24, "gb2312", "gb2312_chinese_ci", true}, 220 {25, "greek", "greek_general_ci", true}, 221 {26, "cp1250", "cp1250_general_ci", true}, 222 {27, "latin2", "latin2_croatian_ci", false}, 223 {28, "gbk", "gbk_chinese_ci", true}, 224 {29, "cp1257", "cp1257_lithuanian_ci", false}, 225 {30, "latin5", "latin5_turkish_ci", true}, 226 {31, "latin1", "latin1_german2_ci", false}, 227 {32, "armscii8", "armscii8_general_ci", true}, 228 {33, "utf8", "utf8_general_ci", true}, 229 {34, "cp1250", "cp1250_czech_cs", false}, 230 {35, "ucs2", "ucs2_general_ci", true}, 231 {36, "cp866", "cp866_general_ci", true}, 232 {37, "keybcs2", "keybcs2_general_ci", true}, 233 {38, "macce", "macce_general_ci", true}, 234 {39, "macroman", "macroman_general_ci", true}, 235 {40, "cp852", "cp852_general_ci", true}, 236 {41, "latin7", "latin7_general_ci", true}, 237 {42, "latin7", "latin7_general_cs", false}, 238 {43, "macce", "macce_bin", false}, 239 {44, "cp1250", "cp1250_croatian_ci", false}, 240 {45, "utf8mb4", "utf8mb4_general_ci", true}, 241 {46, "utf8mb4", "utf8mb4_bin", false}, 242 {47, "latin1", "latin1_bin", false}, 243 {48, "latin1", "latin1_general_ci", false}, 244 {49, "latin1", "latin1_general_cs", false}, 245 {50, "cp1251", "cp1251_bin", false}, 246 {51, "cp1251", "cp1251_general_ci", true}, 247 {52, "cp1251", "cp1251_general_cs", false}, 248 {53, "macroman", "macroman_bin", false}, 249 {54, "utf16", "utf16_general_ci", true}, 250 {55, "utf16", "utf16_bin", false}, 251 {56, "utf16le", "utf16le_general_ci", true}, 252 {57, "cp1256", "cp1256_general_ci", true}, 253 {58, "cp1257", "cp1257_bin", false}, 254 {59, "cp1257", "cp1257_general_ci", true}, 255 {60, "utf32", "utf32_general_ci", true}, 256 {61, "utf32", "utf32_bin", false}, 257 {62, "utf16le", "utf16le_bin", false}, 258 {63, "binary", "binary", true}, 259 {64, "armscii8", "armscii8_bin", false}, 260 {65, "ascii", "ascii_bin", false}, 261 {66, "cp1250", "cp1250_bin", false}, 262 {67, "cp1256", "cp1256_bin", false}, 263 {68, "cp866", "cp866_bin", false}, 264 {69, "dec8", "dec8_bin", false}, 265 {70, "greek", "greek_bin", false}, 266 {71, "hebrew", "hebrew_bin", false}, 267 {72, "hp8", "hp8_bin", false}, 268 {73, "keybcs2", "keybcs2_bin", false}, 269 {74, "koi8r", "koi8r_bin", false}, 270 {75, "koi8u", "koi8u_bin", false}, 271 {77, "latin2", "latin2_bin", false}, 272 {78, "latin5", "latin5_bin", false}, 273 {79, "latin7", "latin7_bin", false}, 274 {80, "cp850", "cp850_bin", false}, 275 {81, "cp852", "cp852_bin", false}, 276 {82, "swe7", "swe7_bin", false}, 277 {83, "utf8", "utf8_bin", false}, 278 {84, "big5", "big5_bin", false}, 279 {85, "euckr", "euckr_bin", false}, 280 {86, "gb2312", "gb2312_bin", false}, 281 {87, "gbk", "gbk_bin", false}, 282 {88, "sjis", "sjis_bin", false}, 283 {89, "tis620", "tis620_bin", false}, 284 {90, "ucs2", "ucs2_bin", false}, 285 {91, "ujis", "ujis_bin", false}, 286 {92, "geostd8", "geostd8_general_ci", true}, 287 {93, "geostd8", "geostd8_bin", false}, 288 {94, "latin1", "latin1_spanish_ci", false}, 289 {95, "cp932", "cp932_japanese_ci", true}, 290 {96, "cp932", "cp932_bin", false}, 291 {97, "eucjpms", "eucjpms_japanese_ci", true}, 292 {98, "eucjpms", "eucjpms_bin", false}, 293 {99, "cp1250", "cp1250_polish_ci", false}, 294 {101, "utf16", "utf16_unicode_ci", false}, 295 {102, "utf16", "utf16_icelandic_ci", false}, 296 {103, "utf16", "utf16_latvian_ci", false}, 297 {104, "utf16", "utf16_romanian_ci", false}, 298 {105, "utf16", "utf16_slovenian_ci", false}, 299 {106, "utf16", "utf16_polish_ci", false}, 300 {107, "utf16", "utf16_estonian_ci", false}, 301 {108, "utf16", "utf16_spanish_ci", false}, 302 {109, "utf16", "utf16_swedish_ci", false}, 303 {110, "utf16", "utf16_turkish_ci", false}, 304 {111, "utf16", "utf16_czech_ci", false}, 305 {112, "utf16", "utf16_danish_ci", false}, 306 {113, "utf16", "utf16_lithuanian_ci", false}, 307 {114, "utf16", "utf16_slovak_ci", false}, 308 {115, "utf16", "utf16_spanish2_ci", false}, 309 {116, "utf16", "utf16_roman_ci", false}, 310 {117, "utf16", "utf16_persian_ci", false}, 311 {118, "utf16", "utf16_esperanto_ci", false}, 312 {119, "utf16", "utf16_hungarian_ci", false}, 313 {120, "utf16", "utf16_sinhala_ci", false}, 314 {121, "utf16", "utf16_german2_ci", false}, 315 {122, "utf16", "utf16_croatian_ci", false}, 316 {123, "utf16", "utf16_unicode_520_ci", false}, 317 {124, "utf16", "utf16_vietnamese_ci", false}, 318 {128, "ucs2", "ucs2_unicode_ci", false}, 319 {129, "ucs2", "ucs2_icelandic_ci", false}, 320 {130, "ucs2", "ucs2_latvian_ci", false}, 321 {131, "ucs2", "ucs2_romanian_ci", false}, 322 {132, "ucs2", "ucs2_slovenian_ci", false}, 323 {133, "ucs2", "ucs2_polish_ci", false}, 324 {134, "ucs2", "ucs2_estonian_ci", false}, 325 {135, "ucs2", "ucs2_spanish_ci", false}, 326 {136, "ucs2", "ucs2_swedish_ci", false}, 327 {137, "ucs2", "ucs2_turkish_ci", false}, 328 {138, "ucs2", "ucs2_czech_ci", false}, 329 {139, "ucs2", "ucs2_danish_ci", false}, 330 {140, "ucs2", "ucs2_lithuanian_ci", false}, 331 {141, "ucs2", "ucs2_slovak_ci", false}, 332 {142, "ucs2", "ucs2_spanish2_ci", false}, 333 {143, "ucs2", "ucs2_roman_ci", false}, 334 {144, "ucs2", "ucs2_persian_ci", false}, 335 {145, "ucs2", "ucs2_esperanto_ci", false}, 336 {146, "ucs2", "ucs2_hungarian_ci", false}, 337 {147, "ucs2", "ucs2_sinhala_ci", false}, 338 {148, "ucs2", "ucs2_german2_ci", false}, 339 {149, "ucs2", "ucs2_croatian_ci", false}, 340 {150, "ucs2", "ucs2_unicode_520_ci", false}, 341 {151, "ucs2", "ucs2_vietnamese_ci", false}, 342 {159, "ucs2", "ucs2_general_mysql500_ci", false}, 343 {160, "utf32", "utf32_unicode_ci", false}, 344 {161, "utf32", "utf32_icelandic_ci", false}, 345 {162, "utf32", "utf32_latvian_ci", false}, 346 {163, "utf32", "utf32_romanian_ci", false}, 347 {164, "utf32", "utf32_slovenian_ci", false}, 348 {165, "utf32", "utf32_polish_ci", false}, 349 {166, "utf32", "utf32_estonian_ci", false}, 350 {167, "utf32", "utf32_spanish_ci", false}, 351 {168, "utf32", "utf32_swedish_ci", false}, 352 {169, "utf32", "utf32_turkish_ci", false}, 353 {170, "utf32", "utf32_czech_ci", false}, 354 {171, "utf32", "utf32_danish_ci", false}, 355 {172, "utf32", "utf32_lithuanian_ci", false}, 356 {173, "utf32", "utf32_slovak_ci", false}, 357 {174, "utf32", "utf32_spanish2_ci", false}, 358 {175, "utf32", "utf32_roman_ci", false}, 359 {176, "utf32", "utf32_persian_ci", false}, 360 {177, "utf32", "utf32_esperanto_ci", false}, 361 {178, "utf32", "utf32_hungarian_ci", false}, 362 {179, "utf32", "utf32_sinhala_ci", false}, 363 {180, "utf32", "utf32_german2_ci", false}, 364 {181, "utf32", "utf32_croatian_ci", false}, 365 {182, "utf32", "utf32_unicode_520_ci", false}, 366 {183, "utf32", "utf32_vietnamese_ci", false}, 367 {192, "utf8", "utf8_unicode_ci", false}, 368 {193, "utf8", "utf8_icelandic_ci", false}, 369 {194, "utf8", "utf8_latvian_ci", false}, 370 {195, "utf8", "utf8_romanian_ci", false}, 371 {196, "utf8", "utf8_slovenian_ci", false}, 372 {197, "utf8", "utf8_polish_ci", false}, 373 {198, "utf8", "utf8_estonian_ci", false}, 374 {199, "utf8", "utf8_spanish_ci", false}, 375 {200, "utf8", "utf8_swedish_ci", false}, 376 {201, "utf8", "utf8_turkish_ci", false}, 377 {202, "utf8", "utf8_czech_ci", false}, 378 {203, "utf8", "utf8_danish_ci", false}, 379 {204, "utf8", "utf8_lithuanian_ci", false}, 380 {205, "utf8", "utf8_slovak_ci", false}, 381 {206, "utf8", "utf8_spanish2_ci", false}, 382 {207, "utf8", "utf8_roman_ci", false}, 383 {208, "utf8", "utf8_persian_ci", false}, 384 {209, "utf8", "utf8_esperanto_ci", false}, 385 {210, "utf8", "utf8_hungarian_ci", false}, 386 {211, "utf8", "utf8_sinhala_ci", false}, 387 {212, "utf8", "utf8_german2_ci", false}, 388 {213, "utf8", "utf8_croatian_ci", false}, 389 {214, "utf8", "utf8_unicode_520_ci", false}, 390 {215, "utf8", "utf8_vietnamese_ci", false}, 391 {223, "utf8", "utf8_general_mysql500_ci", false}, 392 {224, "utf8mb4", "utf8mb4_unicode_ci", false}, 393 {225, "utf8mb4", "utf8mb4_icelandic_ci", false}, 394 {226, "utf8mb4", "utf8mb4_latvian_ci", false}, 395 {227, "utf8mb4", "utf8mb4_romanian_ci", false}, 396 {228, "utf8mb4", "utf8mb4_slovenian_ci", false}, 397 {229, "utf8mb4", "utf8mb4_polish_ci", false}, 398 {230, "utf8mb4", "utf8mb4_estonian_ci", false}, 399 {231, "utf8mb4", "utf8mb4_spanish_ci", false}, 400 {232, "utf8mb4", "utf8mb4_swedish_ci", false}, 401 {233, "utf8mb4", "utf8mb4_turkish_ci", false}, 402 {234, "utf8mb4", "utf8mb4_czech_ci", false}, 403 {235, "utf8mb4", "utf8mb4_danish_ci", false}, 404 {236, "utf8mb4", "utf8mb4_lithuanian_ci", false}, 405 {237, "utf8mb4", "utf8mb4_slovak_ci", false}, 406 {238, "utf8mb4", "utf8mb4_spanish2_ci", false}, 407 {239, "utf8mb4", "utf8mb4_roman_ci", false}, 408 {240, "utf8mb4", "utf8mb4_persian_ci", false}, 409 {241, "utf8mb4", "utf8mb4_esperanto_ci", false}, 410 {242, "utf8mb4", "utf8mb4_hungarian_ci", false}, 411 {243, "utf8mb4", "utf8mb4_sinhala_ci", false}, 412 {244, "utf8mb4", "utf8mb4_german2_ci", false}, 413 {245, "utf8mb4", "utf8mb4_croatian_ci", false}, 414 {246, "utf8mb4", "utf8mb4_unicode_520_ci", false}, 415 {247, "utf8mb4", "utf8mb4_vietnamese_ci", false}, 416 }