github.com/pingcap/tidb/parser@v0.0.0-20231013125129-93a834a6bf8d/mysql/charset.go (about) 1 // Copyright 2015 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package mysql 15 16 import "unicode" 17 18 // CharsetNameToID maps charset name to its default collation ID. 19 func CharsetNameToID(charset string) uint8 { 20 // Use quick path for TiDB to avoid access CharsetIDs map 21 // "SHOW CHARACTER SET;" to see all the supported character sets. 22 if charset == "utf8mb4" { 23 return UTF8MB4DefaultCollationID 24 } else if charset == "binary" { 25 return BinaryDefaultCollationID 26 } else if charset == "utf8" { 27 return UTF8DefaultCollationID 28 } else if charset == "ascii" { 29 return ASCIIDefaultCollationID 30 } else if charset == "latin1" { 31 return Latin1DefaultCollationID 32 } else { 33 return CharsetIDs[charset] 34 } 35 } 36 37 // CharsetIDs maps charset name to its default collation ID. 38 var CharsetIDs = map[string]uint8{ 39 "big5": 1, 40 "dec8": 3, 41 "cp850": 4, 42 "hp8": 6, 43 "koi8r": 7, 44 "latin1": Latin1DefaultCollationID, 45 "latin2": 9, 46 "swe7": 10, 47 "ascii": ASCIIDefaultCollationID, 48 "ujis": 12, 49 "sjis": 13, 50 "hebrew": 16, 51 "tis620": 18, 52 "euckr": 19, 53 "koi8u": 22, 54 "gb2312": 24, 55 "greek": 25, 56 "cp1250": 26, 57 "gbk": 28, 58 "latin5": 30, 59 "armscii8": 32, 60 "utf8": UTF8DefaultCollationID, 61 "ucs2": 35, 62 "cp866": 36, 63 "keybcs2": 37, 64 "macce": 38, 65 "macroman": 39, 66 "cp852": 40, 67 "latin7": 41, 68 "utf8mb4": UTF8MB4DefaultCollationID, 69 "cp1251": 51, 70 "utf16": 54, 71 "utf16le": 56, 72 "cp1256": 57, 73 "cp1257": 59, 74 "utf32": 60, 75 "binary": BinaryDefaultCollationID, 76 "geostd8": 92, 77 "cp932": 95, 78 "eucjpms": 97, 79 } 80 81 // Collations maps MySQL collation ID to its name. 82 var Collations = map[uint16]string{ 83 1: "big5_chinese_ci", 84 2: "latin2_czech_cs", 85 3: "dec8_swedish_ci", 86 4: "cp850_general_ci", 87 5: "latin1_german1_ci", 88 6: "hp8_english_ci", 89 7: "koi8r_general_ci", 90 8: "latin1_swedish_ci", 91 9: "latin2_general_ci", 92 10: "swe7_swedish_ci", 93 11: "ascii_general_ci", 94 12: "ujis_japanese_ci", 95 13: "sjis_japanese_ci", 96 14: "cp1251_bulgarian_ci", 97 15: "latin1_danish_ci", 98 16: "hebrew_general_ci", 99 18: "tis620_thai_ci", 100 19: "euckr_korean_ci", 101 20: "latin7_estonian_cs", 102 21: "latin2_hungarian_ci", 103 22: "koi8u_general_ci", 104 23: "cp1251_ukrainian_ci", 105 24: "gb2312_chinese_ci", 106 25: "greek_general_ci", 107 26: "cp1250_general_ci", 108 27: "latin2_croatian_ci", 109 28: "gbk_chinese_ci", 110 29: "cp1257_lithuanian_ci", 111 30: "latin5_turkish_ci", 112 31: "latin1_german2_ci", 113 32: "armscii8_general_ci", 114 33: "utf8_general_ci", 115 34: "cp1250_czech_cs", 116 35: "ucs2_general_ci", 117 36: "cp866_general_ci", 118 37: "keybcs2_general_ci", 119 38: "macce_general_ci", 120 39: "macroman_general_ci", 121 40: "cp852_general_ci", 122 41: "latin7_general_ci", 123 42: "latin7_general_cs", 124 43: "macce_bin", 125 44: "cp1250_croatian_ci", 126 45: "utf8mb4_general_ci", 127 46: "utf8mb4_bin", 128 47: "latin1_bin", 129 48: "latin1_general_ci", 130 49: "latin1_general_cs", 131 50: "cp1251_bin", 132 51: "cp1251_general_ci", 133 52: "cp1251_general_cs", 134 53: "macroman_bin", 135 54: "utf16_general_ci", 136 55: "utf16_bin", 137 56: "utf16le_general_ci", 138 57: "cp1256_general_ci", 139 58: "cp1257_bin", 140 59: "cp1257_general_ci", 141 60: "utf32_general_ci", 142 61: "utf32_bin", 143 62: "utf16le_bin", 144 63: "binary", 145 64: "armscii8_bin", 146 65: "ascii_bin", 147 66: "cp1250_bin", 148 67: "cp1256_bin", 149 68: "cp866_bin", 150 69: "dec8_bin", 151 70: "greek_bin", 152 71: "hebrew_bin", 153 72: "hp8_bin", 154 73: "keybcs2_bin", 155 74: "koi8r_bin", 156 75: "koi8u_bin", 157 77: "latin2_bin", 158 78: "latin5_bin", 159 79: "latin7_bin", 160 80: "cp850_bin", 161 81: "cp852_bin", 162 82: "swe7_bin", 163 83: "utf8_bin", 164 84: "big5_bin", 165 85: "euckr_bin", 166 86: "gb2312_bin", 167 87: "gbk_bin", 168 88: "sjis_bin", 169 89: "tis620_bin", 170 90: "ucs2_bin", 171 91: "ujis_bin", 172 92: "geostd8_general_ci", 173 93: "geostd8_bin", 174 94: "latin1_spanish_ci", 175 95: "cp932_japanese_ci", 176 96: "cp932_bin", 177 97: "eucjpms_japanese_ci", 178 98: "eucjpms_bin", 179 99: "cp1250_polish_ci", 180 101: "utf16_unicode_ci", 181 102: "utf16_icelandic_ci", 182 103: "utf16_latvian_ci", 183 104: "utf16_romanian_ci", 184 105: "utf16_slovenian_ci", 185 106: "utf16_polish_ci", 186 107: "utf16_estonian_ci", 187 108: "utf16_spanish_ci", 188 109: "utf16_swedish_ci", 189 110: "utf16_turkish_ci", 190 111: "utf16_czech_ci", 191 112: "utf16_danish_ci", 192 113: "utf16_lithuanian_ci", 193 114: "utf16_slovak_ci", 194 115: "utf16_spanish2_ci", 195 116: "utf16_roman_ci", 196 117: "utf16_persian_ci", 197 118: "utf16_esperanto_ci", 198 119: "utf16_hungarian_ci", 199 120: "utf16_sinhala_ci", 200 121: "utf16_german2_ci", 201 122: "utf16_croatian_ci", 202 123: "utf16_unicode_520_ci", 203 124: "utf16_vietnamese_ci", 204 128: "ucs2_unicode_ci", 205 129: "ucs2_icelandic_ci", 206 130: "ucs2_latvian_ci", 207 131: "ucs2_romanian_ci", 208 132: "ucs2_slovenian_ci", 209 133: "ucs2_polish_ci", 210 134: "ucs2_estonian_ci", 211 135: "ucs2_spanish_ci", 212 136: "ucs2_swedish_ci", 213 137: "ucs2_turkish_ci", 214 138: "ucs2_czech_ci", 215 139: "ucs2_danish_ci", 216 140: "ucs2_lithuanian_ci", 217 141: "ucs2_slovak_ci", 218 142: "ucs2_spanish2_ci", 219 143: "ucs2_roman_ci", 220 144: "ucs2_persian_ci", 221 145: "ucs2_esperanto_ci", 222 146: "ucs2_hungarian_ci", 223 147: "ucs2_sinhala_ci", 224 148: "ucs2_german2_ci", 225 149: "ucs2_croatian_ci", 226 150: "ucs2_unicode_520_ci", 227 151: "ucs2_vietnamese_ci", 228 159: "ucs2_general_mysql500_ci", 229 160: "utf32_unicode_ci", 230 161: "utf32_icelandic_ci", 231 162: "utf32_latvian_ci", 232 163: "utf32_romanian_ci", 233 164: "utf32_slovenian_ci", 234 165: "utf32_polish_ci", 235 166: "utf32_estonian_ci", 236 167: "utf32_spanish_ci", 237 168: "utf32_swedish_ci", 238 169: "utf32_turkish_ci", 239 170: "utf32_czech_ci", 240 171: "utf32_danish_ci", 241 172: "utf32_lithuanian_ci", 242 173: "utf32_slovak_ci", 243 174: "utf32_spanish2_ci", 244 175: "utf32_roman_ci", 245 176: "utf32_persian_ci", 246 177: "utf32_esperanto_ci", 247 178: "utf32_hungarian_ci", 248 179: "utf32_sinhala_ci", 249 180: "utf32_german2_ci", 250 181: "utf32_croatian_ci", 251 182: "utf32_unicode_520_ci", 252 183: "utf32_vietnamese_ci", 253 192: "utf8_unicode_ci", 254 193: "utf8_icelandic_ci", 255 194: "utf8_latvian_ci", 256 195: "utf8_romanian_ci", 257 196: "utf8_slovenian_ci", 258 197: "utf8_polish_ci", 259 198: "utf8_estonian_ci", 260 199: "utf8_spanish_ci", 261 200: "utf8_swedish_ci", 262 201: "utf8_turkish_ci", 263 202: "utf8_czech_ci", 264 203: "utf8_danish_ci", 265 204: "utf8_lithuanian_ci", 266 205: "utf8_slovak_ci", 267 206: "utf8_spanish2_ci", 268 207: "utf8_roman_ci", 269 208: "utf8_persian_ci", 270 209: "utf8_esperanto_ci", 271 210: "utf8_hungarian_ci", 272 211: "utf8_sinhala_ci", 273 212: "utf8_german2_ci", 274 213: "utf8_croatian_ci", 275 214: "utf8_unicode_520_ci", 276 215: "utf8_vietnamese_ci", 277 223: "utf8_general_mysql500_ci", 278 224: "utf8mb4_unicode_ci", 279 225: "utf8mb4_icelandic_ci", 280 226: "utf8mb4_latvian_ci", 281 227: "utf8mb4_romanian_ci", 282 228: "utf8mb4_slovenian_ci", 283 229: "utf8mb4_polish_ci", 284 230: "utf8mb4_estonian_ci", 285 231: "utf8mb4_spanish_ci", 286 232: "utf8mb4_swedish_ci", 287 233: "utf8mb4_turkish_ci", 288 234: "utf8mb4_czech_ci", 289 235: "utf8mb4_danish_ci", 290 236: "utf8mb4_lithuanian_ci", 291 237: "utf8mb4_slovak_ci", 292 238: "utf8mb4_spanish2_ci", 293 239: "utf8mb4_roman_ci", 294 240: "utf8mb4_persian_ci", 295 241: "utf8mb4_esperanto_ci", 296 242: "utf8mb4_hungarian_ci", 297 243: "utf8mb4_sinhala_ci", 298 244: "utf8mb4_german2_ci", 299 245: "utf8mb4_croatian_ci", 300 246: "utf8mb4_unicode_520_ci", 301 247: "utf8mb4_vietnamese_ci", 302 255: "utf8mb4_0900_ai_ci", 303 309: "utf8mb4_0900_bin", 304 } 305 306 // CollationNames maps MySQL collation name to its ID 307 var CollationNames = map[string]uint16{ 308 "big5_chinese_ci": 1, 309 "latin2_czech_cs": 2, 310 "dec8_swedish_ci": 3, 311 "cp850_general_ci": 4, 312 "latin1_german1_ci": 5, 313 "hp8_english_ci": 6, 314 "koi8r_general_ci": 7, 315 "latin1_swedish_ci": 8, 316 "latin2_general_ci": 9, 317 "swe7_swedish_ci": 10, 318 "ascii_general_ci": 11, 319 "ujis_japanese_ci": 12, 320 "sjis_japanese_ci": 13, 321 "cp1251_bulgarian_ci": 14, 322 "latin1_danish_ci": 15, 323 "hebrew_general_ci": 16, 324 "tis620_thai_ci": 18, 325 "euckr_korean_ci": 19, 326 "latin7_estonian_cs": 20, 327 "latin2_hungarian_ci": 21, 328 "koi8u_general_ci": 22, 329 "cp1251_ukrainian_ci": 23, 330 "gb2312_chinese_ci": 24, 331 "greek_general_ci": 25, 332 "cp1250_general_ci": 26, 333 "latin2_croatian_ci": 27, 334 "gbk_chinese_ci": 28, 335 "cp1257_lithuanian_ci": 29, 336 "latin5_turkish_ci": 30, 337 "latin1_german2_ci": 31, 338 "armscii8_general_ci": 32, 339 "utf8_general_ci": 33, 340 "cp1250_czech_cs": 34, 341 "ucs2_general_ci": 35, 342 "cp866_general_ci": 36, 343 "keybcs2_general_ci": 37, 344 "macce_general_ci": 38, 345 "macroman_general_ci": 39, 346 "cp852_general_ci": 40, 347 "latin7_general_ci": 41, 348 "latin7_general_cs": 42, 349 "macce_bin": 43, 350 "cp1250_croatian_ci": 44, 351 "utf8mb4_general_ci": 45, 352 "utf8mb4_bin": 46, 353 "latin1_bin": 47, 354 "latin1_general_ci": 48, 355 "latin1_general_cs": 49, 356 "cp1251_bin": 50, 357 "cp1251_general_ci": 51, 358 "cp1251_general_cs": 52, 359 "macroman_bin": 53, 360 "utf16_general_ci": 54, 361 "utf16_bin": 55, 362 "utf16le_general_ci": 56, 363 "cp1256_general_ci": 57, 364 "cp1257_bin": 58, 365 "cp1257_general_ci": 59, 366 "utf32_general_ci": 60, 367 "utf32_bin": 61, 368 "utf16le_bin": 62, 369 "binary": 63, 370 "armscii8_bin": 64, 371 "ascii_bin": 65, 372 "cp1250_bin": 66, 373 "cp1256_bin": 67, 374 "cp866_bin": 68, 375 "dec8_bin": 69, 376 "greek_bin": 70, 377 "hebrew_bin": 71, 378 "hp8_bin": 72, 379 "keybcs2_bin": 73, 380 "koi8r_bin": 74, 381 "koi8u_bin": 75, 382 "latin2_bin": 77, 383 "latin5_bin": 78, 384 "latin7_bin": 79, 385 "cp850_bin": 80, 386 "cp852_bin": 81, 387 "swe7_bin": 82, 388 "utf8_bin": 83, 389 "big5_bin": 84, 390 "euckr_bin": 85, 391 "gb2312_bin": 86, 392 "gbk_bin": 87, 393 "sjis_bin": 88, 394 "tis620_bin": 89, 395 "ucs2_bin": 90, 396 "ujis_bin": 91, 397 "geostd8_general_ci": 92, 398 "geostd8_bin": 93, 399 "latin1_spanish_ci": 94, 400 "cp932_japanese_ci": 95, 401 "cp932_bin": 96, 402 "eucjpms_japanese_ci": 97, 403 "eucjpms_bin": 98, 404 "cp1250_polish_ci": 99, 405 "utf16_unicode_ci": 101, 406 "utf16_icelandic_ci": 102, 407 "utf16_latvian_ci": 103, 408 "utf16_romanian_ci": 104, 409 "utf16_slovenian_ci": 105, 410 "utf16_polish_ci": 106, 411 "utf16_estonian_ci": 107, 412 "utf16_spanish_ci": 108, 413 "utf16_swedish_ci": 109, 414 "utf16_turkish_ci": 110, 415 "utf16_czech_ci": 111, 416 "utf16_danish_ci": 112, 417 "utf16_lithuanian_ci": 113, 418 "utf16_slovak_ci": 114, 419 "utf16_spanish2_ci": 115, 420 "utf16_roman_ci": 116, 421 "utf16_persian_ci": 117, 422 "utf16_esperanto_ci": 118, 423 "utf16_hungarian_ci": 119, 424 "utf16_sinhala_ci": 120, 425 "utf16_german2_ci": 121, 426 "utf16_croatian_ci": 122, 427 "utf16_unicode_520_ci": 123, 428 "utf16_vietnamese_ci": 124, 429 "ucs2_unicode_ci": 128, 430 "ucs2_icelandic_ci": 129, 431 "ucs2_latvian_ci": 130, 432 "ucs2_romanian_ci": 131, 433 "ucs2_slovenian_ci": 132, 434 "ucs2_polish_ci": 133, 435 "ucs2_estonian_ci": 134, 436 "ucs2_spanish_ci": 135, 437 "ucs2_swedish_ci": 136, 438 "ucs2_turkish_ci": 137, 439 "ucs2_czech_ci": 138, 440 "ucs2_danish_ci": 139, 441 "ucs2_lithuanian_ci": 140, 442 "ucs2_slovak_ci": 141, 443 "ucs2_spanish2_ci": 142, 444 "ucs2_roman_ci": 143, 445 "ucs2_persian_ci": 144, 446 "ucs2_esperanto_ci": 145, 447 "ucs2_hungarian_ci": 146, 448 "ucs2_sinhala_ci": 147, 449 "ucs2_german2_ci": 148, 450 "ucs2_croatian_ci": 149, 451 "ucs2_unicode_520_ci": 150, 452 "ucs2_vietnamese_ci": 151, 453 "ucs2_general_mysql500_ci": 159, 454 "utf32_unicode_ci": 160, 455 "utf32_icelandic_ci": 161, 456 "utf32_latvian_ci": 162, 457 "utf32_romanian_ci": 163, 458 "utf32_slovenian_ci": 164, 459 "utf32_polish_ci": 165, 460 "utf32_estonian_ci": 166, 461 "utf32_spanish_ci": 167, 462 "utf32_swedish_ci": 168, 463 "utf32_turkish_ci": 169, 464 "utf32_czech_ci": 170, 465 "utf32_danish_ci": 171, 466 "utf32_lithuanian_ci": 172, 467 "utf32_slovak_ci": 173, 468 "utf32_spanish2_ci": 174, 469 "utf32_roman_ci": 175, 470 "utf32_persian_ci": 176, 471 "utf32_esperanto_ci": 177, 472 "utf32_hungarian_ci": 178, 473 "utf32_sinhala_ci": 179, 474 "utf32_german2_ci": 180, 475 "utf32_croatian_ci": 181, 476 "utf32_unicode_520_ci": 182, 477 "utf32_vietnamese_ci": 183, 478 "utf8_unicode_ci": 192, 479 "utf8_icelandic_ci": 193, 480 "utf8_latvian_ci": 194, 481 "utf8_romanian_ci": 195, 482 "utf8_slovenian_ci": 196, 483 "utf8_polish_ci": 197, 484 "utf8_estonian_ci": 198, 485 "utf8_spanish_ci": 199, 486 "utf8_swedish_ci": 200, 487 "utf8_turkish_ci": 201, 488 "utf8_czech_ci": 202, 489 "utf8_danish_ci": 203, 490 "utf8_lithuanian_ci": 204, 491 "utf8_slovak_ci": 205, 492 "utf8_spanish2_ci": 206, 493 "utf8_roman_ci": 207, 494 "utf8_persian_ci": 208, 495 "utf8_esperanto_ci": 209, 496 "utf8_hungarian_ci": 210, 497 "utf8_sinhala_ci": 211, 498 "utf8_german2_ci": 212, 499 "utf8_croatian_ci": 213, 500 "utf8_unicode_520_ci": 214, 501 "utf8_vietnamese_ci": 215, 502 "utf8_general_mysql500_ci": 223, 503 "utf8mb4_unicode_ci": 224, 504 "utf8mb4_icelandic_ci": 225, 505 "utf8mb4_latvian_ci": 226, 506 "utf8mb4_romanian_ci": 227, 507 "utf8mb4_slovenian_ci": 228, 508 "utf8mb4_polish_ci": 229, 509 "utf8mb4_estonian_ci": 230, 510 "utf8mb4_spanish_ci": 231, 511 "utf8mb4_swedish_ci": 232, 512 "utf8mb4_turkish_ci": 233, 513 "utf8mb4_czech_ci": 234, 514 "utf8mb4_danish_ci": 235, 515 "utf8mb4_lithuanian_ci": 236, 516 "utf8mb4_slovak_ci": 237, 517 "utf8mb4_spanish2_ci": 238, 518 "utf8mb4_roman_ci": 239, 519 "utf8mb4_persian_ci": 240, 520 "utf8mb4_esperanto_ci": 241, 521 "utf8mb4_hungarian_ci": 242, 522 "utf8mb4_sinhala_ci": 243, 523 "utf8mb4_german2_ci": 244, 524 "utf8mb4_croatian_ci": 245, 525 "utf8mb4_unicode_520_ci": 246, 526 "utf8mb4_vietnamese_ci": 247, 527 "utf8mb4_0900_ai_ci": 255, 528 "utf8mb4_0900_bin": 309, 529 } 530 531 // MySQL collation information. 532 const ( 533 UTF8Charset = "utf8" 534 UTF8MB4Charset = "utf8mb4" 535 Latin1Charset = "latin1" 536 DefaultCharset = UTF8MB4Charset 537 // DefaultCollationID is utf8mb4_bin(46) 538 DefaultCollationID = 46 539 Latin1DefaultCollationID = 47 540 ASCIIDefaultCollationID = 65 541 UTF8DefaultCollationID = 83 542 UTF8MB4DefaultCollationID = 46 543 BinaryDefaultCollationID = 63 544 UTF8DefaultCollation = "utf8_bin" 545 UTF8MB4DefaultCollation = "utf8mb4_bin" 546 DefaultCollationName = UTF8MB4DefaultCollation 547 548 // MaxBytesOfCharacter, is the max bytes length of a character, 549 // refer to RFC3629, in UTF-8, characters from the U+0000..U+10FFFF range 550 // (the UTF-16 accessible range) are encoded using sequences of 1 to 4 octets. 551 MaxBytesOfCharacter = 4 552 ) 553 554 // IsUTF8Charset checks if charset is utf8, utf8mb4. 555 func IsUTF8Charset(charset string) bool { 556 return charset == UTF8Charset || charset == UTF8MB4Charset 557 } 558 559 // RangeGraph defines valid unicode characters to use in column names. It strictly follows MySQL's definition. 560 // See #3994. 561 var RangeGraph = []*unicode.RangeTable{ 562 // _MY_PNT 563 unicode.No, 564 unicode.Mn, 565 unicode.Me, 566 unicode.Pc, 567 unicode.Pd, 568 unicode.Pd, 569 unicode.Ps, 570 unicode.Pe, 571 unicode.Pi, 572 unicode.Pf, 573 unicode.Po, 574 unicode.Sm, 575 unicode.Sc, 576 unicode.Sk, 577 unicode.So, 578 // _MY_U 579 unicode.Lu, 580 unicode.Lt, 581 unicode.Nl, 582 // _MY_L 583 unicode.Ll, 584 unicode.Lm, 585 unicode.Lo, 586 unicode.Nl, 587 unicode.Mn, 588 unicode.Mc, 589 unicode.Me, 590 // _MY_NMR 591 unicode.Nd, 592 unicode.Nl, 593 unicode.No, 594 }