github.com/bingoohuang/gg@v0.0.0-20240325092523-45da7dee9335/pkg/sqlparse/tidbparser/dependency/mysql/charset.go (about) 1 // Copyright 2015 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package mysql 15 16 import "unicode" 17 18 // CharsetIDs maps charset name to its default collation ID. 19 var CharsetIDs = map[string]uint8{ 20 "big5": 1, 21 "dec8": 3, 22 "cp850": 4, 23 "hp8": 6, 24 "koi8r": 7, 25 "latin1": 8, 26 "latin2": 9, 27 "swe7": 10, 28 "ascii": 11, 29 "ujis": 12, 30 "sjis": 13, 31 "hebrew": 16, 32 "tis620": 18, 33 "euckr": 19, 34 "koi8u": 22, 35 "gb2312": 24, 36 "greek": 25, 37 "cp1250": 26, 38 "gbk": 28, 39 "latin5": 30, 40 "armscii8": 32, 41 "utf8": 33, 42 "ucs2": 35, 43 "cp866": 36, 44 "keybcs2": 37, 45 "macce": 38, 46 "macroman": 39, 47 "cp852": 40, 48 "latin7": 41, 49 "utf8mb4": 45, 50 "cp1251": 51, 51 "utf16": 54, 52 "utf16le": 56, 53 "cp1256": 57, 54 "cp1257": 59, 55 "utf32": 60, 56 "binary": 63, 57 "geostd8": 92, 58 "cp932": 95, 59 "eucjpms": 97, 60 } 61 62 // Charsets maps charset name to its default collation name. 63 var Charsets = map[string]string{ 64 "big5": "big5_chinese_ci", 65 "dec8": "dec8_swedish_ci", 66 "cp850": "cp850_general_ci", 67 "hp8": "hp8_english_ci", 68 "koi8r": "koi8r_general_ci", 69 "latin1": "latin1_swedish_ci", 70 "latin2": "latin2_general_ci", 71 "swe7": "swe7_swedish_ci", 72 "ascii": "ascii_general_ci", 73 "ujis": "ujis_japanese_ci", 74 "sjis": "sjis_japanese_ci", 75 "hebrew": "hebrew_general_ci", 76 "tis620": "tis620_thai_ci", 77 "euckr": "euckr_korean_ci", 78 "koi8u": "koi8u_general_ci", 79 "gb2312": "gb2312_chinese_ci", 80 "greek": "greek_general_ci", 81 "cp1250": "cp1250_general_ci", 82 "gbk": "gbk_chinese_ci", 83 "latin5": "latin5_turkish_ci", 84 "armscii8": "armscii8_general_ci", 85 "utf8": "utf8_general_ci", 86 "ucs2": "ucs2_general_ci", 87 "cp866": "cp866_general_ci", 88 "keybcs2": "keybcs2_general_ci", 89 "macce": "macce_general_ci", 90 "macroman": "macroman_general_ci", 91 "cp852": "cp852_general_ci", 92 "latin7": "latin7_general_ci", 93 "utf8mb4": "utf8mb4_general_ci", 94 "cp1251": "cp1251_general_ci", 95 "utf16": "utf16_general_ci", 96 "utf16le": "utf16le_general_ci", 97 "cp1256": "cp1256_general_ci", 98 "cp1257": "cp1257_general_ci", 99 "utf32": "utf32_general_ci", 100 "binary": "binary", 101 "geostd8": "geostd8_general_ci", 102 "cp932": "cp932_japanese_ci", 103 "eucjpms": "eucjpms_japanese_ci", 104 } 105 106 // Collations maps MySQL default collation ID to its name. 107 var Collations = map[uint8]string{ 108 1: "big5_chinese_ci", 109 2: "latin2_czech_cs", 110 3: "dec8_swedish_ci", 111 4: "cp850_general_ci", 112 5: "latin1_german1_ci", 113 6: "hp8_english_ci", 114 7: "koi8r_general_ci", 115 8: "latin1_swedish_ci", 116 9: "latin2_general_ci", 117 10: "swe7_swedish_ci", 118 11: "ascii_general_ci", 119 12: "ujis_japanese_ci", 120 13: "sjis_japanese_ci", 121 14: "cp1251_bulgarian_ci", 122 15: "latin1_danish_ci", 123 16: "hebrew_general_ci", 124 18: "tis620_thai_ci", 125 19: "euckr_korean_ci", 126 20: "latin7_estonian_cs", 127 21: "latin2_hungarian_ci", 128 22: "koi8u_general_ci", 129 23: "cp1251_ukrainian_ci", 130 24: "gb2312_chinese_ci", 131 25: "greek_general_ci", 132 26: "cp1250_general_ci", 133 27: "latin2_croatian_ci", 134 28: "gbk_chinese_ci", 135 29: "cp1257_lithuanian_ci", 136 30: "latin5_turkish_ci", 137 31: "latin1_german2_ci", 138 32: "armscii8_general_ci", 139 33: "utf8_general_ci", 140 34: "cp1250_czech_cs", 141 35: "ucs2_general_ci", 142 36: "cp866_general_ci", 143 37: "keybcs2_general_ci", 144 38: "macce_general_ci", 145 39: "macroman_general_ci", 146 40: "cp852_general_ci", 147 41: "latin7_general_ci", 148 42: "latin7_general_cs", 149 43: "macce_bin", 150 44: "cp1250_croatian_ci", 151 45: "utf8mb4_general_ci", 152 46: "utf8mb4_bin", 153 47: "latin1_bin", 154 48: "latin1_general_ci", 155 49: "latin1_general_cs", 156 50: "cp1251_bin", 157 51: "cp1251_general_ci", 158 52: "cp1251_general_cs", 159 53: "macroman_bin", 160 54: "utf16_general_ci", 161 55: "utf16_bin", 162 56: "utf16le_general_ci", 163 57: "cp1256_general_ci", 164 58: "cp1257_bin", 165 59: "cp1257_general_ci", 166 60: "utf32_general_ci", 167 61: "utf32_bin", 168 62: "utf16le_bin", 169 63: "binary", 170 64: "armscii8_bin", 171 65: "ascii_bin", 172 66: "cp1250_bin", 173 67: "cp1256_bin", 174 68: "cp866_bin", 175 69: "dec8_bin", 176 70: "greek_bin", 177 71: "hebrew_bin", 178 72: "hp8_bin", 179 73: "keybcs2_bin", 180 74: "koi8r_bin", 181 75: "koi8u_bin", 182 77: "latin2_bin", 183 78: "latin5_bin", 184 79: "latin7_bin", 185 80: "cp850_bin", 186 81: "cp852_bin", 187 82: "swe7_bin", 188 83: "utf8_bin", 189 84: "big5_bin", 190 85: "euckr_bin", 191 86: "gb2312_bin", 192 87: "gbk_bin", 193 88: "sjis_bin", 194 89: "tis620_bin", 195 90: "ucs2_bin", 196 91: "ujis_bin", 197 92: "geostd8_general_ci", 198 93: "geostd8_bin", 199 94: "latin1_spanish_ci", 200 95: "cp932_japanese_ci", 201 96: "cp932_bin", 202 97: "eucjpms_japanese_ci", 203 98: "eucjpms_bin", 204 99: "cp1250_polish_ci", 205 101: "utf16_unicode_ci", 206 102: "utf16_icelandic_ci", 207 103: "utf16_latvian_ci", 208 104: "utf16_romanian_ci", 209 105: "utf16_slovenian_ci", 210 106: "utf16_polish_ci", 211 107: "utf16_estonian_ci", 212 108: "utf16_spanish_ci", 213 109: "utf16_swedish_ci", 214 110: "utf16_turkish_ci", 215 111: "utf16_czech_ci", 216 112: "utf16_danish_ci", 217 113: "utf16_lithuanian_ci", 218 114: "utf16_slovak_ci", 219 115: "utf16_spanish2_ci", 220 116: "utf16_roman_ci", 221 117: "utf16_persian_ci", 222 118: "utf16_esperanto_ci", 223 119: "utf16_hungarian_ci", 224 120: "utf16_sinhala_ci", 225 121: "utf16_german2_ci", 226 122: "utf16_croatian_ci", 227 123: "utf16_unicode_520_ci", 228 124: "utf16_vietnamese_ci", 229 128: "ucs2_unicode_ci", 230 129: "ucs2_icelandic_ci", 231 130: "ucs2_latvian_ci", 232 131: "ucs2_romanian_ci", 233 132: "ucs2_slovenian_ci", 234 133: "ucs2_polish_ci", 235 134: "ucs2_estonian_ci", 236 135: "ucs2_spanish_ci", 237 136: "ucs2_swedish_ci", 238 137: "ucs2_turkish_ci", 239 138: "ucs2_czech_ci", 240 139: "ucs2_danish_ci", 241 140: "ucs2_lithuanian_ci", 242 141: "ucs2_slovak_ci", 243 142: "ucs2_spanish2_ci", 244 143: "ucs2_roman_ci", 245 144: "ucs2_persian_ci", 246 145: "ucs2_esperanto_ci", 247 146: "ucs2_hungarian_ci", 248 147: "ucs2_sinhala_ci", 249 148: "ucs2_german2_ci", 250 149: "ucs2_croatian_ci", 251 150: "ucs2_unicode_520_ci", 252 151: "ucs2_vietnamese_ci", 253 159: "ucs2_general_mysql500_ci", 254 160: "utf32_unicode_ci", 255 161: "utf32_icelandic_ci", 256 162: "utf32_latvian_ci", 257 163: "utf32_romanian_ci", 258 164: "utf32_slovenian_ci", 259 165: "utf32_polish_ci", 260 166: "utf32_estonian_ci", 261 167: "utf32_spanish_ci", 262 168: "utf32_swedish_ci", 263 169: "utf32_turkish_ci", 264 170: "utf32_czech_ci", 265 171: "utf32_danish_ci", 266 172: "utf32_lithuanian_ci", 267 173: "utf32_slovak_ci", 268 174: "utf32_spanish2_ci", 269 175: "utf32_roman_ci", 270 176: "utf32_persian_ci", 271 177: "utf32_esperanto_ci", 272 178: "utf32_hungarian_ci", 273 179: "utf32_sinhala_ci", 274 180: "utf32_german2_ci", 275 181: "utf32_croatian_ci", 276 182: "utf32_unicode_520_ci", 277 183: "utf32_vietnamese_ci", 278 192: "utf8_unicode_ci", 279 193: "utf8_icelandic_ci", 280 194: "utf8_latvian_ci", 281 195: "utf8_romanian_ci", 282 196: "utf8_slovenian_ci", 283 197: "utf8_polish_ci", 284 198: "utf8_estonian_ci", 285 199: "utf8_spanish_ci", 286 200: "utf8_swedish_ci", 287 201: "utf8_turkish_ci", 288 202: "utf8_czech_ci", 289 203: "utf8_danish_ci", 290 204: "utf8_lithuanian_ci", 291 205: "utf8_slovak_ci", 292 206: "utf8_spanish2_ci", 293 207: "utf8_roman_ci", 294 208: "utf8_persian_ci", 295 209: "utf8_esperanto_ci", 296 210: "utf8_hungarian_ci", 297 211: "utf8_sinhala_ci", 298 212: "utf8_german2_ci", 299 213: "utf8_croatian_ci", 300 214: "utf8_unicode_520_ci", 301 215: "utf8_vietnamese_ci", 302 223: "utf8_general_mysql500_ci", 303 224: "utf8mb4_unicode_ci", 304 225: "utf8mb4_icelandic_ci", 305 226: "utf8mb4_latvian_ci", 306 227: "utf8mb4_romanian_ci", 307 228: "utf8mb4_slovenian_ci", 308 229: "utf8mb4_polish_ci", 309 230: "utf8mb4_estonian_ci", 310 231: "utf8mb4_spanish_ci", 311 232: "utf8mb4_swedish_ci", 312 233: "utf8mb4_turkish_ci", 313 234: "utf8mb4_czech_ci", 314 235: "utf8mb4_danish_ci", 315 236: "utf8mb4_lithuanian_ci", 316 237: "utf8mb4_slovak_ci", 317 238: "utf8mb4_spanish2_ci", 318 239: "utf8mb4_roman_ci", 319 240: "utf8mb4_persian_ci", 320 241: "utf8mb4_esperanto_ci", 321 242: "utf8mb4_hungarian_ci", 322 243: "utf8mb4_sinhala_ci", 323 244: "utf8mb4_german2_ci", 324 245: "utf8mb4_croatian_ci", 325 246: "utf8mb4_unicode_520_ci", 326 247: "utf8mb4_vietnamese_ci", 327 } 328 329 // CollationNames maps MySQL default collation name to its ID 330 var CollationNames = map[string]uint8{ 331 "big5_chinese_ci": 1, 332 "latin2_czech_cs": 2, 333 "dec8_swedish_ci": 3, 334 "cp850_general_ci": 4, 335 "latin1_german1_ci": 5, 336 "hp8_english_ci": 6, 337 "koi8r_general_ci": 7, 338 "latin1_swedish_ci": 8, 339 "latin2_general_ci": 9, 340 "swe7_swedish_ci": 10, 341 "ascii_general_ci": 11, 342 "ujis_japanese_ci": 12, 343 "sjis_japanese_ci": 13, 344 "cp1251_bulgarian_ci": 14, 345 "latin1_danish_ci": 15, 346 "hebrew_general_ci": 16, 347 "tis620_thai_ci": 18, 348 "euckr_korean_ci": 19, 349 "latin7_estonian_cs": 20, 350 "latin2_hungarian_ci": 21, 351 "koi8u_general_ci": 22, 352 "cp1251_ukrainian_ci": 23, 353 "gb2312_chinese_ci": 24, 354 "greek_general_ci": 25, 355 "cp1250_general_ci": 26, 356 "latin2_croatian_ci": 27, 357 "gbk_chinese_ci": 28, 358 "cp1257_lithuanian_ci": 29, 359 "latin5_turkish_ci": 30, 360 "latin1_german2_ci": 31, 361 "armscii8_general_ci": 32, 362 "utf8_general_ci": 33, 363 "cp1250_czech_cs": 34, 364 "ucs2_general_ci": 35, 365 "cp866_general_ci": 36, 366 "keybcs2_general_ci": 37, 367 "macce_general_ci": 38, 368 "macroman_general_ci": 39, 369 "cp852_general_ci": 40, 370 "latin7_general_ci": 41, 371 "latin7_general_cs": 42, 372 "macce_bin": 43, 373 "cp1250_croatian_ci": 44, 374 "utf8mb4_general_ci": 45, 375 "utf8mb4_bin": 46, 376 "latin1_bin": 47, 377 "latin1_general_ci": 48, 378 "latin1_general_cs": 49, 379 "cp1251_bin": 50, 380 "cp1251_general_ci": 51, 381 "cp1251_general_cs": 52, 382 "macroman_bin": 53, 383 "utf16_general_ci": 54, 384 "utf16_bin": 55, 385 "utf16le_general_ci": 56, 386 "cp1256_general_ci": 57, 387 "cp1257_bin": 58, 388 "cp1257_general_ci": 59, 389 "utf32_general_ci": 60, 390 "utf32_bin": 61, 391 "utf16le_bin": 62, 392 "binary": 63, 393 "armscii8_bin": 64, 394 "ascii_bin": 65, 395 "cp1250_bin": 66, 396 "cp1256_bin": 67, 397 "cp866_bin": 68, 398 "dec8_bin": 69, 399 "greek_bin": 70, 400 "hebrew_bin": 71, 401 "hp8_bin": 72, 402 "keybcs2_bin": 73, 403 "koi8r_bin": 74, 404 "koi8u_bin": 75, 405 "latin2_bin": 77, 406 "latin5_bin": 78, 407 "latin7_bin": 79, 408 "cp850_bin": 80, 409 "cp852_bin": 81, 410 "swe7_bin": 82, 411 "utf8_bin": 83, 412 "big5_bin": 84, 413 "euckr_bin": 85, 414 "gb2312_bin": 86, 415 "gbk_bin": 87, 416 "sjis_bin": 88, 417 "tis620_bin": 89, 418 "ucs2_bin": 90, 419 "ujis_bin": 91, 420 "geostd8_general_ci": 92, 421 "geostd8_bin": 93, 422 "latin1_spanish_ci": 94, 423 "cp932_japanese_ci": 95, 424 "cp932_bin": 96, 425 "eucjpms_japanese_ci": 97, 426 "eucjpms_bin": 98, 427 "cp1250_polish_ci": 99, 428 "utf16_unicode_ci": 101, 429 "utf16_icelandic_ci": 102, 430 "utf16_latvian_ci": 103, 431 "utf16_romanian_ci": 104, 432 "utf16_slovenian_ci": 105, 433 "utf16_polish_ci": 106, 434 "utf16_estonian_ci": 107, 435 "utf16_spanish_ci": 108, 436 "utf16_swedish_ci": 109, 437 "utf16_turkish_ci": 110, 438 "utf16_czech_ci": 111, 439 "utf16_danish_ci": 112, 440 "utf16_lithuanian_ci": 113, 441 "utf16_slovak_ci": 114, 442 "utf16_spanish2_ci": 115, 443 "utf16_roman_ci": 116, 444 "utf16_persian_ci": 117, 445 "utf16_esperanto_ci": 118, 446 "utf16_hungarian_ci": 119, 447 "utf16_sinhala_ci": 120, 448 "utf16_german2_ci": 121, 449 "utf16_croatian_ci": 122, 450 "utf16_unicode_520_ci": 123, 451 "utf16_vietnamese_ci": 124, 452 "ucs2_unicode_ci": 128, 453 "ucs2_icelandic_ci": 129, 454 "ucs2_latvian_ci": 130, 455 "ucs2_romanian_ci": 131, 456 "ucs2_slovenian_ci": 132, 457 "ucs2_polish_ci": 133, 458 "ucs2_estonian_ci": 134, 459 "ucs2_spanish_ci": 135, 460 "ucs2_swedish_ci": 136, 461 "ucs2_turkish_ci": 137, 462 "ucs2_czech_ci": 138, 463 "ucs2_danish_ci": 139, 464 "ucs2_lithuanian_ci": 140, 465 "ucs2_slovak_ci": 141, 466 "ucs2_spanish2_ci": 142, 467 "ucs2_roman_ci": 143, 468 "ucs2_persian_ci": 144, 469 "ucs2_esperanto_ci": 145, 470 "ucs2_hungarian_ci": 146, 471 "ucs2_sinhala_ci": 147, 472 "ucs2_german2_ci": 148, 473 "ucs2_croatian_ci": 149, 474 "ucs2_unicode_520_ci": 150, 475 "ucs2_vietnamese_ci": 151, 476 "ucs2_general_mysql500_ci": 159, 477 "utf32_unicode_ci": 160, 478 "utf32_icelandic_ci": 161, 479 "utf32_latvian_ci": 162, 480 "utf32_romanian_ci": 163, 481 "utf32_slovenian_ci": 164, 482 "utf32_polish_ci": 165, 483 "utf32_estonian_ci": 166, 484 "utf32_spanish_ci": 167, 485 "utf32_swedish_ci": 168, 486 "utf32_turkish_ci": 169, 487 "utf32_czech_ci": 170, 488 "utf32_danish_ci": 171, 489 "utf32_lithuanian_ci": 172, 490 "utf32_slovak_ci": 173, 491 "utf32_spanish2_ci": 174, 492 "utf32_roman_ci": 175, 493 "utf32_persian_ci": 176, 494 "utf32_esperanto_ci": 177, 495 "utf32_hungarian_ci": 178, 496 "utf32_sinhala_ci": 179, 497 "utf32_german2_ci": 180, 498 "utf32_croatian_ci": 181, 499 "utf32_unicode_520_ci": 182, 500 "utf32_vietnamese_ci": 183, 501 "utf8_unicode_ci": 192, 502 "utf8_icelandic_ci": 193, 503 "utf8_latvian_ci": 194, 504 "utf8_romanian_ci": 195, 505 "utf8_slovenian_ci": 196, 506 "utf8_polish_ci": 197, 507 "utf8_estonian_ci": 198, 508 "utf8_spanish_ci": 199, 509 "utf8_swedish_ci": 200, 510 "utf8_turkish_ci": 201, 511 "utf8_czech_ci": 202, 512 "utf8_danish_ci": 203, 513 "utf8_lithuanian_ci": 204, 514 "utf8_slovak_ci": 205, 515 "utf8_spanish2_ci": 206, 516 "utf8_roman_ci": 207, 517 "utf8_persian_ci": 208, 518 "utf8_esperanto_ci": 209, 519 "utf8_hungarian_ci": 210, 520 "utf8_sinhala_ci": 211, 521 "utf8_german2_ci": 212, 522 "utf8_croatian_ci": 213, 523 "utf8_unicode_520_ci": 214, 524 "utf8_vietnamese_ci": 215, 525 "utf8_general_mysql500_ci": 223, 526 "utf8mb4_unicode_ci": 224, 527 "utf8mb4_icelandic_ci": 225, 528 "utf8mb4_latvian_ci": 226, 529 "utf8mb4_romanian_ci": 227, 530 "utf8mb4_slovenian_ci": 228, 531 "utf8mb4_polish_ci": 229, 532 "utf8mb4_estonian_ci": 230, 533 "utf8mb4_spanish_ci": 231, 534 "utf8mb4_swedish_ci": 232, 535 "utf8mb4_turkish_ci": 233, 536 "utf8mb4_czech_ci": 234, 537 "utf8mb4_danish_ci": 235, 538 "utf8mb4_lithuanian_ci": 236, 539 "utf8mb4_slovak_ci": 237, 540 "utf8mb4_spanish2_ci": 238, 541 "utf8mb4_roman_ci": 239, 542 "utf8mb4_persian_ci": 240, 543 "utf8mb4_esperanto_ci": 241, 544 "utf8mb4_hungarian_ci": 242, 545 "utf8mb4_sinhala_ci": 243, 546 "utf8mb4_german2_ci": 244, 547 "utf8mb4_croatian_ci": 245, 548 "utf8mb4_unicode_520_ci": 246, 549 "utf8mb4_vietnamese_ci": 247, 550 } 551 552 // MySQL collation information. 553 const ( 554 UTF8Charset = "utf8" 555 UTF8MB4Charset = "utf8mb4" 556 DefaultCharset = UTF8Charset 557 DefaultCollationID = 83 558 BinaryCollationID = 63 559 UTF8DefaultCollation = "utf8_bin" 560 DefaultCollationName = UTF8DefaultCollation 561 562 // MaxBytesOfCharacter, is the max bytes length of a character, 563 // refer to RFC3629, in UTF-8, characters from the U+0000..U+10FFFF range 564 // (the UTF-16 accessible range) are encoded using sequences of 1 to 4 octets. 565 MaxBytesOfCharacter = 4 566 ) 567 568 // IsUTF8Charset checks if charset is utf8 or utf8mb4 569 func IsUTF8Charset(charset string) bool { 570 return charset == UTF8Charset || charset == UTF8MB4Charset 571 } 572 573 // RangeGraph defines valid unicode characters to use in column names. It strictly follows MySQL's definition. 574 // See #3994. 575 var RangeGraph = []*unicode.RangeTable{ 576 // _MY_PNT 577 unicode.No, 578 unicode.Mn, 579 unicode.Me, 580 unicode.Pc, 581 unicode.Pd, 582 unicode.Pd, 583 unicode.Ps, 584 unicode.Pe, 585 unicode.Pi, 586 unicode.Pf, 587 unicode.Po, 588 unicode.Sm, 589 unicode.Sc, 590 unicode.Sk, 591 unicode.So, 592 // _MY_U 593 unicode.Lu, 594 unicode.Lt, 595 unicode.Nl, 596 // _MY_L 597 unicode.Ll, 598 unicode.Lm, 599 unicode.Lo, 600 unicode.Nl, 601 unicode.Mn, 602 unicode.Mc, 603 unicode.Me, 604 // _MY_NMR 605 unicode.Nd, 606 unicode.Nl, 607 unicode.No, 608 }