github.com/pingcap/tidb/parser@v0.0.0-20231013125129-93a834a6bf8d/mysql/charset.go (about)

     1  // Copyright 2015 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package mysql
    15  
    16  import "unicode"
    17  
    18  // CharsetNameToID maps charset name to its default collation ID.
    19  func CharsetNameToID(charset string) uint8 {
    20  	// Use quick path for TiDB to avoid access CharsetIDs map
    21  	// "SHOW CHARACTER SET;" to see all the supported character sets.
    22  	if charset == "utf8mb4" {
    23  		return UTF8MB4DefaultCollationID
    24  	} else if charset == "binary" {
    25  		return BinaryDefaultCollationID
    26  	} else if charset == "utf8" {
    27  		return UTF8DefaultCollationID
    28  	} else if charset == "ascii" {
    29  		return ASCIIDefaultCollationID
    30  	} else if charset == "latin1" {
    31  		return Latin1DefaultCollationID
    32  	} else {
    33  		return CharsetIDs[charset]
    34  	}
    35  }
    36  
    37  // CharsetIDs maps charset name to its default collation ID.
    38  var CharsetIDs = map[string]uint8{
    39  	"big5":     1,
    40  	"dec8":     3,
    41  	"cp850":    4,
    42  	"hp8":      6,
    43  	"koi8r":    7,
    44  	"latin1":   Latin1DefaultCollationID,
    45  	"latin2":   9,
    46  	"swe7":     10,
    47  	"ascii":    ASCIIDefaultCollationID,
    48  	"ujis":     12,
    49  	"sjis":     13,
    50  	"hebrew":   16,
    51  	"tis620":   18,
    52  	"euckr":    19,
    53  	"koi8u":    22,
    54  	"gb2312":   24,
    55  	"greek":    25,
    56  	"cp1250":   26,
    57  	"gbk":      28,
    58  	"latin5":   30,
    59  	"armscii8": 32,
    60  	"utf8":     UTF8DefaultCollationID,
    61  	"ucs2":     35,
    62  	"cp866":    36,
    63  	"keybcs2":  37,
    64  	"macce":    38,
    65  	"macroman": 39,
    66  	"cp852":    40,
    67  	"latin7":   41,
    68  	"utf8mb4":  UTF8MB4DefaultCollationID,
    69  	"cp1251":   51,
    70  	"utf16":    54,
    71  	"utf16le":  56,
    72  	"cp1256":   57,
    73  	"cp1257":   59,
    74  	"utf32":    60,
    75  	"binary":   BinaryDefaultCollationID,
    76  	"geostd8":  92,
    77  	"cp932":    95,
    78  	"eucjpms":  97,
    79  }
    80  
    81  // Collations maps MySQL collation ID to its name.
    82  var Collations = map[uint16]string{
    83  	1:   "big5_chinese_ci",
    84  	2:   "latin2_czech_cs",
    85  	3:   "dec8_swedish_ci",
    86  	4:   "cp850_general_ci",
    87  	5:   "latin1_german1_ci",
    88  	6:   "hp8_english_ci",
    89  	7:   "koi8r_general_ci",
    90  	8:   "latin1_swedish_ci",
    91  	9:   "latin2_general_ci",
    92  	10:  "swe7_swedish_ci",
    93  	11:  "ascii_general_ci",
    94  	12:  "ujis_japanese_ci",
    95  	13:  "sjis_japanese_ci",
    96  	14:  "cp1251_bulgarian_ci",
    97  	15:  "latin1_danish_ci",
    98  	16:  "hebrew_general_ci",
    99  	18:  "tis620_thai_ci",
   100  	19:  "euckr_korean_ci",
   101  	20:  "latin7_estonian_cs",
   102  	21:  "latin2_hungarian_ci",
   103  	22:  "koi8u_general_ci",
   104  	23:  "cp1251_ukrainian_ci",
   105  	24:  "gb2312_chinese_ci",
   106  	25:  "greek_general_ci",
   107  	26:  "cp1250_general_ci",
   108  	27:  "latin2_croatian_ci",
   109  	28:  "gbk_chinese_ci",
   110  	29:  "cp1257_lithuanian_ci",
   111  	30:  "latin5_turkish_ci",
   112  	31:  "latin1_german2_ci",
   113  	32:  "armscii8_general_ci",
   114  	33:  "utf8_general_ci",
   115  	34:  "cp1250_czech_cs",
   116  	35:  "ucs2_general_ci",
   117  	36:  "cp866_general_ci",
   118  	37:  "keybcs2_general_ci",
   119  	38:  "macce_general_ci",
   120  	39:  "macroman_general_ci",
   121  	40:  "cp852_general_ci",
   122  	41:  "latin7_general_ci",
   123  	42:  "latin7_general_cs",
   124  	43:  "macce_bin",
   125  	44:  "cp1250_croatian_ci",
   126  	45:  "utf8mb4_general_ci",
   127  	46:  "utf8mb4_bin",
   128  	47:  "latin1_bin",
   129  	48:  "latin1_general_ci",
   130  	49:  "latin1_general_cs",
   131  	50:  "cp1251_bin",
   132  	51:  "cp1251_general_ci",
   133  	52:  "cp1251_general_cs",
   134  	53:  "macroman_bin",
   135  	54:  "utf16_general_ci",
   136  	55:  "utf16_bin",
   137  	56:  "utf16le_general_ci",
   138  	57:  "cp1256_general_ci",
   139  	58:  "cp1257_bin",
   140  	59:  "cp1257_general_ci",
   141  	60:  "utf32_general_ci",
   142  	61:  "utf32_bin",
   143  	62:  "utf16le_bin",
   144  	63:  "binary",
   145  	64:  "armscii8_bin",
   146  	65:  "ascii_bin",
   147  	66:  "cp1250_bin",
   148  	67:  "cp1256_bin",
   149  	68:  "cp866_bin",
   150  	69:  "dec8_bin",
   151  	70:  "greek_bin",
   152  	71:  "hebrew_bin",
   153  	72:  "hp8_bin",
   154  	73:  "keybcs2_bin",
   155  	74:  "koi8r_bin",
   156  	75:  "koi8u_bin",
   157  	77:  "latin2_bin",
   158  	78:  "latin5_bin",
   159  	79:  "latin7_bin",
   160  	80:  "cp850_bin",
   161  	81:  "cp852_bin",
   162  	82:  "swe7_bin",
   163  	83:  "utf8_bin",
   164  	84:  "big5_bin",
   165  	85:  "euckr_bin",
   166  	86:  "gb2312_bin",
   167  	87:  "gbk_bin",
   168  	88:  "sjis_bin",
   169  	89:  "tis620_bin",
   170  	90:  "ucs2_bin",
   171  	91:  "ujis_bin",
   172  	92:  "geostd8_general_ci",
   173  	93:  "geostd8_bin",
   174  	94:  "latin1_spanish_ci",
   175  	95:  "cp932_japanese_ci",
   176  	96:  "cp932_bin",
   177  	97:  "eucjpms_japanese_ci",
   178  	98:  "eucjpms_bin",
   179  	99:  "cp1250_polish_ci",
   180  	101: "utf16_unicode_ci",
   181  	102: "utf16_icelandic_ci",
   182  	103: "utf16_latvian_ci",
   183  	104: "utf16_romanian_ci",
   184  	105: "utf16_slovenian_ci",
   185  	106: "utf16_polish_ci",
   186  	107: "utf16_estonian_ci",
   187  	108: "utf16_spanish_ci",
   188  	109: "utf16_swedish_ci",
   189  	110: "utf16_turkish_ci",
   190  	111: "utf16_czech_ci",
   191  	112: "utf16_danish_ci",
   192  	113: "utf16_lithuanian_ci",
   193  	114: "utf16_slovak_ci",
   194  	115: "utf16_spanish2_ci",
   195  	116: "utf16_roman_ci",
   196  	117: "utf16_persian_ci",
   197  	118: "utf16_esperanto_ci",
   198  	119: "utf16_hungarian_ci",
   199  	120: "utf16_sinhala_ci",
   200  	121: "utf16_german2_ci",
   201  	122: "utf16_croatian_ci",
   202  	123: "utf16_unicode_520_ci",
   203  	124: "utf16_vietnamese_ci",
   204  	128: "ucs2_unicode_ci",
   205  	129: "ucs2_icelandic_ci",
   206  	130: "ucs2_latvian_ci",
   207  	131: "ucs2_romanian_ci",
   208  	132: "ucs2_slovenian_ci",
   209  	133: "ucs2_polish_ci",
   210  	134: "ucs2_estonian_ci",
   211  	135: "ucs2_spanish_ci",
   212  	136: "ucs2_swedish_ci",
   213  	137: "ucs2_turkish_ci",
   214  	138: "ucs2_czech_ci",
   215  	139: "ucs2_danish_ci",
   216  	140: "ucs2_lithuanian_ci",
   217  	141: "ucs2_slovak_ci",
   218  	142: "ucs2_spanish2_ci",
   219  	143: "ucs2_roman_ci",
   220  	144: "ucs2_persian_ci",
   221  	145: "ucs2_esperanto_ci",
   222  	146: "ucs2_hungarian_ci",
   223  	147: "ucs2_sinhala_ci",
   224  	148: "ucs2_german2_ci",
   225  	149: "ucs2_croatian_ci",
   226  	150: "ucs2_unicode_520_ci",
   227  	151: "ucs2_vietnamese_ci",
   228  	159: "ucs2_general_mysql500_ci",
   229  	160: "utf32_unicode_ci",
   230  	161: "utf32_icelandic_ci",
   231  	162: "utf32_latvian_ci",
   232  	163: "utf32_romanian_ci",
   233  	164: "utf32_slovenian_ci",
   234  	165: "utf32_polish_ci",
   235  	166: "utf32_estonian_ci",
   236  	167: "utf32_spanish_ci",
   237  	168: "utf32_swedish_ci",
   238  	169: "utf32_turkish_ci",
   239  	170: "utf32_czech_ci",
   240  	171: "utf32_danish_ci",
   241  	172: "utf32_lithuanian_ci",
   242  	173: "utf32_slovak_ci",
   243  	174: "utf32_spanish2_ci",
   244  	175: "utf32_roman_ci",
   245  	176: "utf32_persian_ci",
   246  	177: "utf32_esperanto_ci",
   247  	178: "utf32_hungarian_ci",
   248  	179: "utf32_sinhala_ci",
   249  	180: "utf32_german2_ci",
   250  	181: "utf32_croatian_ci",
   251  	182: "utf32_unicode_520_ci",
   252  	183: "utf32_vietnamese_ci",
   253  	192: "utf8_unicode_ci",
   254  	193: "utf8_icelandic_ci",
   255  	194: "utf8_latvian_ci",
   256  	195: "utf8_romanian_ci",
   257  	196: "utf8_slovenian_ci",
   258  	197: "utf8_polish_ci",
   259  	198: "utf8_estonian_ci",
   260  	199: "utf8_spanish_ci",
   261  	200: "utf8_swedish_ci",
   262  	201: "utf8_turkish_ci",
   263  	202: "utf8_czech_ci",
   264  	203: "utf8_danish_ci",
   265  	204: "utf8_lithuanian_ci",
   266  	205: "utf8_slovak_ci",
   267  	206: "utf8_spanish2_ci",
   268  	207: "utf8_roman_ci",
   269  	208: "utf8_persian_ci",
   270  	209: "utf8_esperanto_ci",
   271  	210: "utf8_hungarian_ci",
   272  	211: "utf8_sinhala_ci",
   273  	212: "utf8_german2_ci",
   274  	213: "utf8_croatian_ci",
   275  	214: "utf8_unicode_520_ci",
   276  	215: "utf8_vietnamese_ci",
   277  	223: "utf8_general_mysql500_ci",
   278  	224: "utf8mb4_unicode_ci",
   279  	225: "utf8mb4_icelandic_ci",
   280  	226: "utf8mb4_latvian_ci",
   281  	227: "utf8mb4_romanian_ci",
   282  	228: "utf8mb4_slovenian_ci",
   283  	229: "utf8mb4_polish_ci",
   284  	230: "utf8mb4_estonian_ci",
   285  	231: "utf8mb4_spanish_ci",
   286  	232: "utf8mb4_swedish_ci",
   287  	233: "utf8mb4_turkish_ci",
   288  	234: "utf8mb4_czech_ci",
   289  	235: "utf8mb4_danish_ci",
   290  	236: "utf8mb4_lithuanian_ci",
   291  	237: "utf8mb4_slovak_ci",
   292  	238: "utf8mb4_spanish2_ci",
   293  	239: "utf8mb4_roman_ci",
   294  	240: "utf8mb4_persian_ci",
   295  	241: "utf8mb4_esperanto_ci",
   296  	242: "utf8mb4_hungarian_ci",
   297  	243: "utf8mb4_sinhala_ci",
   298  	244: "utf8mb4_german2_ci",
   299  	245: "utf8mb4_croatian_ci",
   300  	246: "utf8mb4_unicode_520_ci",
   301  	247: "utf8mb4_vietnamese_ci",
   302  	255: "utf8mb4_0900_ai_ci",
   303  	309: "utf8mb4_0900_bin",
   304  }
   305  
   306  // CollationNames maps MySQL collation name to its ID
   307  var CollationNames = map[string]uint16{
   308  	"big5_chinese_ci":          1,
   309  	"latin2_czech_cs":          2,
   310  	"dec8_swedish_ci":          3,
   311  	"cp850_general_ci":         4,
   312  	"latin1_german1_ci":        5,
   313  	"hp8_english_ci":           6,
   314  	"koi8r_general_ci":         7,
   315  	"latin1_swedish_ci":        8,
   316  	"latin2_general_ci":        9,
   317  	"swe7_swedish_ci":          10,
   318  	"ascii_general_ci":         11,
   319  	"ujis_japanese_ci":         12,
   320  	"sjis_japanese_ci":         13,
   321  	"cp1251_bulgarian_ci":      14,
   322  	"latin1_danish_ci":         15,
   323  	"hebrew_general_ci":        16,
   324  	"tis620_thai_ci":           18,
   325  	"euckr_korean_ci":          19,
   326  	"latin7_estonian_cs":       20,
   327  	"latin2_hungarian_ci":      21,
   328  	"koi8u_general_ci":         22,
   329  	"cp1251_ukrainian_ci":      23,
   330  	"gb2312_chinese_ci":        24,
   331  	"greek_general_ci":         25,
   332  	"cp1250_general_ci":        26,
   333  	"latin2_croatian_ci":       27,
   334  	"gbk_chinese_ci":           28,
   335  	"cp1257_lithuanian_ci":     29,
   336  	"latin5_turkish_ci":        30,
   337  	"latin1_german2_ci":        31,
   338  	"armscii8_general_ci":      32,
   339  	"utf8_general_ci":          33,
   340  	"cp1250_czech_cs":          34,
   341  	"ucs2_general_ci":          35,
   342  	"cp866_general_ci":         36,
   343  	"keybcs2_general_ci":       37,
   344  	"macce_general_ci":         38,
   345  	"macroman_general_ci":      39,
   346  	"cp852_general_ci":         40,
   347  	"latin7_general_ci":        41,
   348  	"latin7_general_cs":        42,
   349  	"macce_bin":                43,
   350  	"cp1250_croatian_ci":       44,
   351  	"utf8mb4_general_ci":       45,
   352  	"utf8mb4_bin":              46,
   353  	"latin1_bin":               47,
   354  	"latin1_general_ci":        48,
   355  	"latin1_general_cs":        49,
   356  	"cp1251_bin":               50,
   357  	"cp1251_general_ci":        51,
   358  	"cp1251_general_cs":        52,
   359  	"macroman_bin":             53,
   360  	"utf16_general_ci":         54,
   361  	"utf16_bin":                55,
   362  	"utf16le_general_ci":       56,
   363  	"cp1256_general_ci":        57,
   364  	"cp1257_bin":               58,
   365  	"cp1257_general_ci":        59,
   366  	"utf32_general_ci":         60,
   367  	"utf32_bin":                61,
   368  	"utf16le_bin":              62,
   369  	"binary":                   63,
   370  	"armscii8_bin":             64,
   371  	"ascii_bin":                65,
   372  	"cp1250_bin":               66,
   373  	"cp1256_bin":               67,
   374  	"cp866_bin":                68,
   375  	"dec8_bin":                 69,
   376  	"greek_bin":                70,
   377  	"hebrew_bin":               71,
   378  	"hp8_bin":                  72,
   379  	"keybcs2_bin":              73,
   380  	"koi8r_bin":                74,
   381  	"koi8u_bin":                75,
   382  	"latin2_bin":               77,
   383  	"latin5_bin":               78,
   384  	"latin7_bin":               79,
   385  	"cp850_bin":                80,
   386  	"cp852_bin":                81,
   387  	"swe7_bin":                 82,
   388  	"utf8_bin":                 83,
   389  	"big5_bin":                 84,
   390  	"euckr_bin":                85,
   391  	"gb2312_bin":               86,
   392  	"gbk_bin":                  87,
   393  	"sjis_bin":                 88,
   394  	"tis620_bin":               89,
   395  	"ucs2_bin":                 90,
   396  	"ujis_bin":                 91,
   397  	"geostd8_general_ci":       92,
   398  	"geostd8_bin":              93,
   399  	"latin1_spanish_ci":        94,
   400  	"cp932_japanese_ci":        95,
   401  	"cp932_bin":                96,
   402  	"eucjpms_japanese_ci":      97,
   403  	"eucjpms_bin":              98,
   404  	"cp1250_polish_ci":         99,
   405  	"utf16_unicode_ci":         101,
   406  	"utf16_icelandic_ci":       102,
   407  	"utf16_latvian_ci":         103,
   408  	"utf16_romanian_ci":        104,
   409  	"utf16_slovenian_ci":       105,
   410  	"utf16_polish_ci":          106,
   411  	"utf16_estonian_ci":        107,
   412  	"utf16_spanish_ci":         108,
   413  	"utf16_swedish_ci":         109,
   414  	"utf16_turkish_ci":         110,
   415  	"utf16_czech_ci":           111,
   416  	"utf16_danish_ci":          112,
   417  	"utf16_lithuanian_ci":      113,
   418  	"utf16_slovak_ci":          114,
   419  	"utf16_spanish2_ci":        115,
   420  	"utf16_roman_ci":           116,
   421  	"utf16_persian_ci":         117,
   422  	"utf16_esperanto_ci":       118,
   423  	"utf16_hungarian_ci":       119,
   424  	"utf16_sinhala_ci":         120,
   425  	"utf16_german2_ci":         121,
   426  	"utf16_croatian_ci":        122,
   427  	"utf16_unicode_520_ci":     123,
   428  	"utf16_vietnamese_ci":      124,
   429  	"ucs2_unicode_ci":          128,
   430  	"ucs2_icelandic_ci":        129,
   431  	"ucs2_latvian_ci":          130,
   432  	"ucs2_romanian_ci":         131,
   433  	"ucs2_slovenian_ci":        132,
   434  	"ucs2_polish_ci":           133,
   435  	"ucs2_estonian_ci":         134,
   436  	"ucs2_spanish_ci":          135,
   437  	"ucs2_swedish_ci":          136,
   438  	"ucs2_turkish_ci":          137,
   439  	"ucs2_czech_ci":            138,
   440  	"ucs2_danish_ci":           139,
   441  	"ucs2_lithuanian_ci":       140,
   442  	"ucs2_slovak_ci":           141,
   443  	"ucs2_spanish2_ci":         142,
   444  	"ucs2_roman_ci":            143,
   445  	"ucs2_persian_ci":          144,
   446  	"ucs2_esperanto_ci":        145,
   447  	"ucs2_hungarian_ci":        146,
   448  	"ucs2_sinhala_ci":          147,
   449  	"ucs2_german2_ci":          148,
   450  	"ucs2_croatian_ci":         149,
   451  	"ucs2_unicode_520_ci":      150,
   452  	"ucs2_vietnamese_ci":       151,
   453  	"ucs2_general_mysql500_ci": 159,
   454  	"utf32_unicode_ci":         160,
   455  	"utf32_icelandic_ci":       161,
   456  	"utf32_latvian_ci":         162,
   457  	"utf32_romanian_ci":        163,
   458  	"utf32_slovenian_ci":       164,
   459  	"utf32_polish_ci":          165,
   460  	"utf32_estonian_ci":        166,
   461  	"utf32_spanish_ci":         167,
   462  	"utf32_swedish_ci":         168,
   463  	"utf32_turkish_ci":         169,
   464  	"utf32_czech_ci":           170,
   465  	"utf32_danish_ci":          171,
   466  	"utf32_lithuanian_ci":      172,
   467  	"utf32_slovak_ci":          173,
   468  	"utf32_spanish2_ci":        174,
   469  	"utf32_roman_ci":           175,
   470  	"utf32_persian_ci":         176,
   471  	"utf32_esperanto_ci":       177,
   472  	"utf32_hungarian_ci":       178,
   473  	"utf32_sinhala_ci":         179,
   474  	"utf32_german2_ci":         180,
   475  	"utf32_croatian_ci":        181,
   476  	"utf32_unicode_520_ci":     182,
   477  	"utf32_vietnamese_ci":      183,
   478  	"utf8_unicode_ci":          192,
   479  	"utf8_icelandic_ci":        193,
   480  	"utf8_latvian_ci":          194,
   481  	"utf8_romanian_ci":         195,
   482  	"utf8_slovenian_ci":        196,
   483  	"utf8_polish_ci":           197,
   484  	"utf8_estonian_ci":         198,
   485  	"utf8_spanish_ci":          199,
   486  	"utf8_swedish_ci":          200,
   487  	"utf8_turkish_ci":          201,
   488  	"utf8_czech_ci":            202,
   489  	"utf8_danish_ci":           203,
   490  	"utf8_lithuanian_ci":       204,
   491  	"utf8_slovak_ci":           205,
   492  	"utf8_spanish2_ci":         206,
   493  	"utf8_roman_ci":            207,
   494  	"utf8_persian_ci":          208,
   495  	"utf8_esperanto_ci":        209,
   496  	"utf8_hungarian_ci":        210,
   497  	"utf8_sinhala_ci":          211,
   498  	"utf8_german2_ci":          212,
   499  	"utf8_croatian_ci":         213,
   500  	"utf8_unicode_520_ci":      214,
   501  	"utf8_vietnamese_ci":       215,
   502  	"utf8_general_mysql500_ci": 223,
   503  	"utf8mb4_unicode_ci":       224,
   504  	"utf8mb4_icelandic_ci":     225,
   505  	"utf8mb4_latvian_ci":       226,
   506  	"utf8mb4_romanian_ci":      227,
   507  	"utf8mb4_slovenian_ci":     228,
   508  	"utf8mb4_polish_ci":        229,
   509  	"utf8mb4_estonian_ci":      230,
   510  	"utf8mb4_spanish_ci":       231,
   511  	"utf8mb4_swedish_ci":       232,
   512  	"utf8mb4_turkish_ci":       233,
   513  	"utf8mb4_czech_ci":         234,
   514  	"utf8mb4_danish_ci":        235,
   515  	"utf8mb4_lithuanian_ci":    236,
   516  	"utf8mb4_slovak_ci":        237,
   517  	"utf8mb4_spanish2_ci":      238,
   518  	"utf8mb4_roman_ci":         239,
   519  	"utf8mb4_persian_ci":       240,
   520  	"utf8mb4_esperanto_ci":     241,
   521  	"utf8mb4_hungarian_ci":     242,
   522  	"utf8mb4_sinhala_ci":       243,
   523  	"utf8mb4_german2_ci":       244,
   524  	"utf8mb4_croatian_ci":      245,
   525  	"utf8mb4_unicode_520_ci":   246,
   526  	"utf8mb4_vietnamese_ci":    247,
   527  	"utf8mb4_0900_ai_ci":       255,
   528  	"utf8mb4_0900_bin":         309,
   529  }
   530  
   531  // MySQL collation information.
   532  const (
   533  	UTF8Charset    = "utf8"
   534  	UTF8MB4Charset = "utf8mb4"
   535  	Latin1Charset  = "latin1"
   536  	DefaultCharset = UTF8MB4Charset
   537  	// DefaultCollationID is utf8mb4_bin(46)
   538  	DefaultCollationID        = 46
   539  	Latin1DefaultCollationID  = 47
   540  	ASCIIDefaultCollationID   = 65
   541  	UTF8DefaultCollationID    = 83
   542  	UTF8MB4DefaultCollationID = 46
   543  	BinaryDefaultCollationID  = 63
   544  	UTF8DefaultCollation      = "utf8_bin"
   545  	UTF8MB4DefaultCollation   = "utf8mb4_bin"
   546  	DefaultCollationName      = UTF8MB4DefaultCollation
   547  
   548  	// MaxBytesOfCharacter, is the max bytes length of a character,
   549  	// refer to RFC3629, in UTF-8, characters from the U+0000..U+10FFFF range
   550  	// (the UTF-16 accessible range) are encoded using sequences of 1 to 4 octets.
   551  	MaxBytesOfCharacter = 4
   552  )
   553  
   554  // IsUTF8Charset checks if charset is utf8, utf8mb4.
   555  func IsUTF8Charset(charset string) bool {
   556  	return charset == UTF8Charset || charset == UTF8MB4Charset
   557  }
   558  
   559  // RangeGraph defines valid unicode characters to use in column names. It strictly follows MySQL's definition.
   560  // See #3994.
   561  var RangeGraph = []*unicode.RangeTable{
   562  	// _MY_PNT
   563  	unicode.No,
   564  	unicode.Mn,
   565  	unicode.Me,
   566  	unicode.Pc,
   567  	unicode.Pd,
   568  	unicode.Pd,
   569  	unicode.Ps,
   570  	unicode.Pe,
   571  	unicode.Pi,
   572  	unicode.Pf,
   573  	unicode.Po,
   574  	unicode.Sm,
   575  	unicode.Sc,
   576  	unicode.Sk,
   577  	unicode.So,
   578  	// _MY_U
   579  	unicode.Lu,
   580  	unicode.Lt,
   581  	unicode.Nl,
   582  	// _MY_L
   583  	unicode.Ll,
   584  	unicode.Lm,
   585  	unicode.Lo,
   586  	unicode.Nl,
   587  	unicode.Mn,
   588  	unicode.Mc,
   589  	unicode.Me,
   590  	// _MY_NMR
   591  	unicode.Nd,
   592  	unicode.Nl,
   593  	unicode.No,
   594  }