github.com/bingoohuang/gg@v0.0.0-20240325092523-45da7dee9335/pkg/sqlparse/tidbparser/dependency/util/charset/encoding_table.go (about)

     1  // Copyright 2015 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package charset
    15  
    16  import (
    17  	"strings"
    18  
    19  	"golang.org/x/text/encoding"
    20  	"golang.org/x/text/encoding/charmap"
    21  	"golang.org/x/text/encoding/japanese"
    22  	"golang.org/x/text/encoding/korean"
    23  	"golang.org/x/text/encoding/simplifiedchinese"
    24  	"golang.org/x/text/encoding/traditionalchinese"
    25  	"golang.org/x/text/encoding/unicode"
    26  )
    27  
    28  // Lookup returns the encoding with the specified label, and its canonical
    29  // name. It returns nil and the empty string if label is not one of the
    30  // standard encodings for HTML. Matching is case-insensitive and ignores
    31  // leading and trailing whitespace.
    32  func Lookup(label string) (e encoding.Encoding, name string) {
    33  	label = strings.ToLower(strings.Trim(label, "\t\n\r\f "))
    34  	enc := encodings[label]
    35  	return enc.e, enc.name
    36  }
    37  
    38  var encodings = map[string]struct {
    39  	e    encoding.Encoding
    40  	name string
    41  }{
    42  	"unicode-1-1-utf-8":   {encoding.Nop, "utf-8"},
    43  	"utf-8":               {encoding.Nop, "utf-8"},
    44  	"utf8":                {encoding.Nop, "utf-8"},
    45  	"binary":              {encoding.Nop, "binary"},
    46  	"866":                 {charmap.CodePage866, "ibm866"},
    47  	"cp866":               {charmap.CodePage866, "ibm866"},
    48  	"csibm866":            {charmap.CodePage866, "ibm866"},
    49  	"ibm866":              {charmap.CodePage866, "ibm866"},
    50  	"csisolatin2":         {charmap.ISO8859_2, "iso-8859-2"},
    51  	"iso-8859-2":          {charmap.ISO8859_2, "iso-8859-2"},
    52  	"iso-ir-101":          {charmap.ISO8859_2, "iso-8859-2"},
    53  	"iso8859-2":           {charmap.ISO8859_2, "iso-8859-2"},
    54  	"iso88592":            {charmap.ISO8859_2, "iso-8859-2"},
    55  	"iso_8859-2":          {charmap.ISO8859_2, "iso-8859-2"},
    56  	"iso_8859-2:1987":     {charmap.ISO8859_2, "iso-8859-2"},
    57  	"l2":                  {charmap.ISO8859_2, "iso-8859-2"},
    58  	"latin2":              {charmap.ISO8859_2, "iso-8859-2"},
    59  	"csisolatin3":         {charmap.ISO8859_3, "iso-8859-3"},
    60  	"iso-8859-3":          {charmap.ISO8859_3, "iso-8859-3"},
    61  	"iso-ir-109":          {charmap.ISO8859_3, "iso-8859-3"},
    62  	"iso8859-3":           {charmap.ISO8859_3, "iso-8859-3"},
    63  	"iso88593":            {charmap.ISO8859_3, "iso-8859-3"},
    64  	"iso_8859-3":          {charmap.ISO8859_3, "iso-8859-3"},
    65  	"iso_8859-3:1988":     {charmap.ISO8859_3, "iso-8859-3"},
    66  	"l3":                  {charmap.ISO8859_3, "iso-8859-3"},
    67  	"latin3":              {charmap.ISO8859_3, "iso-8859-3"},
    68  	"csisolatin4":         {charmap.ISO8859_4, "iso-8859-4"},
    69  	"iso-8859-4":          {charmap.ISO8859_4, "iso-8859-4"},
    70  	"iso-ir-110":          {charmap.ISO8859_4, "iso-8859-4"},
    71  	"iso8859-4":           {charmap.ISO8859_4, "iso-8859-4"},
    72  	"iso88594":            {charmap.ISO8859_4, "iso-8859-4"},
    73  	"iso_8859-4":          {charmap.ISO8859_4, "iso-8859-4"},
    74  	"iso_8859-4:1988":     {charmap.ISO8859_4, "iso-8859-4"},
    75  	"l4":                  {charmap.ISO8859_4, "iso-8859-4"},
    76  	"latin4":              {charmap.ISO8859_4, "iso-8859-4"},
    77  	"csisolatincyrillic":  {charmap.ISO8859_5, "iso-8859-5"},
    78  	"cyrillic":            {charmap.ISO8859_5, "iso-8859-5"},
    79  	"iso-8859-5":          {charmap.ISO8859_5, "iso-8859-5"},
    80  	"iso-ir-144":          {charmap.ISO8859_5, "iso-8859-5"},
    81  	"iso8859-5":           {charmap.ISO8859_5, "iso-8859-5"},
    82  	"iso88595":            {charmap.ISO8859_5, "iso-8859-5"},
    83  	"iso_8859-5":          {charmap.ISO8859_5, "iso-8859-5"},
    84  	"iso_8859-5:1988":     {charmap.ISO8859_5, "iso-8859-5"},
    85  	"arabic":              {charmap.ISO8859_6, "iso-8859-6"},
    86  	"asmo-708":            {charmap.ISO8859_6, "iso-8859-6"},
    87  	"csiso88596e":         {charmap.ISO8859_6, "iso-8859-6"},
    88  	"csiso88596i":         {charmap.ISO8859_6, "iso-8859-6"},
    89  	"csisolatinarabic":    {charmap.ISO8859_6, "iso-8859-6"},
    90  	"ecma-114":            {charmap.ISO8859_6, "iso-8859-6"},
    91  	"iso-8859-6":          {charmap.ISO8859_6, "iso-8859-6"},
    92  	"iso-8859-6-e":        {charmap.ISO8859_6, "iso-8859-6"},
    93  	"iso-8859-6-i":        {charmap.ISO8859_6, "iso-8859-6"},
    94  	"iso-ir-127":          {charmap.ISO8859_6, "iso-8859-6"},
    95  	"iso8859-6":           {charmap.ISO8859_6, "iso-8859-6"},
    96  	"iso88596":            {charmap.ISO8859_6, "iso-8859-6"},
    97  	"iso_8859-6":          {charmap.ISO8859_6, "iso-8859-6"},
    98  	"iso_8859-6:1987":     {charmap.ISO8859_6, "iso-8859-6"},
    99  	"csisolatingreek":     {charmap.ISO8859_7, "iso-8859-7"},
   100  	"ecma-118":            {charmap.ISO8859_7, "iso-8859-7"},
   101  	"elot_928":            {charmap.ISO8859_7, "iso-8859-7"},
   102  	"greek":               {charmap.ISO8859_7, "iso-8859-7"},
   103  	"greek8":              {charmap.ISO8859_7, "iso-8859-7"},
   104  	"iso-8859-7":          {charmap.ISO8859_7, "iso-8859-7"},
   105  	"iso-ir-126":          {charmap.ISO8859_7, "iso-8859-7"},
   106  	"iso8859-7":           {charmap.ISO8859_7, "iso-8859-7"},
   107  	"iso88597":            {charmap.ISO8859_7, "iso-8859-7"},
   108  	"iso_8859-7":          {charmap.ISO8859_7, "iso-8859-7"},
   109  	"iso_8859-7:1987":     {charmap.ISO8859_7, "iso-8859-7"},
   110  	"sun_eu_greek":        {charmap.ISO8859_7, "iso-8859-7"},
   111  	"csiso88598e":         {charmap.ISO8859_8, "iso-8859-8"},
   112  	"csisolatinhebrew":    {charmap.ISO8859_8, "iso-8859-8"},
   113  	"hebrew":              {charmap.ISO8859_8, "iso-8859-8"},
   114  	"iso-8859-8":          {charmap.ISO8859_8, "iso-8859-8"},
   115  	"iso-8859-8-e":        {charmap.ISO8859_8, "iso-8859-8"},
   116  	"iso-ir-138":          {charmap.ISO8859_8, "iso-8859-8"},
   117  	"iso8859-8":           {charmap.ISO8859_8, "iso-8859-8"},
   118  	"iso88598":            {charmap.ISO8859_8, "iso-8859-8"},
   119  	"iso_8859-8":          {charmap.ISO8859_8, "iso-8859-8"},
   120  	"iso_8859-8:1988":     {charmap.ISO8859_8, "iso-8859-8"},
   121  	"visual":              {charmap.ISO8859_8, "iso-8859-8"},
   122  	"csiso88598i":         {charmap.ISO8859_8, "iso-8859-8-i"},
   123  	"iso-8859-8-i":        {charmap.ISO8859_8, "iso-8859-8-i"},
   124  	"logical":             {charmap.ISO8859_8, "iso-8859-8-i"},
   125  	"csisolatin6":         {charmap.ISO8859_10, "iso-8859-10"},
   126  	"iso-8859-10":         {charmap.ISO8859_10, "iso-8859-10"},
   127  	"iso-ir-157":          {charmap.ISO8859_10, "iso-8859-10"},
   128  	"iso8859-10":          {charmap.ISO8859_10, "iso-8859-10"},
   129  	"iso885910":           {charmap.ISO8859_10, "iso-8859-10"},
   130  	"l6":                  {charmap.ISO8859_10, "iso-8859-10"},
   131  	"latin6":              {charmap.ISO8859_10, "iso-8859-10"},
   132  	"iso-8859-13":         {charmap.ISO8859_13, "iso-8859-13"},
   133  	"iso8859-13":          {charmap.ISO8859_13, "iso-8859-13"},
   134  	"iso885913":           {charmap.ISO8859_13, "iso-8859-13"},
   135  	"iso-8859-14":         {charmap.ISO8859_14, "iso-8859-14"},
   136  	"iso8859-14":          {charmap.ISO8859_14, "iso-8859-14"},
   137  	"iso885914":           {charmap.ISO8859_14, "iso-8859-14"},
   138  	"csisolatin9":         {charmap.ISO8859_15, "iso-8859-15"},
   139  	"iso-8859-15":         {charmap.ISO8859_15, "iso-8859-15"},
   140  	"iso8859-15":          {charmap.ISO8859_15, "iso-8859-15"},
   141  	"iso885915":           {charmap.ISO8859_15, "iso-8859-15"},
   142  	"iso_8859-15":         {charmap.ISO8859_15, "iso-8859-15"},
   143  	"l9":                  {charmap.ISO8859_15, "iso-8859-15"},
   144  	"iso-8859-16":         {charmap.ISO8859_16, "iso-8859-16"},
   145  	"cskoi8r":             {charmap.KOI8R, "koi8-r"},
   146  	"koi":                 {charmap.KOI8R, "koi8-r"},
   147  	"koi8":                {charmap.KOI8R, "koi8-r"},
   148  	"koi8-r":              {charmap.KOI8R, "koi8-r"},
   149  	"koi8_r":              {charmap.KOI8R, "koi8-r"},
   150  	"koi8-u":              {charmap.KOI8U, "koi8-u"},
   151  	"csmacintosh":         {charmap.Macintosh, "macintosh"},
   152  	"mac":                 {charmap.Macintosh, "macintosh"},
   153  	"macintosh":           {charmap.Macintosh, "macintosh"},
   154  	"x-mac-roman":         {charmap.Macintosh, "macintosh"},
   155  	"dos-874":             {charmap.Windows874, "windows-874"},
   156  	"iso-8859-11":         {charmap.Windows874, "windows-874"},
   157  	"iso8859-11":          {charmap.Windows874, "windows-874"},
   158  	"iso885911":           {charmap.Windows874, "windows-874"},
   159  	"tis-620":             {charmap.Windows874, "windows-874"},
   160  	"windows-874":         {charmap.Windows874, "windows-874"},
   161  	"cp1250":              {charmap.Windows1250, "windows-1250"},
   162  	"windows-1250":        {charmap.Windows1250, "windows-1250"},
   163  	"x-cp1250":            {charmap.Windows1250, "windows-1250"},
   164  	"cp1251":              {charmap.Windows1251, "windows-1251"},
   165  	"windows-1251":        {charmap.Windows1251, "windows-1251"},
   166  	"x-cp1251":            {charmap.Windows1251, "windows-1251"},
   167  	"ansi_x3.4-1968":      {charmap.Windows1252, "windows-1252"},
   168  	"ascii":               {charmap.Windows1252, "windows-1252"},
   169  	"cp1252":              {charmap.Windows1252, "windows-1252"},
   170  	"cp819":               {charmap.Windows1252, "windows-1252"},
   171  	"csisolatin1":         {charmap.Windows1252, "windows-1252"},
   172  	"ibm819":              {charmap.Windows1252, "windows-1252"},
   173  	"iso-8859-1":          {charmap.Windows1252, "windows-1252"},
   174  	"iso-ir-100":          {charmap.Windows1252, "windows-1252"},
   175  	"iso8859-1":           {charmap.Windows1252, "windows-1252"},
   176  	"iso88591":            {charmap.Windows1252, "windows-1252"},
   177  	"iso_8859-1":          {charmap.Windows1252, "windows-1252"},
   178  	"iso_8859-1:1987":     {charmap.Windows1252, "windows-1252"},
   179  	"l1":                  {charmap.Windows1252, "windows-1252"},
   180  	"latin1":              {charmap.Windows1252, "windows-1252"},
   181  	"us-ascii":            {charmap.Windows1252, "windows-1252"},
   182  	"windows-1252":        {charmap.Windows1252, "windows-1252"},
   183  	"x-cp1252":            {charmap.Windows1252, "windows-1252"},
   184  	"cp1253":              {charmap.Windows1253, "windows-1253"},
   185  	"windows-1253":        {charmap.Windows1253, "windows-1253"},
   186  	"x-cp1253":            {charmap.Windows1253, "windows-1253"},
   187  	"cp1254":              {charmap.Windows1254, "windows-1254"},
   188  	"csisolatin5":         {charmap.Windows1254, "windows-1254"},
   189  	"iso-8859-9":          {charmap.Windows1254, "windows-1254"},
   190  	"iso-ir-148":          {charmap.Windows1254, "windows-1254"},
   191  	"iso8859-9":           {charmap.Windows1254, "windows-1254"},
   192  	"iso88599":            {charmap.Windows1254, "windows-1254"},
   193  	"iso_8859-9":          {charmap.Windows1254, "windows-1254"},
   194  	"iso_8859-9:1989":     {charmap.Windows1254, "windows-1254"},
   195  	"l5":                  {charmap.Windows1254, "windows-1254"},
   196  	"latin5":              {charmap.Windows1254, "windows-1254"},
   197  	"windows-1254":        {charmap.Windows1254, "windows-1254"},
   198  	"x-cp1254":            {charmap.Windows1254, "windows-1254"},
   199  	"cp1255":              {charmap.Windows1255, "windows-1255"},
   200  	"windows-1255":        {charmap.Windows1255, "windows-1255"},
   201  	"x-cp1255":            {charmap.Windows1255, "windows-1255"},
   202  	"cp1256":              {charmap.Windows1256, "windows-1256"},
   203  	"windows-1256":        {charmap.Windows1256, "windows-1256"},
   204  	"x-cp1256":            {charmap.Windows1256, "windows-1256"},
   205  	"cp1257":              {charmap.Windows1257, "windows-1257"},
   206  	"windows-1257":        {charmap.Windows1257, "windows-1257"},
   207  	"x-cp1257":            {charmap.Windows1257, "windows-1257"},
   208  	"cp1258":              {charmap.Windows1258, "windows-1258"},
   209  	"windows-1258":        {charmap.Windows1258, "windows-1258"},
   210  	"x-cp1258":            {charmap.Windows1258, "windows-1258"},
   211  	"x-mac-cyrillic":      {charmap.MacintoshCyrillic, "x-mac-cyrillic"},
   212  	"x-mac-ukrainian":     {charmap.MacintoshCyrillic, "x-mac-cyrillic"},
   213  	"chinese":             {simplifiedchinese.GBK, "gbk"},
   214  	"csgb2312":            {simplifiedchinese.GBK, "gbk"},
   215  	"csiso58gb231280":     {simplifiedchinese.GBK, "gbk"},
   216  	"gb2312":              {simplifiedchinese.GBK, "gbk"},
   217  	"gb_2312":             {simplifiedchinese.GBK, "gbk"},
   218  	"gb_2312-80":          {simplifiedchinese.GBK, "gbk"},
   219  	"gbk":                 {simplifiedchinese.GBK, "gbk"},
   220  	"iso-ir-58":           {simplifiedchinese.GBK, "gbk"},
   221  	"x-gbk":               {simplifiedchinese.GBK, "gbk"},
   222  	"gb18030":             {simplifiedchinese.GB18030, "gb18030"},
   223  	"hz-gb-2312":          {simplifiedchinese.HZGB2312, "hz-gb-2312"},
   224  	"big5":                {traditionalchinese.Big5, "big5"},
   225  	"big5-hkscs":          {traditionalchinese.Big5, "big5"},
   226  	"cn-big5":             {traditionalchinese.Big5, "big5"},
   227  	"csbig5":              {traditionalchinese.Big5, "big5"},
   228  	"x-x-big5":            {traditionalchinese.Big5, "big5"},
   229  	"cseucpkdfmtjapanese": {japanese.EUCJP, "euc-jp"},
   230  	"euc-jp":              {japanese.EUCJP, "euc-jp"},
   231  	"x-euc-jp":            {japanese.EUCJP, "euc-jp"},
   232  	"csiso2022jp":         {japanese.ISO2022JP, "iso-2022-jp"},
   233  	"iso-2022-jp":         {japanese.ISO2022JP, "iso-2022-jp"},
   234  	"csshiftjis":          {japanese.ShiftJIS, "shift_jis"},
   235  	"ms_kanji":            {japanese.ShiftJIS, "shift_jis"},
   236  	"shift-jis":           {japanese.ShiftJIS, "shift_jis"},
   237  	"shift_jis":           {japanese.ShiftJIS, "shift_jis"},
   238  	"sjis":                {japanese.ShiftJIS, "shift_jis"},
   239  	"windows-31j":         {japanese.ShiftJIS, "shift_jis"},
   240  	"x-sjis":              {japanese.ShiftJIS, "shift_jis"},
   241  	"cseuckr":             {korean.EUCKR, "euc-kr"},
   242  	"csksc56011987":       {korean.EUCKR, "euc-kr"},
   243  	"euc-kr":              {korean.EUCKR, "euc-kr"},
   244  	"iso-ir-149":          {korean.EUCKR, "euc-kr"},
   245  	"korean":              {korean.EUCKR, "euc-kr"},
   246  	"ks_c_5601-1987":      {korean.EUCKR, "euc-kr"},
   247  	"ks_c_5601-1989":      {korean.EUCKR, "euc-kr"},
   248  	"ksc5601":             {korean.EUCKR, "euc-kr"},
   249  	"ksc_5601":            {korean.EUCKR, "euc-kr"},
   250  	"windows-949":         {korean.EUCKR, "euc-kr"},
   251  	"csiso2022kr":         {encoding.Replacement, "replacement"},
   252  	"iso-2022-kr":         {encoding.Replacement, "replacement"},
   253  	"iso-2022-cn":         {encoding.Replacement, "replacement"},
   254  	"iso-2022-cn-ext":     {encoding.Replacement, "replacement"},
   255  	"utf-16be":            {unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM), "utf-16be"},
   256  	"utf-16":              {unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM), "utf-16le"},
   257  	"utf-16le":            {unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM), "utf-16le"},
   258  	"x-user-defined":      {charmap.XUserDefined, "x-user-defined"},
   259  }