github.com/Aoi-hosizora/ahlib-more@v1.5.1-0.20230404072844-256112befaf6/xcharset/xcharset_encoding.go (about)

     1  package xcharset
     2  
     3  import (
     4  	"golang.org/x/text/encoding"
     5  	"golang.org/x/text/encoding/charmap"
     6  	"golang.org/x/text/encoding/japanese"
     7  	"golang.org/x/text/encoding/korean"
     8  	"golang.org/x/text/encoding/simplifiedchinese"
     9  	"golang.org/x/text/encoding/traditionalchinese"
    10  	"golang.org/x/text/encoding/unicode"
    11  	"golang.org/x/text/encoding/unicode/utf32"
    12  	"golang.org/x/text/transform"
    13  )
    14  
    15  // EncodeString encodes a string to given encoding.
    16  func EncodeString(encoding encoding.Encoding, s string) (string, error) {
    17  	result, _, err := transform.String(encoding.NewEncoder(), s)
    18  	return result, err
    19  }
    20  
    21  // DecodeString decodes a string to given encoding.
    22  func DecodeString(encoding encoding.Encoding, s string) (string, error) {
    23  	result, _, err := transform.String(encoding.NewDecoder(), s)
    24  	return result, err
    25  }
    26  
    27  // EncodeBytes encodes a bytes to given encoding.
    28  func EncodeBytes(encoding encoding.Encoding, bs []byte) ([]byte, error) {
    29  	result, _, err := transform.Bytes(encoding.NewEncoder(), bs)
    30  	return result, err
    31  }
    32  
    33  // DecodeBytes decodes a bytes to given encoding.
    34  func DecodeBytes(encoding encoding.Encoding, bs []byte) ([]byte, error) {
    35  	result, _, err := transform.Bytes(encoding.NewDecoder(), bs)
    36  	return result, err
    37  }
    38  
    39  // See https://github.com/saintfish/chardet/blob/3af4cd4741/detector.go and https://www.iana.org/assignments/charset-reg/charset-reg.xhtml.
    40  const (
    41  	IANA_UTF8    = "UTF-8"    // *
    42  	IANA_UTF16BE = "UTF-16BE" // *
    43  	IANA_UTF16LE = "UTF-16LE" // *
    44  	IANA_UTF32BE = "UTF-32BE" // *
    45  	IANA_UTF32LE = "UTF-32LE" // *
    46  
    47  	IANA_ISO8859_1   = "ISO-8859-1"   // en, da, de, es, fr, it, nl, no, pt, sv
    48  	IANA_ISO8859_2   = "ISO-8859-2"   // cs, hu, pl, ro
    49  	IANA_ISO8859_5   = "ISO-8859-5"   // ru
    50  	IANA_ISO8859_6   = "ISO-8859-6"   // ar
    51  	IANA_ISO8859_7   = "ISO-8859-7"   // el
    52  	IANA_ISO8859_8   = "ISO-8859-8"   // he
    53  	IANA_ISO8859_8I  = "ISO-8859-8-I" // he
    54  	IANA_ISO8859_9   = "ISO-8859-9"   // tr
    55  	IANA_KOI8R       = "KOI8-R"       // ru
    56  	IANA_WINDOWS1251 = "windows-1251" // ar
    57  	IANA_WINDOWS1256 = "windows-1256" // ar
    58  	IANA_IBM424RTL   = "IBM424_rtl"   // he
    59  	IANA_IBM424LTR   = "IBM424_ltr"   // he
    60  	IANA_IBM420RTL   = "IBM420_rtl"   // ar
    61  	IANA_IBM420LTR   = "IBM420_ltr"   // ar
    62  
    63  	IANA_SHIFTJIS  = "Shift_JIS"   // ja
    64  	IANA_GBK       = "GBK"         // zh
    65  	IANA_GB18030   = "GB18030"     // zh
    66  	IANA_BIG5      = "Big5"        // zh
    67  	IANA_EUCJP     = "EUC-JP"      // ja
    68  	IANA_EUCKR     = "EUC-KR"      // ko
    69  	IANA_ISO2022JP = "ISO-2022-JP" // jp
    70  	IANA_ISO2022KR = "ISO-2022-KR" // kr
    71  	IANA_ISO2022CN = "ISO-2022-CN" // cn
    72  )
    73  
    74  // GetEncoding returns an encoding.Encoding from some IANA or MIME names.
    75  func GetEncoding(iana string) (encode encoding.Encoding, exist bool) {
    76  	// Note: These names must be matched from chardet's detector.go, including utf8.go,
    77  	// unicode.go, single_byte.go, multi_byte.go, etc.
    78  	switch iana {
    79  	// utf8, utf16, utf32
    80  	case IANA_UTF8:
    81  		return unicode.UTF8, true
    82  	case IANA_UTF16BE:
    83  		return unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM), true
    84  	case IANA_UTF16LE:
    85  		return unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM), true
    86  	case IANA_UTF32BE:
    87  		return utf32.UTF32(utf32.BigEndian, utf32.IgnoreBOM), true
    88  	case IANA_UTF32LE:
    89  		return utf32.UTF32(utf32.LittleEndian, utf32.IgnoreBOM), true
    90  
    91  	// single_byte
    92  	case IANA_ISO8859_1:
    93  		return charmap.ISO8859_1, true
    94  	case IANA_ISO8859_2:
    95  		return charmap.ISO8859_2, true
    96  	case IANA_ISO8859_5:
    97  		return charmap.ISO8859_5, true
    98  	case IANA_ISO8859_6:
    99  		return charmap.ISO8859_6, true
   100  	case IANA_ISO8859_7:
   101  		return charmap.ISO8859_7, true
   102  	case IANA_ISO8859_8:
   103  		return charmap.ISO8859_8, true
   104  	case IANA_ISO8859_8I:
   105  		return charmap.ISO8859_8I, true
   106  	case IANA_ISO8859_9:
   107  		return charmap.ISO8859_9, true
   108  	case IANA_KOI8R:
   109  		return charmap.KOI8R, true
   110  	case IANA_WINDOWS1251:
   111  		return charmap.Windows1251, true
   112  	case IANA_WINDOWS1256:
   113  		return charmap.Windows1256, true
   114  	case IANA_IBM424RTL, IANA_IBM424LTR, IANA_IBM420RTL, IANA_IBM420LTR:
   115  		// not found
   116  
   117  	// multi_byte
   118  	case IANA_SHIFTJIS:
   119  		return japanese.ShiftJIS, true
   120  	case IANA_GBK:
   121  		return simplifiedchinese.GBK, true
   122  	case IANA_GB18030:
   123  		return simplifiedchinese.GB18030, true
   124  	case IANA_BIG5:
   125  		return traditionalchinese.Big5, true
   126  	case IANA_EUCJP:
   127  		return japanese.EUCJP, true
   128  	case IANA_EUCKR:
   129  		return korean.EUCKR, true
   130  	case IANA_ISO2022JP:
   131  		return japanese.ISO2022JP, true
   132  	case IANA_ISO2022KR, IANA_ISO2022CN:
   133  		// not found
   134  	}
   135  
   136  	// not found
   137  	return nil, false
   138  }