github.com/Aoi-hosizora/ahlib-more@v1.5.1-0.20230404072844-256112befaf6/xcharset/xcharset_encoding.go (about) 1 package xcharset 2 3 import ( 4 "golang.org/x/text/encoding" 5 "golang.org/x/text/encoding/charmap" 6 "golang.org/x/text/encoding/japanese" 7 "golang.org/x/text/encoding/korean" 8 "golang.org/x/text/encoding/simplifiedchinese" 9 "golang.org/x/text/encoding/traditionalchinese" 10 "golang.org/x/text/encoding/unicode" 11 "golang.org/x/text/encoding/unicode/utf32" 12 "golang.org/x/text/transform" 13 ) 14 15 // EncodeString encodes a string to given encoding. 16 func EncodeString(encoding encoding.Encoding, s string) (string, error) { 17 result, _, err := transform.String(encoding.NewEncoder(), s) 18 return result, err 19 } 20 21 // DecodeString decodes a string to given encoding. 22 func DecodeString(encoding encoding.Encoding, s string) (string, error) { 23 result, _, err := transform.String(encoding.NewDecoder(), s) 24 return result, err 25 } 26 27 // EncodeBytes encodes a bytes to given encoding. 28 func EncodeBytes(encoding encoding.Encoding, bs []byte) ([]byte, error) { 29 result, _, err := transform.Bytes(encoding.NewEncoder(), bs) 30 return result, err 31 } 32 33 // DecodeBytes decodes a bytes to given encoding. 34 func DecodeBytes(encoding encoding.Encoding, bs []byte) ([]byte, error) { 35 result, _, err := transform.Bytes(encoding.NewDecoder(), bs) 36 return result, err 37 } 38 39 // See https://github.com/saintfish/chardet/blob/3af4cd4741/detector.go and https://www.iana.org/assignments/charset-reg/charset-reg.xhtml. 40 const ( 41 IANA_UTF8 = "UTF-8" // * 42 IANA_UTF16BE = "UTF-16BE" // * 43 IANA_UTF16LE = "UTF-16LE" // * 44 IANA_UTF32BE = "UTF-32BE" // * 45 IANA_UTF32LE = "UTF-32LE" // * 46 47 IANA_ISO8859_1 = "ISO-8859-1" // en, da, de, es, fr, it, nl, no, pt, sv 48 IANA_ISO8859_2 = "ISO-8859-2" // cs, hu, pl, ro 49 IANA_ISO8859_5 = "ISO-8859-5" // ru 50 IANA_ISO8859_6 = "ISO-8859-6" // ar 51 IANA_ISO8859_7 = "ISO-8859-7" // el 52 IANA_ISO8859_8 = "ISO-8859-8" // he 53 IANA_ISO8859_8I = "ISO-8859-8-I" // he 54 IANA_ISO8859_9 = "ISO-8859-9" // tr 55 IANA_KOI8R = "KOI8-R" // ru 56 IANA_WINDOWS1251 = "windows-1251" // ar 57 IANA_WINDOWS1256 = "windows-1256" // ar 58 IANA_IBM424RTL = "IBM424_rtl" // he 59 IANA_IBM424LTR = "IBM424_ltr" // he 60 IANA_IBM420RTL = "IBM420_rtl" // ar 61 IANA_IBM420LTR = "IBM420_ltr" // ar 62 63 IANA_SHIFTJIS = "Shift_JIS" // ja 64 IANA_GBK = "GBK" // zh 65 IANA_GB18030 = "GB18030" // zh 66 IANA_BIG5 = "Big5" // zh 67 IANA_EUCJP = "EUC-JP" // ja 68 IANA_EUCKR = "EUC-KR" // ko 69 IANA_ISO2022JP = "ISO-2022-JP" // jp 70 IANA_ISO2022KR = "ISO-2022-KR" // kr 71 IANA_ISO2022CN = "ISO-2022-CN" // cn 72 ) 73 74 // GetEncoding returns an encoding.Encoding from some IANA or MIME names. 75 func GetEncoding(iana string) (encode encoding.Encoding, exist bool) { 76 // Note: These names must be matched from chardet's detector.go, including utf8.go, 77 // unicode.go, single_byte.go, multi_byte.go, etc. 78 switch iana { 79 // utf8, utf16, utf32 80 case IANA_UTF8: 81 return unicode.UTF8, true 82 case IANA_UTF16BE: 83 return unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM), true 84 case IANA_UTF16LE: 85 return unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM), true 86 case IANA_UTF32BE: 87 return utf32.UTF32(utf32.BigEndian, utf32.IgnoreBOM), true 88 case IANA_UTF32LE: 89 return utf32.UTF32(utf32.LittleEndian, utf32.IgnoreBOM), true 90 91 // single_byte 92 case IANA_ISO8859_1: 93 return charmap.ISO8859_1, true 94 case IANA_ISO8859_2: 95 return charmap.ISO8859_2, true 96 case IANA_ISO8859_5: 97 return charmap.ISO8859_5, true 98 case IANA_ISO8859_6: 99 return charmap.ISO8859_6, true 100 case IANA_ISO8859_7: 101 return charmap.ISO8859_7, true 102 case IANA_ISO8859_8: 103 return charmap.ISO8859_8, true 104 case IANA_ISO8859_8I: 105 return charmap.ISO8859_8I, true 106 case IANA_ISO8859_9: 107 return charmap.ISO8859_9, true 108 case IANA_KOI8R: 109 return charmap.KOI8R, true 110 case IANA_WINDOWS1251: 111 return charmap.Windows1251, true 112 case IANA_WINDOWS1256: 113 return charmap.Windows1256, true 114 case IANA_IBM424RTL, IANA_IBM424LTR, IANA_IBM420RTL, IANA_IBM420LTR: 115 // not found 116 117 // multi_byte 118 case IANA_SHIFTJIS: 119 return japanese.ShiftJIS, true 120 case IANA_GBK: 121 return simplifiedchinese.GBK, true 122 case IANA_GB18030: 123 return simplifiedchinese.GB18030, true 124 case IANA_BIG5: 125 return traditionalchinese.Big5, true 126 case IANA_EUCJP: 127 return japanese.EUCJP, true 128 case IANA_EUCKR: 129 return korean.EUCKR, true 130 case IANA_ISO2022JP: 131 return japanese.ISO2022JP, true 132 case IANA_ISO2022KR, IANA_ISO2022CN: 133 // not found 134 } 135 136 // not found 137 return nil, false 138 }