github.com/isyscore/isc-gobase@v1.5.3-0.20231218061332-cbc7451899e9/encoding/charset.go (about) 1 package encoding 2 3 import ( 4 "bytes" 5 "fmt" 6 "golang.org/x/text/encoding" 7 "golang.org/x/text/encoding/ianaindex" 8 "golang.org/x/text/transform" 9 "io" 10 "log" 11 ) 12 13 const ( 14 UTF8 = "UTF-8" 15 UTF16 = "UTF-16" 16 UTF16LE = "UTF-16LE" 17 UTF16BE = "UTF-16BE" 18 GBK = "gbk" 19 GB2312 = "GB2312" 20 BIG5 = "Big5" 21 GB18030 = "gb18030" 22 EUCJP = "EUC-JP" 23 ISO2022JP = "ISO-2022-JP" 24 SHIFTJIS = "Shift_JIS" 25 EUCKR = "EUC-KR" 26 ISO8859_2 = "ISO-8859-2" 27 ISO8859_3 = "ISO-8859-3" 28 ISO8859_4 = "ISO-8859-4" 29 ISO8859_5 = "ISO-8859-5" 30 ISO8859_7 = "ISO-8859-7" 31 ISO8859_9 = "ISO-8859-9" 32 ISO8859_10 = "ISO-8859-10" 33 ISO8859_13 = "ISO-8859-13" 34 ISO8859_14 = "ISO-8859-14" 35 ISO8859_15 = "ISO-8859-15" 36 ISO8859_16 = "ISO-8859-16" 37 WINDOWS1250 = "windows-1250" 38 WINDOWS1251 = "windows-1251" 39 WINDOWS1252 = "windows-1252" 40 WINDOWS1253 = "windows-1253" 41 WINDOWS1254 = "windows-1254" 42 WINDOWS1255 = "windows-1255" 43 WINDOWS1256 = "windows-1256" 44 WINDOWS1257 = "windows-1257" 45 WINDOWS1258 = "windows-1258" 46 WINDOWS874 = "windows-874" 47 MACINTOSH = "macintosh" 48 KOI8R = "KOI8-R" 49 KOI8U = "KOI8-U" 50 ) 51 52 // 别名 53 var charsetAlias = map[string]string{"HZGB2312": "HZ-GB-2312", "hzgb2312": "HZ-GB-2312", "GB2312": "HZ-GB-2312", "gb2312": "HZ-GB-2312"} 54 55 // Supported 判断指定的编码是否被支持 56 func Supported(charset string) bool { 57 return getEncoding(charset) != nil 58 } 59 60 func Convert(src string, srcCharset string, dstCharset string) (string, error) { 61 if dstCharset == srcCharset { 62 return src, nil 63 } 64 dst := src 65 if srcCharset != "UTF-8" { 66 if e := getEncoding(srcCharset); e != nil { 67 tmp, err := io.ReadAll( 68 transform.NewReader(bytes.NewReader([]byte(src)), e.NewDecoder()), 69 ) 70 if err != nil { 71 return "", fmt.Errorf(`convert string "%s" to utf8 failed`, srcCharset) 72 } 73 src = string(tmp) 74 } else { 75 return dst, fmt.Errorf(`unsupported charset "%s"`, srcCharset) 76 } 77 } 78 if dstCharset != "UTF-8" { 79 if e := getEncoding(dstCharset); e != nil { 80 tmp, err := io.ReadAll( 81 transform.NewReader(bytes.NewReader([]byte(src)), e.NewEncoder()), 82 ) 83 if err != nil { 84 return "", fmt.Errorf(`convert string from utf8 to "%s" failed`, dstCharset) 85 } 86 dst = string(tmp) 87 } else { 88 return dst, fmt.Errorf(`unsupported charset "%s"`, dstCharset) 89 } 90 } else { 91 dst = src 92 } 93 return dst, nil 94 } 95 96 func StringToUTF8(src string, srcCharset string) (string, error) { 97 return Convert(src, srcCharset, "UTF-8") 98 } 99 100 func UTF8ToString(src string, dstCharset string) (string, error) { 101 return Convert(src, "UTF-8", dstCharset) 102 } 103 104 func getEncoding(charset string) encoding.Encoding { 105 if c, ok := charsetAlias[charset]; ok { 106 charset = c 107 } 108 enc, err := ianaindex.MIB.Encoding(charset) 109 if err != nil { 110 log.Printf("[WARN] charset %s not supported", charset) 111 } 112 return enc 113 }