github.com/wangyougui/gf/v2@v2.6.5/encoding/gcharset/gcharset.go (about) 1 // Copyright GoFrame Author(https://goframe.org). All Rights Reserved. 2 // 3 // This Source Code Form is subject to the terms of the MIT License. 4 // If a copy of the MIT was not distributed with this file, 5 // You can obtain one at https://github.com/wangyougui/gf. 6 7 // Package gcharset implements character-set conversion functionality. 8 // 9 // Supported Character Set: 10 // 11 // Chinese : GBK/GB18030/GB2312/Big5 12 // 13 // Japanese: EUCJP/ISO2022JP/ShiftJIS 14 // 15 // Korean : EUCKR 16 // 17 // Unicode : UTF-8/UTF-16/UTF-16BE/UTF-16LE 18 // 19 // Other : macintosh/IBM*/Windows*/ISO-* 20 package gcharset 21 22 import ( 23 "bytes" 24 "context" 25 "io" 26 27 "golang.org/x/text/encoding" 28 "golang.org/x/text/encoding/ianaindex" 29 "golang.org/x/text/transform" 30 31 "github.com/wangyougui/gf/v2/errors/gcode" 32 "github.com/wangyougui/gf/v2/errors/gerror" 33 "github.com/wangyougui/gf/v2/internal/intlog" 34 ) 35 36 var ( 37 // Alias for charsets. 38 charsetAlias = map[string]string{ 39 "HZGB2312": "HZ-GB-2312", 40 "hzgb2312": "HZ-GB-2312", 41 "GB2312": "HZ-GB-2312", 42 "gb2312": "HZ-GB-2312", 43 } 44 ) 45 46 // Supported returns whether charset `charset` is supported. 47 func Supported(charset string) bool { 48 return getEncoding(charset) != nil 49 } 50 51 // Convert converts `src` charset encoding from `srcCharset` to `dstCharset`, 52 // and returns the converted string. 53 // It returns `src` as `dst` if it fails converting. 54 func Convert(dstCharset string, srcCharset string, src string) (dst string, err error) { 55 if dstCharset == srcCharset { 56 return src, nil 57 } 58 dst = src 59 // Converting `src` to UTF-8. 60 if srcCharset != "UTF-8" { 61 if e := getEncoding(srcCharset); e != nil { 62 tmp, err := io.ReadAll( 63 transform.NewReader(bytes.NewReader([]byte(src)), e.NewDecoder()), 64 ) 65 if err != nil { 66 return "", gerror.Wrapf(err, `convert string "%s" to utf8 failed`, srcCharset) 67 } 68 src = string(tmp) 69 } else { 70 return dst, gerror.NewCodef(gcode.CodeInvalidParameter, `unsupported srcCharset "%s"`, srcCharset) 71 } 72 } 73 // Do the converting from UTF-8 to `dstCharset`. 74 if dstCharset != "UTF-8" { 75 if e := getEncoding(dstCharset); e != nil { 76 tmp, err := io.ReadAll( 77 transform.NewReader(bytes.NewReader([]byte(src)), e.NewEncoder()), 78 ) 79 if err != nil { 80 return "", gerror.Wrapf(err, `convert string from utf8 to "%s" failed`, dstCharset) 81 } 82 dst = string(tmp) 83 } else { 84 return dst, gerror.NewCodef(gcode.CodeInvalidParameter, `unsupported dstCharset "%s"`, dstCharset) 85 } 86 } else { 87 dst = src 88 } 89 return dst, nil 90 } 91 92 // ToUTF8 converts `src` charset encoding from `srcCharset` to UTF-8 , 93 // and returns the converted string. 94 func ToUTF8(srcCharset string, src string) (dst string, err error) { 95 return Convert("UTF-8", srcCharset, src) 96 } 97 98 // UTF8To converts `src` charset encoding from UTF-8 to `dstCharset`, 99 // and returns the converted string. 100 func UTF8To(dstCharset string, src string) (dst string, err error) { 101 return Convert(dstCharset, "UTF-8", src) 102 } 103 104 // getEncoding returns the encoding.Encoding interface object for `charset`. 105 // It returns nil if `charset` is not supported. 106 func getEncoding(charset string) encoding.Encoding { 107 if c, ok := charsetAlias[charset]; ok { 108 charset = c 109 } 110 enc, err := ianaindex.MIB.Encoding(charset) 111 if err != nil { 112 intlog.Errorf(context.TODO(), `%+v`, err) 113 } 114 return enc 115 }