github.com/gogf/gf@v1.16.9/encoding/gcharset/gcharset.go (about) 1 // Copyright GoFrame Author(https://goframe.org). All Rights Reserved. 2 // 3 // This Source Code Form is subject to the terms of the MIT License. 4 // If a copy of the MIT was not distributed with this file, 5 // You can obtain one at https://github.com/gogf/gf. 6 7 // Package charset implements character-set conversion functionality. 8 // 9 // Supported Character Set: 10 // 11 // Chinese : GBK/GB18030/GB2312/Big5 12 // 13 // Japanese: EUCJP/ISO2022JP/ShiftJIS 14 // 15 // Korean : EUCKR 16 // 17 // Unicode : UTF-8/UTF-16/UTF-16BE/UTF-16LE 18 // 19 // Other : macintosh/IBM*/Windows*/ISO-* 20 package gcharset 21 22 import ( 23 "bytes" 24 "github.com/gogf/gf/errors/gcode" 25 "github.com/gogf/gf/errors/gerror" 26 "io/ioutil" 27 28 "golang.org/x/text/encoding" 29 "golang.org/x/text/encoding/ianaindex" 30 "golang.org/x/text/transform" 31 ) 32 33 var ( 34 // Alias for charsets. 35 charsetAlias = map[string]string{ 36 "HZGB2312": "HZ-GB-2312", 37 "hzgb2312": "HZ-GB-2312", 38 "GB2312": "HZ-GB-2312", 39 "gb2312": "HZ-GB-2312", 40 } 41 ) 42 43 // Supported returns whether charset <charset> is supported. 44 func Supported(charset string) bool { 45 return getEncoding(charset) != nil 46 } 47 48 // Convert converts <src> charset encoding from <srcCharset> to <dstCharset>, 49 // and returns the converted string. 50 // It returns <src> as <dst> if it fails converting. 51 func Convert(dstCharset string, srcCharset string, src string) (dst string, err error) { 52 if dstCharset == srcCharset { 53 return src, nil 54 } 55 dst = src 56 // Converting <src> to UTF-8. 57 if srcCharset != "UTF-8" { 58 if e := getEncoding(srcCharset); e != nil { 59 tmp, err := ioutil.ReadAll( 60 transform.NewReader(bytes.NewReader([]byte(src)), e.NewDecoder()), 61 ) 62 if err != nil { 63 return "", gerror.WrapCodef(gcode.CodeInternalError, err, "%s to utf8 failed", srcCharset) 64 } 65 src = string(tmp) 66 } else { 67 return dst, gerror.NewCodef(gcode.CodeInvalidParameter, "unsupported srcCharset: %s", srcCharset) 68 } 69 } 70 // Do the converting from UTF-8 to <dstCharset>. 71 if dstCharset != "UTF-8" { 72 if e := getEncoding(dstCharset); e != nil { 73 tmp, err := ioutil.ReadAll( 74 transform.NewReader(bytes.NewReader([]byte(src)), e.NewEncoder()), 75 ) 76 if err != nil { 77 return "", gerror.WrapCodef(gcode.CodeInternalError, err, "utf to %s failed", dstCharset) 78 } 79 dst = string(tmp) 80 } else { 81 return dst, gerror.NewCodef(gcode.CodeInvalidParameter, "unsupported dstCharset: %s", dstCharset) 82 } 83 } else { 84 dst = src 85 } 86 return dst, nil 87 } 88 89 // ToUTF8 converts <src> charset encoding from <srcCharset> to UTF-8 , 90 // and returns the converted string. 91 func ToUTF8(srcCharset string, src string) (dst string, err error) { 92 return Convert("UTF-8", srcCharset, src) 93 } 94 95 // UTF8To converts <src> charset encoding from UTF-8 to <dstCharset>, 96 // and returns the converted string. 97 func UTF8To(dstCharset string, src string) (dst string, err error) { 98 return Convert(dstCharset, "UTF-8", src) 99 } 100 101 // getEncoding returns the encoding.Encoding interface object for <charset>. 102 // It returns nil if <charset> is not supported. 103 func getEncoding(charset string) encoding.Encoding { 104 if c, ok := charsetAlias[charset]; ok { 105 charset = c 106 } 107 if e, err := ianaindex.MIB.Encoding(charset); err == nil && e != nil { 108 return e 109 } 110 return nil 111 }