github.com/zhongdalu/gf@v1.0.0/g/encoding/gcharset/gcharset.go (about) 1 // Copyright 2018-2019 gf Author(https://github.com/zhongdalu/gf). All Rights Reserved. 2 // 3 // This Source Code Form is subject to the terms of the MIT License. 4 // If a copy of the MIT was not distributed with this file, 5 // You can obtain one at https://github.com/zhongdalu/gf. 6 7 // Package charset implements character-set conversion functionality. 8 // 9 // Supported Character Set: 10 // 11 // Chinese : GBK/GB18030/GB2312/Big5 12 // 13 // Japanese: EUCJP/ISO2022JP/ShiftJIS 14 // 15 // Korean : EUCKR 16 // 17 // Unicode : UTF-8/UTF-16/UTF-16BE/UTF-16LE 18 // 19 // Other : macintosh/IBM*/Windows*/ISO-* 20 package gcharset 21 22 import ( 23 "bytes" 24 "errors" 25 "fmt" 26 "github.com/zhongdalu/gf/third/golang.org/x/text/encoding" 27 "github.com/zhongdalu/gf/third/golang.org/x/text/encoding/ianaindex" 28 "github.com/zhongdalu/gf/third/golang.org/x/text/transform" 29 "io/ioutil" 30 ) 31 32 var ( 33 // Alias for charsets. 34 charsetAlias = map[string]string{ 35 "HZGB2312": "HZ-GB-2312", 36 "hzgb2312": "HZ-GB-2312", 37 "GB2312": "HZ-GB-2312", 38 "gb2312": "HZ-GB-2312", 39 } 40 ) 41 42 // Supported returns whether charset <charset> is supported. 43 func Supported(charset string) bool { 44 return getEncoding(charset) != nil 45 } 46 47 // Convert converts <src> charset encoding from <srcCharset> to <dstCharset>, 48 // and returns the converted string. 49 // It returns <src> as <dst> if it fails converting. 50 func Convert(dstCharset string, srcCharset string, src string) (dst string, err error) { 51 if dstCharset == srcCharset { 52 return src, nil 53 } 54 dst = src 55 // Converting <src> to UTF-8. 56 if srcCharset != "UTF-8" { 57 if e := getEncoding(srcCharset); e != nil { 58 tmp, err := ioutil.ReadAll( 59 transform.NewReader(bytes.NewReader([]byte(src)), e.NewDecoder()), 60 ) 61 if err != nil { 62 return "", fmt.Errorf("%s to utf8 failed. %v", srcCharset, err) 63 } 64 src = string(tmp) 65 } else { 66 return dst, errors.New(fmt.Sprintf("unsupport srcCharset: %s", srcCharset)) 67 } 68 } 69 // Do the converting from UTF-8 to <dstCharset>. 70 if dstCharset != "UTF-8" { 71 if e := getEncoding(dstCharset); e != nil { 72 tmp, err := ioutil.ReadAll( 73 transform.NewReader(bytes.NewReader([]byte(src)), e.NewEncoder()), 74 ) 75 if err != nil { 76 return "", fmt.Errorf("utf to %s failed. %v", dstCharset, err) 77 } 78 dst = string(tmp) 79 } else { 80 return dst, errors.New(fmt.Sprintf("unsupport dstCharset: %s", dstCharset)) 81 } 82 } else { 83 dst = src 84 } 85 return dst, nil 86 } 87 88 // ToUTF8 converts <src> charset encoding from <srcCharset> to UTF-8 , 89 // and returns the converted string. 90 func ToUTF8(srcCharset string, src string) (dst string, err error) { 91 return Convert("UTF-8", srcCharset, src) 92 } 93 94 // UTF8To converts <src> charset encoding from UTF-8 to <dstCharset>, 95 // and returns the converted string. 96 func UTF8To(dstCharset string, src string) (dst string, err error) { 97 return Convert(dstCharset, "UTF-8", src) 98 } 99 100 // getEncoding returns the encoding.Encoding interface object for <charset>. 101 // It returns nil if <charset> is not supported. 102 func getEncoding(charset string) encoding.Encoding { 103 if c, ok := charsetAlias[charset]; ok { 104 charset = c 105 } 106 if e, err := ianaindex.MIB.Encoding(charset); err == nil && e != nil { 107 return e 108 } 109 return nil 110 }