github.com/gogf/gf@v1.16.9/encoding/gcharset/gcharset.go (about)

     1  // Copyright GoFrame Author(https://goframe.org). All Rights Reserved.
     2  //
     3  // This Source Code Form is subject to the terms of the MIT License.
     4  // If a copy of the MIT was not distributed with this file,
     5  // You can obtain one at https://github.com/gogf/gf.
     6  
     7  // Package charset implements character-set conversion functionality.
     8  //
     9  // Supported Character Set:
    10  //
    11  // Chinese : GBK/GB18030/GB2312/Big5
    12  //
    13  // Japanese: EUCJP/ISO2022JP/ShiftJIS
    14  //
    15  // Korean  : EUCKR
    16  //
    17  // Unicode : UTF-8/UTF-16/UTF-16BE/UTF-16LE
    18  //
    19  // Other   : macintosh/IBM*/Windows*/ISO-*
    20  package gcharset
    21  
    22  import (
    23  	"bytes"
    24  	"github.com/gogf/gf/errors/gcode"
    25  	"github.com/gogf/gf/errors/gerror"
    26  	"io/ioutil"
    27  
    28  	"golang.org/x/text/encoding"
    29  	"golang.org/x/text/encoding/ianaindex"
    30  	"golang.org/x/text/transform"
    31  )
    32  
    33  var (
    34  	// Alias for charsets.
    35  	charsetAlias = map[string]string{
    36  		"HZGB2312": "HZ-GB-2312",
    37  		"hzgb2312": "HZ-GB-2312",
    38  		"GB2312":   "HZ-GB-2312",
    39  		"gb2312":   "HZ-GB-2312",
    40  	}
    41  )
    42  
    43  // Supported returns whether charset <charset> is supported.
    44  func Supported(charset string) bool {
    45  	return getEncoding(charset) != nil
    46  }
    47  
    48  // Convert converts <src> charset encoding from <srcCharset> to <dstCharset>,
    49  // and returns the converted string.
    50  // It returns <src> as <dst> if it fails converting.
    51  func Convert(dstCharset string, srcCharset string, src string) (dst string, err error) {
    52  	if dstCharset == srcCharset {
    53  		return src, nil
    54  	}
    55  	dst = src
    56  	// Converting <src> to UTF-8.
    57  	if srcCharset != "UTF-8" {
    58  		if e := getEncoding(srcCharset); e != nil {
    59  			tmp, err := ioutil.ReadAll(
    60  				transform.NewReader(bytes.NewReader([]byte(src)), e.NewDecoder()),
    61  			)
    62  			if err != nil {
    63  				return "", gerror.WrapCodef(gcode.CodeInternalError, err, "%s to utf8 failed", srcCharset)
    64  			}
    65  			src = string(tmp)
    66  		} else {
    67  			return dst, gerror.NewCodef(gcode.CodeInvalidParameter, "unsupported srcCharset: %s", srcCharset)
    68  		}
    69  	}
    70  	// Do the converting from UTF-8 to <dstCharset>.
    71  	if dstCharset != "UTF-8" {
    72  		if e := getEncoding(dstCharset); e != nil {
    73  			tmp, err := ioutil.ReadAll(
    74  				transform.NewReader(bytes.NewReader([]byte(src)), e.NewEncoder()),
    75  			)
    76  			if err != nil {
    77  				return "", gerror.WrapCodef(gcode.CodeInternalError, err, "utf to %s failed", dstCharset)
    78  			}
    79  			dst = string(tmp)
    80  		} else {
    81  			return dst, gerror.NewCodef(gcode.CodeInvalidParameter, "unsupported dstCharset: %s", dstCharset)
    82  		}
    83  	} else {
    84  		dst = src
    85  	}
    86  	return dst, nil
    87  }
    88  
    89  // ToUTF8 converts <src> charset encoding from <srcCharset> to UTF-8 ,
    90  // and returns the converted string.
    91  func ToUTF8(srcCharset string, src string) (dst string, err error) {
    92  	return Convert("UTF-8", srcCharset, src)
    93  }
    94  
    95  // UTF8To converts <src> charset encoding from UTF-8 to <dstCharset>,
    96  // and returns the converted string.
    97  func UTF8To(dstCharset string, src string) (dst string, err error) {
    98  	return Convert(dstCharset, "UTF-8", src)
    99  }
   100  
   101  // getEncoding returns the encoding.Encoding interface object for <charset>.
   102  // It returns nil if <charset> is not supported.
   103  func getEncoding(charset string) encoding.Encoding {
   104  	if c, ok := charsetAlias[charset]; ok {
   105  		charset = c
   106  	}
   107  	if e, err := ianaindex.MIB.Encoding(charset); err == nil && e != nil {
   108  		return e
   109  	}
   110  	return nil
   111  }