github.com/wangyougui/gf/v2@v2.6.5/encoding/gcharset/gcharset.go (about)

     1  // Copyright GoFrame Author(https://goframe.org). All Rights Reserved.
     2  //
     3  // This Source Code Form is subject to the terms of the MIT License.
     4  // If a copy of the MIT was not distributed with this file,
     5  // You can obtain one at https://github.com/wangyougui/gf.
     6  
     7  // Package gcharset implements character-set conversion functionality.
     8  //
     9  // Supported Character Set:
    10  //
    11  // Chinese : GBK/GB18030/GB2312/Big5
    12  //
    13  // Japanese: EUCJP/ISO2022JP/ShiftJIS
    14  //
    15  // Korean  : EUCKR
    16  //
    17  // Unicode : UTF-8/UTF-16/UTF-16BE/UTF-16LE
    18  //
    19  // Other   : macintosh/IBM*/Windows*/ISO-*
    20  package gcharset
    21  
    22  import (
    23  	"bytes"
    24  	"context"
    25  	"io"
    26  
    27  	"golang.org/x/text/encoding"
    28  	"golang.org/x/text/encoding/ianaindex"
    29  	"golang.org/x/text/transform"
    30  
    31  	"github.com/wangyougui/gf/v2/errors/gcode"
    32  	"github.com/wangyougui/gf/v2/errors/gerror"
    33  	"github.com/wangyougui/gf/v2/internal/intlog"
    34  )
    35  
    36  var (
    37  	// Alias for charsets.
    38  	charsetAlias = map[string]string{
    39  		"HZGB2312": "HZ-GB-2312",
    40  		"hzgb2312": "HZ-GB-2312",
    41  		"GB2312":   "HZ-GB-2312",
    42  		"gb2312":   "HZ-GB-2312",
    43  	}
    44  )
    45  
    46  // Supported returns whether charset `charset` is supported.
    47  func Supported(charset string) bool {
    48  	return getEncoding(charset) != nil
    49  }
    50  
    51  // Convert converts `src` charset encoding from `srcCharset` to `dstCharset`,
    52  // and returns the converted string.
    53  // It returns `src` as `dst` if it fails converting.
    54  func Convert(dstCharset string, srcCharset string, src string) (dst string, err error) {
    55  	if dstCharset == srcCharset {
    56  		return src, nil
    57  	}
    58  	dst = src
    59  	// Converting `src` to UTF-8.
    60  	if srcCharset != "UTF-8" {
    61  		if e := getEncoding(srcCharset); e != nil {
    62  			tmp, err := io.ReadAll(
    63  				transform.NewReader(bytes.NewReader([]byte(src)), e.NewDecoder()),
    64  			)
    65  			if err != nil {
    66  				return "", gerror.Wrapf(err, `convert string "%s" to utf8 failed`, srcCharset)
    67  			}
    68  			src = string(tmp)
    69  		} else {
    70  			return dst, gerror.NewCodef(gcode.CodeInvalidParameter, `unsupported srcCharset "%s"`, srcCharset)
    71  		}
    72  	}
    73  	// Do the converting from UTF-8 to `dstCharset`.
    74  	if dstCharset != "UTF-8" {
    75  		if e := getEncoding(dstCharset); e != nil {
    76  			tmp, err := io.ReadAll(
    77  				transform.NewReader(bytes.NewReader([]byte(src)), e.NewEncoder()),
    78  			)
    79  			if err != nil {
    80  				return "", gerror.Wrapf(err, `convert string from utf8 to "%s" failed`, dstCharset)
    81  			}
    82  			dst = string(tmp)
    83  		} else {
    84  			return dst, gerror.NewCodef(gcode.CodeInvalidParameter, `unsupported dstCharset "%s"`, dstCharset)
    85  		}
    86  	} else {
    87  		dst = src
    88  	}
    89  	return dst, nil
    90  }
    91  
    92  // ToUTF8 converts `src` charset encoding from `srcCharset` to UTF-8 ,
    93  // and returns the converted string.
    94  func ToUTF8(srcCharset string, src string) (dst string, err error) {
    95  	return Convert("UTF-8", srcCharset, src)
    96  }
    97  
    98  // UTF8To converts `src` charset encoding from UTF-8 to `dstCharset`,
    99  // and returns the converted string.
   100  func UTF8To(dstCharset string, src string) (dst string, err error) {
   101  	return Convert(dstCharset, "UTF-8", src)
   102  }
   103  
   104  // getEncoding returns the encoding.Encoding interface object for `charset`.
   105  // It returns nil if `charset` is not supported.
   106  func getEncoding(charset string) encoding.Encoding {
   107  	if c, ok := charsetAlias[charset]; ok {
   108  		charset = c
   109  	}
   110  	enc, err := ianaindex.MIB.Encoding(charset)
   111  	if err != nil {
   112  		intlog.Errorf(context.TODO(), `%+v`, err)
   113  	}
   114  	return enc
   115  }