github.com/zhongdalu/gf@v1.0.0/g/encoding/gcharset/gcharset.go (about)

     1  // Copyright 2018-2019 gf Author(https://github.com/zhongdalu/gf). All Rights Reserved.
     2  //
     3  // This Source Code Form is subject to the terms of the MIT License.
     4  // If a copy of the MIT was not distributed with this file,
     5  // You can obtain one at https://github.com/zhongdalu/gf.
     6  
     7  // Package charset implements character-set conversion functionality.
     8  //
     9  // Supported Character Set:
    10  //
    11  // Chinese : GBK/GB18030/GB2312/Big5
    12  //
    13  // Japanese: EUCJP/ISO2022JP/ShiftJIS
    14  //
    15  // Korean  : EUCKR
    16  //
    17  // Unicode : UTF-8/UTF-16/UTF-16BE/UTF-16LE
    18  //
    19  // Other   : macintosh/IBM*/Windows*/ISO-*
    20  package gcharset
    21  
    22  import (
    23  	"bytes"
    24  	"errors"
    25  	"fmt"
    26  	"github.com/zhongdalu/gf/third/golang.org/x/text/encoding"
    27  	"github.com/zhongdalu/gf/third/golang.org/x/text/encoding/ianaindex"
    28  	"github.com/zhongdalu/gf/third/golang.org/x/text/transform"
    29  	"io/ioutil"
    30  )
    31  
    32  var (
    33  	// Alias for charsets.
    34  	charsetAlias = map[string]string{
    35  		"HZGB2312": "HZ-GB-2312",
    36  		"hzgb2312": "HZ-GB-2312",
    37  		"GB2312":   "HZ-GB-2312",
    38  		"gb2312":   "HZ-GB-2312",
    39  	}
    40  )
    41  
    42  // Supported returns whether charset <charset> is supported.
    43  func Supported(charset string) bool {
    44  	return getEncoding(charset) != nil
    45  }
    46  
    47  // Convert converts <src> charset encoding from <srcCharset> to <dstCharset>,
    48  // and returns the converted string.
    49  // It returns <src> as <dst> if it fails converting.
    50  func Convert(dstCharset string, srcCharset string, src string) (dst string, err error) {
    51  	if dstCharset == srcCharset {
    52  		return src, nil
    53  	}
    54  	dst = src
    55  	// Converting <src> to UTF-8.
    56  	if srcCharset != "UTF-8" {
    57  		if e := getEncoding(srcCharset); e != nil {
    58  			tmp, err := ioutil.ReadAll(
    59  				transform.NewReader(bytes.NewReader([]byte(src)), e.NewDecoder()),
    60  			)
    61  			if err != nil {
    62  				return "", fmt.Errorf("%s to utf8 failed. %v", srcCharset, err)
    63  			}
    64  			src = string(tmp)
    65  		} else {
    66  			return dst, errors.New(fmt.Sprintf("unsupport srcCharset: %s", srcCharset))
    67  		}
    68  	}
    69  	// Do the converting from UTF-8 to <dstCharset>.
    70  	if dstCharset != "UTF-8" {
    71  		if e := getEncoding(dstCharset); e != nil {
    72  			tmp, err := ioutil.ReadAll(
    73  				transform.NewReader(bytes.NewReader([]byte(src)), e.NewEncoder()),
    74  			)
    75  			if err != nil {
    76  				return "", fmt.Errorf("utf to %s failed. %v", dstCharset, err)
    77  			}
    78  			dst = string(tmp)
    79  		} else {
    80  			return dst, errors.New(fmt.Sprintf("unsupport dstCharset: %s", dstCharset))
    81  		}
    82  	} else {
    83  		dst = src
    84  	}
    85  	return dst, nil
    86  }
    87  
    88  // ToUTF8 converts <src> charset encoding from <srcCharset> to UTF-8 ,
    89  // and returns the converted string.
    90  func ToUTF8(srcCharset string, src string) (dst string, err error) {
    91  	return Convert("UTF-8", srcCharset, src)
    92  }
    93  
    94  // UTF8To converts <src> charset encoding from UTF-8 to <dstCharset>,
    95  // and returns the converted string.
    96  func UTF8To(dstCharset string, src string) (dst string, err error) {
    97  	return Convert(dstCharset, "UTF-8", src)
    98  }
    99  
   100  // getEncoding returns the encoding.Encoding interface object for <charset>.
   101  // It returns nil if <charset> is not supported.
   102  func getEncoding(charset string) encoding.Encoding {
   103  	if c, ok := charsetAlias[charset]; ok {
   104  		charset = c
   105  	}
   106  	if e, err := ianaindex.MIB.Encoding(charset); err == nil && e != nil {
   107  		return e
   108  	}
   109  	return nil
   110  }