github.com/isyscore/isc-gobase@v1.5.3-0.20231218061332-cbc7451899e9/encoding/charset.go (about)

     1  package encoding
     2  
     3  import (
     4  	"bytes"
     5  	"fmt"
     6  	"golang.org/x/text/encoding"
     7  	"golang.org/x/text/encoding/ianaindex"
     8  	"golang.org/x/text/transform"
     9  	"io"
    10  	"log"
    11  )
    12  
    13  const (
    14  	UTF8        = "UTF-8"
    15  	UTF16       = "UTF-16"
    16  	UTF16LE     = "UTF-16LE"
    17  	UTF16BE     = "UTF-16BE"
    18  	GBK         = "gbk"
    19  	GB2312      = "GB2312"
    20  	BIG5        = "Big5"
    21  	GB18030     = "gb18030"
    22  	EUCJP       = "EUC-JP"
    23  	ISO2022JP   = "ISO-2022-JP"
    24  	SHIFTJIS    = "Shift_JIS"
    25  	EUCKR       = "EUC-KR"
    26  	ISO8859_2   = "ISO-8859-2"
    27  	ISO8859_3   = "ISO-8859-3"
    28  	ISO8859_4   = "ISO-8859-4"
    29  	ISO8859_5   = "ISO-8859-5"
    30  	ISO8859_7   = "ISO-8859-7"
    31  	ISO8859_9   = "ISO-8859-9"
    32  	ISO8859_10  = "ISO-8859-10"
    33  	ISO8859_13  = "ISO-8859-13"
    34  	ISO8859_14  = "ISO-8859-14"
    35  	ISO8859_15  = "ISO-8859-15"
    36  	ISO8859_16  = "ISO-8859-16"
    37  	WINDOWS1250 = "windows-1250"
    38  	WINDOWS1251 = "windows-1251"
    39  	WINDOWS1252 = "windows-1252"
    40  	WINDOWS1253 = "windows-1253"
    41  	WINDOWS1254 = "windows-1254"
    42  	WINDOWS1255 = "windows-1255"
    43  	WINDOWS1256 = "windows-1256"
    44  	WINDOWS1257 = "windows-1257"
    45  	WINDOWS1258 = "windows-1258"
    46  	WINDOWS874  = "windows-874"
    47  	MACINTOSH   = "macintosh"
    48  	KOI8R       = "KOI8-R"
    49  	KOI8U       = "KOI8-U"
    50  )
    51  
    52  // 别名
    53  var charsetAlias = map[string]string{"HZGB2312": "HZ-GB-2312", "hzgb2312": "HZ-GB-2312", "GB2312": "HZ-GB-2312", "gb2312": "HZ-GB-2312"}
    54  
    55  // Supported 判断指定的编码是否被支持
    56  func Supported(charset string) bool {
    57  	return getEncoding(charset) != nil
    58  }
    59  
    60  func Convert(src string, srcCharset string, dstCharset string) (string, error) {
    61  	if dstCharset == srcCharset {
    62  		return src, nil
    63  	}
    64  	dst := src
    65  	if srcCharset != "UTF-8" {
    66  		if e := getEncoding(srcCharset); e != nil {
    67  			tmp, err := io.ReadAll(
    68  				transform.NewReader(bytes.NewReader([]byte(src)), e.NewDecoder()),
    69  			)
    70  			if err != nil {
    71  				return "", fmt.Errorf(`convert string "%s" to utf8 failed`, srcCharset)
    72  			}
    73  			src = string(tmp)
    74  		} else {
    75  			return dst, fmt.Errorf(`unsupported charset "%s"`, srcCharset)
    76  		}
    77  	}
    78  	if dstCharset != "UTF-8" {
    79  		if e := getEncoding(dstCharset); e != nil {
    80  			tmp, err := io.ReadAll(
    81  				transform.NewReader(bytes.NewReader([]byte(src)), e.NewEncoder()),
    82  			)
    83  			if err != nil {
    84  				return "", fmt.Errorf(`convert string from utf8 to "%s" failed`, dstCharset)
    85  			}
    86  			dst = string(tmp)
    87  		} else {
    88  			return dst, fmt.Errorf(`unsupported charset "%s"`, dstCharset)
    89  		}
    90  	} else {
    91  		dst = src
    92  	}
    93  	return dst, nil
    94  }
    95  
    96  func StringToUTF8(src string, srcCharset string) (string, error) {
    97  	return Convert(src, srcCharset, "UTF-8")
    98  }
    99  
   100  func UTF8ToString(src string, dstCharset string) (string, error) {
   101  	return Convert(src, "UTF-8", dstCharset)
   102  }
   103  
   104  func getEncoding(charset string) encoding.Encoding {
   105  	if c, ok := charsetAlias[charset]; ok {
   106  		charset = c
   107  	}
   108  	enc, err := ianaindex.MIB.Encoding(charset)
   109  	if err != nil {
   110  		log.Printf("[WARN] charset %s not supported", charset)
   111  	}
   112  	return enc
   113  }