github.com/Aoi-hosizora/ahlib-more@v1.5.1-0.20230404072844-256112befaf6/xcharset/xcharset_test.go (about)

     1  package xcharset
     2  
     3  import (
     4  	"github.com/Aoi-hosizora/ahlib/xtesting"
     5  	"golang.org/x/text/encoding/japanese"
     6  	"golang.org/x/text/encoding/unicode"
     7  	"testing"
     8  )
     9  
    10  func TestDetect(t *testing.T) {
    11  	for _, tc := range []struct {
    12  		name         string
    13  		give         []byte
    14  		wantCharset  string
    15  		wantLanguage string
    16  		wantOk       bool
    17  	}{
    18  		{"empty", []byte{}, "UTF-8", "", true},
    19  		{"FF", []byte{0xff}, "", "", false},
    20  		{"EF_BB_BF", []byte{0xef, 0xbb, 0xbf}, "UTF-8", "", true},
    21  		{"EF_BF_BD", []byte{0xef, 0xbf, 0xbd}, "UTF-8", "", true},
    22  		// 英语 Latin1 编码:Go is an ...
    23  		{"English_ISO8859", []byte(`Go is an open source programming language that makes it easy to build simple, reliable, and efficient software. Build fast, reliable, and efficient software at scale.`), "ISO-8859-1", "en", true},
    24  		// 简体中文编码:测试文本。
    25  		{"Chinese_UTF8", []byte{0xe7, 0xae, 0x80, 0xe4, 0xbd, 0x93, 0xe4, 0xb8, 0xad, 0xe6, 0x96, 0x87, 0xe7, 0xbc, 0x96, 0xe7, 0xa0, 0x81, 0xef, 0xbc, 0x9a, 0xe6, 0xb5, 0x8b, 0xe8, 0xaf, 0x95, 0xe6, 0x96, 0x87, 0xe6, 0x9c, 0xac, 0xe3, 0x80, 0x82}, "UTF-8", "", true},
    26  		{"Chinese_GB18030", []byte{0xbc, 0xf2, 0xcc, 0xe5, 0xd6, 0xd0, 0xce, 0xc4, 0xb1, 0xe0, 0xc2, 0xeb, 0xa3, 0xba, 0xb2, 0xe2, 0xca, 0xd4, 0xce, 0xc4, 0xb1, 0xbe, 0xa1, 0xa3}, "GB18030", "zh", true},
    27  		// 繁體中文編碼:測試文本。
    28  		{"Chinese_Hant_UTF8", []byte{0xe7, 0xb9, 0x81, 0xe9, 0xab, 0x94, 0xe4, 0xb8, 0xad, 0xe6, 0x96, 0x87, 0xe7, 0xb7, 0xa8, 0xe7, 0xa2, 0xbc, 0xef, 0xbc, 0x9a, 0xe6, 0xb8, 0xac, 0xe8, 0xa9, 0xa6, 0xe6, 0x96, 0x87, 0xe6, 0x9c, 0xac, 0xe3, 0x80, 0x82}, "UTF-8", "", true},
    29  		{"Chinese_Hant_BIG5", []byte{0xc1, 0x63, 0xc5, 0xe9, 0xa4, 0xa4, 0xa4, 0xe5, 0xbd, 0x73, 0xbd, 0x58, 0xa1, 0x47, 0xb4, 0xfa, 0xb8, 0xd5, 0xa4, 0xe5, 0xa5, 0xbb, 0xa1, 0x43}, "Big5", "zh", true},
    30  		{"Chinese_Hant_GB18030", []byte{0xb7, 0xb1, 0xf3, 0x77, 0xd6, 0xd0, 0xce, 0xc4, 0xbe, 0x8e, 0xb4, 0x61, 0xa3, 0xba, 0x9c, 0x79, 0xd4, 0x87, 0xce, 0xc4, 0xb1, 0xbe, 0xa1, 0xa3}, "GB18030", "zh", true},
    31  		// 日本語コーディング:テス
    32  		{"Japanese_UTF8", []byte{0xe6, 0x97, 0xa5, 0xe6, 0x9c, 0xac, 0xe8, 0xaa, 0x9e, 0xe3, 0x82, 0xb3, 0xe3, 0x83, 0xbc, 0xe3, 0x83, 0x87, 0xe3, 0x82, 0xa3, 0xe3, 0x83, 0xb3, 0xe3, 0x82, 0xb0, 0xef, 0xbc, 0x9a, 0xe3, 0x83, 0x86, 0xe3, 0x82, 0xb9}, "UTF-8", "", true},
    33  		{"Japanese_ShiftJIS", []byte{0x93, 0xFA, 0x96, 0x7B, 0x8C, 0xEA, 0x83, 0x52, 0x81, 0x5B, 0x83, 0x66, 0x83, 0x42, 0x83, 0x93, 0x83, 0x4F, 0x81, 0x46, 0x83, 0x65, 0x83, 0x58}, "Shift_JIS", "ja", true},
    34  		{"Japanese_EUCJP", []byte{0xc6, 0xfc, 0xcb, 0xdc, 0xb8, 0xec, 0xa5, 0xb3, 0xa1, 0xbc, 0xa5, 0xc7, 0xa5, 0xa3, 0xa5, 0xf3, 0xa5, 0xb0, 0xa1, 0xa7, 0xa5, 0xc6, 0xa5, 0xb9}, "EUC-JP", "ja", true},
    35  		{"Japanese_ISO2022", []byte{0x1b, 0x24, 0x42, 0x46, 0x7c, 0x4b, 0x5c, 0x38, 0x6c, 0x25, 0x33, 0x21, 0x3c, 0x25, 0x47, 0x25, 0x23, 0x25, 0x73, 0x25, 0x30, 0x21, 0x27, 0x25, 0x46, 0x25, 0x39, 0x1b, 0x28, 0x42}, "ISO-2022-JP", "ja", true},
    36  	} {
    37  		t.Run(tc.name, func(t *testing.T) {
    38  			result, ok := DetectBestCharset(tc.give)
    39  			xtesting.Equal(t, ok, tc.wantOk)
    40  			if ok {
    41  				xtesting.Equal(t, result.Charset, tc.wantCharset)
    42  				xtesting.Equal(t, result.Language, tc.wantLanguage)
    43  			}
    44  
    45  			results, ok := DetectAllCharsets(tc.give)
    46  			xtesting.Equal(t, ok, tc.wantOk)
    47  			if ok {
    48  				xtesting.Equal(t, results[0].Charset, tc.wantCharset)
    49  				xtesting.Equal(t, results[0].Language, tc.wantLanguage)
    50  			}
    51  		})
    52  	}
    53  }
    54  
    55  func TestEncode(t *testing.T) {
    56  	dest, err := EncodeString(unicode.UTF8, "test")
    57  	xtesting.Nil(t, err)
    58  	xtesting.Equal(t, dest, "test")
    59  
    60  	dest, err = EncodeString(japanese.ShiftJIS, "测试")
    61  	xtesting.NotNil(t, err)
    62  
    63  	dest2, err := EncodeBytes(unicode.UTF8, []byte("test"))
    64  	xtesting.Nil(t, err)
    65  	xtesting.Equal(t, dest2, []byte("test"))
    66  
    67  	dest2, err = EncodeBytes(japanese.ShiftJIS, []byte("测试"))
    68  	xtesting.NotNil(t, err)
    69  }
    70  
    71  func TestDecode(t *testing.T) {
    72  	dest, err := DecodeString(unicode.UTF8, "test")
    73  	xtesting.Nil(t, err)
    74  	xtesting.Equal(t, dest, "test")
    75  
    76  	dest2, err := DecodeBytes(unicode.UTF8, []byte("test"))
    77  	xtesting.Nil(t, err)
    78  	xtesting.Equal(t, dest2, []byte("test"))
    79  }
    80  
    81  func TestGetEncoding(t *testing.T) {
    82  	for _, tc := range []struct {
    83  		give   string
    84  		wantOk bool
    85  	}{
    86  		{"", false},
    87  		{IANA_UTF8, true},
    88  		{IANA_UTF16BE, true},
    89  		{IANA_UTF16LE, true},
    90  		{IANA_UTF32BE, true},
    91  		{IANA_UTF32LE, true},
    92  		{IANA_ISO8859_1, true},
    93  		{IANA_ISO8859_2, true},
    94  		{IANA_ISO8859_5, true},
    95  		{IANA_ISO8859_6, true},
    96  		{IANA_ISO8859_7, true},
    97  		{IANA_ISO8859_8, true},
    98  		{IANA_ISO8859_8I, true},
    99  		{IANA_ISO8859_9, true},
   100  		{IANA_KOI8R, true},
   101  		{IANA_WINDOWS1251, true},
   102  		{IANA_WINDOWS1256, true},
   103  		{IANA_IBM424RTL, false},
   104  		{IANA_IBM424LTR, false},
   105  		{IANA_IBM420RTL, false},
   106  		{IANA_IBM420LTR, false},
   107  		{IANA_SHIFTJIS, true},
   108  		{IANA_GBK, true},
   109  		{IANA_GB18030, true},
   110  		{IANA_BIG5, true},
   111  		{IANA_EUCJP, true},
   112  		{IANA_EUCKR, true},
   113  		{IANA_ISO2022JP, true},
   114  		{IANA_ISO2022KR, false},
   115  		{IANA_ISO2022CN, false},
   116  	} {
   117  		_, ok := GetEncoding(tc.give)
   118  		xtesting.Equal(t, ok, tc.wantOk)
   119  	}
   120  }