github.com/Aoi-hosizora/ahlib-more@v1.5.1-0.20230404072844-256112befaf6/xcharset/xcharset_test.go (about) 1 package xcharset 2 3 import ( 4 "github.com/Aoi-hosizora/ahlib/xtesting" 5 "golang.org/x/text/encoding/japanese" 6 "golang.org/x/text/encoding/unicode" 7 "testing" 8 ) 9 10 func TestDetect(t *testing.T) { 11 for _, tc := range []struct { 12 name string 13 give []byte 14 wantCharset string 15 wantLanguage string 16 wantOk bool 17 }{ 18 {"empty", []byte{}, "UTF-8", "", true}, 19 {"FF", []byte{0xff}, "", "", false}, 20 {"EF_BB_BF", []byte{0xef, 0xbb, 0xbf}, "UTF-8", "", true}, 21 {"EF_BF_BD", []byte{0xef, 0xbf, 0xbd}, "UTF-8", "", true}, 22 // 英语 Latin1 编码:Go is an ... 23 {"English_ISO8859", []byte(`Go is an open source programming language that makes it easy to build simple, reliable, and efficient software. Build fast, reliable, and efficient software at scale.`), "ISO-8859-1", "en", true}, 24 // 简体中文编码:测试文本。 25 {"Chinese_UTF8", []byte{0xe7, 0xae, 0x80, 0xe4, 0xbd, 0x93, 0xe4, 0xb8, 0xad, 0xe6, 0x96, 0x87, 0xe7, 0xbc, 0x96, 0xe7, 0xa0, 0x81, 0xef, 0xbc, 0x9a, 0xe6, 0xb5, 0x8b, 0xe8, 0xaf, 0x95, 0xe6, 0x96, 0x87, 0xe6, 0x9c, 0xac, 0xe3, 0x80, 0x82}, "UTF-8", "", true}, 26 {"Chinese_GB18030", []byte{0xbc, 0xf2, 0xcc, 0xe5, 0xd6, 0xd0, 0xce, 0xc4, 0xb1, 0xe0, 0xc2, 0xeb, 0xa3, 0xba, 0xb2, 0xe2, 0xca, 0xd4, 0xce, 0xc4, 0xb1, 0xbe, 0xa1, 0xa3}, "GB18030", "zh", true}, 27 // 繁體中文編碼:測試文本。 28 {"Chinese_Hant_UTF8", []byte{0xe7, 0xb9, 0x81, 0xe9, 0xab, 0x94, 0xe4, 0xb8, 0xad, 0xe6, 0x96, 0x87, 0xe7, 0xb7, 0xa8, 0xe7, 0xa2, 0xbc, 0xef, 0xbc, 0x9a, 0xe6, 0xb8, 0xac, 0xe8, 0xa9, 0xa6, 0xe6, 0x96, 0x87, 0xe6, 0x9c, 0xac, 0xe3, 0x80, 0x82}, "UTF-8", "", true}, 29 {"Chinese_Hant_BIG5", []byte{0xc1, 0x63, 0xc5, 0xe9, 0xa4, 0xa4, 0xa4, 0xe5, 0xbd, 0x73, 0xbd, 0x58, 0xa1, 0x47, 0xb4, 0xfa, 0xb8, 0xd5, 0xa4, 0xe5, 0xa5, 0xbb, 0xa1, 0x43}, "Big5", "zh", true}, 30 {"Chinese_Hant_GB18030", []byte{0xb7, 0xb1, 0xf3, 0x77, 0xd6, 0xd0, 0xce, 0xc4, 0xbe, 0x8e, 0xb4, 0x61, 0xa3, 0xba, 0x9c, 0x79, 0xd4, 0x87, 0xce, 0xc4, 0xb1, 0xbe, 0xa1, 0xa3}, "GB18030", "zh", true}, 31 // 日本語コーディング:テス 32 {"Japanese_UTF8", []byte{0xe6, 0x97, 0xa5, 0xe6, 0x9c, 0xac, 0xe8, 0xaa, 0x9e, 0xe3, 0x82, 0xb3, 0xe3, 0x83, 0xbc, 0xe3, 0x83, 0x87, 0xe3, 0x82, 0xa3, 0xe3, 0x83, 0xb3, 0xe3, 0x82, 0xb0, 0xef, 0xbc, 0x9a, 0xe3, 0x83, 0x86, 0xe3, 0x82, 0xb9}, "UTF-8", "", true}, 33 {"Japanese_ShiftJIS", []byte{0x93, 0xFA, 0x96, 0x7B, 0x8C, 0xEA, 0x83, 0x52, 0x81, 0x5B, 0x83, 0x66, 0x83, 0x42, 0x83, 0x93, 0x83, 0x4F, 0x81, 0x46, 0x83, 0x65, 0x83, 0x58}, "Shift_JIS", "ja", true}, 34 {"Japanese_EUCJP", []byte{0xc6, 0xfc, 0xcb, 0xdc, 0xb8, 0xec, 0xa5, 0xb3, 0xa1, 0xbc, 0xa5, 0xc7, 0xa5, 0xa3, 0xa5, 0xf3, 0xa5, 0xb0, 0xa1, 0xa7, 0xa5, 0xc6, 0xa5, 0xb9}, "EUC-JP", "ja", true}, 35 {"Japanese_ISO2022", []byte{0x1b, 0x24, 0x42, 0x46, 0x7c, 0x4b, 0x5c, 0x38, 0x6c, 0x25, 0x33, 0x21, 0x3c, 0x25, 0x47, 0x25, 0x23, 0x25, 0x73, 0x25, 0x30, 0x21, 0x27, 0x25, 0x46, 0x25, 0x39, 0x1b, 0x28, 0x42}, "ISO-2022-JP", "ja", true}, 36 } { 37 t.Run(tc.name, func(t *testing.T) { 38 result, ok := DetectBestCharset(tc.give) 39 xtesting.Equal(t, ok, tc.wantOk) 40 if ok { 41 xtesting.Equal(t, result.Charset, tc.wantCharset) 42 xtesting.Equal(t, result.Language, tc.wantLanguage) 43 } 44 45 results, ok := DetectAllCharsets(tc.give) 46 xtesting.Equal(t, ok, tc.wantOk) 47 if ok { 48 xtesting.Equal(t, results[0].Charset, tc.wantCharset) 49 xtesting.Equal(t, results[0].Language, tc.wantLanguage) 50 } 51 }) 52 } 53 } 54 55 func TestEncode(t *testing.T) { 56 dest, err := EncodeString(unicode.UTF8, "test") 57 xtesting.Nil(t, err) 58 xtesting.Equal(t, dest, "test") 59 60 dest, err = EncodeString(japanese.ShiftJIS, "测试") 61 xtesting.NotNil(t, err) 62 63 dest2, err := EncodeBytes(unicode.UTF8, []byte("test")) 64 xtesting.Nil(t, err) 65 xtesting.Equal(t, dest2, []byte("test")) 66 67 dest2, err = EncodeBytes(japanese.ShiftJIS, []byte("测试")) 68 xtesting.NotNil(t, err) 69 } 70 71 func TestDecode(t *testing.T) { 72 dest, err := DecodeString(unicode.UTF8, "test") 73 xtesting.Nil(t, err) 74 xtesting.Equal(t, dest, "test") 75 76 dest2, err := DecodeBytes(unicode.UTF8, []byte("test")) 77 xtesting.Nil(t, err) 78 xtesting.Equal(t, dest2, []byte("test")) 79 } 80 81 func TestGetEncoding(t *testing.T) { 82 for _, tc := range []struct { 83 give string 84 wantOk bool 85 }{ 86 {"", false}, 87 {IANA_UTF8, true}, 88 {IANA_UTF16BE, true}, 89 {IANA_UTF16LE, true}, 90 {IANA_UTF32BE, true}, 91 {IANA_UTF32LE, true}, 92 {IANA_ISO8859_1, true}, 93 {IANA_ISO8859_2, true}, 94 {IANA_ISO8859_5, true}, 95 {IANA_ISO8859_6, true}, 96 {IANA_ISO8859_7, true}, 97 {IANA_ISO8859_8, true}, 98 {IANA_ISO8859_8I, true}, 99 {IANA_ISO8859_9, true}, 100 {IANA_KOI8R, true}, 101 {IANA_WINDOWS1251, true}, 102 {IANA_WINDOWS1256, true}, 103 {IANA_IBM424RTL, false}, 104 {IANA_IBM424LTR, false}, 105 {IANA_IBM420RTL, false}, 106 {IANA_IBM420LTR, false}, 107 {IANA_SHIFTJIS, true}, 108 {IANA_GBK, true}, 109 {IANA_GB18030, true}, 110 {IANA_BIG5, true}, 111 {IANA_EUCJP, true}, 112 {IANA_EUCKR, true}, 113 {IANA_ISO2022JP, true}, 114 {IANA_ISO2022KR, false}, 115 {IANA_ISO2022CN, false}, 116 } { 117 _, ok := GetEncoding(tc.give) 118 xtesting.Equal(t, ok, tc.wantOk) 119 } 120 }