github.com/isyscore/isc-gobase@v1.5.3-0.20231218061332-cbc7451899e9/encoding/test/encoding_test.go (about) 1 package test 2 3 import ( 4 "strings" 5 "testing" 6 7 "github.com/isyscore/isc-gobase/encoding" 8 "github.com/isyscore/isc-gobase/isc" 9 ) 10 11 var testData = []struct{ utf8, other, otherEncoding string }{ 12 {"Résumé", "Résumé", "utf-8"}, 13 {"これは漢字です。", "S0\x8c0o0\"oW[g0Y0\x020", "UTF-16LE"}, 14 {"これは漢字です。", "0S0\x8c0oo\"[W0g0Y0\x02", "UTF-16BE"}, 15 {"これは漢字です。", "\xfe\xff0S0\x8c0oo\"[W0g0Y0\x02", "UTF-16"}, 16 {"𝄢𝄞𝄪𝄫", "\xfe\xff\xd8\x34\xdd\x22\xd8\x34\xdd\x1e\xd8\x34\xdd\x2a\xd8\x34\xdd\x2b", "UTF-16"}, 17 {"Gdańsk", "Gda\xf1sk", "ISO-8859-2"}, 18 {"Ââ Čč Đđ Ŋŋ Õõ Šš Žž Åå Ää", "\xc2\xe2 \xc8\xe8 \xa9\xb9 \xaf\xbf \xd5\xf5 \xaa\xba \xac\xbc \xc5\xe5 \xc4\xe4", "ISO-8859-10"}, 19 {"latviešu", "latvie\xf0u", "ISO-8859-13"}, 20 {"Seònaid", "Se\xf2naid", "ISO-8859-14"}, 21 {"€1 is cheap", "\xa41 is cheap", "ISO-8859-15"}, 22 {"românește", "rom\xe2ne\xbate", "ISO-8859-16"}, 23 {"nutraĵo", "nutra\xbco", "ISO-8859-3"}, 24 {"Kalâdlit", "Kal\xe2dlit", "ISO-8859-4"}, 25 {"русский", "\xe0\xe3\xe1\xe1\xda\xd8\xd9", "ISO-8859-5"}, 26 {"ελληνικά", "\xe5\xeb\xeb\xe7\xed\xe9\xea\xdc", "ISO-8859-7"}, 27 {"Kağan", "Ka\xf0an", "ISO-8859-9"}, 28 {"Résumé", "R\x8esum\x8e", "macintosh"}, 29 {"Gdańsk", "Gda\xf1sk", "windows-1250"}, 30 {"русский", "\xf0\xf3\xf1\xf1\xea\xe8\xe9", "windows-1251"}, 31 {"Résumé", "R\xe9sum\xe9", "windows-1252"}, 32 {"ελληνικά", "\xe5\xeb\xeb\xe7\xed\xe9\xea\xdc", "windows-1253"}, 33 {"Kağan", "Ka\xf0an", "windows-1254"}, 34 {"עִבְרִית", "\xf2\xc4\xe1\xc0\xf8\xc4\xe9\xfa", "windows-1255"}, 35 {"العربية", "\xc7\xe1\xda\xd1\xc8\xed\xc9", "windows-1256"}, 36 {"latviešu", "latvie\xf0u", "windows-1257"}, 37 {"Việt", "Vi\xea\xf2t", "windows-1258"}, 38 {"สำหรับ", "\xca\xd3\xcb\xc3\u047a", "windows-874"}, 39 {"русский", "\xd2\xd5\xd3\xd3\xcb\xc9\xca", "KOI8-R"}, 40 {"українська", "\xd5\xcb\xd2\xc1\xa7\xce\xd3\xd8\xcb\xc1", "KOI8-U"}, 41 {"Hello 常用國字標準字體表", "Hello \xb1`\xa5\u03b0\xea\xa6r\xbc\u0437\u01e6r\xc5\xe9\xaa\xed", "big5"}, 42 {"Hello 常用國字標準字體表", "Hello \xb3\xa3\xd3\xc3\x87\xf8\xd7\xd6\x98\xcb\x9c\xca\xd7\xd6\xf3\x77\xb1\xed", "gbk"}, 43 {"Hello 常用國字標準字體表", "Hello \xb3\xa3\xd3\xc3\x87\xf8\xd7\xd6\x98\xcb\x9c\xca\xd7\xd6\xf3\x77\xb1\xed", "gb18030"}, 44 {"花间一壶酒,独酌无相亲。", "~{;(<dR;:x>F#,6@WCN^O`GW!#", "GB2312"}, 45 {"花间一壶酒,独酌无相亲。", "~{;(<dR;:x>F#,6@WCN^O`GW!#", "HZGB2312"}, 46 {"עִבְרִית", "\x81\x30\xfb\x30\x81\x30\xf6\x34\x81\x30\xf9\x33\x81\x30\xf6\x30\x81\x30\xfb\x36\x81\x30\xf6\x34\x81\x30\xfa\x31\x81\x30\xfb\x38", "gb18030"}, 47 {"㧯", "\x82\x31\x89\x38", "gb18030"}, 48 {"㧯", "㧯", "UTF-8"}, 49 {"これは漢字です。", "\xa4\xb3\xa4\xec\xa4\u03f4\xc1\xbb\xfa\xa4\u01e4\xb9\xa1\xa3", "EUC-JP"}, 50 } 51 52 func TestDecode(t *testing.T) { 53 for _, data := range testData { 54 str := "" 55 str, err := encoding.Convert(data.other, data.otherEncoding, "UTF-8") 56 if err != nil { 57 t.Errorf("Could not create decoder for %v", err) 58 continue 59 } 60 61 if str != data.utf8 { 62 t.Errorf("Unexpected value: %#v (expected %#v) %v", str, data.utf8, data.otherEncoding) 63 } 64 } 65 } 66 67 func TestUTF8To(t *testing.T) { 68 for _, data := range testData { 69 str := "" 70 str, err := encoding.UTF8ToString(data.utf8, data.otherEncoding) 71 if err != nil { 72 t.Errorf("Could not create decoder for %v", err) 73 continue 74 } 75 76 if str != data.other { 77 t.Errorf("Unexpected value: %#v (expected %#v) %v", str, data.other, data.otherEncoding) 78 } 79 } 80 } 81 82 func TestToUTF8(t *testing.T) { 83 for _, data := range testData { 84 str := "" 85 str, err := encoding.StringToUTF8(data.other, data.otherEncoding) 86 if err != nil { 87 t.Errorf("Could not create decoder for %v", err) 88 continue 89 } 90 91 if str != data.utf8 { 92 t.Errorf("Unexpected value: %#v (expected %#v)", str, data.utf8) 93 } 94 } 95 } 96 97 func TestEncode(t *testing.T) { 98 for _, data := range testData { 99 str := "" 100 str, err := encoding.Convert(data.utf8, "UTF-8", data.otherEncoding) 101 if err != nil { 102 t.Errorf("Could not create decoder for %v", err) 103 continue 104 } 105 106 if str != data.other { 107 t.Errorf("Unexpected value: %#v (expected %#v)", str, data.other) 108 } 109 } 110 } 111 112 func TestConvert(t *testing.T) { 113 srcCharset := "big5" 114 src := "Hello \xb1`\xa5\u03b0\xea\xa6r\xbc\u0437\u01e6r\xc5\xe9\xaa\xed" 115 dstCharset := "gbk" 116 dst := "Hello \xb3\xa3\xd3\xc3\x87\xf8\xd7\xd6\x98\xcb\x9c\xca\xd7\xd6\xf3\x77\xb1\xed" 117 118 str, err := encoding.Convert(src, srcCharset, dstCharset) 119 if err != nil { 120 t.Errorf("convert error. %v", err) 121 return 122 } 123 124 if str != dst { 125 t.Errorf("unexpected value:%#v (expected %#v)", str, dst) 126 } 127 } 128 129 func TestUrlEncode(t *testing.T) { 130 str1 := "《青眼の白龍》" 131 e1, _ := encoding.UrlEncoding(str1, encoding.EUCJP) 132 t.Logf("%v\n", e1) 133 134 str2 := "%A1%D4%C0%C4%B4%E3%A4%CE%C7%F2%CE%B6%A1%D5" 135 e2, _ := encoding.UrlDecoding(str2, encoding.EUCJP) 136 t.Logf("%v\n", e2) 137 } 138 139 func TestCode(t *testing.T) { 140 str := "指令集" 141 t.Logf("len = %d\n", len(str)) 142 str1 := isc.NewListWithList([]rune(str)) 143 t.Logf("len = %d\n", len(str1)) 144 145 idx1 := strings.Index(str, "令") 146 t.Logf("idx1 = %d\n", idx1) 147 idx2 := str1.IndexOf('令') 148 t.Logf("idx2 = %d\n", idx2) 149 }