github.com/insionng/yougam@v0.0.0-20170714101924-2bc18d833463/libraries/chardet/detector_test.go (about) 1 package chardet_test 2 3 import ( 4 "io" 5 "os" 6 "path/filepath" 7 "testing" 8 "github.com/insionng/yougam/plugins/chardet" 9 ) 10 11 func TestDetector(t *testing.T) { 12 type file_charset_language struct { 13 File string 14 IsHtml bool 15 Charset string 16 Language string 17 } 18 var data = []file_charset_language{ 19 {"utf8.html", true, "UTF-8", ""}, 20 {"utf8_bom.html", true, "UTF-8", ""}, 21 {"8859_1_en.html", true, "ISO-8859-1", "en"}, 22 {"8859_1_da.html", true, "ISO-8859-1", "da"}, 23 {"8859_1_de.html", true, "ISO-8859-1", "de"}, 24 {"8859_1_es.html", true, "ISO-8859-1", "es"}, 25 {"8859_1_fr.html", true, "ISO-8859-1", "fr"}, 26 {"8859_1_pt.html", true, "ISO-8859-1", "pt"}, 27 {"shift_jis.html", true, "Shift_JIS", "ja"}, 28 {"gb18030.html", true, "GB-18030", "zh"}, 29 {"euc_jp.html", true, "EUC-JP", "ja"}, 30 {"euc_kr.html", true, "EUC-KR", "ko"}, 31 {"big5.html", true, "Big5", "zh"}, 32 } 33 34 textDetector := chardet.NewTextDetector() 35 htmlDetector := chardet.NewHtmlDetector() 36 buffer := make([]byte, 32<<10) 37 for _, d := range data { 38 f, err := os.Open(filepath.Join("testdata", d.File)) 39 if err != nil { 40 t.Fatal(err) 41 } 42 defer f.Close() 43 size, _ := io.ReadFull(f, buffer) 44 input := buffer[:size] 45 var detector = textDetector 46 if d.IsHtml { 47 detector = htmlDetector 48 } 49 result, err := detector.DetectBest(input) 50 if err != nil { 51 t.Fatal(err) 52 } 53 if result.Charset != d.Charset { 54 t.Errorf("Expected charset %s, actual %s", d.Charset, result.Charset) 55 } 56 if result.Language != d.Language { 57 t.Errorf("Expected language %s, actual %s", d.Language, result.Language) 58 } 59 } 60 }