github.com/abadojack/whatlanggo@v1.0.1/detect_test.go (about) 1 package whatlanggo 2 3 import ( 4 "encoding/json" 5 "io/ioutil" 6 "os" 7 "testing" 8 "unicode" 9 ) 10 11 func TestDetect(t *testing.T) { 12 tests := map[string]Info{ 13 "Además de todo lo anteriormente dicho, también encontramos...": {Spa, unicode.Latin, 1}, 14 "बहुत बहुत (धन्यवाद / शुक्रिया)!": {Hin, unicode.Devanagari, 1}, 15 "अनुच्छेद १: सबहि लोकानि आजादे जम्मेला आओर ओखिनियो के बराबर सम्मान आओर अघ्कार प्राप्त हवे। ओखिनियो के पास समझ-बूझ आओर अंत:करण के आवाज होखता आओर हुनको के दोसरा के साथ भाईचारे के बेवहार करे के होखला": {Bho, unicode.Devanagari, 1}, 16 "ኢትዮጵያ አፍሪቃ ውስጥ ናት": {Amh, unicode.Ethiopic, 1}, 17 "لغتي العربية ليست كما يجب": {Arb, unicode.Arabic, 1}, 18 "我爱你": {Cmn, unicode.Han, 1}, 19 "আমি তোমাকে ভালোবাস ": {Ben, unicode.Bengali, 1}, 20 "울란바토르": {Kor, unicode.Hangul, 1}, 21 "ყველა ადამიანი იბადება თავისუფალი და თანასწორი თავისი ღირსებითა და უფლებებით": {Kat, unicode.Georgian, 1}, 22 "Όλοι οι άνθρωποι γεννιούνται ελεύθεροι και ίσοι στην αξιοπρέπεια και τα δικαιώματα.": {Ell, unicode.Greek, 1}, 23 "ಎಲ್ಲಾ ಮಾನವರ ಉಚಿತ ಮತ್ತು ಘನತೆ ಮತ್ತು ಹಕ್ಕುಗಳಲ್ಲಿ ಸಮಾನ ಹುಟ್ಟಿದ.": {Kan, unicode.Kannada, 1}, 24 "நீங்கள் ஆங்கிலம் பேசுவீர்களா?": {Tam, unicode.Tamil, 1}, 25 "มนุษย์ทุกคนเกิดมามีอิสระและเสมอภาคกันในศักดิ์ศรีและสิทธิ": {Tha, unicode.Thai, 1}, 26 "નાણાં મારા લોહીમાં છે": {Guj, unicode.Gujarati, 1}, 27 " ਗੁਰੂ ਗ੍ਰੰਥ ਸਾਹਿਬ ਜੀ": {Pan, unicode.Gurmukhi, 1}, 28 "నన్ను ఒంటరిగా వదిలేయ్": {Tel, unicode.Telugu, 1}, 29 "എന്താണ് നിങ്ങളുടെ പേര് ?": {Mal, unicode.Malayalam, 1}, 30 "ମୁ ତୁମକୁ ଭଲ ପାଏ |": {Ori, unicode.Oriya, 1}, 31 "အားလုံးလူသားတွေအခမဲ့နှင့်ဂုဏ်သိက္ခာနှင့်လူ့အခွင့်အရေးအတွက်တန်းတူဖွားမြင်ကြသည်။": {Mya, unicode.Myanmar, 1}, 32 "වෙලාව කියද?": {Sin, unicode.Sinhala, 1}, 33 "ពួកម៉ាកខ្ញុំពីរនាក់នេះ": {Khm, unicode.Khmer, 1}, 34 "其疾如風、其徐如林、侵掠如火、不動如山、難知如陰、動如雷震。": {Cmn, unicode.Han, 1}, 35 "知彼知己、百戰不殆。不知彼而知己、一勝一負。不知彼不知己、毎戰必殆。": {Cmn, unicode.Han, 1}, 36 "支那の上海の或町です。": {Jpn, _HiraganaKatakana, 1}, 37 "或日の暮方の事である。": {Jpn, _HiraganaKatakana, 1}, 38 "今日は": {Jpn, _HiraganaKatakana, 1}, 39 "コンニチハ": {Jpn, _HiraganaKatakana, 1}, 40 "タナカ タロウ": {Jpn, _HiraganaKatakana, 1}, 41 "どうもありがとう": {Jpn, _HiraganaKatakana, 1}, 42 } 43 44 for key, value := range tests { 45 got := Detect(key) 46 47 if value.Lang != got.Lang || value.Script != got.Script { 48 t.Fatalf("%s want %v %v got %v %v", key, LangToString(value.Lang), Scripts[value.Script], LangToString(got.Lang), Scripts[got.Script]) 49 } 50 } 51 } 52 53 func TestDetectLang(t *testing.T) { 54 tests := map[string]Lang{ 55 "Та нічого, все нормально. А в тебе як?": Ukr, 56 "Vouloir, c'est pouvoir": Fra, 57 "Where there is a will there is a way": Eng, 58 "Mi ŝategas la japanan kaj studas ĝin kelkajn jarojn 😊": Epo, 59 "Te echo de menos": Spa, 60 "Buona notte e sogni d'oro!": Ita, 61 } 62 63 for text, want := range tests { 64 got := DetectLang(text) 65 if got != want { 66 t.Fatalf("%s want %v got %v", text, LangToString(want), LangToString(got)) 67 } 68 } 69 } 70 71 // Test detect with empty options and supported language and script 72 func TestDetectWithOptionsEmptySupportedLang(t *testing.T) { 73 want := Info{Epo, unicode.Latin, 1} 74 got := DetectWithOptions("La viro amas hundojn. Hundo estas la plej bona amiko de viro", Options{}) 75 if want.Lang != got.Lang && want.Script != got.Script { 76 t.Fatalf("want %v %v got %v %v", want.Lang, want.Script, got.Lang, got.Script) 77 } 78 } 79 80 // Test detect with empty options and nonsupported script(Balinese) 81 func TestDetectWithOptionsEmptyNonSupportedLang(t *testing.T) { 82 want := Info{-1, nil, 0} 83 got := DetectWithOptions("ᬅᬓ᭄ᬱᬭᬯ᭄ᬬᬜ᭄ᬚᬦ", Options{}) 84 if want.Lang != got.Lang && want.Script != got.Script { 85 t.Fatalf("want %v %v got %v %v", want.Lang, want.Script, got.Lang, got.Script) 86 } 87 } 88 89 func TestDetectWithOptionsWithBlacklist(t *testing.T) { 90 text := "האקדמיה ללשון העברית" 91 //All languages with Hebrew text blacklisted ... returns correct script but invalid language 92 options1 := Options{ 93 Blacklist: map[Lang]bool{ 94 Heb: true, 95 Ydd: true, 96 }, 97 } 98 want := Info{-1, unicode.Hebrew, 1} 99 got := DetectWithOptions(text, options1) 100 if got.Lang != want.Lang && want.Script != got.Script { 101 t.Fatalf("Want %s %s got %s %s", LangToString(want.Lang), Scripts[want.Script], LangToString(got.Lang), Scripts[got.Script]) 102 } 103 104 text = "Tu me manques" 105 want = Info{Fra, unicode.Latin, 1} 106 options3 := Options{ 107 Blacklist: map[Lang]bool{ 108 Kur: true, 109 }, 110 } 111 got = DetectWithOptions(text, options3) 112 if got.Lang != want.Lang && want.Script != got.Script { 113 t.Fatalf("Want %s %s got %s %s", LangToString(want.Lang), Scripts[want.Script], LangToString(got.Lang), Scripts[got.Script]) 114 } 115 } 116 117 func TestWithOptionsWithWhitelist(t *testing.T) { 118 text := "Mi ne scias!" 119 want := Info{Epo, unicode.Latin, 1} 120 options2 := Options{ 121 Whitelist: map[Lang]bool{ 122 Epo: true, 123 Ukr: true, 124 }, 125 } 126 got := DetectWithOptions(text, options2) 127 if got.Lang != want.Lang && want.Script != got.Script { 128 t.Fatalf("Want %s %s got %s %s", LangToString(want.Lang), Scripts[want.Script], LangToString(got.Lang), Scripts[got.Script]) 129 } 130 } 131 132 func TestDetectLangWithOptions(t *testing.T) { 133 text := "All evil come from a single cause ... man's inability to sit still in a room" 134 want := Eng 135 //without blacklist 136 got := DetectLangWithOptions(text, Options{}) 137 if want != got { 138 t.Fatalf("want %s got %s", LangToString(want), LangToString(got)) 139 } 140 141 //with blacklist 142 options := Options{ 143 Blacklist: map[Lang]bool{ 144 Jav: true, 145 Tgl: true, 146 Nld: true, 147 Uzb: true, 148 Swe: true, 149 Nob: true, 150 Ceb: true, 151 Ilo: true, 152 }, 153 } 154 got = DetectLangWithOptions(text, options) 155 if want != got { 156 t.Fatalf("want %s got %s", LangToString(want), LangToString(got)) 157 } 158 } 159 160 func Test_detectLangBaseOnScriptUnsupportedScript(t *testing.T) { 161 want := Info{-1, nil, 0} 162 gotLang, gotConfidence := detectLangBaseOnScript("ᬅᬓ᭄ᬱᬭᬯ᭄ᬬᬜ᭄ᬚᬦ", Options{}, unicode.Balinese) 163 if want.Lang != gotLang && want.Confidence != gotConfidence { 164 t.Fatalf("want %v %v got %v %v", want.Lang, want.Script, gotLang, gotConfidence) 165 } 166 } 167 168 func TestWithMultipleExamples(t *testing.T) { 169 examplesFile, err := os.Open("testdata/examples.json") 170 if err != nil { 171 t.Fatal("Error opening testdata/examples.json") 172 } 173 174 defer examplesFile.Close() 175 176 byteValue, err := ioutil.ReadAll(examplesFile) 177 if err != nil { 178 t.Fatal("Error reading testdata/examples.json") 179 } 180 181 var examples map[string]string 182 err = json.Unmarshal(byteValue, &examples) 183 if err != nil { 184 t.Fatal("Error Unmarshalling json") 185 } 186 187 for lang, text := range examples { 188 want := CodeToLang(lang) 189 info := Detect(text) 190 if info.Lang != want && !info.IsReliable() { 191 t.Fatalf("want %v, got %v", Langs[want], Langs[info.Lang]) 192 } 193 } 194 }