github.com/abadojack/whatlanggo@v1.0.1/detect_test.go (about)

     1  package whatlanggo
     2  
     3  import (
     4  	"encoding/json"
     5  	"io/ioutil"
     6  	"os"
     7  	"testing"
     8  	"unicode"
     9  )
    10  
    11  func TestDetect(t *testing.T) {
    12  	tests := map[string]Info{
    13  		"Además de todo lo anteriormente dicho, también encontramos...": {Spa, unicode.Latin, 1},
    14  		"बहुत बहुत (धन्यवाद / शुक्रिया)!":                               {Hin, unicode.Devanagari, 1},
    15  		"अनुच्छेद १: सबहि लोकानि आजादे जम्मेला आओर ओखिनियो के बराबर सम्मान आओर अघ्कार प्राप्त हवे। ओखिनियो के पास समझ-बूझ आओर अंत:करण के आवाज होखता आओर हुनको के दोसरा के साथ भाईचारे के बेवहार करे के होखला": {Bho, unicode.Devanagari, 1},
    16  		"ኢትዮጵያ አፍሪቃ ውስጥ ናት":         {Amh, unicode.Ethiopic, 1},
    17  		"لغتي العربية ليست كما يجب": {Arb, unicode.Arabic, 1},
    18  		"我爱你": {Cmn, unicode.Han, 1},
    19  		"আমি তোমাকে ভালোবাস ": {Ben, unicode.Bengali, 1},
    20  		"울란바토르": {Kor, unicode.Hangul, 1},
    21  		"ყველა ადამიანი იბადება თავისუფალი და თანასწორი თავისი ღირსებითა და უფლებებით":        {Kat, unicode.Georgian, 1},
    22  		"Όλοι οι άνθρωποι γεννιούνται ελεύθεροι και ίσοι στην αξιοπρέπεια και τα δικαιώματα.": {Ell, unicode.Greek, 1},
    23  		"ಎಲ್ಲಾ ಮಾನವರ ಉಚಿತ ಮತ್ತು ಘನತೆ ಮತ್ತು ಹಕ್ಕುಗಳಲ್ಲಿ ಸಮಾನ ಹುಟ್ಟಿದ.":                         {Kan, unicode.Kannada, 1},
    24  		"நீங்கள் ஆங்கிலம் பேசுவீர்களா?":                                                       {Tam, unicode.Tamil, 1},
    25  		"มนุษย์ทุกคนเกิดมามีอิสระและเสมอภาคกันในศักดิ์ศรีและสิทธิ":                            {Tha, unicode.Thai, 1},
    26  		"નાણાં મારા લોહીમાં છે":    {Guj, unicode.Gujarati, 1},
    27  		" ਗੁਰੂ ਗ੍ਰੰਥ ਸਾਹਿਬ ਜੀ":     {Pan, unicode.Gurmukhi, 1},
    28  		"నన్ను ఒంటరిగా వదిలేయ్":    {Tel, unicode.Telugu, 1},
    29  		"എന്താണ് നിങ്ങളുടെ പേര് ?": {Mal, unicode.Malayalam, 1},
    30  		"ମୁ ତୁମକୁ ଭଲ ପାଏ |":        {Ori, unicode.Oriya, 1},
    31  		"အားလုံးလူသားတွေအခမဲ့နှင့်ဂုဏ်သိက္ခာနှင့်လူ့အခွင့်အရေးအတွက်တန်းတူဖွားမြင်ကြသည်။": {Mya, unicode.Myanmar, 1},
    32  		"වෙලාව කියද?":                        {Sin, unicode.Sinhala, 1},
    33  		"ពួកម៉ាកខ្ញុំពីរនាក់នេះ":             {Khm, unicode.Khmer, 1},
    34  		"其疾如風、其徐如林、侵掠如火、不動如山、難知如陰、動如雷震。":     {Cmn, unicode.Han, 1},
    35  		"知彼知己、百戰不殆。不知彼而知己、一勝一負。不知彼不知己、毎戰必殆。": {Cmn, unicode.Han, 1},
    36  		"支那の上海の或町です。":                        {Jpn, _HiraganaKatakana, 1},
    37  		"或日の暮方の事である。":                        {Jpn, _HiraganaKatakana, 1},
    38  		"今日は":                                {Jpn, _HiraganaKatakana, 1},
    39  		"コンニチハ":                              {Jpn, _HiraganaKatakana, 1},
    40  		"タナカ タロウ":                            {Jpn, _HiraganaKatakana, 1},
    41  		"どうもありがとう":                           {Jpn, _HiraganaKatakana, 1},
    42  	}
    43  
    44  	for key, value := range tests {
    45  		got := Detect(key)
    46  
    47  		if value.Lang != got.Lang || value.Script != got.Script {
    48  			t.Fatalf("%s want %v %v got %v %v", key, LangToString(value.Lang), Scripts[value.Script], LangToString(got.Lang), Scripts[got.Script])
    49  		}
    50  	}
    51  }
    52  
    53  func TestDetectLang(t *testing.T) {
    54  	tests := map[string]Lang{
    55  		"Та нічого, все нормально. А в тебе як?": Ukr,
    56  		"Vouloir, c'est pouvoir":                                Fra,
    57  		"Where there is a will there is a way":                  Eng,
    58  		"Mi ŝategas la japanan kaj studas ĝin kelkajn jarojn 😊": Epo,
    59  		"Te echo de menos":                                      Spa,
    60  		"Buona notte e sogni d'oro!":                            Ita,
    61  	}
    62  
    63  	for text, want := range tests {
    64  		got := DetectLang(text)
    65  		if got != want {
    66  			t.Fatalf("%s want %v got %v", text, LangToString(want), LangToString(got))
    67  		}
    68  	}
    69  }
    70  
    71  // Test detect with empty options and supported language and script
    72  func TestDetectWithOptionsEmptySupportedLang(t *testing.T) {
    73  	want := Info{Epo, unicode.Latin, 1}
    74  	got := DetectWithOptions("La viro amas hundojn. Hundo estas la plej bona amiko de viro", Options{})
    75  	if want.Lang != got.Lang && want.Script != got.Script {
    76  		t.Fatalf("want %v %v got %v %v", want.Lang, want.Script, got.Lang, got.Script)
    77  	}
    78  }
    79  
    80  // Test detect with empty options and nonsupported script(Balinese)
    81  func TestDetectWithOptionsEmptyNonSupportedLang(t *testing.T) {
    82  	want := Info{-1, nil, 0}
    83  	got := DetectWithOptions("ᬅᬓ᭄ᬱᬭᬯ᭄ᬬᬜ᭄ᬚᬦ", Options{})
    84  	if want.Lang != got.Lang && want.Script != got.Script {
    85  		t.Fatalf("want %v %v got %v %v", want.Lang, want.Script, got.Lang, got.Script)
    86  	}
    87  }
    88  
    89  func TestDetectWithOptionsWithBlacklist(t *testing.T) {
    90  	text := "האקדמיה ללשון העברית"
    91  	//All languages with Hebrew text blacklisted ... returns correct script but invalid language
    92  	options1 := Options{
    93  		Blacklist: map[Lang]bool{
    94  			Heb: true,
    95  			Ydd: true,
    96  		},
    97  	}
    98  	want := Info{-1, unicode.Hebrew, 1}
    99  	got := DetectWithOptions(text, options1)
   100  	if got.Lang != want.Lang && want.Script != got.Script {
   101  		t.Fatalf("Want %s %s got %s %s", LangToString(want.Lang), Scripts[want.Script], LangToString(got.Lang), Scripts[got.Script])
   102  	}
   103  
   104  	text = "Tu me manques"
   105  	want = Info{Fra, unicode.Latin, 1}
   106  	options3 := Options{
   107  		Blacklist: map[Lang]bool{
   108  			Kur: true,
   109  		},
   110  	}
   111  	got = DetectWithOptions(text, options3)
   112  	if got.Lang != want.Lang && want.Script != got.Script {
   113  		t.Fatalf("Want %s %s got %s %s", LangToString(want.Lang), Scripts[want.Script], LangToString(got.Lang), Scripts[got.Script])
   114  	}
   115  }
   116  
   117  func TestWithOptionsWithWhitelist(t *testing.T) {
   118  	text := "Mi ne scias!"
   119  	want := Info{Epo, unicode.Latin, 1}
   120  	options2 := Options{
   121  		Whitelist: map[Lang]bool{
   122  			Epo: true,
   123  			Ukr: true,
   124  		},
   125  	}
   126  	got := DetectWithOptions(text, options2)
   127  	if got.Lang != want.Lang && want.Script != got.Script {
   128  		t.Fatalf("Want %s %s got %s %s", LangToString(want.Lang), Scripts[want.Script], LangToString(got.Lang), Scripts[got.Script])
   129  	}
   130  }
   131  
   132  func TestDetectLangWithOptions(t *testing.T) {
   133  	text := "All evil come from a single cause ... man's inability to sit still in a room"
   134  	want := Eng
   135  	//without blacklist
   136  	got := DetectLangWithOptions(text, Options{})
   137  	if want != got {
   138  		t.Fatalf("want %s got %s", LangToString(want), LangToString(got))
   139  	}
   140  
   141  	//with blacklist
   142  	options := Options{
   143  		Blacklist: map[Lang]bool{
   144  			Jav: true,
   145  			Tgl: true,
   146  			Nld: true,
   147  			Uzb: true,
   148  			Swe: true,
   149  			Nob: true,
   150  			Ceb: true,
   151  			Ilo: true,
   152  		},
   153  	}
   154  	got = DetectLangWithOptions(text, options)
   155  	if want != got {
   156  		t.Fatalf("want %s got %s", LangToString(want), LangToString(got))
   157  	}
   158  }
   159  
   160  func Test_detectLangBaseOnScriptUnsupportedScript(t *testing.T) {
   161  	want := Info{-1, nil, 0}
   162  	gotLang, gotConfidence := detectLangBaseOnScript("ᬅᬓ᭄ᬱᬭᬯ᭄ᬬᬜ᭄ᬚᬦ", Options{}, unicode.Balinese)
   163  	if want.Lang != gotLang && want.Confidence != gotConfidence {
   164  		t.Fatalf("want %v %v got %v %v", want.Lang, want.Script, gotLang, gotConfidence)
   165  	}
   166  }
   167  
   168  func TestWithMultipleExamples(t *testing.T) {
   169  	examplesFile, err := os.Open("testdata/examples.json")
   170  	if err != nil {
   171  		t.Fatal("Error opening testdata/examples.json")
   172  	}
   173  
   174  	defer examplesFile.Close()
   175  
   176  	byteValue, err := ioutil.ReadAll(examplesFile)
   177  	if err != nil {
   178  		t.Fatal("Error reading testdata/examples.json")
   179  	}
   180  
   181  	var examples map[string]string
   182  	err = json.Unmarshal(byteValue, &examples)
   183  	if err != nil {
   184  		t.Fatal("Error Unmarshalling json")
   185  	}
   186  
   187  	for lang, text := range examples {
   188  		want := CodeToLang(lang)
   189  		info := Detect(text)
   190  		if info.Lang != want && !info.IsReliable() {
   191  			t.Fatalf("want %v, got %v", Langs[want], Langs[info.Lang])
   192  		}
   193  	}
   194  }