gitlab.com/thomasboni/go-enry/v2@v2.8.3-0.20220418031202-30b0d7a3de98/common_test.go (about) 1 package enry 2 3 import ( 4 "fmt" 5 "io/ioutil" 6 "os" 7 "os/exec" 8 "path/filepath" 9 "strings" 10 "testing" 11 12 "gitlab.com/thomasboni/go-enry/v2/data" 13 14 "github.com/stretchr/testify/assert" 15 "github.com/stretchr/testify/require" 16 "github.com/stretchr/testify/suite" 17 ) 18 19 const linguistURL = "https://github.com/github/linguist.git" 20 const linguistClonedEnvVar = "ENRY_TEST_REPO" 21 22 type EnryTestSuite struct { 23 suite.Suite 24 tmpLinguist string 25 needToClone bool 26 samplesDir string 27 testFixturesDir string 28 } 29 30 func (s *EnryTestSuite) TestRegexpEdgeCases() { 31 var regexpEdgeCases = []struct { 32 lang string 33 filename string 34 }{ 35 {lang: "ActionScript", filename: "FooBar.as"}, 36 {lang: "Forth", filename: "asm.fr"}, 37 {lang: "X PixMap", filename: "cc-public_domain_mark_white.pm"}, 38 //{lang: "SQL", filename: "drop_stuff.sql"}, // https://github.com/src-d/enry/issues/194 39 {lang: "Fstar", filename: "Hacl.Spec.Bignum.Fmul.fst"}, 40 {lang: "C++", filename: "Types.h"}, 41 } 42 43 for _, r := range regexpEdgeCases { 44 filename := filepath.Join(s.tmpLinguist, "samples", r.lang, r.filename) 45 46 content, err := ioutil.ReadFile(filename) 47 require.NoError(s.T(), err) 48 49 lang := GetLanguage(r.filename, content) 50 s.T().Logf("File:%s, lang:%s", filename, lang) 51 52 expLang, _ := data.LanguageByAlias(r.lang) 53 require.EqualValues(s.T(), expLang, lang) 54 } 55 } 56 57 func Test_EnryTestSuite(t *testing.T) { 58 suite.Run(t, new(EnryTestSuite)) 59 } 60 61 func (s *EnryTestSuite) SetupSuite() { 62 var err error 63 s.tmpLinguist = os.Getenv(linguistClonedEnvVar) 64 s.needToClone = s.tmpLinguist == "" 65 if s.needToClone { 66 s.tmpLinguist, err = ioutil.TempDir("", "linguist-") 67 require.NoError(s.T(), err) 68 s.T().Logf("Cloning Linguist repo to '%s' as %s was not set\n", 69 s.tmpLinguist, linguistClonedEnvVar) 70 cmd := exec.Command("git", "clone", linguistURL, s.tmpLinguist) 71 err = cmd.Run() 72 require.NoError(s.T(), err) 73 } 74 s.samplesDir = filepath.Join(s.tmpLinguist, "samples") 75 s.T().Logf("using samples from %s", s.samplesDir) 76 77 s.testFixturesDir = filepath.Join(s.tmpLinguist, "test", "fixtures") 78 s.T().Logf("using test fixtures from %s", s.samplesDir) 79 80 cwd, err := os.Getwd() 81 assert.NoError(s.T(), err) 82 83 err = os.Chdir(s.tmpLinguist) 84 assert.NoError(s.T(), err) 85 86 cmd := exec.Command("git", "checkout", data.LinguistCommit) 87 err = cmd.Run() 88 assert.NoError(s.T(), err) 89 90 err = os.Chdir(cwd) 91 assert.NoError(s.T(), err) 92 } 93 94 func (s *EnryTestSuite) TearDownSuite() { 95 if s.needToClone { 96 err := os.RemoveAll(s.tmpLinguist) 97 assert.NoError(s.T(), err) 98 } 99 } 100 101 func (s *EnryTestSuite) TestGetLanguage() { 102 tests := []struct { 103 name string 104 filename string 105 content []byte 106 expected string 107 safe bool 108 }{ 109 {name: "TestGetLanguage_0", filename: "foo.h", content: []byte{}, expected: "C"}, 110 {name: "TestGetLanguage_1", filename: "foo.py", content: []byte{}, expected: "Python"}, 111 {name: "TestGetLanguage_2", filename: "foo.m", content: []byte(":- module"), expected: "Mercury"}, 112 {name: "TestGetLanguage_3", filename: "foo.m", content: nil, expected: "MATLAB"}, 113 {name: "TestGetLanguage_4", filename: "foo.mo", content: []byte{0xDE, 0x12, 0x04, 0x95, 0x00, 0x00, 0x00, 0x00}, expected: OtherLanguage}, 114 {name: "TestGetLanguage_5", filename: "", content: nil, expected: OtherLanguage}, 115 } 116 117 for _, test := range tests { 118 language := GetLanguage(test.filename, test.content) 119 assert.Equal(s.T(), test.expected, language, fmt.Sprintf("%v: %v, expected: %v", test.name, language, test.expected)) 120 } 121 } 122 123 func (s *EnryTestSuite) TestGetLanguages() { 124 tests := []struct { 125 name string 126 filename string 127 content []byte 128 expected []string 129 }{ 130 // With no content or filename, no language can be detected 131 {name: "TestGetLanguages_0", filename: "", content: []byte{}, expected: nil}, 132 // The strategy that will match is GetLanguagesByExtension. Lacking content, it will return those results. 133 {name: "TestGetLanguages_1", filename: "foo.h", content: []byte{}, expected: []string{"C"}}, 134 // GetLanguagesByExtension will return an unambiguous match when there is a single result. 135 {name: "TestGetLanguages_2", filename: "foo.groovy", content: []byte{}, expected: []string{"Groovy"}}, 136 // GetLanguagesByExtension will return "Rust", "RenderScript" for .rs, 137 // then GetLanguagesByContent will take the first rule that matches (in this case Rust) 138 {name: "TestGetLanguages_3", filename: "foo.rs", content: []byte("use \n#include"), expected: []string{"Rust"}}, 139 // .. and in this case, RenderScript (no content that matches a Rust regex can be included, because it runs first.) 140 {name: "TestGetLanguages_4", filename: "foo.rs", content: []byte("#include"), expected: []string{"RenderScript"}}, 141 // GetLanguagesByExtension will return "AMPL", "Linux Kernel Module", "Modula-2", "XML", 142 // then GetLanguagesByContent will ALWAYS return Linux Kernel Module and AMPL when there is no content, 143 // and no further classifier can do anything without content 144 {name: "TestGetLanguages_5", filename: "foo.mod", content: []byte{}, expected: []string{"Linux Kernel Module", "AMPL"}}, 145 // ...with some AMPL tokens, the DefaultClassifier will pick AMPL as the most likely language. 146 {name: "TestGetLanguages_6", filename: "foo.mod", content: []byte("BEAMS ROWS - TotalWeight"), expected: []string{"AMPL", "Linux Kernel Module"}}, 147 } 148 149 for _, test := range tests { 150 languages := GetLanguages(test.filename, test.content) 151 assert.Equal(s.T(), test.expected, languages, fmt.Sprintf("%v: %v, expected: %v", test.name, languages, test.expected)) 152 } 153 } 154 155 func (s *EnryTestSuite) TestGetLanguagesByModelineLinguist() { 156 var modelinesDir = filepath.Join(s.tmpLinguist, "test", "fixtures", "Data", "Modelines") 157 158 tests := []struct { 159 name string 160 filename string 161 candidates []string 162 expected []string 163 }{ 164 // Emacs 165 {name: "TestGetLanguagesByModelineLinguist_1", filename: filepath.Join(modelinesDir, "example_smalltalk.md"), expected: []string{"Smalltalk"}}, 166 {name: "TestGetLanguagesByModelineLinguist_2", filename: filepath.Join(modelinesDir, "fundamentalEmacs.c"), expected: []string{"Text"}}, 167 {name: "TestGetLanguagesByModelineLinguist_3", filename: filepath.Join(modelinesDir, "iamphp.inc"), expected: []string{"PHP"}}, 168 {name: "TestGetLanguagesByModelineLinguist_4", filename: filepath.Join(modelinesDir, "seeplusplusEmacs1"), expected: []string{"C++"}}, 169 {name: "TestGetLanguagesByModelineLinguist_5", filename: filepath.Join(modelinesDir, "seeplusplusEmacs2"), expected: []string{"C++"}}, 170 {name: "TestGetLanguagesByModelineLinguist_6", filename: filepath.Join(modelinesDir, "seeplusplusEmacs3"), expected: []string{"C++"}}, 171 {name: "TestGetLanguagesByModelineLinguist_7", filename: filepath.Join(modelinesDir, "seeplusplusEmacs4"), expected: []string{"C++"}}, 172 {name: "TestGetLanguagesByModelineLinguist_8", filename: filepath.Join(modelinesDir, "seeplusplusEmacs5"), expected: []string{"C++"}}, 173 {name: "TestGetLanguagesByModelineLinguist_9", filename: filepath.Join(modelinesDir, "seeplusplusEmacs6"), expected: []string{"C++"}}, 174 {name: "TestGetLanguagesByModelineLinguist_10", filename: filepath.Join(modelinesDir, "seeplusplusEmacs7"), expected: []string{"C++"}}, 175 {name: "TestGetLanguagesByModelineLinguist_11", filename: filepath.Join(modelinesDir, "seeplusplusEmacs9"), expected: []string{"C++"}}, 176 {name: "TestGetLanguagesByModelineLinguist_12", filename: filepath.Join(modelinesDir, "seeplusplusEmacs10"), expected: []string{"C++"}}, 177 {name: "TestGetLanguagesByModelineLinguist_13", filename: filepath.Join(modelinesDir, "seeplusplusEmacs11"), expected: []string{"C++"}}, 178 {name: "TestGetLanguagesByModelineLinguist_14", filename: filepath.Join(modelinesDir, "seeplusplusEmacs12"), expected: []string{"C++"}}, 179 180 // Vim 181 {name: "TestGetLanguagesByModelineLinguist_15", filename: filepath.Join(modelinesDir, "seeplusplus"), expected: []string{"C++"}}, 182 {name: "TestGetLanguagesByModelineLinguist_16", filename: filepath.Join(modelinesDir, "iamjs.pl"), expected: []string{"JavaScript"}}, 183 {name: "TestGetLanguagesByModelineLinguist_17", filename: filepath.Join(modelinesDir, "iamjs2.pl"), expected: []string{"JavaScript"}}, 184 {name: "TestGetLanguagesByModelineLinguist_18", filename: filepath.Join(modelinesDir, "not_perl.pl"), expected: []string{"Prolog"}}, 185 {name: "TestGetLanguagesByModelineLinguist_19", filename: filepath.Join(modelinesDir, "ruby"), expected: []string{"Ruby"}}, 186 {name: "TestGetLanguagesByModelineLinguist_20", filename: filepath.Join(modelinesDir, "ruby2"), expected: []string{"Ruby"}}, 187 {name: "TestGetLanguagesByModelineLinguist_21", filename: filepath.Join(modelinesDir, "ruby3"), expected: []string{"Ruby"}}, 188 {name: "TestGetLanguagesByModelineLinguist_22", filename: filepath.Join(modelinesDir, "ruby4"), expected: []string{"Ruby"}}, 189 {name: "TestGetLanguagesByModelineLinguist_23", filename: filepath.Join(modelinesDir, "ruby5"), expected: []string{"Ruby"}}, 190 {name: "TestGetLanguagesByModelineLinguist_24", filename: filepath.Join(modelinesDir, "ruby6"), expected: []string{"Ruby"}}, 191 {name: "TestGetLanguagesByModelineLinguist_25", filename: filepath.Join(modelinesDir, "ruby7"), expected: []string{"Ruby"}}, 192 {name: "TestGetLanguagesByModelineLinguist_26", filename: filepath.Join(modelinesDir, "ruby8"), expected: []string{"Ruby"}}, 193 {name: "TestGetLanguagesByModelineLinguist_27", filename: filepath.Join(modelinesDir, "ruby9"), expected: []string{"Ruby"}}, 194 {name: "TestGetLanguagesByModelineLinguist_28", filename: filepath.Join(modelinesDir, "ruby10"), expected: []string{"Ruby"}}, 195 {name: "TestGetLanguagesByModelineLinguist_29", filename: filepath.Join(modelinesDir, "ruby11"), expected: []string{"Ruby"}}, 196 {name: "TestGetLanguagesByModelineLinguist_30", filename: filepath.Join(modelinesDir, "ruby12"), expected: []string{"Ruby"}}, 197 {name: "TestGetLanguagesByModelineLinguist_31", filename: filepath.Join(s.samplesDir, "C++/runtime-compiler.cc"), expected: nil}, 198 {name: "TestGetLanguagesByModelineLinguist_32", filename: "", expected: nil}, 199 } 200 201 for _, test := range tests { 202 var content []byte 203 var err error 204 205 if test.filename != "" { 206 content, err = ioutil.ReadFile(test.filename) 207 assert.NoError(s.T(), err) 208 } 209 210 languages := GetLanguagesByModeline(test.filename, content, test.candidates) 211 assert.Equal(s.T(), test.expected, languages, fmt.Sprintf("%v: languages = %v, expected: %v", test.name, languages, test.expected)) 212 } 213 } 214 215 func (s *EnryTestSuite) TestGetLanguagesByModeline() { 216 const ( 217 wrongVim = `# vim: set syntax=ruby ft =python filetype=perl :` 218 rightVim = `/* vim: set syntax=python ft =python filetype=python */` 219 noLangVim = `/* vim: set shiftwidth=4 softtabstop=0 cindent cinoptions={1s: */` 220 ) 221 222 tests := []struct { 223 name string 224 filename string 225 content []byte 226 candidates []string 227 expected []string 228 }{ 229 {name: "TestGetLanguagesByModeline_1", content: []byte(wrongVim), expected: nil}, 230 {name: "TestGetLanguagesByModeline_2", content: []byte(rightVim), expected: []string{"Python"}}, 231 {name: "TestGetLanguagesByModeline_3", content: []byte(noLangVim), expected: nil}, 232 {name: "TestGetLanguagesByModeline_4", content: nil, expected: nil}, 233 {name: "TestGetLanguagesByModeline_5", content: []byte{}, expected: nil}, 234 } 235 236 for _, test := range tests { 237 languages := GetLanguagesByModeline(test.filename, test.content, test.candidates) 238 assert.Equal(s.T(), test.expected, languages, fmt.Sprintf("%v: languages = %v, expected: %v", test.name, languages, test.expected)) 239 } 240 } 241 242 func (s *EnryTestSuite) TestGetLanguagesByFilename() { 243 tests := []struct { 244 name string 245 filename string 246 content []byte 247 candidates []string 248 expected []string 249 }{ 250 {name: "TestGetLanguagesByFilename_1", filename: "unknown.interpreter", expected: nil}, 251 {name: "TestGetLanguagesByFilename_2", filename: ".bashrc", expected: []string{"Shell"}}, 252 {name: "TestGetLanguagesByFilename_3", filename: "Dockerfile", expected: []string{"Dockerfile"}}, 253 {name: "TestGetLanguagesByFilename_4", filename: "Makefile.frag", expected: []string{"Makefile"}}, 254 {name: "TestGetLanguagesByFilename_5", filename: "makefile", expected: []string{"Makefile"}}, 255 {name: "TestGetLanguagesByFilename_6", filename: "Vagrantfile", expected: []string{"Ruby"}}, 256 {name: "TestGetLanguagesByFilename_7", filename: "_vimrc", expected: []string{"Vim Script"}}, 257 {name: "TestGetLanguagesByFilename_8", filename: "pom.xml", expected: []string{"Maven POM"}}, 258 {name: "TestGetLanguagesByFilename_9", filename: "", expected: nil}, 259 } 260 261 for _, test := range tests { 262 languages := GetLanguagesByFilename(test.filename, test.content, test.candidates) 263 assert.Equal(s.T(), len(test.expected), len(languages), fmt.Sprintf("%v: number of languages = %v, expected: %v", test.name, len(languages), len(test.expected))) 264 for i := range languages { // case-insensitive name comparison 265 assert.True(s.T(), strings.EqualFold(test.expected[i], languages[i]), fmt.Sprintf("%v: languages = %v, expected: %v", test.name, languages, test.expected)) 266 } 267 } 268 } 269 270 func (s *EnryTestSuite) TestGetLanguagesByShebang() { 271 const ( 272 multilineExecHack = `#!/bin/sh 273 # Next line is comment in Tcl, but not in sh... \ 274 exec tclsh "$0" ${1+"$@"}` 275 276 multilineNoExecHack = `#!/bin/sh 277 #<<<# 278 echo "A shell script in a zkl program ($0)" 279 echo "Now run zkl <this file> with Hello World as args" 280 zkl $0 Hello World! 281 exit 282 #<<<# 283 println("The shell script says ",vm.arglist.concat(" "));` 284 ) 285 286 tests := []struct { 287 name string 288 filename string 289 content []byte 290 candidates []string 291 expected []string 292 }{ 293 {name: "TestGetLanguagesByShebang_1", content: []byte(`#!/unknown/interpreter`), expected: nil}, 294 {name: "TestGetLanguagesByShebang_2", content: []byte(`no shebang`), expected: nil}, 295 {name: "TestGetLanguagesByShebang_3", content: []byte(`#!/usr/bin/env`), expected: nil}, 296 {name: "TestGetLanguagesByShebang_4", content: []byte(`#!/usr/bin/python -tt`), expected: []string{"Python"}}, 297 {name: "TestGetLanguagesByShebang_5", content: []byte(`#!/usr/bin/env python2.6`), expected: []string{"Python"}}, 298 {name: "TestGetLanguagesByShebang_6", content: []byte(`#!/usr/bin/env perl`), expected: []string{"Perl", "Pod"}}, 299 {name: "TestGetLanguagesByShebang_7", content: []byte(`#! /bin/sh`), expected: []string{"Shell"}}, 300 {name: "TestGetLanguagesByShebang_8", content: []byte(`#!bash`), expected: []string{"Shell"}}, 301 {name: "TestGetLanguagesByShebang_9", content: []byte(multilineExecHack), expected: []string{"Tcl"}}, 302 {name: "TestGetLanguagesByShebang_10", content: []byte(multilineNoExecHack), expected: []string{"Shell"}}, 303 {name: "TestGetLanguagesByShebang_11", content: []byte(`#!/envinpath/python`), expected: []string{"Python"}}, 304 305 {name: "TestGetLanguagesByShebang_12", content: []byte(""), expected: nil}, 306 {name: "TestGetLanguagesByShebang_13", content: []byte("foo"), expected: nil}, 307 {name: "TestGetLanguagesByShebang_14", content: []byte("#bar"), expected: nil}, 308 {name: "TestGetLanguagesByShebang_15", content: []byte("#baz"), expected: nil}, 309 {name: "TestGetLanguagesByShebang_16", content: []byte("///"), expected: nil}, 310 {name: "TestGetLanguagesByShebang_17", content: []byte("\n\n\n\n\n"), expected: nil}, 311 {name: "TestGetLanguagesByShebang_18", content: []byte(" #!/usr/sbin/ruby"), expected: nil}, 312 {name: "TestGetLanguagesByShebang_19", content: []byte("\n#!/usr/sbin/ruby"), expected: nil}, 313 {name: "TestGetLanguagesByShebang_20", content: []byte("#!"), expected: nil}, 314 {name: "TestGetLanguagesByShebang_21", content: []byte("#! "), expected: nil}, 315 {name: "TestGetLanguagesByShebang_22", content: []byte("#!/usr/bin/env"), expected: nil}, 316 {name: "TestGetLanguagesByShebang_23", content: []byte("#!/usr/bin/env osascript -l JavaScript"), expected: nil}, 317 {name: "TestGetLanguagesByShebang_24", content: []byte("#!/usr/bin/env osascript -l AppleScript"), expected: nil}, 318 {name: "TestGetLanguagesByShebang_25", content: []byte("#!/usr/bin/env osascript -l foobar"), expected: nil}, 319 {name: "TestGetLanguagesByShebang_26", content: []byte("#!/usr/bin/osascript -l JavaScript"), expected: nil}, 320 {name: "TestGetLanguagesByShebang_27", content: []byte("#!/usr/bin/osascript -l foobar"), expected: nil}, 321 322 {name: "TestGetLanguagesByShebang_28", content: []byte("#!/usr/sbin/ruby\n# bar"), expected: []string{"Ruby"}}, 323 {name: "TestGetLanguagesByShebang_29", content: []byte("#!/usr/bin/ruby\n# foo"), expected: []string{"Ruby"}}, 324 {name: "TestGetLanguagesByShebang_30", content: []byte("#!/usr/sbin/ruby"), expected: []string{"Ruby"}}, 325 {name: "TestGetLanguagesByShebang_31", content: []byte("#!/usr/sbin/ruby foo bar baz\n"), expected: []string{"Ruby"}}, 326 327 {name: "TestGetLanguagesByShebang_32", content: []byte("#!/usr/bin/env Rscript\n# example R script\n#\n"), expected: []string{"R"}}, 328 {name: "TestGetLanguagesByShebang_33", content: []byte("#!/usr/bin/env ruby\n# baz"), expected: []string{"Ruby"}}, 329 330 {name: "TestGetLanguagesByShebang_34", content: []byte("#!/usr/bin/bash\n"), expected: []string{"Shell"}}, 331 {name: "TestGetLanguagesByShebang_35", content: []byte("#!/bin/sh"), expected: []string{"Shell"}}, 332 {name: "TestGetLanguagesByShebang_36", content: []byte("#!/bin/python\n# foo\n# bar\n# baz"), expected: []string{"Python"}}, 333 {name: "TestGetLanguagesByShebang_37", content: []byte("#!/usr/bin/python2.7\n\n\n\n"), expected: []string{"Python"}}, 334 {name: "TestGetLanguagesByShebang_38", content: []byte("#!/usr/bin/python3\n\n\n\n"), expected: []string{"Python"}}, 335 {name: "TestGetLanguagesByShebang_39", content: []byte("#!/usr/bin/sbcl --script\n\n"), expected: []string{"Common Lisp"}}, 336 {name: "TestGetLanguagesByShebang_40", content: []byte("#! perl"), expected: []string{"Perl", "Pod"}}, 337 338 {name: "TestGetLanguagesByShebang_41", content: []byte("#!/bin/sh\n\n\nexec ruby $0 $@"), expected: []string{"Ruby"}}, 339 {name: "TestGetLanguagesByShebang_42", content: []byte("#! /usr/bin/env A=003 B=149 C=150 D=xzd E=base64 F=tar G=gz H=head I=tail sh"), expected: []string{"Shell"}}, 340 {name: "TestGetLanguagesByShebang_43", content: []byte("#!/usr/bin/env foo=bar bar=foo python -cos=__import__(\"os\");"), expected: []string{"Python"}}, 341 {name: "TestGetLanguagesByShebang_44", content: []byte("#!/usr/bin/env osascript"), expected: []string{"AppleScript"}}, 342 {name: "TestGetLanguagesByShebang_45", content: []byte("#!/usr/bin/osascript"), expected: []string{"AppleScript"}}, 343 344 {name: "TestGetLanguagesByShebang_46", content: []byte("#!/usr/bin/env -vS ruby -wKU\nputs ?t+?e+?s+?t"), expected: []string{"Ruby"}}, 345 {name: "TestGetLanguagesByShebang_47", content: []byte("#!/usr/bin/env --split-string sed -f\ny/a/A/"), expected: []string{"sed"}}, 346 {name: "TestGetLanguagesByShebang_48", content: []byte("#!/usr/bin/env -S GH_TOKEN=ghp_*** deno run --allow-net\nconsole.log(1);"), expected: []string{"TypeScript"}}, 347 } 348 349 for _, test := range tests { 350 languages := GetLanguagesByShebang(test.filename, test.content, test.candidates) 351 assert.Equal(s.T(), test.expected, languages, fmt.Sprintf("%v: languages = %v, expected: %v", test.name, languages, test.expected)) 352 } 353 } 354 355 func (s *EnryTestSuite) TestGetLanguagesByExtension() { 356 tests := []struct { 357 name string 358 filename string 359 content []byte 360 candidates []string 361 expected []string 362 }{ 363 {name: "TestGetLanguagesByExtension_0", filename: "foo.h", expected: []string{"C", "C++", "Objective-C"}}, 364 {name: "TestGetLanguagesByExtension_1", filename: "foo.foo", expected: nil}, 365 {name: "TestGetLanguagesByExtension_2", filename: "foo.go", expected: []string{"Go"}}, 366 {name: "TestGetLanguagesByExtension_3", filename: "foo.go.php", expected: []string{"Hack", "PHP"}}, 367 {name: "TestGetLanguagesByExtension_4", filename: "", expected: nil}, 368 } 369 370 for _, test := range tests { 371 languages := GetLanguagesByExtension(test.filename, test.content, test.candidates) 372 assert.Equal(s.T(), test.expected, languages, fmt.Sprintf("%v: languages = %v, expected: %v", test.name, languages, test.expected)) 373 } 374 } 375 376 func (s *EnryTestSuite) TestGetLanguagesByManpage() { 377 tests := []struct { 378 name string 379 filename string 380 content []byte 381 candidates []string 382 expected []string 383 }{ 384 {name: "TestGetLanguagesByManpage_1", filename: "bsdmalloc.3malloc", expected: []string{"Roff Manpage", "Roff"}}, 385 {name: "TestGetLanguagesByManpage_2", filename: "dirent.h.0p", expected: []string{"Roff Manpage", "Roff"}}, 386 {name: "TestGetLanguagesByManpage_3", filename: "linguist.1gh", expected: []string{"Roff Manpage", "Roff"}}, 387 {name: "TestGetLanguagesByManpage_4", filename: "test.1.in", expected: []string{"Roff Manpage", "Roff"}}, 388 {name: "TestGetLanguagesByManpage_5", filename: "test.man.in", expected: []string{"Roff Manpage", "Roff"}}, 389 {name: "TestGetLanguagesByManpage_6", filename: "test.mdoc.in", expected: []string{"Roff Manpage", "Roff"}}, 390 {name: "TestGetLanguagesByManpage_7", filename: "foo.h", expected: nil}, 391 {name: "TestGetLanguagesByManpage_8", filename: "", expected: nil}, 392 } 393 394 for _, test := range tests { 395 languages := GetLanguagesByManpage(test.filename, test.content, test.candidates) 396 assert.Equal(s.T(), test.expected, languages, fmt.Sprintf("%v: languages = %v, expected: %v", test.name, languages, test.expected)) 397 } 398 } 399 400 func (s *EnryTestSuite) TestGetLanguagesByXML() { 401 tests := []struct { 402 name string 403 filename string 404 candidates []string 405 expected []string 406 }{ 407 {name: "TestGetLanguagesByXML_1", filename: filepath.Join(s.testFixturesDir, "XML/app.config"), expected: []string{"XML"}}, 408 {name: "TestGetLanguagesByXML_2", filename: filepath.Join(s.testFixturesDir, "XML/AssertionIDRequestOptionalAttributes.xml.svn-base"), expected: []string{"XML"}}, 409 // no XML header so should not be identified by this strategy 410 {name: "TestGetLanguagesByXML_3", filename: filepath.Join(s.samplesDir, "XML/libsomething.dll.config"), expected: nil}, 411 {name: "TestGetLanguagesByXML_4", filename: filepath.Join(s.samplesDir, "Eagle/Eagle.sch"), candidates: []string{"Eagle"}, expected: []string{"Eagle"}}, 412 } 413 414 for _, test := range tests { 415 content, err := ioutil.ReadFile(test.filename) 416 assert.NoError(s.T(), err) 417 418 languages := GetLanguagesByXML(test.filename, content, test.candidates) 419 assert.Equal(s.T(), test.expected, languages, fmt.Sprintf("%v: languages = %v, expected: %v", test.name, languages, test.expected)) 420 } 421 } 422 423 func (s *EnryTestSuite) TestGetLanguagesByClassifier() { 424 test := []struct { 425 name string 426 filename string 427 candidates []string 428 expected string 429 }{ 430 {name: "TestGetLanguagesByClassifier_1", filename: filepath.Join(s.samplesDir, "C/blob.c"), candidates: []string{"python", "ruby", "c", "c++"}, expected: "C"}, 431 {name: "TestGetLanguagesByClassifier_2", filename: filepath.Join(s.samplesDir, "C/blob.c"), candidates: nil, expected: OtherLanguage}, 432 {name: "TestGetLanguagesByClassifier_3", filename: filepath.Join(s.samplesDir, "C++/runtime-compiler.cc"), candidates: []string{}, expected: OtherLanguage}, 433 {name: "TestGetLanguagesByClassifier_4", filename: filepath.Join(s.samplesDir, "C/blob.c"), candidates: []string{"python", "ruby", "c++"}, expected: "C++"}, 434 {name: "TestGetLanguagesByClassifier_5", filename: filepath.Join(s.samplesDir, "C/blob.c"), candidates: []string{"ruby"}, expected: "Ruby"}, 435 {name: "TestGetLanguagesByClassifier_6", filename: filepath.Join(s.samplesDir, "Python/django-models-base.py"), candidates: []string{"python", "ruby", "c", "c++"}, expected: "Python"}, 436 {name: "TestGetLanguagesByClassifier_7", filename: "", candidates: []string{"python"}, expected: "Python"}, 437 } 438 439 for _, test := range test { 440 var content []byte 441 var err error 442 443 if test.filename != "" { 444 content, err = ioutil.ReadFile(test.filename) 445 assert.NoError(s.T(), err) 446 } 447 448 languages := GetLanguagesByClassifier(test.filename, content, test.candidates) 449 var language string 450 if len(languages) == 0 { 451 language = OtherLanguage 452 } else { 453 language = languages[0] 454 } 455 456 assert.Equal(s.T(), test.expected, language, fmt.Sprintf("%v: language = %v, expected: %v", test.name, language, test.expected)) 457 } 458 } 459 460 func (s *EnryTestSuite) TestGetLanguagesBySpecificClassifier() { 461 test := []struct { 462 name string 463 filename string 464 candidates []string 465 classifier classifier 466 expected string 467 }{ 468 {name: "TestGetLanguagesByClassifier_1", filename: filepath.Join(s.samplesDir, "C/blob.c"), candidates: []string{"python", "ruby", "c", "c++"}, classifier: defaultClassifier, expected: "C"}, 469 {name: "TestGetLanguagesByClassifier_2", filename: filepath.Join(s.samplesDir, "C/blob.c"), candidates: nil, classifier: defaultClassifier, expected: "C"}, 470 {name: "TestGetLanguagesByClassifier_3", filename: filepath.Join(s.samplesDir, "C++/runtime-compiler.cc"), candidates: []string{}, classifier: defaultClassifier, expected: "C++"}, 471 {name: "TestGetLanguagesByClassifier_4", filename: filepath.Join(s.samplesDir, "C/blob.c"), candidates: []string{"python", "ruby", "c++"}, classifier: defaultClassifier, expected: "C++"}, 472 {name: "TestGetLanguagesByClassifier_5", filename: filepath.Join(s.samplesDir, "C/blob.c"), candidates: []string{"ruby"}, classifier: defaultClassifier, expected: "Ruby"}, 473 {name: "TestGetLanguagesByClassifier_6", filename: filepath.Join(s.samplesDir, "Python/django-models-base.py"), candidates: []string{"python", "ruby", "c", "c++"}, classifier: defaultClassifier, expected: "Python"}, 474 {name: "TestGetLanguagesByClassifier_7", filename: os.DevNull, candidates: nil, classifier: defaultClassifier, expected: "XML"}, 475 } 476 477 for _, test := range test { 478 content, err := ioutil.ReadFile(test.filename) 479 assert.NoError(s.T(), err) 480 481 languages := getLanguagesBySpecificClassifier(content, test.candidates, test.classifier) 482 var language string 483 if len(languages) == 0 { 484 language = OtherLanguage 485 } else { 486 language = languages[0] 487 } 488 489 assert.Equal(s.T(), test.expected, language, fmt.Sprintf("%v: language = %v, expected: %v", test.name, language, test.expected)) 490 } 491 } 492 493 func (s *EnryTestSuite) TestGetLanguageExtensions() { 494 tests := []struct { 495 name string 496 language string 497 expected []string 498 }{ 499 {name: "TestGetLanguageExtensions_1", language: "foo", expected: nil}, 500 {name: "TestGetLanguageExtensions_2", language: "COBOL", expected: []string{".cob", ".cbl", ".ccp", ".cobol", ".cpy"}}, 501 {name: "TestGetLanguageExtensions_3", language: "Maven POM", expected: nil}, 502 } 503 504 for _, test := range tests { 505 extensions := GetLanguageExtensions(test.language) 506 assert.EqualValues(s.T(), test.expected, extensions, fmt.Sprintf("%v: extensions = %v, expected: %v", test.name, extensions, test.expected)) 507 } 508 } 509 510 func (s *EnryTestSuite) TestGetLanguageType() { 511 tests := []struct { 512 name string 513 language string 514 expected Type 515 }{ 516 {name: "TestGetLanguageType_1", language: "BestLanguageEver", expected: Unknown}, 517 {name: "TestGetLanguageType_2", language: "JSON", expected: Data}, 518 {name: "TestGetLanguageType_3", language: "COLLADA", expected: Data}, 519 {name: "TestGetLanguageType_4", language: "Go", expected: Programming}, 520 {name: "TestGetLanguageType_5", language: "Brainfuck", expected: Programming}, 521 {name: "TestGetLanguageType_6", language: "HTML", expected: Markup}, 522 {name: "TestGetLanguageType_7", language: "Sass", expected: Markup}, 523 {name: "TestGetLanguageType_8", language: "AsciiDoc", expected: Prose}, 524 {name: "TestGetLanguageType_9", language: "Textile", expected: Prose}, 525 } 526 527 for _, test := range tests { 528 langType := GetLanguageType(test.language) 529 assert.Equal(s.T(), test.expected, langType, fmt.Sprintf("%v: langType = %v, expected: %v", test.name, langType, test.expected)) 530 } 531 } 532 533 func (s *EnryTestSuite) TestGetLanguageGroup() { 534 tests := []struct { 535 name string 536 language string 537 expected string 538 }{ 539 {name: "TestGetLanguageGroup_1", language: "BestLanguageEver", expected: ""}, 540 {name: "TestGetLanguageGroup_2", language: "Bison", expected: "Yacc"}, 541 {name: "TestGetLanguageGroup_3", language: "HTML+PHP", expected: "HTML"}, 542 {name: "TestGetLanguageGroup_4", language: "HTML", expected: ""}, 543 } 544 545 for _, test := range tests { 546 langGroup := GetLanguageGroup(test.language) 547 assert.Equal(s.T(), test.expected, langGroup, fmt.Sprintf("%v: langGroup = %v, expected: %v", test.name, langGroup, test.expected)) 548 } 549 } 550 551 func (s *EnryTestSuite) TestGetLanguageByAlias() { 552 tests := []struct { 553 name string 554 alias string 555 expectedLang string 556 expectedOk bool 557 }{ 558 {name: "TestGetLanguageByAlias_1", alias: "BestLanguageEver", expectedLang: OtherLanguage, expectedOk: false}, 559 {name: "TestGetLanguageByAlias_2", alias: "aspx-vb", expectedLang: "ASP.NET", expectedOk: true}, 560 {name: "TestGetLanguageByAlias_3", alias: "C++", expectedLang: "C++", expectedOk: true}, 561 {name: "TestGetLanguageByAlias_4", alias: "c++", expectedLang: "C++", expectedOk: true}, 562 {name: "TestGetLanguageByAlias_5", alias: "objc", expectedLang: "Objective-C", expectedOk: true}, 563 {name: "TestGetLanguageByAlias_6", alias: "golang", expectedLang: "Go", expectedOk: true}, 564 {name: "TestGetLanguageByAlias_7", alias: "GOLANG", expectedLang: "Go", expectedOk: true}, 565 {name: "TestGetLanguageByAlias_8", alias: "bsdmake", expectedLang: "Makefile", expectedOk: true}, 566 {name: "TestGetLanguageByAlias_9", alias: "xhTmL", expectedLang: "HTML", expectedOk: true}, 567 {name: "TestGetLanguageByAlias_10", alias: "python", expectedLang: "Python", expectedOk: true}, 568 } 569 570 for _, test := range tests { 571 lang, ok := GetLanguageByAlias(test.alias) 572 assert.Equal(s.T(), test.expectedLang, lang, fmt.Sprintf("%v: lang = %v, expected: %v", test.name, lang, test.expectedLang)) 573 assert.Equal(s.T(), test.expectedOk, ok, fmt.Sprintf("%v: ok = %v, expected: %v", test.name, ok, test.expectedOk)) 574 } 575 } 576 577 func (s *EnryTestSuite) TestLinguistCorpus() { 578 const filenamesDir = "filenames" 579 var cornerCases = map[string]bool{ 580 "drop_stuff.sql": true, // https://github.com/src-d/enry/issues/194 581 "textobj-rubyblock.vba": true, // Because of unsupported negative lookahead RE syntax (https://github.com/github/linguist/blob/8083cb5a89cee2d99f5a988f165994d0243f0d1e/lib/linguist/heuristics.yml#L521) 582 // .es and .ice fail heuristics parsing, but do not fail any tests 583 } 584 585 var total, failed, ok, other int 586 var expected string 587 filepath.Walk(s.samplesDir, func(path string, f os.FileInfo, err error) error { 588 if f.IsDir() { 589 if f.Name() != filenamesDir { 590 expected, _ = data.LanguageByAlias(f.Name()) 591 } 592 593 return nil 594 } 595 596 filename := filepath.Base(path) 597 content, _ := ioutil.ReadFile(path) 598 599 total++ 600 obtained := GetLanguage(filename, content) 601 if obtained == OtherLanguage { 602 obtained = "Other" 603 other++ 604 } 605 606 var status string 607 if expected == obtained { 608 status = "ok" 609 ok++ 610 } else { 611 status = "failed" 612 failed++ 613 } 614 615 if _, ok := cornerCases[filename]; ok { 616 s.T().Logf("\t\t[considered corner case] %s\texpected: %s\tobtained: %s\tstatus: %s\n", filename, expected, obtained, status) 617 } else { 618 assert.Equal(s.T(), expected, obtained, fmt.Sprintf("%s\texpected: %s\tobtained: %s\tstatus: %s\n", filename, expected, obtained, status)) 619 } 620 621 return nil 622 }) 623 624 s.T().Logf("\t\ttotal files: %d, ok: %d, failed: %d, other: %d\n", total, ok, failed, other) 625 } 626 627 func (s *EnryTestSuite) TestGetLanguageID() { 628 tests := []struct { 629 name string 630 language string 631 expectedID int 632 found bool 633 }{ 634 {name: "TestGetLanguageID_1", language: "1C Enterprise", expectedID: 0, found: true}, 635 {name: "TestGetLanguageID_2", language: "BestLanguageEver", expectedID: 0, found: false}, 636 {name: "TestGetLanguageID_3", language: "C++", expectedID: 43, found: true}, 637 {name: "TestGetLanguageID_5", language: "Objective-C", expectedID: 257, found: true}, 638 {name: "TestGetLanguageID_6", language: "golang", expectedID: 0, found: false}, // Aliases are not supported 639 {name: "TestGetLanguageID_7", language: "Go", expectedID: 132, found: true}, 640 {name: "TestGetLanguageID_8", language: "Makefile", expectedID: 220, found: true}, 641 } 642 643 for _, test := range tests { 644 id, found := GetLanguageID(test.language) 645 assert.Equal(s.T(), test.expectedID, id, fmt.Sprintf("%v: id = %v, expected: %v", test.name, id, test.expectedID)) 646 assert.Equal(s.T(), test.found, found, fmt.Sprintf("%v: found = %t, expected: %t", test.name, found, test.found)) 647 } 648 } 649 650 func (s *EnryTestSuite) TestGetLanguageInfo() { 651 tests := []struct { 652 name string 653 language string 654 expectedID int 655 error bool 656 }{ 657 {name: "TestGetLanguageID_1", language: "1C Enterprise", expectedID: 0}, 658 {name: "TestGetLanguageID_2", language: "BestLanguageEver", error: true}, 659 {name: "TestGetLanguageID_3", language: "C++", expectedID: 43}, 660 {name: "TestGetLanguageID_5", language: "Objective-C", expectedID: 257}, 661 {name: "TestGetLanguageID_6", language: "golang", error: true}, // Aliases are not supported 662 {name: "TestGetLanguageID_7", language: "Go", expectedID: 132}, 663 {name: "TestGetLanguageID_8", language: "Makefile", expectedID: 220}, 664 } 665 666 for _, test := range tests { 667 info, err := GetLanguageInfo(test.language) 668 if test.error { 669 assert.Error(s.T(), err, "%v: expected error for %q", test.name, test.language) 670 } else { 671 assert.NoError(s.T(), err) 672 assert.Equal(s.T(), test.expectedID, info.LanguageID, fmt.Sprintf("%v: id = %v, expected: %v", test.name, info.LanguageID, test.expectedID)) 673 } 674 } 675 } 676 677 func (s *EnryTestSuite) TestGetLanguageInfoByID() { 678 tests := []struct { 679 name string 680 id int 681 expectedName string 682 error bool 683 }{ 684 {name: "TestGetLanguageID_1", id: 0, expectedName: "1C Enterprise"}, 685 {name: "TestGetLanguageID_2", id: -1, error: true}, 686 {name: "TestGetLanguageID_3", id: 43, expectedName: "C++"}, 687 {name: "TestGetLanguageID_5", id: 257, expectedName: "Objective-C"}, 688 {name: "TestGetLanguageID_7", id: 132, expectedName: "Go"}, 689 {name: "TestGetLanguageID_8", id: 220, expectedName: "Makefile"}, 690 } 691 692 for _, test := range tests { 693 info, err := GetLanguageInfoByID(test.id) 694 if test.error { 695 assert.Error(s.T(), err, "%v: expected error for %q", test.name, test.id) 696 } else { 697 assert.NoError(s.T(), err) 698 assert.Equal(s.T(), test.expectedName, info.Name, fmt.Sprintf("%v: id = %v, expected: %v", test.name, test.id, test.expectedName)) 699 } 700 } 701 }