git.templeos.me/xultist/go-enry/v2@v2.0.0-20230215093429-6ef3e87f47c0/common_test.go (about) 1 package enry 2 3 import ( 4 "fmt" 5 "io/ioutil" 6 "os" 7 "os/exec" 8 "path/filepath" 9 "strings" 10 "testing" 11 12 "github.com/go-enry/go-enry/v2/data" 13 14 "github.com/stretchr/testify/assert" 15 "github.com/stretchr/testify/require" 16 "github.com/stretchr/testify/suite" 17 ) 18 19 const linguistURL = "https://github.com/github/linguist.git" 20 const linguistClonedEnvVar = "ENRY_TEST_REPO" 21 22 // not a part of the test Suite as benchmark does not use testify 23 func maybeCloneLinguist() (string, bool, error) { 24 var err error 25 linguistTmpDir := os.Getenv(linguistClonedEnvVar) 26 isCleanupNeeded := false 27 isLinguistCloned := linguistTmpDir != "" 28 if !isLinguistCloned { 29 linguistTmpDir, err = ioutil.TempDir("", "linguist-") 30 if err != nil { 31 return "", false, err 32 } 33 34 isCleanupNeeded = true 35 cmd := exec.Command("git", "clone", "--depth", "100", linguistURL, linguistTmpDir) 36 if err := cmd.Run(); err != nil { 37 return linguistTmpDir, isCleanupNeeded, err 38 } 39 } 40 41 cwd, err := os.Getwd() 42 if err != nil { 43 return linguistTmpDir, isCleanupNeeded, err 44 } 45 46 if err = os.Chdir(linguistTmpDir); err != nil { 47 return linguistTmpDir, isCleanupNeeded, err 48 } 49 50 cmd := exec.Command("git", "checkout", data.LinguistCommit) 51 if err := cmd.Run(); err != nil { 52 return linguistTmpDir, isCleanupNeeded, err 53 } 54 55 if err = os.Chdir(cwd); err != nil { 56 return linguistTmpDir, isCleanupNeeded, err 57 } 58 return linguistTmpDir, isCleanupNeeded, nil 59 } 60 61 type enryBaseTestSuite struct { 62 suite.Suite 63 tmpLinguistDir string 64 isCleanupNeeded bool 65 samplesDir string 66 testFixturesDir string 67 } 68 69 func (s *enryBaseTestSuite) SetupSuite() { 70 var err error 71 s.tmpLinguistDir, s.isCleanupNeeded, err = maybeCloneLinguist() 72 require.NoError(s.T(), err) 73 74 s.samplesDir = filepath.Join(s.tmpLinguistDir, "samples") 75 s.testFixturesDir = filepath.Join(s.tmpLinguistDir, "test", "fixtures") 76 } 77 78 func (s *enryBaseTestSuite) TearDownSuite() { 79 if s.isCleanupNeeded { 80 err := os.RemoveAll(s.tmpLinguistDir) 81 require.NoError(s.T(), err) 82 } 83 } 84 85 type enryTestSuite struct { 86 enryBaseTestSuite 87 } 88 89 func Test_EnryTestSuite(t *testing.T) { 90 suite.Run(t, new(enryTestSuite)) 91 } 92 93 func (s *enryTestSuite) TestRegexpEdgeCases() { 94 var regexpEdgeCases = []struct { 95 lang string 96 filename string 97 }{ 98 {lang: "ActionScript", filename: "FooBar.as"}, 99 {lang: "Forth", filename: "asm.fr"}, 100 {lang: "X PixMap", filename: "cc-public_domain_mark_white.pm"}, 101 //{lang: "SQL", filename: "drop_stuff.sql"}, // https://github.com/src-d/enry/issues/194 102 {lang: "Fstar", filename: "Hacl.Spec.Bignum.Fmul.fst"}, 103 {lang: "C++", filename: "Types.h"}, 104 } 105 106 for _, r := range regexpEdgeCases { 107 filename := filepath.Join(s.tmpLinguistDir, "samples", r.lang, r.filename) 108 109 content, err := ioutil.ReadFile(filename) 110 require.NoError(s.T(), err) 111 112 lang := GetLanguage(r.filename, content) 113 s.T().Logf("File:%s, lang:%s", filename, lang) 114 115 expLang, _ := data.LanguageByAlias(r.lang) 116 require.EqualValues(s.T(), expLang, lang) 117 } 118 } 119 120 func (s *enryTestSuite) TestGetLanguage() { 121 tests := []struct { 122 name string 123 filename string 124 content []byte 125 expected string 126 safe bool 127 }{ 128 {name: "TestGetLanguage_0", filename: "foo.h", content: []byte{}, expected: "C"}, 129 {name: "TestGetLanguage_1", filename: "foo.py", content: []byte{}, expected: "Python"}, 130 {name: "TestGetLanguage_2", filename: "foo.m", content: []byte(":- module"), expected: "Mercury"}, 131 {name: "TestGetLanguage_3", filename: "foo.m", content: nil, expected: "MATLAB"}, 132 {name: "TestGetLanguage_4", filename: "foo.mo", content: []byte{0xDE, 0x12, 0x04, 0x95, 0x00, 0x00, 0x00, 0x00}, expected: OtherLanguage}, 133 {name: "TestGetLanguage_5", filename: "", content: nil, expected: OtherLanguage}, 134 } 135 136 for _, test := range tests { 137 language := GetLanguage(test.filename, test.content) 138 assert.Equal(s.T(), test.expected, language, fmt.Sprintf("%v: %v, expected: %v", test.name, language, test.expected)) 139 } 140 } 141 142 func (s *enryTestSuite) TestGetLanguages() { 143 tests := []struct { 144 name string 145 filename string 146 content []byte 147 expected []string 148 }{ 149 // With no content or filename, no language can be detected 150 {name: "TestGetLanguages_0", filename: "", content: []byte{}, expected: nil}, 151 // The strategy that will match is GetLanguagesByExtension. Lacking content, it will return those results. 152 {name: "TestGetLanguages_1", filename: "foo.h", content: []byte{}, expected: []string{"C"}}, 153 // GetLanguagesByExtension will return an unambiguous match when there is a single result. 154 {name: "TestGetLanguages_2", filename: "foo.groovy", content: []byte{}, expected: []string{"Groovy"}}, 155 // GetLanguagesByExtension will return "Rust", "RenderScript" for .rs, 156 // then GetLanguagesByContent will take the first rule that matches (in this case Rust) 157 {name: "TestGetLanguages_3", filename: "foo.rs", content: []byte("use \n#include"), expected: []string{"Rust"}}, 158 // .. and in this case, RenderScript (no content that matches a Rust regex can be included, because it runs first.) 159 {name: "TestGetLanguages_4", filename: "foo.rs", content: []byte("#include"), expected: []string{"RenderScript"}}, 160 // GetLanguagesByExtension will return "AMPL", "Linux Kernel Module", "Modula-2", "XML", 161 // then GetLanguagesByContent will ALWAYS return Linux Kernel Module and AMPL when there is no content, 162 // and no further classifier can do anything without content 163 {name: "TestGetLanguages_5", filename: "foo.mod", content: []byte{}, expected: []string{"Linux Kernel Module", "AMPL"}}, 164 // ...with some AMPL tokens, the DefaultClassifier will pick AMPL as the most likely language. 165 {name: "TestGetLanguages_6", filename: "foo.mod", content: []byte("BEAMS ROWS - TotalWeight"), expected: []string{"AMPL", "Linux Kernel Module"}}, 166 } 167 168 for _, test := range tests { 169 languages := GetLanguages(test.filename, test.content) 170 assert.Equal(s.T(), test.expected, languages, fmt.Sprintf("%v: %v, expected: %v", test.name, languages, test.expected)) 171 } 172 } 173 174 func (s *enryTestSuite) TestGetLanguagesByModelineLinguist() { 175 var modelinesDir = filepath.Join(s.tmpLinguistDir, "test", "fixtures", "Data", "Modelines") 176 177 tests := []struct { 178 name string 179 filename string 180 candidates []string 181 expected []string 182 }{ 183 // Emacs 184 {name: "TestGetLanguagesByModelineLinguist_1", filename: filepath.Join(modelinesDir, "example_smalltalk.md"), expected: []string{"Smalltalk"}}, 185 {name: "TestGetLanguagesByModelineLinguist_2", filename: filepath.Join(modelinesDir, "fundamentalEmacs.c"), expected: []string{"Text"}}, 186 {name: "TestGetLanguagesByModelineLinguist_3", filename: filepath.Join(modelinesDir, "iamphp.inc"), expected: []string{"PHP"}}, 187 {name: "TestGetLanguagesByModelineLinguist_4", filename: filepath.Join(modelinesDir, "seeplusplusEmacs1"), expected: []string{"C++"}}, 188 {name: "TestGetLanguagesByModelineLinguist_5", filename: filepath.Join(modelinesDir, "seeplusplusEmacs2"), expected: []string{"C++"}}, 189 {name: "TestGetLanguagesByModelineLinguist_6", filename: filepath.Join(modelinesDir, "seeplusplusEmacs3"), expected: []string{"C++"}}, 190 {name: "TestGetLanguagesByModelineLinguist_7", filename: filepath.Join(modelinesDir, "seeplusplusEmacs4"), expected: []string{"C++"}}, 191 {name: "TestGetLanguagesByModelineLinguist_8", filename: filepath.Join(modelinesDir, "seeplusplusEmacs5"), expected: []string{"C++"}}, 192 {name: "TestGetLanguagesByModelineLinguist_9", filename: filepath.Join(modelinesDir, "seeplusplusEmacs6"), expected: []string{"C++"}}, 193 {name: "TestGetLanguagesByModelineLinguist_10", filename: filepath.Join(modelinesDir, "seeplusplusEmacs7"), expected: []string{"C++"}}, 194 {name: "TestGetLanguagesByModelineLinguist_11", filename: filepath.Join(modelinesDir, "seeplusplusEmacs9"), expected: []string{"C++"}}, 195 {name: "TestGetLanguagesByModelineLinguist_12", filename: filepath.Join(modelinesDir, "seeplusplusEmacs10"), expected: []string{"C++"}}, 196 {name: "TestGetLanguagesByModelineLinguist_13", filename: filepath.Join(modelinesDir, "seeplusplusEmacs11"), expected: []string{"C++"}}, 197 {name: "TestGetLanguagesByModelineLinguist_14", filename: filepath.Join(modelinesDir, "seeplusplusEmacs12"), expected: []string{"C++"}}, 198 199 // Vim 200 {name: "TestGetLanguagesByModelineLinguist_15", filename: filepath.Join(modelinesDir, "seeplusplus"), expected: []string{"C++"}}, 201 {name: "TestGetLanguagesByModelineLinguist_16", filename: filepath.Join(modelinesDir, "iamjs.pl"), expected: []string{"JavaScript"}}, 202 {name: "TestGetLanguagesByModelineLinguist_17", filename: filepath.Join(modelinesDir, "iamjs2.pl"), expected: []string{"JavaScript"}}, 203 {name: "TestGetLanguagesByModelineLinguist_18", filename: filepath.Join(modelinesDir, "not_perl.pl"), expected: []string{"Prolog"}}, 204 {name: "TestGetLanguagesByModelineLinguist_19", filename: filepath.Join(modelinesDir, "ruby"), expected: []string{"Ruby"}}, 205 {name: "TestGetLanguagesByModelineLinguist_20", filename: filepath.Join(modelinesDir, "ruby2"), expected: []string{"Ruby"}}, 206 {name: "TestGetLanguagesByModelineLinguist_21", filename: filepath.Join(modelinesDir, "ruby3"), expected: []string{"Ruby"}}, 207 {name: "TestGetLanguagesByModelineLinguist_22", filename: filepath.Join(modelinesDir, "ruby4"), expected: []string{"Ruby"}}, 208 {name: "TestGetLanguagesByModelineLinguist_23", filename: filepath.Join(modelinesDir, "ruby5"), expected: []string{"Ruby"}}, 209 {name: "TestGetLanguagesByModelineLinguist_24", filename: filepath.Join(modelinesDir, "ruby6"), expected: []string{"Ruby"}}, 210 {name: "TestGetLanguagesByModelineLinguist_25", filename: filepath.Join(modelinesDir, "ruby7"), expected: []string{"Ruby"}}, 211 {name: "TestGetLanguagesByModelineLinguist_26", filename: filepath.Join(modelinesDir, "ruby8"), expected: []string{"Ruby"}}, 212 {name: "TestGetLanguagesByModelineLinguist_27", filename: filepath.Join(modelinesDir, "ruby9"), expected: []string{"Ruby"}}, 213 {name: "TestGetLanguagesByModelineLinguist_28", filename: filepath.Join(modelinesDir, "ruby10"), expected: []string{"Ruby"}}, 214 {name: "TestGetLanguagesByModelineLinguist_29", filename: filepath.Join(modelinesDir, "ruby11"), expected: []string{"Ruby"}}, 215 {name: "TestGetLanguagesByModelineLinguist_30", filename: filepath.Join(modelinesDir, "ruby12"), expected: []string{"Ruby"}}, 216 {name: "TestGetLanguagesByModelineLinguist_31", filename: filepath.Join(s.samplesDir, "C++/runtime-compiler.cc"), expected: nil}, 217 {name: "TestGetLanguagesByModelineLinguist_32", filename: "", expected: nil}, 218 } 219 220 for _, test := range tests { 221 var content []byte 222 var err error 223 224 if test.filename != "" { 225 content, err = ioutil.ReadFile(test.filename) 226 assert.NoError(s.T(), err) 227 } 228 229 languages := GetLanguagesByModeline(test.filename, content, test.candidates) 230 assert.Equal(s.T(), test.expected, languages, fmt.Sprintf("%v: languages = %v, expected: %v", test.name, languages, test.expected)) 231 } 232 } 233 234 func (s *enryTestSuite) TestGetLanguagesByModeline() { 235 const ( 236 wrongVim = `# vim: set syntax=ruby ft =python filetype=perl :` 237 rightVim = `/* vim: set syntax=python ft =python filetype=python */` 238 noLangVim = `/* vim: set shiftwidth=4 softtabstop=0 cindent cinoptions={1s: */` 239 ) 240 241 tests := []struct { 242 name string 243 filename string 244 content []byte 245 candidates []string 246 expected []string 247 }{ 248 {name: "TestGetLanguagesByModeline_1", content: []byte(wrongVim), expected: nil}, 249 {name: "TestGetLanguagesByModeline_2", content: []byte(rightVim), expected: []string{"Python"}}, 250 {name: "TestGetLanguagesByModeline_3", content: []byte(noLangVim), expected: nil}, 251 {name: "TestGetLanguagesByModeline_4", content: nil, expected: nil}, 252 {name: "TestGetLanguagesByModeline_5", content: []byte{}, expected: nil}, 253 } 254 255 for _, test := range tests { 256 languages := GetLanguagesByModeline(test.filename, test.content, test.candidates) 257 assert.Equal(s.T(), test.expected, languages, fmt.Sprintf("%v: languages = %v, expected: %v", test.name, languages, test.expected)) 258 } 259 } 260 261 func (s *enryTestSuite) TestGetLanguagesByFilename() { 262 tests := []struct { 263 name string 264 filename string 265 content []byte 266 candidates []string 267 expected []string 268 }{ 269 {name: "TestGetLanguagesByFilename_1", filename: "unknown.interpreter", expected: nil}, 270 {name: "TestGetLanguagesByFilename_2", filename: ".bashrc", expected: []string{"Shell"}}, 271 {name: "TestGetLanguagesByFilename_3", filename: "Dockerfile", expected: []string{"Dockerfile"}}, 272 {name: "TestGetLanguagesByFilename_4", filename: "Makefile.frag", expected: []string{"Makefile"}}, 273 {name: "TestGetLanguagesByFilename_5", filename: "makefile", expected: []string{"Makefile"}}, 274 {name: "TestGetLanguagesByFilename_6", filename: "Vagrantfile", expected: []string{"Ruby"}}, 275 {name: "TestGetLanguagesByFilename_7", filename: "_vimrc", expected: []string{"Vim Script"}}, 276 {name: "TestGetLanguagesByFilename_8", filename: "pom.xml", expected: []string{"Maven POM"}}, 277 {name: "TestGetLanguagesByFilename_9", filename: "", expected: nil}, 278 } 279 280 for _, test := range tests { 281 languages := GetLanguagesByFilename(test.filename, test.content, test.candidates) 282 assert.Equal(s.T(), len(test.expected), len(languages), fmt.Sprintf("%v: number of languages = %v, expected: %v", test.name, len(languages), len(test.expected))) 283 for i := range languages { // case-insensitive name comparison 284 assert.True(s.T(), strings.EqualFold(test.expected[i], languages[i]), fmt.Sprintf("%v: languages = %v, expected: %v", test.name, languages, test.expected)) 285 } 286 } 287 } 288 289 func (s *enryTestSuite) TestGetLanguagesByShebang() { 290 const ( 291 multilineExecHack = `#!/bin/sh 292 # Next line is comment in Tcl, but not in sh... \ 293 exec tclsh "$0" ${1+"$@"}` 294 295 multilineNoExecHack = `#!/bin/sh 296 #<<<# 297 echo "A shell script in a zkl program ($0)" 298 echo "Now run zkl <this file> with Hello World as args" 299 zkl $0 Hello World! 300 exit 301 #<<<# 302 println("The shell script says ",vm.arglist.concat(" "));` 303 ) 304 305 tests := []struct { 306 name string 307 filename string 308 content []byte 309 candidates []string 310 expected []string 311 }{ 312 {name: "TestGetLanguagesByShebang_1", content: []byte(`#!/unknown/interpreter`), expected: nil}, 313 {name: "TestGetLanguagesByShebang_2", content: []byte(`no shebang`), expected: nil}, 314 {name: "TestGetLanguagesByShebang_3", content: []byte(`#!/usr/bin/env`), expected: nil}, 315 {name: "TestGetLanguagesByShebang_4", content: []byte(`#!/usr/bin/python -tt`), expected: []string{"Python"}}, 316 {name: "TestGetLanguagesByShebang_5", content: []byte(`#!/usr/bin/env python2.6`), expected: []string{"Python"}}, 317 {name: "TestGetLanguagesByShebang_6", content: []byte(`#!/usr/bin/env perl`), expected: []string{"Perl", "Pod"}}, 318 {name: "TestGetLanguagesByShebang_7", content: []byte(`#! /bin/sh`), expected: []string{"Shell"}}, 319 {name: "TestGetLanguagesByShebang_8", content: []byte(`#!bash`), expected: []string{"Shell"}}, 320 {name: "TestGetLanguagesByShebang_9", content: []byte(multilineExecHack), expected: []string{"Tcl"}}, 321 {name: "TestGetLanguagesByShebang_10", content: []byte(multilineNoExecHack), expected: []string{"Shell"}}, 322 {name: "TestGetLanguagesByShebang_11", content: []byte(`#!/envinpath/python`), expected: []string{"Python"}}, 323 324 {name: "TestGetLanguagesByShebang_12", content: []byte(""), expected: nil}, 325 {name: "TestGetLanguagesByShebang_13", content: []byte("foo"), expected: nil}, 326 {name: "TestGetLanguagesByShebang_14", content: []byte("#bar"), expected: nil}, 327 {name: "TestGetLanguagesByShebang_15", content: []byte("#baz"), expected: nil}, 328 {name: "TestGetLanguagesByShebang_16", content: []byte("///"), expected: nil}, 329 {name: "TestGetLanguagesByShebang_17", content: []byte("\n\n\n\n\n"), expected: nil}, 330 {name: "TestGetLanguagesByShebang_18", content: []byte(" #!/usr/sbin/ruby"), expected: nil}, 331 {name: "TestGetLanguagesByShebang_19", content: []byte("\n#!/usr/sbin/ruby"), expected: nil}, 332 {name: "TestGetLanguagesByShebang_20", content: []byte("#!"), expected: nil}, 333 {name: "TestGetLanguagesByShebang_21", content: []byte("#! "), expected: nil}, 334 {name: "TestGetLanguagesByShebang_22", content: []byte("#!/usr/bin/env"), expected: nil}, 335 {name: "TestGetLanguagesByShebang_23", content: []byte("#!/usr/bin/env osascript -l JavaScript"), expected: nil}, 336 {name: "TestGetLanguagesByShebang_24", content: []byte("#!/usr/bin/env osascript -l AppleScript"), expected: nil}, 337 {name: "TestGetLanguagesByShebang_25", content: []byte("#!/usr/bin/env osascript -l foobar"), expected: nil}, 338 {name: "TestGetLanguagesByShebang_26", content: []byte("#!/usr/bin/osascript -l JavaScript"), expected: nil}, 339 {name: "TestGetLanguagesByShebang_27", content: []byte("#!/usr/bin/osascript -l foobar"), expected: nil}, 340 341 {name: "TestGetLanguagesByShebang_28", content: []byte("#!/usr/sbin/ruby\n# bar"), expected: []string{"Ruby"}}, 342 {name: "TestGetLanguagesByShebang_29", content: []byte("#!/usr/bin/ruby\n# foo"), expected: []string{"Ruby"}}, 343 {name: "TestGetLanguagesByShebang_30", content: []byte("#!/usr/sbin/ruby"), expected: []string{"Ruby"}}, 344 {name: "TestGetLanguagesByShebang_31", content: []byte("#!/usr/sbin/ruby foo bar baz\n"), expected: []string{"Ruby"}}, 345 346 {name: "TestGetLanguagesByShebang_32", content: []byte("#!/usr/bin/env Rscript\n# example R script\n#\n"), expected: []string{"R"}}, 347 {name: "TestGetLanguagesByShebang_33", content: []byte("#!/usr/bin/env ruby\n# baz"), expected: []string{"Ruby"}}, 348 349 {name: "TestGetLanguagesByShebang_34", content: []byte("#!/usr/bin/bash\n"), expected: []string{"Shell"}}, 350 {name: "TestGetLanguagesByShebang_35", content: []byte("#!/bin/sh"), expected: []string{"Shell"}}, 351 {name: "TestGetLanguagesByShebang_36", content: []byte("#!/bin/python\n# foo\n# bar\n# baz"), expected: []string{"Python"}}, 352 {name: "TestGetLanguagesByShebang_37", content: []byte("#!/usr/bin/python2.7\n\n\n\n"), expected: []string{"Python"}}, 353 {name: "TestGetLanguagesByShebang_38", content: []byte("#!/usr/bin/python3\n\n\n\n"), expected: []string{"Python"}}, 354 {name: "TestGetLanguagesByShebang_39", content: []byte("#!/usr/bin/sbcl --script\n\n"), expected: []string{"Common Lisp"}}, 355 {name: "TestGetLanguagesByShebang_40", content: []byte("#! perl"), expected: []string{"Perl", "Pod"}}, 356 357 {name: "TestGetLanguagesByShebang_41", content: []byte("#!/bin/sh\n\n\nexec ruby $0 $@"), expected: []string{"Ruby"}}, 358 {name: "TestGetLanguagesByShebang_42", content: []byte("#! /usr/bin/env A=003 B=149 C=150 D=xzd E=base64 F=tar G=gz H=head I=tail sh"), expected: []string{"Shell"}}, 359 {name: "TestGetLanguagesByShebang_43", content: []byte("#!/usr/bin/env foo=bar bar=foo python -cos=__import__(\"os\");"), expected: []string{"Python"}}, 360 {name: "TestGetLanguagesByShebang_44", content: []byte("#!/usr/bin/env osascript"), expected: []string{"AppleScript"}}, 361 {name: "TestGetLanguagesByShebang_45", content: []byte("#!/usr/bin/osascript"), expected: []string{"AppleScript"}}, 362 363 {name: "TestGetLanguagesByShebang_46", content: []byte("#!/usr/bin/env -vS ruby -wKU\nputs ?t+?e+?s+?t"), expected: []string{"Ruby"}}, 364 {name: "TestGetLanguagesByShebang_47", content: []byte("#!/usr/bin/env --split-string sed -f\ny/a/A/"), expected: []string{"sed"}}, 365 {name: "TestGetLanguagesByShebang_48", content: []byte("#!/usr/bin/env -S GH_TOKEN=ghp_*** deno run --allow-net\nconsole.log(1);"), expected: []string{"TypeScript"}}, 366 } 367 368 for _, test := range tests { 369 languages := GetLanguagesByShebang(test.filename, test.content, test.candidates) 370 assert.Equal(s.T(), test.expected, languages, fmt.Sprintf("%v: languages = %v, expected: %v", test.name, languages, test.expected)) 371 } 372 } 373 374 func (s *enryTestSuite) TestGetLanguageByContent() { 375 tests := []struct { 376 name string 377 filename string 378 content []byte 379 expected string 380 }{ 381 {name: "TestGetLanguageByContent_0", filename: "", expected: ""}, 382 {name: "TestGetLanguageByContent_1", filename: "foo.cpp", content: []byte("int main() { return 0; }"), expected: ""}, // as .cpp is unambiguous ¯\_(ツ)_/¯ 383 {name: "TestGetLanguageByContent_2", filename: "foo.h", content: []byte("int main() { return 0; }"), expected: "C"}, // C, as it does not match any of the heuristics for C++ or Objective-C 384 {name: "TestGetLanguageByContent_3", filename: "foo.h", content: []byte("#include <string>\n int main() { return 0; }"), expected: "C++"}, // '#include <string>' matches regex heuristic 385 } 386 387 for _, test := range tests { 388 languages, _ := GetLanguageByContent(test.filename, test.content) 389 assert.Equal(s.T(), test.expected, languages, fmt.Sprintf("%v: languages = %v, expected: %v", test.name, languages, test.expected)) 390 } 391 } 392 393 func (s *enryTestSuite) TestGetLanguagesByExtension() { 394 tests := []struct { 395 name string 396 filename string 397 content []byte 398 candidates []string 399 expected []string 400 }{ 401 {name: "TestGetLanguagesByExtension_0", filename: "foo.h", expected: []string{"C", "C++", "Objective-C"}}, 402 {name: "TestGetLanguagesByExtension_1", filename: "foo.foo", expected: nil}, 403 {name: "TestGetLanguagesByExtension_2", filename: "foo.go", expected: []string{"Go"}}, 404 {name: "TestGetLanguagesByExtension_3", filename: "foo.go.php", expected: []string{"Hack", "PHP"}}, 405 {name: "TestGetLanguagesByExtension_4", filename: "", expected: nil}, 406 } 407 408 for _, test := range tests { 409 languages := GetLanguagesByExtension(test.filename, test.content, test.candidates) 410 assert.Equal(s.T(), test.expected, languages, fmt.Sprintf("%v: languages = %v, expected: %v", test.name, languages, test.expected)) 411 } 412 } 413 414 func (s *enryTestSuite) TestGetLanguagesByManpage() { 415 tests := []struct { 416 name string 417 filename string 418 content []byte 419 candidates []string 420 expected []string 421 }{ 422 {name: "TestGetLanguagesByManpage_1", filename: "bsdmalloc.3malloc", expected: []string{"Roff Manpage", "Roff"}}, 423 {name: "TestGetLanguagesByManpage_2", filename: "dirent.h.0p", expected: []string{"Roff Manpage", "Roff"}}, 424 {name: "TestGetLanguagesByManpage_3", filename: "linguist.1gh", expected: []string{"Roff Manpage", "Roff"}}, 425 {name: "TestGetLanguagesByManpage_4", filename: "test.1.in", expected: []string{"Roff Manpage", "Roff"}}, 426 {name: "TestGetLanguagesByManpage_5", filename: "test.man.in", expected: []string{"Roff Manpage", "Roff"}}, 427 {name: "TestGetLanguagesByManpage_6", filename: "test.mdoc.in", expected: []string{"Roff Manpage", "Roff"}}, 428 {name: "TestGetLanguagesByManpage_7", filename: "foo.h", expected: nil}, 429 {name: "TestGetLanguagesByManpage_8", filename: "", expected: nil}, 430 } 431 432 for _, test := range tests { 433 languages := GetLanguagesByManpage(test.filename, test.content, test.candidates) 434 assert.Equal(s.T(), test.expected, languages, fmt.Sprintf("%v: languages = %v, expected: %v", test.name, languages, test.expected)) 435 } 436 } 437 438 func (s *enryTestSuite) TestGetLanguagesByXML() { 439 tests := []struct { 440 name string 441 filename string 442 candidates []string 443 expected []string 444 }{ 445 {name: "TestGetLanguagesByXML_1", filename: filepath.Join(s.testFixturesDir, "XML/app.config"), expected: []string{"XML"}}, 446 {name: "TestGetLanguagesByXML_2", filename: filepath.Join(s.testFixturesDir, "XML/AssertionIDRequestOptionalAttributes.xml.svn-base"), expected: []string{"XML"}}, 447 // no XML header so should not be identified by this strategy 448 {name: "TestGetLanguagesByXML_3", filename: filepath.Join(s.samplesDir, "XML/libsomething.dll.config"), expected: nil}, 449 {name: "TestGetLanguagesByXML_4", filename: filepath.Join(s.samplesDir, "Eagle/Eagle.sch"), candidates: []string{"Eagle"}, expected: []string{"Eagle"}}, 450 } 451 452 for _, test := range tests { 453 content, err := ioutil.ReadFile(test.filename) 454 assert.NoError(s.T(), err) 455 456 languages := GetLanguagesByXML(test.filename, content, test.candidates) 457 assert.Equal(s.T(), test.expected, languages, fmt.Sprintf("%v: languages = %v, expected: %v", test.name, languages, test.expected)) 458 } 459 } 460 461 func (s *enryTestSuite) TestGetLanguagesByClassifier() { 462 test := []struct { 463 name string 464 filename string 465 candidates []string 466 expected string 467 }{ 468 {name: "TestGetLanguagesByClassifier_1", filename: filepath.Join(s.samplesDir, "C/blob.c"), candidates: []string{"python", "ruby", "c", "c++"}, expected: "C"}, 469 {name: "TestGetLanguagesByClassifier_2", filename: filepath.Join(s.samplesDir, "C/blob.c"), candidates: nil, expected: OtherLanguage}, 470 {name: "TestGetLanguagesByClassifier_3", filename: filepath.Join(s.samplesDir, "C++/runtime-compiler.cc"), candidates: []string{}, expected: OtherLanguage}, 471 {name: "TestGetLanguagesByClassifier_4", filename: filepath.Join(s.samplesDir, "C/blob.c"), candidates: []string{"python", "ruby", "c++"}, expected: "C++"}, 472 {name: "TestGetLanguagesByClassifier_5", filename: filepath.Join(s.samplesDir, "C/blob.c"), candidates: []string{"ruby"}, expected: "Ruby"}, 473 {name: "TestGetLanguagesByClassifier_6", filename: filepath.Join(s.samplesDir, "Python/django-models-base.py"), candidates: []string{"python", "ruby", "c", "c++"}, expected: "Python"}, 474 {name: "TestGetLanguagesByClassifier_7", filename: "", candidates: []string{"python"}, expected: "Python"}, 475 } 476 477 for _, test := range test { 478 var content []byte 479 var err error 480 481 if test.filename != "" { 482 content, err = ioutil.ReadFile(test.filename) 483 assert.NoError(s.T(), err) 484 } 485 486 languages := GetLanguagesByClassifier(test.filename, content, test.candidates) 487 var language string 488 if len(languages) == 0 { 489 language = OtherLanguage 490 } else { 491 language = languages[0] 492 } 493 494 assert.Equal(s.T(), test.expected, language, fmt.Sprintf("%v: language = %v, expected: %v", test.name, language, test.expected)) 495 } 496 } 497 498 func (s *enryTestSuite) TestGetLanguagesBySpecificClassifier() { 499 test := []struct { 500 name string 501 filename string 502 candidates []string 503 classifier classifier 504 expected string 505 }{ 506 {name: "TestGetLanguagesByClassifier_1", filename: filepath.Join(s.samplesDir, "C/blob.c"), candidates: []string{"python", "ruby", "c", "c++"}, classifier: defaultClassifier, expected: "C"}, 507 {name: "TestGetLanguagesByClassifier_2", filename: filepath.Join(s.samplesDir, "C/blob.c"), candidates: nil, classifier: defaultClassifier, expected: "C"}, 508 {name: "TestGetLanguagesByClassifier_3", filename: filepath.Join(s.samplesDir, "C++/runtime-compiler.cc"), candidates: []string{}, classifier: defaultClassifier, expected: "C++"}, 509 {name: "TestGetLanguagesByClassifier_4", filename: filepath.Join(s.samplesDir, "C/blob.c"), candidates: []string{"python", "ruby", "c++"}, classifier: defaultClassifier, expected: "C++"}, 510 {name: "TestGetLanguagesByClassifier_5", filename: filepath.Join(s.samplesDir, "C/blob.c"), candidates: []string{"ruby"}, classifier: defaultClassifier, expected: "Ruby"}, 511 {name: "TestGetLanguagesByClassifier_6", filename: filepath.Join(s.samplesDir, "Python/django-models-base.py"), candidates: []string{"python", "ruby", "c", "c++"}, classifier: defaultClassifier, expected: "Python"}, 512 {name: "TestGetLanguagesByClassifier_7", filename: os.DevNull, candidates: nil, classifier: defaultClassifier, expected: "XML"}, 513 } 514 515 for _, test := range test { 516 content, err := ioutil.ReadFile(test.filename) 517 assert.NoError(s.T(), err) 518 519 languages := getLanguagesBySpecificClassifier(content, test.candidates, test.classifier) 520 var language string 521 if len(languages) == 0 { 522 language = OtherLanguage 523 } else { 524 language = languages[0] 525 } 526 527 assert.Equal(s.T(), test.expected, language, fmt.Sprintf("%v: language = %v, expected: %v", test.name, language, test.expected)) 528 } 529 } 530 531 func (s *enryTestSuite) TestGetLanguageExtensions() { 532 tests := []struct { 533 name string 534 language string 535 expected []string 536 }{ 537 {name: "TestGetLanguageExtensions_1", language: "foo", expected: nil}, 538 {name: "TestGetLanguageExtensions_2", language: "COBOL", expected: []string{".cob", ".cbl", ".ccp", ".cobol", ".cpy"}}, 539 {name: "TestGetLanguageExtensions_3", language: "Maven POM", expected: nil}, 540 } 541 542 for _, test := range tests { 543 extensions := GetLanguageExtensions(test.language) 544 assert.EqualValues(s.T(), test.expected, extensions, fmt.Sprintf("%v: extensions = %v, expected: %v", test.name, extensions, test.expected)) 545 } 546 } 547 548 func (s *enryTestSuite) TestGetLanguageType() { 549 tests := []struct { 550 name string 551 language string 552 expected Type 553 }{ 554 {name: "TestGetLanguageType_1", language: "BestLanguageEver", expected: Unknown}, 555 {name: "TestGetLanguageType_2", language: "JSON", expected: Data}, 556 {name: "TestGetLanguageType_3", language: "COLLADA", expected: Data}, 557 {name: "TestGetLanguageType_4", language: "Go", expected: Programming}, 558 {name: "TestGetLanguageType_5", language: "Brainfuck", expected: Programming}, 559 {name: "TestGetLanguageType_6", language: "HTML", expected: Markup}, 560 {name: "TestGetLanguageType_7", language: "Sass", expected: Markup}, 561 {name: "TestGetLanguageType_8", language: "AsciiDoc", expected: Prose}, 562 {name: "TestGetLanguageType_9", language: "Textile", expected: Prose}, 563 } 564 565 for _, test := range tests { 566 langType := GetLanguageType(test.language) 567 assert.Equal(s.T(), test.expected, langType, fmt.Sprintf("%v: langType = %v, expected: %v", test.name, langType, test.expected)) 568 } 569 } 570 571 func (s *enryTestSuite) TestGetLanguageGroup() { 572 tests := []struct { 573 name string 574 language string 575 expected string 576 }{ 577 {name: "TestGetLanguageGroup_1", language: "BestLanguageEver", expected: ""}, 578 {name: "TestGetLanguageGroup_2", language: "Bison", expected: "Yacc"}, 579 {name: "TestGetLanguageGroup_3", language: "HTML+PHP", expected: "HTML"}, 580 {name: "TestGetLanguageGroup_4", language: "HTML", expected: ""}, 581 } 582 583 for _, test := range tests { 584 langGroup := GetLanguageGroup(test.language) 585 assert.Equal(s.T(), test.expected, langGroup, fmt.Sprintf("%v: langGroup = %v, expected: %v", test.name, langGroup, test.expected)) 586 } 587 } 588 589 func (s *enryTestSuite) TestGetLanguageByAlias() { 590 tests := []struct { 591 name string 592 alias string 593 expectedLang string 594 expectedOk bool 595 }{ 596 {name: "TestGetLanguageByAlias_1", alias: "BestLanguageEver", expectedLang: OtherLanguage, expectedOk: false}, 597 {name: "TestGetLanguageByAlias_2", alias: "aspx-vb", expectedLang: "ASP.NET", expectedOk: true}, 598 {name: "TestGetLanguageByAlias_3", alias: "C++", expectedLang: "C++", expectedOk: true}, 599 {name: "TestGetLanguageByAlias_4", alias: "c++", expectedLang: "C++", expectedOk: true}, 600 {name: "TestGetLanguageByAlias_5", alias: "objc", expectedLang: "Objective-C", expectedOk: true}, 601 {name: "TestGetLanguageByAlias_6", alias: "golang", expectedLang: "Go", expectedOk: true}, 602 {name: "TestGetLanguageByAlias_7", alias: "GOLANG", expectedLang: "Go", expectedOk: true}, 603 {name: "TestGetLanguageByAlias_8", alias: "bsdmake", expectedLang: "Makefile", expectedOk: true}, 604 {name: "TestGetLanguageByAlias_9", alias: "xhTmL", expectedLang: "HTML", expectedOk: true}, 605 {name: "TestGetLanguageByAlias_10", alias: "python", expectedLang: "Python", expectedOk: true}, 606 } 607 608 for _, test := range tests { 609 lang, ok := GetLanguageByAlias(test.alias) 610 assert.Equal(s.T(), test.expectedLang, lang, fmt.Sprintf("%v: lang = %v, expected: %v", test.name, lang, test.expectedLang)) 611 assert.Equal(s.T(), test.expectedOk, ok, fmt.Sprintf("%v: ok = %v, expected: %v", test.name, ok, test.expectedOk)) 612 } 613 } 614 615 func (s *enryTestSuite) TestGetLanguageID() { 616 tests := []struct { 617 name string 618 language string 619 expectedID int 620 found bool 621 }{ 622 {name: "TestGetLanguageID_1", language: "1C Enterprise", expectedID: 0, found: true}, 623 {name: "TestGetLanguageID_2", language: "BestLanguageEver", expectedID: 0, found: false}, 624 {name: "TestGetLanguageID_3", language: "C++", expectedID: 43, found: true}, 625 {name: "TestGetLanguageID_5", language: "Objective-C", expectedID: 257, found: true}, 626 {name: "TestGetLanguageID_6", language: "golang", expectedID: 0, found: false}, // Aliases are not supported 627 {name: "TestGetLanguageID_7", language: "Go", expectedID: 132, found: true}, 628 {name: "TestGetLanguageID_8", language: "Makefile", expectedID: 220, found: true}, 629 } 630 631 for _, test := range tests { 632 id, found := GetLanguageID(test.language) 633 assert.Equal(s.T(), test.expectedID, id, fmt.Sprintf("%v: id = %v, expected: %v", test.name, id, test.expectedID)) 634 assert.Equal(s.T(), test.found, found, fmt.Sprintf("%v: found = %t, expected: %t", test.name, found, test.found)) 635 } 636 } 637 638 func (s *enryTestSuite) TestGetLanguageInfo() { 639 tests := []struct { 640 name string 641 language string 642 expectedID int 643 error bool 644 }{ 645 {name: "TestGetLanguageID_1", language: "1C Enterprise", expectedID: 0}, 646 {name: "TestGetLanguageID_2", language: "BestLanguageEver", error: true}, 647 {name: "TestGetLanguageID_3", language: "C++", expectedID: 43}, 648 {name: "TestGetLanguageID_5", language: "Objective-C", expectedID: 257}, 649 {name: "TestGetLanguageID_6", language: "golang", error: true}, // Aliases are not supported 650 {name: "TestGetLanguageID_7", language: "Go", expectedID: 132}, 651 {name: "TestGetLanguageID_8", language: "Makefile", expectedID: 220}, 652 } 653 654 for _, test := range tests { 655 info, err := GetLanguageInfo(test.language) 656 if test.error { 657 assert.Error(s.T(), err, "%v: expected error for %q", test.name, test.language) 658 } else { 659 assert.NoError(s.T(), err) 660 assert.Equal(s.T(), test.expectedID, info.LanguageID, fmt.Sprintf("%v: id = %v, expected: %v", test.name, info.LanguageID, test.expectedID)) 661 } 662 } 663 } 664 665 func (s *enryTestSuite) TestGetLanguageInfoByID() { 666 tests := []struct { 667 name string 668 id int 669 expectedName string 670 error bool 671 }{ 672 {name: "TestGetLanguageID_1", id: 0, expectedName: "1C Enterprise"}, 673 {name: "TestGetLanguageID_2", id: -1, error: true}, 674 {name: "TestGetLanguageID_3", id: 43, expectedName: "C++"}, 675 {name: "TestGetLanguageID_5", id: 257, expectedName: "Objective-C"}, 676 {name: "TestGetLanguageID_7", id: 132, expectedName: "Go"}, 677 {name: "TestGetLanguageID_8", id: 220, expectedName: "Makefile"}, 678 } 679 680 for _, test := range tests { 681 info, err := GetLanguageInfoByID(test.id) 682 if test.error { 683 assert.Error(s.T(), err, "%v: expected error for %q", test.name, test.id) 684 } else { 685 assert.NoError(s.T(), err) 686 assert.Equal(s.T(), test.expectedName, info.Name, fmt.Sprintf("%v: id = %v, expected: %v", test.name, test.id, test.expectedName)) 687 } 688 } 689 }