github.com/go-enry/go-enry@v1.7.3/common_test.go (about)

     1  package enry
     2  
     3  import (
     4  	"fmt"
     5  	"io/ioutil"
     6  	"os"
     7  	"os/exec"
     8  	"path/filepath"
     9  	"testing"
    10  
    11  	"gopkg.in/src-d/enry.v1/data"
    12  
    13  	"github.com/stretchr/testify/assert"
    14  	"github.com/stretchr/testify/require"
    15  	"github.com/stretchr/testify/suite"
    16  )
    17  
    18  const linguistURL = "https://github.com/github/linguist.git"
    19  const linguistClonedEnvVar = "ENRY_TEST_REPO"
    20  
    21  type EnryTestSuite struct {
    22  	suite.Suite
    23  	tmpLinguist string
    24  	needToClone bool
    25  	samplesDir  string
    26  }
    27  
    28  func (s *EnryTestSuite) TestRegexpEdgeCases() {
    29  	var regexpEdgeCases = []struct {
    30  		lang     string
    31  		filename string
    32  	}{
    33  		{lang: "ActionScript", filename: "FooBar.as"},
    34  		{lang: "Forth", filename: "asm.fr"},
    35  		{lang: "X PixMap", filename: "cc-public_domain_mark_white.pm"},
    36  		//{lang: "SQL", filename: "drop_stuff.sql"}, // https://github.com/src-d/enry/issues/194
    37  		{lang: "Fstar", filename: "Hacl.Spec.Bignum.Fmul.fst"},
    38  		{lang: "C++", filename: "Types.h"},
    39  	}
    40  
    41  	for _, r := range regexpEdgeCases {
    42  		filename := fmt.Sprintf("%s/samples/%s/%s", s.tmpLinguist, r.lang, r.filename)
    43  
    44  		content, err := ioutil.ReadFile(filename)
    45  		require.NoError(s.T(), err)
    46  
    47  		lang := GetLanguage(r.filename, content)
    48  		s.T().Logf("File:%s, lang:%s", filename, lang)
    49  
    50  		expLang, _ := data.LanguageByAlias(r.lang)
    51  		require.EqualValues(s.T(), expLang, lang)
    52  	}
    53  }
    54  
    55  func Test_EnryTestSuite(t *testing.T) {
    56  	suite.Run(t, new(EnryTestSuite))
    57  }
    58  
    59  func (s *EnryTestSuite) SetupSuite() {
    60  	var err error
    61  	s.tmpLinguist = os.Getenv(linguistClonedEnvVar)
    62  	s.needToClone = s.tmpLinguist == ""
    63  	if s.needToClone {
    64  		s.tmpLinguist, err = ioutil.TempDir("", "linguist-")
    65  		require.NoError(s.T(), err)
    66  		s.T().Logf("Cloning Linguist repo to '%s' as %s was not set\n",
    67  			s.tmpLinguist, linguistClonedEnvVar)
    68  		cmd := exec.Command("git", "clone", linguistURL, s.tmpLinguist)
    69  		err = cmd.Run()
    70  		require.NoError(s.T(), err)
    71  	}
    72  	s.samplesDir = filepath.Join(s.tmpLinguist, "samples")
    73  	s.T().Logf("using samples from %s", s.samplesDir)
    74  
    75  	cwd, err := os.Getwd()
    76  	assert.NoError(s.T(), err)
    77  
    78  	err = os.Chdir(s.tmpLinguist)
    79  	assert.NoError(s.T(), err)
    80  
    81  	cmd := exec.Command("git", "checkout", data.LinguistCommit)
    82  	err = cmd.Run()
    83  	assert.NoError(s.T(), err)
    84  
    85  	err = os.Chdir(cwd)
    86  	assert.NoError(s.T(), err)
    87  }
    88  
    89  func (s *EnryTestSuite) TearDownSuite() {
    90  	if s.needToClone {
    91  		err := os.RemoveAll(s.tmpLinguist)
    92  		assert.NoError(s.T(), err)
    93  	}
    94  }
    95  
    96  func (s *EnryTestSuite) TestGetLanguage() {
    97  	tests := []struct {
    98  		name     string
    99  		filename string
   100  		content  []byte
   101  		expected string
   102  		safe     bool
   103  	}{
   104  		{name: "TestGetLanguage_0", filename: "foo.h", content: []byte{}, expected: "C"},
   105  		{name: "TestGetLanguage_1", filename: "foo.py", content: []byte{}, expected: "Python"},
   106  		{name: "TestGetLanguage_2", filename: "foo.m", content: []byte(":- module"), expected: "Mercury"},
   107  		{name: "TestGetLanguage_3", filename: "foo.m", content: nil, expected: "MATLAB"},
   108  		{name: "TestGetLanguage_4", filename: "foo.mo", content: []byte{0xDE, 0x12, 0x04, 0x95, 0x00, 0x00, 0x00, 0x00}, expected: OtherLanguage},
   109  		{name: "TestGetLanguage_5", filename: "", content: nil, expected: OtherLanguage},
   110  	}
   111  
   112  	for _, test := range tests {
   113  		language := GetLanguage(test.filename, test.content)
   114  		assert.Equal(s.T(), test.expected, language, fmt.Sprintf("%v: %v, expected: %v", test.name, language, test.expected))
   115  	}
   116  }
   117  
   118  func (s *EnryTestSuite) TestGetLanguagesByModelineLinguist() {
   119  	var modelinesDir = filepath.Join(s.tmpLinguist, "test/fixtures/Data/Modelines")
   120  
   121  	tests := []struct {
   122  		name       string
   123  		filename   string
   124  		candidates []string
   125  		expected   []string
   126  	}{
   127  		// Emacs
   128  		{name: "TestGetLanguagesByModelineLinguist_1", filename: filepath.Join(modelinesDir, "example_smalltalk.md"), expected: []string{"Smalltalk"}},
   129  		{name: "TestGetLanguagesByModelineLinguist_2", filename: filepath.Join(modelinesDir, "fundamentalEmacs.c"), expected: []string{"Text"}},
   130  		{name: "TestGetLanguagesByModelineLinguist_3", filename: filepath.Join(modelinesDir, "iamphp.inc"), expected: []string{"PHP"}},
   131  		{name: "TestGetLanguagesByModelineLinguist_4", filename: filepath.Join(modelinesDir, "seeplusplusEmacs1"), expected: []string{"C++"}},
   132  		{name: "TestGetLanguagesByModelineLinguist_5", filename: filepath.Join(modelinesDir, "seeplusplusEmacs2"), expected: []string{"C++"}},
   133  		{name: "TestGetLanguagesByModelineLinguist_6", filename: filepath.Join(modelinesDir, "seeplusplusEmacs3"), expected: []string{"C++"}},
   134  		{name: "TestGetLanguagesByModelineLinguist_7", filename: filepath.Join(modelinesDir, "seeplusplusEmacs4"), expected: []string{"C++"}},
   135  		{name: "TestGetLanguagesByModelineLinguist_8", filename: filepath.Join(modelinesDir, "seeplusplusEmacs5"), expected: []string{"C++"}},
   136  		{name: "TestGetLanguagesByModelineLinguist_9", filename: filepath.Join(modelinesDir, "seeplusplusEmacs6"), expected: []string{"C++"}},
   137  		{name: "TestGetLanguagesByModelineLinguist_10", filename: filepath.Join(modelinesDir, "seeplusplusEmacs7"), expected: []string{"C++"}},
   138  		{name: "TestGetLanguagesByModelineLinguist_11", filename: filepath.Join(modelinesDir, "seeplusplusEmacs9"), expected: []string{"C++"}},
   139  		{name: "TestGetLanguagesByModelineLinguist_12", filename: filepath.Join(modelinesDir, "seeplusplusEmacs10"), expected: []string{"C++"}},
   140  		{name: "TestGetLanguagesByModelineLinguist_13", filename: filepath.Join(modelinesDir, "seeplusplusEmacs11"), expected: []string{"C++"}},
   141  		{name: "TestGetLanguagesByModelineLinguist_14", filename: filepath.Join(modelinesDir, "seeplusplusEmacs12"), expected: []string{"C++"}},
   142  
   143  		// Vim
   144  		{name: "TestGetLanguagesByModelineLinguist_15", filename: filepath.Join(modelinesDir, "seeplusplus"), expected: []string{"C++"}},
   145  		{name: "TestGetLanguagesByModelineLinguist_16", filename: filepath.Join(modelinesDir, "iamjs.pl"), expected: []string{"JavaScript"}},
   146  		{name: "TestGetLanguagesByModelineLinguist_17", filename: filepath.Join(modelinesDir, "iamjs2.pl"), expected: []string{"JavaScript"}},
   147  		{name: "TestGetLanguagesByModelineLinguist_18", filename: filepath.Join(modelinesDir, "not_perl.pl"), expected: []string{"Prolog"}},
   148  		{name: "TestGetLanguagesByModelineLinguist_19", filename: filepath.Join(modelinesDir, "ruby"), expected: []string{"Ruby"}},
   149  		{name: "TestGetLanguagesByModelineLinguist_20", filename: filepath.Join(modelinesDir, "ruby2"), expected: []string{"Ruby"}},
   150  		{name: "TestGetLanguagesByModelineLinguist_21", filename: filepath.Join(modelinesDir, "ruby3"), expected: []string{"Ruby"}},
   151  		{name: "TestGetLanguagesByModelineLinguist_22", filename: filepath.Join(modelinesDir, "ruby4"), expected: []string{"Ruby"}},
   152  		{name: "TestGetLanguagesByModelineLinguist_23", filename: filepath.Join(modelinesDir, "ruby5"), expected: []string{"Ruby"}},
   153  		{name: "TestGetLanguagesByModelineLinguist_24", filename: filepath.Join(modelinesDir, "ruby6"), expected: []string{"Ruby"}},
   154  		{name: "TestGetLanguagesByModelineLinguist_25", filename: filepath.Join(modelinesDir, "ruby7"), expected: []string{"Ruby"}},
   155  		{name: "TestGetLanguagesByModelineLinguist_26", filename: filepath.Join(modelinesDir, "ruby8"), expected: []string{"Ruby"}},
   156  		{name: "TestGetLanguagesByModelineLinguist_27", filename: filepath.Join(modelinesDir, "ruby9"), expected: []string{"Ruby"}},
   157  		{name: "TestGetLanguagesByModelineLinguist_28", filename: filepath.Join(modelinesDir, "ruby10"), expected: []string{"Ruby"}},
   158  		{name: "TestGetLanguagesByModelineLinguist_29", filename: filepath.Join(modelinesDir, "ruby11"), expected: []string{"Ruby"}},
   159  		{name: "TestGetLanguagesByModelineLinguist_30", filename: filepath.Join(modelinesDir, "ruby12"), expected: []string{"Ruby"}},
   160  		{name: "TestGetLanguagesByModelineLinguist_31", filename: filepath.Join(s.samplesDir, "C/main.c"), expected: nil},
   161  		{name: "TestGetLanguagesByModelineLinguist_32", filename: "", expected: nil},
   162  	}
   163  
   164  	for _, test := range tests {
   165  		var content []byte
   166  		var err error
   167  
   168  		if test.filename != "" {
   169  			content, err = ioutil.ReadFile(test.filename)
   170  			assert.NoError(s.T(), err)
   171  		}
   172  
   173  		languages := GetLanguagesByModeline(test.filename, content, test.candidates)
   174  		assert.Equal(s.T(), test.expected, languages, fmt.Sprintf("%v: languages = %v, expected: %v", test.name, languages, test.expected))
   175  	}
   176  }
   177  
   178  func (s *EnryTestSuite) TestGetLanguagesByModeline() {
   179  	const (
   180  		wrongVim  = `# vim: set syntax=ruby ft  =python filetype=perl :`
   181  		rightVim  = `/* vim: set syntax=python ft   =python filetype=python */`
   182  		noLangVim = `/* vim: set shiftwidth=4 softtabstop=0 cindent cinoptions={1s: */`
   183  	)
   184  
   185  	tests := []struct {
   186  		name       string
   187  		filename   string
   188  		content    []byte
   189  		candidates []string
   190  		expected   []string
   191  	}{
   192  		{name: "TestGetLanguagesByModeline_1", content: []byte(wrongVim), expected: nil},
   193  		{name: "TestGetLanguagesByModeline_2", content: []byte(rightVim), expected: []string{"Python"}},
   194  		{name: "TestGetLanguagesByModeline_3", content: []byte(noLangVim), expected: nil},
   195  		{name: "TestGetLanguagesByModeline_4", content: nil, expected: nil},
   196  		{name: "TestGetLanguagesByModeline_5", content: []byte{}, expected: nil},
   197  	}
   198  
   199  	for _, test := range tests {
   200  		languages := GetLanguagesByModeline(test.filename, test.content, test.candidates)
   201  		assert.Equal(s.T(), test.expected, languages, fmt.Sprintf("%v: languages = %v, expected: %v", test.name, languages, test.expected))
   202  	}
   203  }
   204  
   205  func (s *EnryTestSuite) TestGetLanguagesByFilename() {
   206  	tests := []struct {
   207  		name       string
   208  		filename   string
   209  		content    []byte
   210  		candidates []string
   211  		expected   []string
   212  	}{
   213  		{name: "TestGetLanguagesByFilename_1", filename: "unknown.interpreter", expected: nil},
   214  		{name: "TestGetLanguagesByFilename_2", filename: ".bashrc", expected: []string{"Shell"}},
   215  		{name: "TestGetLanguagesByFilename_3", filename: "Dockerfile", expected: []string{"Dockerfile"}},
   216  		{name: "TestGetLanguagesByFilename_4", filename: "Makefile.frag", expected: []string{"Makefile"}},
   217  		{name: "TestGetLanguagesByFilename_5", filename: "makefile", expected: []string{"Makefile"}},
   218  		{name: "TestGetLanguagesByFilename_6", filename: "Vagrantfile", expected: []string{"Ruby"}},
   219  		{name: "TestGetLanguagesByFilename_7", filename: "_vimrc", expected: []string{"Vim script"}},
   220  		{name: "TestGetLanguagesByFilename_8", filename: "pom.xml", expected: []string{"Maven POM"}},
   221  		{name: "TestGetLanguagesByFilename_9", filename: "", expected: nil},
   222  	}
   223  
   224  	for _, test := range tests {
   225  		languages := GetLanguagesByFilename(test.filename, test.content, test.candidates)
   226  		assert.Equal(s.T(), test.expected, languages, fmt.Sprintf("%v: languages = %v, expected: %v", test.name, languages, test.expected))
   227  	}
   228  }
   229  
   230  func (s *EnryTestSuite) TestGetLanguagesByShebang() {
   231  	const (
   232  		multilineExecHack = `#!/bin/sh
   233  # Next line is comment in Tcl, but not in sh... \
   234  exec tclsh "$0" ${1+"$@"}`
   235  
   236  		multilineNoExecHack = `#!/bin/sh
   237  #<<<#
   238  echo "A shell script in a zkl program ($0)"
   239  echo "Now run zkl <this file> with Hello World as args"
   240  zkl $0 Hello World!
   241  exit
   242  #<<<#
   243  println("The shell script says ",vm.arglist.concat(" "));`
   244  	)
   245  
   246  	tests := []struct {
   247  		name       string
   248  		filename   string
   249  		content    []byte
   250  		candidates []string
   251  		expected   []string
   252  	}{
   253  		{name: "TestGetLanguagesByShebang_1", content: []byte(`#!/unknown/interpreter`), expected: nil},
   254  		{name: "TestGetLanguagesByShebang_2", content: []byte(`no shebang`), expected: nil},
   255  		{name: "TestGetLanguagesByShebang_3", content: []byte(`#!/usr/bin/env`), expected: nil},
   256  		{name: "TestGetLanguagesByShebang_4", content: []byte(`#!/usr/bin/python -tt`), expected: []string{"Python"}},
   257  		{name: "TestGetLanguagesByShebang_5", content: []byte(`#!/usr/bin/env python2.6`), expected: []string{"Python"}},
   258  		{name: "TestGetLanguagesByShebang_6", content: []byte(`#!/usr/bin/env perl`), expected: []string{"Perl", "Pod"}},
   259  		{name: "TestGetLanguagesByShebang_7", content: []byte(`#!	/bin/sh`), expected: []string{"Shell"}},
   260  		{name: "TestGetLanguagesByShebang_8", content: []byte(`#!bash`), expected: []string{"Shell"}},
   261  		{name: "TestGetLanguagesByShebang_9", content: []byte(multilineExecHack), expected: []string{"Tcl"}},
   262  		{name: "TestGetLanguagesByShebang_10", content: []byte(multilineNoExecHack), expected: []string{"Shell"}},
   263  		{name: "TestGetLanguagesByShebang_11", content: []byte(`#!`), expected: nil},
   264  	}
   265  
   266  	for _, test := range tests {
   267  		languages := GetLanguagesByShebang(test.filename, test.content, test.candidates)
   268  		assert.Equal(s.T(), test.expected, languages, fmt.Sprintf("%v: languages = %v, expected: %v", test.name, languages, test.expected))
   269  	}
   270  }
   271  
   272  func (s *EnryTestSuite) TestGetLanguagesByExtension() {
   273  	tests := []struct {
   274  		name       string
   275  		filename   string
   276  		content    []byte
   277  		candidates []string
   278  		expected   []string
   279  	}{
   280  		{name: "TestGetLanguagesByExtension_0", filename: "foo.h", expected: []string{"C", "C++", "Objective-C"}},
   281  		{name: "TestGetLanguagesByExtension_1", filename: "foo.foo", expected: nil},
   282  		{name: "TestGetLanguagesByExtension_2", filename: "foo.go", expected: []string{"Go"}},
   283  		{name: "TestGetLanguagesByExtension_3", filename: "foo.go.php", expected: []string{"Hack", "PHP"}},
   284  		{name: "TestGetLanguagesByExtension_4", filename: "", expected: nil},
   285  	}
   286  
   287  	for _, test := range tests {
   288  		languages := GetLanguagesByExtension(test.filename, test.content, test.candidates)
   289  		assert.Equal(s.T(), test.expected, languages, fmt.Sprintf("%v: languages = %v, expected: %v", test.name, languages, test.expected))
   290  	}
   291  }
   292  
   293  func (s *EnryTestSuite) TestGetLanguagesByClassifier() {
   294  	test := []struct {
   295  		name       string
   296  		filename   string
   297  		candidates []string
   298  		expected   string
   299  	}{
   300  		{name: "TestGetLanguagesByClassifier_1", filename: filepath.Join(s.samplesDir, "C/blob.c"), candidates: []string{"python", "ruby", "c", "c++"}, expected: "C"},
   301  		{name: "TestGetLanguagesByClassifier_2", filename: filepath.Join(s.samplesDir, "C/blob.c"), candidates: nil, expected: OtherLanguage},
   302  		{name: "TestGetLanguagesByClassifier_3", filename: filepath.Join(s.samplesDir, "C/main.c"), candidates: []string{}, expected: OtherLanguage},
   303  		{name: "TestGetLanguagesByClassifier_4", filename: filepath.Join(s.samplesDir, "C/blob.c"), candidates: []string{"python", "ruby", "c++"}, expected: "C++"},
   304  		{name: "TestGetLanguagesByClassifier_5", filename: filepath.Join(s.samplesDir, "C/blob.c"), candidates: []string{"ruby"}, expected: "Ruby"},
   305  		{name: "TestGetLanguagesByClassifier_6", filename: filepath.Join(s.samplesDir, "Python/django-models-base.py"), candidates: []string{"python", "ruby", "c", "c++"}, expected: "Python"},
   306  		{name: "TestGetLanguagesByClassifier_7", filename: "", candidates: []string{"python"}, expected: "Python"},
   307  	}
   308  
   309  	for _, test := range test {
   310  		var content []byte
   311  		var err error
   312  
   313  		if test.filename != "" {
   314  			content, err = ioutil.ReadFile(test.filename)
   315  			assert.NoError(s.T(), err)
   316  		}
   317  
   318  		languages := GetLanguagesByClassifier(test.filename, content, test.candidates)
   319  		var language string
   320  		if len(languages) == 0 {
   321  			language = OtherLanguage
   322  		} else {
   323  			language = languages[0]
   324  		}
   325  
   326  		assert.Equal(s.T(), test.expected, language, fmt.Sprintf("%v: language = %v, expected: %v", test.name, language, test.expected))
   327  	}
   328  }
   329  
   330  func (s *EnryTestSuite) TestGetLanguagesBySpecificClassifier() {
   331  	test := []struct {
   332  		name       string
   333  		filename   string
   334  		candidates []string
   335  		classifier Classifier
   336  		expected   string
   337  	}{
   338  		{name: "TestGetLanguagesByClassifier_1", filename: filepath.Join(s.samplesDir, "C/blob.c"), candidates: []string{"python", "ruby", "c", "c++"}, classifier: DefaultClassifier, expected: "C"},
   339  		{name: "TestGetLanguagesByClassifier_2", filename: filepath.Join(s.samplesDir, "C/blob.c"), candidates: nil, classifier: DefaultClassifier, expected: "C"},
   340  		{name: "TestGetLanguagesByClassifier_3", filename: filepath.Join(s.samplesDir, "C/main.c"), candidates: []string{}, classifier: DefaultClassifier, expected: "C"},
   341  		{name: "TestGetLanguagesByClassifier_4", filename: filepath.Join(s.samplesDir, "C/blob.c"), candidates: []string{"python", "ruby", "c++"}, classifier: DefaultClassifier, expected: "C++"},
   342  		{name: "TestGetLanguagesByClassifier_5", filename: filepath.Join(s.samplesDir, "C/blob.c"), candidates: []string{"ruby"}, classifier: DefaultClassifier, expected: "Ruby"},
   343  		{name: "TestGetLanguagesByClassifier_6", filename: filepath.Join(s.samplesDir, "Python/django-models-base.py"), candidates: []string{"python", "ruby", "c", "c++"}, classifier: DefaultClassifier, expected: "Python"},
   344  		{name: "TestGetLanguagesByClassifier_7", filename: os.DevNull, candidates: nil, classifier: DefaultClassifier, expected: "XML"},
   345  	}
   346  
   347  	for _, test := range test {
   348  		content, err := ioutil.ReadFile(test.filename)
   349  		assert.NoError(s.T(), err)
   350  
   351  		languages := GetLanguagesBySpecificClassifier(content, test.candidates, test.classifier)
   352  		var language string
   353  		if len(languages) == 0 {
   354  			language = OtherLanguage
   355  		} else {
   356  			language = languages[0]
   357  		}
   358  
   359  		assert.Equal(s.T(), test.expected, language, fmt.Sprintf("%v: language = %v, expected: %v", test.name, language, test.expected))
   360  	}
   361  }
   362  
   363  func (s *EnryTestSuite) TestGetLanguageExtensions() {
   364  	tests := []struct {
   365  		name     string
   366  		language string
   367  		expected []string
   368  	}{
   369  		{name: "TestGetLanguageExtensions_1", language: "foo", expected: nil},
   370  		{name: "TestGetLanguageExtensions_2", language: "COBOL", expected: []string{".cob", ".cbl", ".ccp", ".cobol", ".cpy"}},
   371  		{name: "TestGetLanguageExtensions_3", language: "Maven POM", expected: nil},
   372  	}
   373  
   374  	for _, test := range tests {
   375  		extensions := GetLanguageExtensions(test.language)
   376  		assert.EqualValues(s.T(), test.expected, extensions, fmt.Sprintf("%v: extensions = %v, expected: %v", test.name, extensions, test.expected))
   377  	}
   378  }
   379  
   380  func (s *EnryTestSuite) TestGetLanguageType() {
   381  	tests := []struct {
   382  		name     string
   383  		language string
   384  		expected Type
   385  	}{
   386  		{name: "TestGetLanguageType_1", language: "BestLanguageEver", expected: Unknown},
   387  		{name: "TestGetLanguageType_2", language: "JSON", expected: Data},
   388  		{name: "TestGetLanguageType_3", language: "COLLADA", expected: Data},
   389  		{name: "TestGetLanguageType_4", language: "Go", expected: Programming},
   390  		{name: "TestGetLanguageType_5", language: "Brainfuck", expected: Programming},
   391  		{name: "TestGetLanguageType_6", language: "HTML", expected: Markup},
   392  		{name: "TestGetLanguageType_7", language: "Sass", expected: Markup},
   393  		{name: "TestGetLanguageType_8", language: "AsciiDoc", expected: Prose},
   394  		{name: "TestGetLanguageType_9", language: "Textile", expected: Prose},
   395  	}
   396  
   397  	for _, test := range tests {
   398  		langType := GetLanguageType(test.language)
   399  		assert.Equal(s.T(), test.expected, langType, fmt.Sprintf("%v: langType = %v, expected: %v", test.name, langType, test.expected))
   400  	}
   401  }
   402  
   403  func (s *EnryTestSuite) TestGetLanguageByAlias() {
   404  	tests := []struct {
   405  		name         string
   406  		alias        string
   407  		expectedLang string
   408  		expectedOk   bool
   409  	}{
   410  		{name: "TestGetLanguageByAlias_1", alias: "BestLanguageEver", expectedLang: OtherLanguage, expectedOk: false},
   411  		{name: "TestGetLanguageByAlias_2", alias: "aspx-vb", expectedLang: "ASP", expectedOk: true},
   412  		{name: "TestGetLanguageByAlias_3", alias: "C++", expectedLang: "C++", expectedOk: true},
   413  		{name: "TestGetLanguageByAlias_4", alias: "c++", expectedLang: "C++", expectedOk: true},
   414  		{name: "TestGetLanguageByAlias_5", alias: "objc", expectedLang: "Objective-C", expectedOk: true},
   415  		{name: "TestGetLanguageByAlias_6", alias: "golang", expectedLang: "Go", expectedOk: true},
   416  		{name: "TestGetLanguageByAlias_7", alias: "GOLANG", expectedLang: "Go", expectedOk: true},
   417  		{name: "TestGetLanguageByAlias_8", alias: "bsdmake", expectedLang: "Makefile", expectedOk: true},
   418  		{name: "TestGetLanguageByAlias_9", alias: "xhTmL", expectedLang: "HTML", expectedOk: true},
   419  		{name: "TestGetLanguageByAlias_10", alias: "python", expectedLang: "Python", expectedOk: true},
   420  	}
   421  
   422  	for _, test := range tests {
   423  		lang, ok := GetLanguageByAlias(test.alias)
   424  		assert.Equal(s.T(), test.expectedLang, lang, fmt.Sprintf("%v: lang = %v, expected: %v", test.name, lang, test.expectedLang))
   425  		assert.Equal(s.T(), test.expectedOk, ok, fmt.Sprintf("%v: ok = %v, expected: %v", test.name, ok, test.expectedOk))
   426  	}
   427  }
   428  
   429  func (s *EnryTestSuite) TestLinguistCorpus() {
   430  	const filenamesDir = "filenames"
   431  	var cornerCases = map[string]bool{
   432  		"drop_stuff.sql": true, // https://github.com/src-d/enry/issues/194
   433  		// .es and .ice fail heuristics parsing, but do not fail any tests
   434  	}
   435  
   436  	var total, failed, ok, other int
   437  	var expected string
   438  	filepath.Walk(s.samplesDir, func(path string, f os.FileInfo, err error) error {
   439  		if f.IsDir() {
   440  			if f.Name() != filenamesDir {
   441  				expected, _ = data.LanguageByAlias(f.Name())
   442  			}
   443  
   444  			return nil
   445  		}
   446  
   447  		filename := filepath.Base(path)
   448  		content, _ := ioutil.ReadFile(path)
   449  
   450  		total++
   451  		obtained := GetLanguage(filename, content)
   452  		if obtained == OtherLanguage {
   453  			obtained = "Other"
   454  			other++
   455  		}
   456  
   457  		var status string
   458  		if expected == obtained {
   459  			status = "ok"
   460  			ok++
   461  		} else {
   462  			status = "failed"
   463  			failed++
   464  		}
   465  
   466  		if _, ok := cornerCases[filename]; ok {
   467  			s.T().Logf("\t\t[considered corner case] %s\texpected: %s\tobtained: %s\tstatus: %s\n", filename, expected, obtained, status)
   468  		} else {
   469  			assert.Equal(s.T(), expected, obtained, fmt.Sprintf("%s\texpected: %s\tobtained: %s\tstatus: %s\n", filename, expected, obtained, status))
   470  		}
   471  
   472  		return nil
   473  	})
   474  
   475  	s.T().Logf("\t\ttotal files: %d, ok: %d, failed: %d, other: %d\n", total, ok, failed, other)
   476  }