gitlab.com/thomasboni/go-enry/v2@v2.8.3-0.20220418031202-30b0d7a3de98/common_test.go (about)

     1  package enry
     2  
     3  import (
     4  	"fmt"
     5  	"io/ioutil"
     6  	"os"
     7  	"os/exec"
     8  	"path/filepath"
     9  	"strings"
    10  	"testing"
    11  
    12  	"gitlab.com/thomasboni/go-enry/v2/data"
    13  
    14  	"github.com/stretchr/testify/assert"
    15  	"github.com/stretchr/testify/require"
    16  	"github.com/stretchr/testify/suite"
    17  )
    18  
    19  const linguistURL = "https://github.com/github/linguist.git"
    20  const linguistClonedEnvVar = "ENRY_TEST_REPO"
    21  
    22  type EnryTestSuite struct {
    23  	suite.Suite
    24  	tmpLinguist     string
    25  	needToClone     bool
    26  	samplesDir      string
    27  	testFixturesDir string
    28  }
    29  
    30  func (s *EnryTestSuite) TestRegexpEdgeCases() {
    31  	var regexpEdgeCases = []struct {
    32  		lang     string
    33  		filename string
    34  	}{
    35  		{lang: "ActionScript", filename: "FooBar.as"},
    36  		{lang: "Forth", filename: "asm.fr"},
    37  		{lang: "X PixMap", filename: "cc-public_domain_mark_white.pm"},
    38  		//{lang: "SQL", filename: "drop_stuff.sql"}, // https://github.com/src-d/enry/issues/194
    39  		{lang: "Fstar", filename: "Hacl.Spec.Bignum.Fmul.fst"},
    40  		{lang: "C++", filename: "Types.h"},
    41  	}
    42  
    43  	for _, r := range regexpEdgeCases {
    44  		filename := filepath.Join(s.tmpLinguist, "samples", r.lang, r.filename)
    45  
    46  		content, err := ioutil.ReadFile(filename)
    47  		require.NoError(s.T(), err)
    48  
    49  		lang := GetLanguage(r.filename, content)
    50  		s.T().Logf("File:%s, lang:%s", filename, lang)
    51  
    52  		expLang, _ := data.LanguageByAlias(r.lang)
    53  		require.EqualValues(s.T(), expLang, lang)
    54  	}
    55  }
    56  
    57  func Test_EnryTestSuite(t *testing.T) {
    58  	suite.Run(t, new(EnryTestSuite))
    59  }
    60  
    61  func (s *EnryTestSuite) SetupSuite() {
    62  	var err error
    63  	s.tmpLinguist = os.Getenv(linguistClonedEnvVar)
    64  	s.needToClone = s.tmpLinguist == ""
    65  	if s.needToClone {
    66  		s.tmpLinguist, err = ioutil.TempDir("", "linguist-")
    67  		require.NoError(s.T(), err)
    68  		s.T().Logf("Cloning Linguist repo to '%s' as %s was not set\n",
    69  			s.tmpLinguist, linguistClonedEnvVar)
    70  		cmd := exec.Command("git", "clone", linguistURL, s.tmpLinguist)
    71  		err = cmd.Run()
    72  		require.NoError(s.T(), err)
    73  	}
    74  	s.samplesDir = filepath.Join(s.tmpLinguist, "samples")
    75  	s.T().Logf("using samples from %s", s.samplesDir)
    76  
    77  	s.testFixturesDir = filepath.Join(s.tmpLinguist, "test", "fixtures")
    78  	s.T().Logf("using test fixtures from %s", s.samplesDir)
    79  
    80  	cwd, err := os.Getwd()
    81  	assert.NoError(s.T(), err)
    82  
    83  	err = os.Chdir(s.tmpLinguist)
    84  	assert.NoError(s.T(), err)
    85  
    86  	cmd := exec.Command("git", "checkout", data.LinguistCommit)
    87  	err = cmd.Run()
    88  	assert.NoError(s.T(), err)
    89  
    90  	err = os.Chdir(cwd)
    91  	assert.NoError(s.T(), err)
    92  }
    93  
    94  func (s *EnryTestSuite) TearDownSuite() {
    95  	if s.needToClone {
    96  		err := os.RemoveAll(s.tmpLinguist)
    97  		assert.NoError(s.T(), err)
    98  	}
    99  }
   100  
   101  func (s *EnryTestSuite) TestGetLanguage() {
   102  	tests := []struct {
   103  		name     string
   104  		filename string
   105  		content  []byte
   106  		expected string
   107  		safe     bool
   108  	}{
   109  		{name: "TestGetLanguage_0", filename: "foo.h", content: []byte{}, expected: "C"},
   110  		{name: "TestGetLanguage_1", filename: "foo.py", content: []byte{}, expected: "Python"},
   111  		{name: "TestGetLanguage_2", filename: "foo.m", content: []byte(":- module"), expected: "Mercury"},
   112  		{name: "TestGetLanguage_3", filename: "foo.m", content: nil, expected: "MATLAB"},
   113  		{name: "TestGetLanguage_4", filename: "foo.mo", content: []byte{0xDE, 0x12, 0x04, 0x95, 0x00, 0x00, 0x00, 0x00}, expected: OtherLanguage},
   114  		{name: "TestGetLanguage_5", filename: "", content: nil, expected: OtherLanguage},
   115  	}
   116  
   117  	for _, test := range tests {
   118  		language := GetLanguage(test.filename, test.content)
   119  		assert.Equal(s.T(), test.expected, language, fmt.Sprintf("%v: %v, expected: %v", test.name, language, test.expected))
   120  	}
   121  }
   122  
   123  func (s *EnryTestSuite) TestGetLanguages() {
   124  	tests := []struct {
   125  		name     string
   126  		filename string
   127  		content  []byte
   128  		expected []string
   129  	}{
   130  		// With no content or filename, no language can be detected
   131  		{name: "TestGetLanguages_0", filename: "", content: []byte{}, expected: nil},
   132  		// The strategy that will match is GetLanguagesByExtension. Lacking content, it will return those results.
   133  		{name: "TestGetLanguages_1", filename: "foo.h", content: []byte{}, expected: []string{"C"}},
   134  		// GetLanguagesByExtension will return an unambiguous match when there is a single result.
   135  		{name: "TestGetLanguages_2", filename: "foo.groovy", content: []byte{}, expected: []string{"Groovy"}},
   136  		// GetLanguagesByExtension will return "Rust", "RenderScript" for .rs,
   137  		// then GetLanguagesByContent will take the first rule that matches (in this case Rust)
   138  		{name: "TestGetLanguages_3", filename: "foo.rs", content: []byte("use \n#include"), expected: []string{"Rust"}},
   139  		// .. and in this case, RenderScript (no content that matches a Rust regex can be included, because it runs first.)
   140  		{name: "TestGetLanguages_4", filename: "foo.rs", content: []byte("#include"), expected: []string{"RenderScript"}},
   141  		// GetLanguagesByExtension will return "AMPL", "Linux Kernel Module", "Modula-2", "XML",
   142  		// then GetLanguagesByContent will ALWAYS return Linux Kernel Module and AMPL when there is no content,
   143  		// and no further classifier can do anything without content
   144  		{name: "TestGetLanguages_5", filename: "foo.mod", content: []byte{}, expected: []string{"Linux Kernel Module", "AMPL"}},
   145  		// ...with some AMPL tokens, the DefaultClassifier will pick AMPL as the most likely language.
   146  		{name: "TestGetLanguages_6", filename: "foo.mod", content: []byte("BEAMS ROWS - TotalWeight"), expected: []string{"AMPL", "Linux Kernel Module"}},
   147  	}
   148  
   149  	for _, test := range tests {
   150  		languages := GetLanguages(test.filename, test.content)
   151  		assert.Equal(s.T(), test.expected, languages, fmt.Sprintf("%v: %v, expected: %v", test.name, languages, test.expected))
   152  	}
   153  }
   154  
   155  func (s *EnryTestSuite) TestGetLanguagesByModelineLinguist() {
   156  	var modelinesDir = filepath.Join(s.tmpLinguist, "test", "fixtures", "Data", "Modelines")
   157  
   158  	tests := []struct {
   159  		name       string
   160  		filename   string
   161  		candidates []string
   162  		expected   []string
   163  	}{
   164  		// Emacs
   165  		{name: "TestGetLanguagesByModelineLinguist_1", filename: filepath.Join(modelinesDir, "example_smalltalk.md"), expected: []string{"Smalltalk"}},
   166  		{name: "TestGetLanguagesByModelineLinguist_2", filename: filepath.Join(modelinesDir, "fundamentalEmacs.c"), expected: []string{"Text"}},
   167  		{name: "TestGetLanguagesByModelineLinguist_3", filename: filepath.Join(modelinesDir, "iamphp.inc"), expected: []string{"PHP"}},
   168  		{name: "TestGetLanguagesByModelineLinguist_4", filename: filepath.Join(modelinesDir, "seeplusplusEmacs1"), expected: []string{"C++"}},
   169  		{name: "TestGetLanguagesByModelineLinguist_5", filename: filepath.Join(modelinesDir, "seeplusplusEmacs2"), expected: []string{"C++"}},
   170  		{name: "TestGetLanguagesByModelineLinguist_6", filename: filepath.Join(modelinesDir, "seeplusplusEmacs3"), expected: []string{"C++"}},
   171  		{name: "TestGetLanguagesByModelineLinguist_7", filename: filepath.Join(modelinesDir, "seeplusplusEmacs4"), expected: []string{"C++"}},
   172  		{name: "TestGetLanguagesByModelineLinguist_8", filename: filepath.Join(modelinesDir, "seeplusplusEmacs5"), expected: []string{"C++"}},
   173  		{name: "TestGetLanguagesByModelineLinguist_9", filename: filepath.Join(modelinesDir, "seeplusplusEmacs6"), expected: []string{"C++"}},
   174  		{name: "TestGetLanguagesByModelineLinguist_10", filename: filepath.Join(modelinesDir, "seeplusplusEmacs7"), expected: []string{"C++"}},
   175  		{name: "TestGetLanguagesByModelineLinguist_11", filename: filepath.Join(modelinesDir, "seeplusplusEmacs9"), expected: []string{"C++"}},
   176  		{name: "TestGetLanguagesByModelineLinguist_12", filename: filepath.Join(modelinesDir, "seeplusplusEmacs10"), expected: []string{"C++"}},
   177  		{name: "TestGetLanguagesByModelineLinguist_13", filename: filepath.Join(modelinesDir, "seeplusplusEmacs11"), expected: []string{"C++"}},
   178  		{name: "TestGetLanguagesByModelineLinguist_14", filename: filepath.Join(modelinesDir, "seeplusplusEmacs12"), expected: []string{"C++"}},
   179  
   180  		// Vim
   181  		{name: "TestGetLanguagesByModelineLinguist_15", filename: filepath.Join(modelinesDir, "seeplusplus"), expected: []string{"C++"}},
   182  		{name: "TestGetLanguagesByModelineLinguist_16", filename: filepath.Join(modelinesDir, "iamjs.pl"), expected: []string{"JavaScript"}},
   183  		{name: "TestGetLanguagesByModelineLinguist_17", filename: filepath.Join(modelinesDir, "iamjs2.pl"), expected: []string{"JavaScript"}},
   184  		{name: "TestGetLanguagesByModelineLinguist_18", filename: filepath.Join(modelinesDir, "not_perl.pl"), expected: []string{"Prolog"}},
   185  		{name: "TestGetLanguagesByModelineLinguist_19", filename: filepath.Join(modelinesDir, "ruby"), expected: []string{"Ruby"}},
   186  		{name: "TestGetLanguagesByModelineLinguist_20", filename: filepath.Join(modelinesDir, "ruby2"), expected: []string{"Ruby"}},
   187  		{name: "TestGetLanguagesByModelineLinguist_21", filename: filepath.Join(modelinesDir, "ruby3"), expected: []string{"Ruby"}},
   188  		{name: "TestGetLanguagesByModelineLinguist_22", filename: filepath.Join(modelinesDir, "ruby4"), expected: []string{"Ruby"}},
   189  		{name: "TestGetLanguagesByModelineLinguist_23", filename: filepath.Join(modelinesDir, "ruby5"), expected: []string{"Ruby"}},
   190  		{name: "TestGetLanguagesByModelineLinguist_24", filename: filepath.Join(modelinesDir, "ruby6"), expected: []string{"Ruby"}},
   191  		{name: "TestGetLanguagesByModelineLinguist_25", filename: filepath.Join(modelinesDir, "ruby7"), expected: []string{"Ruby"}},
   192  		{name: "TestGetLanguagesByModelineLinguist_26", filename: filepath.Join(modelinesDir, "ruby8"), expected: []string{"Ruby"}},
   193  		{name: "TestGetLanguagesByModelineLinguist_27", filename: filepath.Join(modelinesDir, "ruby9"), expected: []string{"Ruby"}},
   194  		{name: "TestGetLanguagesByModelineLinguist_28", filename: filepath.Join(modelinesDir, "ruby10"), expected: []string{"Ruby"}},
   195  		{name: "TestGetLanguagesByModelineLinguist_29", filename: filepath.Join(modelinesDir, "ruby11"), expected: []string{"Ruby"}},
   196  		{name: "TestGetLanguagesByModelineLinguist_30", filename: filepath.Join(modelinesDir, "ruby12"), expected: []string{"Ruby"}},
   197  		{name: "TestGetLanguagesByModelineLinguist_31", filename: filepath.Join(s.samplesDir, "C++/runtime-compiler.cc"), expected: nil},
   198  		{name: "TestGetLanguagesByModelineLinguist_32", filename: "", expected: nil},
   199  	}
   200  
   201  	for _, test := range tests {
   202  		var content []byte
   203  		var err error
   204  
   205  		if test.filename != "" {
   206  			content, err = ioutil.ReadFile(test.filename)
   207  			assert.NoError(s.T(), err)
   208  		}
   209  
   210  		languages := GetLanguagesByModeline(test.filename, content, test.candidates)
   211  		assert.Equal(s.T(), test.expected, languages, fmt.Sprintf("%v: languages = %v, expected: %v", test.name, languages, test.expected))
   212  	}
   213  }
   214  
   215  func (s *EnryTestSuite) TestGetLanguagesByModeline() {
   216  	const (
   217  		wrongVim  = `# vim: set syntax=ruby ft  =python filetype=perl :`
   218  		rightVim  = `/* vim: set syntax=python ft   =python filetype=python */`
   219  		noLangVim = `/* vim: set shiftwidth=4 softtabstop=0 cindent cinoptions={1s: */`
   220  	)
   221  
   222  	tests := []struct {
   223  		name       string
   224  		filename   string
   225  		content    []byte
   226  		candidates []string
   227  		expected   []string
   228  	}{
   229  		{name: "TestGetLanguagesByModeline_1", content: []byte(wrongVim), expected: nil},
   230  		{name: "TestGetLanguagesByModeline_2", content: []byte(rightVim), expected: []string{"Python"}},
   231  		{name: "TestGetLanguagesByModeline_3", content: []byte(noLangVim), expected: nil},
   232  		{name: "TestGetLanguagesByModeline_4", content: nil, expected: nil},
   233  		{name: "TestGetLanguagesByModeline_5", content: []byte{}, expected: nil},
   234  	}
   235  
   236  	for _, test := range tests {
   237  		languages := GetLanguagesByModeline(test.filename, test.content, test.candidates)
   238  		assert.Equal(s.T(), test.expected, languages, fmt.Sprintf("%v: languages = %v, expected: %v", test.name, languages, test.expected))
   239  	}
   240  }
   241  
   242  func (s *EnryTestSuite) TestGetLanguagesByFilename() {
   243  	tests := []struct {
   244  		name       string
   245  		filename   string
   246  		content    []byte
   247  		candidates []string
   248  		expected   []string
   249  	}{
   250  		{name: "TestGetLanguagesByFilename_1", filename: "unknown.interpreter", expected: nil},
   251  		{name: "TestGetLanguagesByFilename_2", filename: ".bashrc", expected: []string{"Shell"}},
   252  		{name: "TestGetLanguagesByFilename_3", filename: "Dockerfile", expected: []string{"Dockerfile"}},
   253  		{name: "TestGetLanguagesByFilename_4", filename: "Makefile.frag", expected: []string{"Makefile"}},
   254  		{name: "TestGetLanguagesByFilename_5", filename: "makefile", expected: []string{"Makefile"}},
   255  		{name: "TestGetLanguagesByFilename_6", filename: "Vagrantfile", expected: []string{"Ruby"}},
   256  		{name: "TestGetLanguagesByFilename_7", filename: "_vimrc", expected: []string{"Vim Script"}},
   257  		{name: "TestGetLanguagesByFilename_8", filename: "pom.xml", expected: []string{"Maven POM"}},
   258  		{name: "TestGetLanguagesByFilename_9", filename: "", expected: nil},
   259  	}
   260  
   261  	for _, test := range tests {
   262  		languages := GetLanguagesByFilename(test.filename, test.content, test.candidates)
   263  		assert.Equal(s.T(), len(test.expected), len(languages), fmt.Sprintf("%v: number of languages = %v, expected: %v", test.name, len(languages), len(test.expected)))
   264  		for i := range languages { // case-insensitive name comparison
   265  			assert.True(s.T(), strings.EqualFold(test.expected[i], languages[i]), fmt.Sprintf("%v: languages = %v, expected: %v", test.name, languages, test.expected))
   266  		}
   267  	}
   268  }
   269  
   270  func (s *EnryTestSuite) TestGetLanguagesByShebang() {
   271  	const (
   272  		multilineExecHack = `#!/bin/sh
   273  # Next line is comment in Tcl, but not in sh... \
   274  exec tclsh "$0" ${1+"$@"}`
   275  
   276  		multilineNoExecHack = `#!/bin/sh
   277  #<<<#
   278  echo "A shell script in a zkl program ($0)"
   279  echo "Now run zkl <this file> with Hello World as args"
   280  zkl $0 Hello World!
   281  exit
   282  #<<<#
   283  println("The shell script says ",vm.arglist.concat(" "));`
   284  	)
   285  
   286  	tests := []struct {
   287  		name       string
   288  		filename   string
   289  		content    []byte
   290  		candidates []string
   291  		expected   []string
   292  	}{
   293  		{name: "TestGetLanguagesByShebang_1", content: []byte(`#!/unknown/interpreter`), expected: nil},
   294  		{name: "TestGetLanguagesByShebang_2", content: []byte(`no shebang`), expected: nil},
   295  		{name: "TestGetLanguagesByShebang_3", content: []byte(`#!/usr/bin/env`), expected: nil},
   296  		{name: "TestGetLanguagesByShebang_4", content: []byte(`#!/usr/bin/python -tt`), expected: []string{"Python"}},
   297  		{name: "TestGetLanguagesByShebang_5", content: []byte(`#!/usr/bin/env python2.6`), expected: []string{"Python"}},
   298  		{name: "TestGetLanguagesByShebang_6", content: []byte(`#!/usr/bin/env perl`), expected: []string{"Perl", "Pod"}},
   299  		{name: "TestGetLanguagesByShebang_7", content: []byte(`#!	/bin/sh`), expected: []string{"Shell"}},
   300  		{name: "TestGetLanguagesByShebang_8", content: []byte(`#!bash`), expected: []string{"Shell"}},
   301  		{name: "TestGetLanguagesByShebang_9", content: []byte(multilineExecHack), expected: []string{"Tcl"}},
   302  		{name: "TestGetLanguagesByShebang_10", content: []byte(multilineNoExecHack), expected: []string{"Shell"}},
   303  		{name: "TestGetLanguagesByShebang_11", content: []byte(`#!/envinpath/python`), expected: []string{"Python"}},
   304  
   305  		{name: "TestGetLanguagesByShebang_12", content: []byte(""), expected: nil},
   306  		{name: "TestGetLanguagesByShebang_13", content: []byte("foo"), expected: nil},
   307  		{name: "TestGetLanguagesByShebang_14", content: []byte("#bar"), expected: nil},
   308  		{name: "TestGetLanguagesByShebang_15", content: []byte("#baz"), expected: nil},
   309  		{name: "TestGetLanguagesByShebang_16", content: []byte("///"), expected: nil},
   310  		{name: "TestGetLanguagesByShebang_17", content: []byte("\n\n\n\n\n"), expected: nil},
   311  		{name: "TestGetLanguagesByShebang_18", content: []byte(" #!/usr/sbin/ruby"), expected: nil},
   312  		{name: "TestGetLanguagesByShebang_19", content: []byte("\n#!/usr/sbin/ruby"), expected: nil},
   313  		{name: "TestGetLanguagesByShebang_20", content: []byte("#!"), expected: nil},
   314  		{name: "TestGetLanguagesByShebang_21", content: []byte("#! "), expected: nil},
   315  		{name: "TestGetLanguagesByShebang_22", content: []byte("#!/usr/bin/env"), expected: nil},
   316  		{name: "TestGetLanguagesByShebang_23", content: []byte("#!/usr/bin/env osascript -l JavaScript"), expected: nil},
   317  		{name: "TestGetLanguagesByShebang_24", content: []byte("#!/usr/bin/env osascript -l AppleScript"), expected: nil},
   318  		{name: "TestGetLanguagesByShebang_25", content: []byte("#!/usr/bin/env osascript -l foobar"), expected: nil},
   319  		{name: "TestGetLanguagesByShebang_26", content: []byte("#!/usr/bin/osascript -l JavaScript"), expected: nil},
   320  		{name: "TestGetLanguagesByShebang_27", content: []byte("#!/usr/bin/osascript -l foobar"), expected: nil},
   321  
   322  		{name: "TestGetLanguagesByShebang_28", content: []byte("#!/usr/sbin/ruby\n# bar"), expected: []string{"Ruby"}},
   323  		{name: "TestGetLanguagesByShebang_29", content: []byte("#!/usr/bin/ruby\n# foo"), expected: []string{"Ruby"}},
   324  		{name: "TestGetLanguagesByShebang_30", content: []byte("#!/usr/sbin/ruby"), expected: []string{"Ruby"}},
   325  		{name: "TestGetLanguagesByShebang_31", content: []byte("#!/usr/sbin/ruby foo bar baz\n"), expected: []string{"Ruby"}},
   326  
   327  		{name: "TestGetLanguagesByShebang_32", content: []byte("#!/usr/bin/env Rscript\n# example R script\n#\n"), expected: []string{"R"}},
   328  		{name: "TestGetLanguagesByShebang_33", content: []byte("#!/usr/bin/env ruby\n# baz"), expected: []string{"Ruby"}},
   329  
   330  		{name: "TestGetLanguagesByShebang_34", content: []byte("#!/usr/bin/bash\n"), expected: []string{"Shell"}},
   331  		{name: "TestGetLanguagesByShebang_35", content: []byte("#!/bin/sh"), expected: []string{"Shell"}},
   332  		{name: "TestGetLanguagesByShebang_36", content: []byte("#!/bin/python\n# foo\n# bar\n# baz"), expected: []string{"Python"}},
   333  		{name: "TestGetLanguagesByShebang_37", content: []byte("#!/usr/bin/python2.7\n\n\n\n"), expected: []string{"Python"}},
   334  		{name: "TestGetLanguagesByShebang_38", content: []byte("#!/usr/bin/python3\n\n\n\n"), expected: []string{"Python"}},
   335  		{name: "TestGetLanguagesByShebang_39", content: []byte("#!/usr/bin/sbcl --script\n\n"), expected: []string{"Common Lisp"}},
   336  		{name: "TestGetLanguagesByShebang_40", content: []byte("#! perl"), expected: []string{"Perl", "Pod"}},
   337  
   338  		{name: "TestGetLanguagesByShebang_41", content: []byte("#!/bin/sh\n\n\nexec ruby $0 $@"), expected: []string{"Ruby"}},
   339  		{name: "TestGetLanguagesByShebang_42", content: []byte("#! /usr/bin/env A=003 B=149 C=150 D=xzd E=base64 F=tar G=gz H=head I=tail sh"), expected: []string{"Shell"}},
   340  		{name: "TestGetLanguagesByShebang_43", content: []byte("#!/usr/bin/env foo=bar bar=foo python -cos=__import__(\"os\");"), expected: []string{"Python"}},
   341  		{name: "TestGetLanguagesByShebang_44", content: []byte("#!/usr/bin/env osascript"), expected: []string{"AppleScript"}},
   342  		{name: "TestGetLanguagesByShebang_45", content: []byte("#!/usr/bin/osascript"), expected: []string{"AppleScript"}},
   343  
   344  		{name: "TestGetLanguagesByShebang_46", content: []byte("#!/usr/bin/env -vS ruby -wKU\nputs ?t+?e+?s+?t"), expected: []string{"Ruby"}},
   345  		{name: "TestGetLanguagesByShebang_47", content: []byte("#!/usr/bin/env --split-string sed -f\ny/a/A/"), expected: []string{"sed"}},
   346  		{name: "TestGetLanguagesByShebang_48", content: []byte("#!/usr/bin/env -S GH_TOKEN=ghp_*** deno run --allow-net\nconsole.log(1);"), expected: []string{"TypeScript"}},
   347  	}
   348  
   349  	for _, test := range tests {
   350  		languages := GetLanguagesByShebang(test.filename, test.content, test.candidates)
   351  		assert.Equal(s.T(), test.expected, languages, fmt.Sprintf("%v: languages = %v, expected: %v", test.name, languages, test.expected))
   352  	}
   353  }
   354  
   355  func (s *EnryTestSuite) TestGetLanguagesByExtension() {
   356  	tests := []struct {
   357  		name       string
   358  		filename   string
   359  		content    []byte
   360  		candidates []string
   361  		expected   []string
   362  	}{
   363  		{name: "TestGetLanguagesByExtension_0", filename: "foo.h", expected: []string{"C", "C++", "Objective-C"}},
   364  		{name: "TestGetLanguagesByExtension_1", filename: "foo.foo", expected: nil},
   365  		{name: "TestGetLanguagesByExtension_2", filename: "foo.go", expected: []string{"Go"}},
   366  		{name: "TestGetLanguagesByExtension_3", filename: "foo.go.php", expected: []string{"Hack", "PHP"}},
   367  		{name: "TestGetLanguagesByExtension_4", filename: "", expected: nil},
   368  	}
   369  
   370  	for _, test := range tests {
   371  		languages := GetLanguagesByExtension(test.filename, test.content, test.candidates)
   372  		assert.Equal(s.T(), test.expected, languages, fmt.Sprintf("%v: languages = %v, expected: %v", test.name, languages, test.expected))
   373  	}
   374  }
   375  
   376  func (s *EnryTestSuite) TestGetLanguagesByManpage() {
   377  	tests := []struct {
   378  		name       string
   379  		filename   string
   380  		content    []byte
   381  		candidates []string
   382  		expected   []string
   383  	}{
   384  		{name: "TestGetLanguagesByManpage_1", filename: "bsdmalloc.3malloc", expected: []string{"Roff Manpage", "Roff"}},
   385  		{name: "TestGetLanguagesByManpage_2", filename: "dirent.h.0p", expected: []string{"Roff Manpage", "Roff"}},
   386  		{name: "TestGetLanguagesByManpage_3", filename: "linguist.1gh", expected: []string{"Roff Manpage", "Roff"}},
   387  		{name: "TestGetLanguagesByManpage_4", filename: "test.1.in", expected: []string{"Roff Manpage", "Roff"}},
   388  		{name: "TestGetLanguagesByManpage_5", filename: "test.man.in", expected: []string{"Roff Manpage", "Roff"}},
   389  		{name: "TestGetLanguagesByManpage_6", filename: "test.mdoc.in", expected: []string{"Roff Manpage", "Roff"}},
   390  		{name: "TestGetLanguagesByManpage_7", filename: "foo.h", expected: nil},
   391  		{name: "TestGetLanguagesByManpage_8", filename: "", expected: nil},
   392  	}
   393  
   394  	for _, test := range tests {
   395  		languages := GetLanguagesByManpage(test.filename, test.content, test.candidates)
   396  		assert.Equal(s.T(), test.expected, languages, fmt.Sprintf("%v: languages = %v, expected: %v", test.name, languages, test.expected))
   397  	}
   398  }
   399  
   400  func (s *EnryTestSuite) TestGetLanguagesByXML() {
   401  	tests := []struct {
   402  		name       string
   403  		filename   string
   404  		candidates []string
   405  		expected   []string
   406  	}{
   407  		{name: "TestGetLanguagesByXML_1", filename: filepath.Join(s.testFixturesDir, "XML/app.config"), expected: []string{"XML"}},
   408  		{name: "TestGetLanguagesByXML_2", filename: filepath.Join(s.testFixturesDir, "XML/AssertionIDRequestOptionalAttributes.xml.svn-base"), expected: []string{"XML"}},
   409  		// no XML header so should not be identified by this strategy
   410  		{name: "TestGetLanguagesByXML_3", filename: filepath.Join(s.samplesDir, "XML/libsomething.dll.config"), expected: nil},
   411  		{name: "TestGetLanguagesByXML_4", filename: filepath.Join(s.samplesDir, "Eagle/Eagle.sch"), candidates: []string{"Eagle"}, expected: []string{"Eagle"}},
   412  	}
   413  
   414  	for _, test := range tests {
   415  		content, err := ioutil.ReadFile(test.filename)
   416  		assert.NoError(s.T(), err)
   417  
   418  		languages := GetLanguagesByXML(test.filename, content, test.candidates)
   419  		assert.Equal(s.T(), test.expected, languages, fmt.Sprintf("%v: languages = %v, expected: %v", test.name, languages, test.expected))
   420  	}
   421  }
   422  
   423  func (s *EnryTestSuite) TestGetLanguagesByClassifier() {
   424  	test := []struct {
   425  		name       string
   426  		filename   string
   427  		candidates []string
   428  		expected   string
   429  	}{
   430  		{name: "TestGetLanguagesByClassifier_1", filename: filepath.Join(s.samplesDir, "C/blob.c"), candidates: []string{"python", "ruby", "c", "c++"}, expected: "C"},
   431  		{name: "TestGetLanguagesByClassifier_2", filename: filepath.Join(s.samplesDir, "C/blob.c"), candidates: nil, expected: OtherLanguage},
   432  		{name: "TestGetLanguagesByClassifier_3", filename: filepath.Join(s.samplesDir, "C++/runtime-compiler.cc"), candidates: []string{}, expected: OtherLanguage},
   433  		{name: "TestGetLanguagesByClassifier_4", filename: filepath.Join(s.samplesDir, "C/blob.c"), candidates: []string{"python", "ruby", "c++"}, expected: "C++"},
   434  		{name: "TestGetLanguagesByClassifier_5", filename: filepath.Join(s.samplesDir, "C/blob.c"), candidates: []string{"ruby"}, expected: "Ruby"},
   435  		{name: "TestGetLanguagesByClassifier_6", filename: filepath.Join(s.samplesDir, "Python/django-models-base.py"), candidates: []string{"python", "ruby", "c", "c++"}, expected: "Python"},
   436  		{name: "TestGetLanguagesByClassifier_7", filename: "", candidates: []string{"python"}, expected: "Python"},
   437  	}
   438  
   439  	for _, test := range test {
   440  		var content []byte
   441  		var err error
   442  
   443  		if test.filename != "" {
   444  			content, err = ioutil.ReadFile(test.filename)
   445  			assert.NoError(s.T(), err)
   446  		}
   447  
   448  		languages := GetLanguagesByClassifier(test.filename, content, test.candidates)
   449  		var language string
   450  		if len(languages) == 0 {
   451  			language = OtherLanguage
   452  		} else {
   453  			language = languages[0]
   454  		}
   455  
   456  		assert.Equal(s.T(), test.expected, language, fmt.Sprintf("%v: language = %v, expected: %v", test.name, language, test.expected))
   457  	}
   458  }
   459  
   460  func (s *EnryTestSuite) TestGetLanguagesBySpecificClassifier() {
   461  	test := []struct {
   462  		name       string
   463  		filename   string
   464  		candidates []string
   465  		classifier classifier
   466  		expected   string
   467  	}{
   468  		{name: "TestGetLanguagesByClassifier_1", filename: filepath.Join(s.samplesDir, "C/blob.c"), candidates: []string{"python", "ruby", "c", "c++"}, classifier: defaultClassifier, expected: "C"},
   469  		{name: "TestGetLanguagesByClassifier_2", filename: filepath.Join(s.samplesDir, "C/blob.c"), candidates: nil, classifier: defaultClassifier, expected: "C"},
   470  		{name: "TestGetLanguagesByClassifier_3", filename: filepath.Join(s.samplesDir, "C++/runtime-compiler.cc"), candidates: []string{}, classifier: defaultClassifier, expected: "C++"},
   471  		{name: "TestGetLanguagesByClassifier_4", filename: filepath.Join(s.samplesDir, "C/blob.c"), candidates: []string{"python", "ruby", "c++"}, classifier: defaultClassifier, expected: "C++"},
   472  		{name: "TestGetLanguagesByClassifier_5", filename: filepath.Join(s.samplesDir, "C/blob.c"), candidates: []string{"ruby"}, classifier: defaultClassifier, expected: "Ruby"},
   473  		{name: "TestGetLanguagesByClassifier_6", filename: filepath.Join(s.samplesDir, "Python/django-models-base.py"), candidates: []string{"python", "ruby", "c", "c++"}, classifier: defaultClassifier, expected: "Python"},
   474  		{name: "TestGetLanguagesByClassifier_7", filename: os.DevNull, candidates: nil, classifier: defaultClassifier, expected: "XML"},
   475  	}
   476  
   477  	for _, test := range test {
   478  		content, err := ioutil.ReadFile(test.filename)
   479  		assert.NoError(s.T(), err)
   480  
   481  		languages := getLanguagesBySpecificClassifier(content, test.candidates, test.classifier)
   482  		var language string
   483  		if len(languages) == 0 {
   484  			language = OtherLanguage
   485  		} else {
   486  			language = languages[0]
   487  		}
   488  
   489  		assert.Equal(s.T(), test.expected, language, fmt.Sprintf("%v: language = %v, expected: %v", test.name, language, test.expected))
   490  	}
   491  }
   492  
   493  func (s *EnryTestSuite) TestGetLanguageExtensions() {
   494  	tests := []struct {
   495  		name     string
   496  		language string
   497  		expected []string
   498  	}{
   499  		{name: "TestGetLanguageExtensions_1", language: "foo", expected: nil},
   500  		{name: "TestGetLanguageExtensions_2", language: "COBOL", expected: []string{".cob", ".cbl", ".ccp", ".cobol", ".cpy"}},
   501  		{name: "TestGetLanguageExtensions_3", language: "Maven POM", expected: nil},
   502  	}
   503  
   504  	for _, test := range tests {
   505  		extensions := GetLanguageExtensions(test.language)
   506  		assert.EqualValues(s.T(), test.expected, extensions, fmt.Sprintf("%v: extensions = %v, expected: %v", test.name, extensions, test.expected))
   507  	}
   508  }
   509  
   510  func (s *EnryTestSuite) TestGetLanguageType() {
   511  	tests := []struct {
   512  		name     string
   513  		language string
   514  		expected Type
   515  	}{
   516  		{name: "TestGetLanguageType_1", language: "BestLanguageEver", expected: Unknown},
   517  		{name: "TestGetLanguageType_2", language: "JSON", expected: Data},
   518  		{name: "TestGetLanguageType_3", language: "COLLADA", expected: Data},
   519  		{name: "TestGetLanguageType_4", language: "Go", expected: Programming},
   520  		{name: "TestGetLanguageType_5", language: "Brainfuck", expected: Programming},
   521  		{name: "TestGetLanguageType_6", language: "HTML", expected: Markup},
   522  		{name: "TestGetLanguageType_7", language: "Sass", expected: Markup},
   523  		{name: "TestGetLanguageType_8", language: "AsciiDoc", expected: Prose},
   524  		{name: "TestGetLanguageType_9", language: "Textile", expected: Prose},
   525  	}
   526  
   527  	for _, test := range tests {
   528  		langType := GetLanguageType(test.language)
   529  		assert.Equal(s.T(), test.expected, langType, fmt.Sprintf("%v: langType = %v, expected: %v", test.name, langType, test.expected))
   530  	}
   531  }
   532  
   533  func (s *EnryTestSuite) TestGetLanguageGroup() {
   534  	tests := []struct {
   535  		name     string
   536  		language string
   537  		expected string
   538  	}{
   539  		{name: "TestGetLanguageGroup_1", language: "BestLanguageEver", expected: ""},
   540  		{name: "TestGetLanguageGroup_2", language: "Bison", expected: "Yacc"},
   541  		{name: "TestGetLanguageGroup_3", language: "HTML+PHP", expected: "HTML"},
   542  		{name: "TestGetLanguageGroup_4", language: "HTML", expected: ""},
   543  	}
   544  
   545  	for _, test := range tests {
   546  		langGroup := GetLanguageGroup(test.language)
   547  		assert.Equal(s.T(), test.expected, langGroup, fmt.Sprintf("%v: langGroup = %v, expected: %v", test.name, langGroup, test.expected))
   548  	}
   549  }
   550  
   551  func (s *EnryTestSuite) TestGetLanguageByAlias() {
   552  	tests := []struct {
   553  		name         string
   554  		alias        string
   555  		expectedLang string
   556  		expectedOk   bool
   557  	}{
   558  		{name: "TestGetLanguageByAlias_1", alias: "BestLanguageEver", expectedLang: OtherLanguage, expectedOk: false},
   559  		{name: "TestGetLanguageByAlias_2", alias: "aspx-vb", expectedLang: "ASP.NET", expectedOk: true},
   560  		{name: "TestGetLanguageByAlias_3", alias: "C++", expectedLang: "C++", expectedOk: true},
   561  		{name: "TestGetLanguageByAlias_4", alias: "c++", expectedLang: "C++", expectedOk: true},
   562  		{name: "TestGetLanguageByAlias_5", alias: "objc", expectedLang: "Objective-C", expectedOk: true},
   563  		{name: "TestGetLanguageByAlias_6", alias: "golang", expectedLang: "Go", expectedOk: true},
   564  		{name: "TestGetLanguageByAlias_7", alias: "GOLANG", expectedLang: "Go", expectedOk: true},
   565  		{name: "TestGetLanguageByAlias_8", alias: "bsdmake", expectedLang: "Makefile", expectedOk: true},
   566  		{name: "TestGetLanguageByAlias_9", alias: "xhTmL", expectedLang: "HTML", expectedOk: true},
   567  		{name: "TestGetLanguageByAlias_10", alias: "python", expectedLang: "Python", expectedOk: true},
   568  	}
   569  
   570  	for _, test := range tests {
   571  		lang, ok := GetLanguageByAlias(test.alias)
   572  		assert.Equal(s.T(), test.expectedLang, lang, fmt.Sprintf("%v: lang = %v, expected: %v", test.name, lang, test.expectedLang))
   573  		assert.Equal(s.T(), test.expectedOk, ok, fmt.Sprintf("%v: ok = %v, expected: %v", test.name, ok, test.expectedOk))
   574  	}
   575  }
   576  
   577  func (s *EnryTestSuite) TestLinguistCorpus() {
   578  	const filenamesDir = "filenames"
   579  	var cornerCases = map[string]bool{
   580  		"drop_stuff.sql":        true, // https://github.com/src-d/enry/issues/194
   581  		"textobj-rubyblock.vba": true, // Because of unsupported negative lookahead RE syntax (https://github.com/github/linguist/blob/8083cb5a89cee2d99f5a988f165994d0243f0d1e/lib/linguist/heuristics.yml#L521)
   582  		// .es and .ice fail heuristics parsing, but do not fail any tests
   583  	}
   584  
   585  	var total, failed, ok, other int
   586  	var expected string
   587  	filepath.Walk(s.samplesDir, func(path string, f os.FileInfo, err error) error {
   588  		if f.IsDir() {
   589  			if f.Name() != filenamesDir {
   590  				expected, _ = data.LanguageByAlias(f.Name())
   591  			}
   592  
   593  			return nil
   594  		}
   595  
   596  		filename := filepath.Base(path)
   597  		content, _ := ioutil.ReadFile(path)
   598  
   599  		total++
   600  		obtained := GetLanguage(filename, content)
   601  		if obtained == OtherLanguage {
   602  			obtained = "Other"
   603  			other++
   604  		}
   605  
   606  		var status string
   607  		if expected == obtained {
   608  			status = "ok"
   609  			ok++
   610  		} else {
   611  			status = "failed"
   612  			failed++
   613  		}
   614  
   615  		if _, ok := cornerCases[filename]; ok {
   616  			s.T().Logf("\t\t[considered corner case] %s\texpected: %s\tobtained: %s\tstatus: %s\n", filename, expected, obtained, status)
   617  		} else {
   618  			assert.Equal(s.T(), expected, obtained, fmt.Sprintf("%s\texpected: %s\tobtained: %s\tstatus: %s\n", filename, expected, obtained, status))
   619  		}
   620  
   621  		return nil
   622  	})
   623  
   624  	s.T().Logf("\t\ttotal files: %d, ok: %d, failed: %d, other: %d\n", total, ok, failed, other)
   625  }
   626  
   627  func (s *EnryTestSuite) TestGetLanguageID() {
   628  	tests := []struct {
   629  		name       string
   630  		language   string
   631  		expectedID int
   632  		found      bool
   633  	}{
   634  		{name: "TestGetLanguageID_1", language: "1C Enterprise", expectedID: 0, found: true},
   635  		{name: "TestGetLanguageID_2", language: "BestLanguageEver", expectedID: 0, found: false},
   636  		{name: "TestGetLanguageID_3", language: "C++", expectedID: 43, found: true},
   637  		{name: "TestGetLanguageID_5", language: "Objective-C", expectedID: 257, found: true},
   638  		{name: "TestGetLanguageID_6", language: "golang", expectedID: 0, found: false}, // Aliases are not supported
   639  		{name: "TestGetLanguageID_7", language: "Go", expectedID: 132, found: true},
   640  		{name: "TestGetLanguageID_8", language: "Makefile", expectedID: 220, found: true},
   641  	}
   642  
   643  	for _, test := range tests {
   644  		id, found := GetLanguageID(test.language)
   645  		assert.Equal(s.T(), test.expectedID, id, fmt.Sprintf("%v: id = %v, expected: %v", test.name, id, test.expectedID))
   646  		assert.Equal(s.T(), test.found, found, fmt.Sprintf("%v: found = %t, expected: %t", test.name, found, test.found))
   647  	}
   648  }
   649  
   650  func (s *EnryTestSuite) TestGetLanguageInfo() {
   651  	tests := []struct {
   652  		name       string
   653  		language   string
   654  		expectedID int
   655  		error      bool
   656  	}{
   657  		{name: "TestGetLanguageID_1", language: "1C Enterprise", expectedID: 0},
   658  		{name: "TestGetLanguageID_2", language: "BestLanguageEver", error: true},
   659  		{name: "TestGetLanguageID_3", language: "C++", expectedID: 43},
   660  		{name: "TestGetLanguageID_5", language: "Objective-C", expectedID: 257},
   661  		{name: "TestGetLanguageID_6", language: "golang", error: true}, // Aliases are not supported
   662  		{name: "TestGetLanguageID_7", language: "Go", expectedID: 132},
   663  		{name: "TestGetLanguageID_8", language: "Makefile", expectedID: 220},
   664  	}
   665  
   666  	for _, test := range tests {
   667  		info, err := GetLanguageInfo(test.language)
   668  		if test.error {
   669  			assert.Error(s.T(), err, "%v: expected error for %q", test.name, test.language)
   670  		} else {
   671  			assert.NoError(s.T(), err)
   672  			assert.Equal(s.T(), test.expectedID, info.LanguageID, fmt.Sprintf("%v: id = %v, expected: %v", test.name, info.LanguageID, test.expectedID))
   673  		}
   674  	}
   675  }
   676  
   677  func (s *EnryTestSuite) TestGetLanguageInfoByID() {
   678  	tests := []struct {
   679  		name         string
   680  		id           int
   681  		expectedName string
   682  		error        bool
   683  	}{
   684  		{name: "TestGetLanguageID_1", id: 0, expectedName: "1C Enterprise"},
   685  		{name: "TestGetLanguageID_2", id: -1, error: true},
   686  		{name: "TestGetLanguageID_3", id: 43, expectedName: "C++"},
   687  		{name: "TestGetLanguageID_5", id: 257, expectedName: "Objective-C"},
   688  		{name: "TestGetLanguageID_7", id: 132, expectedName: "Go"},
   689  		{name: "TestGetLanguageID_8", id: 220, expectedName: "Makefile"},
   690  	}
   691  
   692  	for _, test := range tests {
   693  		info, err := GetLanguageInfoByID(test.id)
   694  		if test.error {
   695  			assert.Error(s.T(), err, "%v: expected error for %q", test.name, test.id)
   696  		} else {
   697  			assert.NoError(s.T(), err)
   698  			assert.Equal(s.T(), test.expectedName, info.Name, fmt.Sprintf("%v: id = %v, expected: %v", test.name, test.id, test.expectedName))
   699  		}
   700  	}
   701  }