git.templeos.me/xultist/go-enry/v2@v2.0.0-20230215093429-6ef3e87f47c0/common_test.go (about)

     1  package enry
     2  
     3  import (
     4  	"fmt"
     5  	"io/ioutil"
     6  	"os"
     7  	"os/exec"
     8  	"path/filepath"
     9  	"strings"
    10  	"testing"
    11  
    12  	"github.com/go-enry/go-enry/v2/data"
    13  
    14  	"github.com/stretchr/testify/assert"
    15  	"github.com/stretchr/testify/require"
    16  	"github.com/stretchr/testify/suite"
    17  )
    18  
    19  const linguistURL = "https://github.com/github/linguist.git"
    20  const linguistClonedEnvVar = "ENRY_TEST_REPO"
    21  
    22  // not a part of the test Suite as benchmark does not use testify
    23  func maybeCloneLinguist() (string, bool, error) {
    24  	var err error
    25  	linguistTmpDir := os.Getenv(linguistClonedEnvVar)
    26  	isCleanupNeeded := false
    27  	isLinguistCloned := linguistTmpDir != ""
    28  	if !isLinguistCloned {
    29  		linguistTmpDir, err = ioutil.TempDir("", "linguist-")
    30  		if err != nil {
    31  			return "", false, err
    32  		}
    33  
    34  		isCleanupNeeded = true
    35  		cmd := exec.Command("git", "clone", "--depth", "100", linguistURL, linguistTmpDir)
    36  		if err := cmd.Run(); err != nil {
    37  			return linguistTmpDir, isCleanupNeeded, err
    38  		}
    39  	}
    40  
    41  	cwd, err := os.Getwd()
    42  	if err != nil {
    43  		return linguistTmpDir, isCleanupNeeded, err
    44  	}
    45  
    46  	if err = os.Chdir(linguistTmpDir); err != nil {
    47  		return linguistTmpDir, isCleanupNeeded, err
    48  	}
    49  
    50  	cmd := exec.Command("git", "checkout", data.LinguistCommit)
    51  	if err := cmd.Run(); err != nil {
    52  		return linguistTmpDir, isCleanupNeeded, err
    53  	}
    54  
    55  	if err = os.Chdir(cwd); err != nil {
    56  		return linguistTmpDir, isCleanupNeeded, err
    57  	}
    58  	return linguistTmpDir, isCleanupNeeded, nil
    59  }
    60  
    61  type enryBaseTestSuite struct {
    62  	suite.Suite
    63  	tmpLinguistDir  string
    64  	isCleanupNeeded bool
    65  	samplesDir      string
    66  	testFixturesDir string
    67  }
    68  
    69  func (s *enryBaseTestSuite) SetupSuite() {
    70  	var err error
    71  	s.tmpLinguistDir, s.isCleanupNeeded, err = maybeCloneLinguist()
    72  	require.NoError(s.T(), err)
    73  
    74  	s.samplesDir = filepath.Join(s.tmpLinguistDir, "samples")
    75  	s.testFixturesDir = filepath.Join(s.tmpLinguistDir, "test", "fixtures")
    76  }
    77  
    78  func (s *enryBaseTestSuite) TearDownSuite() {
    79  	if s.isCleanupNeeded {
    80  		err := os.RemoveAll(s.tmpLinguistDir)
    81  		require.NoError(s.T(), err)
    82  	}
    83  }
    84  
    85  type enryTestSuite struct {
    86  	enryBaseTestSuite
    87  }
    88  
    89  func Test_EnryTestSuite(t *testing.T) {
    90  	suite.Run(t, new(enryTestSuite))
    91  }
    92  
    93  func (s *enryTestSuite) TestRegexpEdgeCases() {
    94  	var regexpEdgeCases = []struct {
    95  		lang     string
    96  		filename string
    97  	}{
    98  		{lang: "ActionScript", filename: "FooBar.as"},
    99  		{lang: "Forth", filename: "asm.fr"},
   100  		{lang: "X PixMap", filename: "cc-public_domain_mark_white.pm"},
   101  		//{lang: "SQL", filename: "drop_stuff.sql"}, // https://github.com/src-d/enry/issues/194
   102  		{lang: "Fstar", filename: "Hacl.Spec.Bignum.Fmul.fst"},
   103  		{lang: "C++", filename: "Types.h"},
   104  	}
   105  
   106  	for _, r := range regexpEdgeCases {
   107  		filename := filepath.Join(s.tmpLinguistDir, "samples", r.lang, r.filename)
   108  
   109  		content, err := ioutil.ReadFile(filename)
   110  		require.NoError(s.T(), err)
   111  
   112  		lang := GetLanguage(r.filename, content)
   113  		s.T().Logf("File:%s, lang:%s", filename, lang)
   114  
   115  		expLang, _ := data.LanguageByAlias(r.lang)
   116  		require.EqualValues(s.T(), expLang, lang)
   117  	}
   118  }
   119  
   120  func (s *enryTestSuite) TestGetLanguage() {
   121  	tests := []struct {
   122  		name     string
   123  		filename string
   124  		content  []byte
   125  		expected string
   126  		safe     bool
   127  	}{
   128  		{name: "TestGetLanguage_0", filename: "foo.h", content: []byte{}, expected: "C"},
   129  		{name: "TestGetLanguage_1", filename: "foo.py", content: []byte{}, expected: "Python"},
   130  		{name: "TestGetLanguage_2", filename: "foo.m", content: []byte(":- module"), expected: "Mercury"},
   131  		{name: "TestGetLanguage_3", filename: "foo.m", content: nil, expected: "MATLAB"},
   132  		{name: "TestGetLanguage_4", filename: "foo.mo", content: []byte{0xDE, 0x12, 0x04, 0x95, 0x00, 0x00, 0x00, 0x00}, expected: OtherLanguage},
   133  		{name: "TestGetLanguage_5", filename: "", content: nil, expected: OtherLanguage},
   134  	}
   135  
   136  	for _, test := range tests {
   137  		language := GetLanguage(test.filename, test.content)
   138  		assert.Equal(s.T(), test.expected, language, fmt.Sprintf("%v: %v, expected: %v", test.name, language, test.expected))
   139  	}
   140  }
   141  
   142  func (s *enryTestSuite) TestGetLanguages() {
   143  	tests := []struct {
   144  		name     string
   145  		filename string
   146  		content  []byte
   147  		expected []string
   148  	}{
   149  		// With no content or filename, no language can be detected
   150  		{name: "TestGetLanguages_0", filename: "", content: []byte{}, expected: nil},
   151  		// The strategy that will match is GetLanguagesByExtension. Lacking content, it will return those results.
   152  		{name: "TestGetLanguages_1", filename: "foo.h", content: []byte{}, expected: []string{"C"}},
   153  		// GetLanguagesByExtension will return an unambiguous match when there is a single result.
   154  		{name: "TestGetLanguages_2", filename: "foo.groovy", content: []byte{}, expected: []string{"Groovy"}},
   155  		// GetLanguagesByExtension will return "Rust", "RenderScript" for .rs,
   156  		// then GetLanguagesByContent will take the first rule that matches (in this case Rust)
   157  		{name: "TestGetLanguages_3", filename: "foo.rs", content: []byte("use \n#include"), expected: []string{"Rust"}},
   158  		// .. and in this case, RenderScript (no content that matches a Rust regex can be included, because it runs first.)
   159  		{name: "TestGetLanguages_4", filename: "foo.rs", content: []byte("#include"), expected: []string{"RenderScript"}},
   160  		// GetLanguagesByExtension will return "AMPL", "Linux Kernel Module", "Modula-2", "XML",
   161  		// then GetLanguagesByContent will ALWAYS return Linux Kernel Module and AMPL when there is no content,
   162  		// and no further classifier can do anything without content
   163  		{name: "TestGetLanguages_5", filename: "foo.mod", content: []byte{}, expected: []string{"Linux Kernel Module", "AMPL"}},
   164  		// ...with some AMPL tokens, the DefaultClassifier will pick AMPL as the most likely language.
   165  		{name: "TestGetLanguages_6", filename: "foo.mod", content: []byte("BEAMS ROWS - TotalWeight"), expected: []string{"AMPL", "Linux Kernel Module"}},
   166  	}
   167  
   168  	for _, test := range tests {
   169  		languages := GetLanguages(test.filename, test.content)
   170  		assert.Equal(s.T(), test.expected, languages, fmt.Sprintf("%v: %v, expected: %v", test.name, languages, test.expected))
   171  	}
   172  }
   173  
   174  func (s *enryTestSuite) TestGetLanguagesByModelineLinguist() {
   175  	var modelinesDir = filepath.Join(s.tmpLinguistDir, "test", "fixtures", "Data", "Modelines")
   176  
   177  	tests := []struct {
   178  		name       string
   179  		filename   string
   180  		candidates []string
   181  		expected   []string
   182  	}{
   183  		// Emacs
   184  		{name: "TestGetLanguagesByModelineLinguist_1", filename: filepath.Join(modelinesDir, "example_smalltalk.md"), expected: []string{"Smalltalk"}},
   185  		{name: "TestGetLanguagesByModelineLinguist_2", filename: filepath.Join(modelinesDir, "fundamentalEmacs.c"), expected: []string{"Text"}},
   186  		{name: "TestGetLanguagesByModelineLinguist_3", filename: filepath.Join(modelinesDir, "iamphp.inc"), expected: []string{"PHP"}},
   187  		{name: "TestGetLanguagesByModelineLinguist_4", filename: filepath.Join(modelinesDir, "seeplusplusEmacs1"), expected: []string{"C++"}},
   188  		{name: "TestGetLanguagesByModelineLinguist_5", filename: filepath.Join(modelinesDir, "seeplusplusEmacs2"), expected: []string{"C++"}},
   189  		{name: "TestGetLanguagesByModelineLinguist_6", filename: filepath.Join(modelinesDir, "seeplusplusEmacs3"), expected: []string{"C++"}},
   190  		{name: "TestGetLanguagesByModelineLinguist_7", filename: filepath.Join(modelinesDir, "seeplusplusEmacs4"), expected: []string{"C++"}},
   191  		{name: "TestGetLanguagesByModelineLinguist_8", filename: filepath.Join(modelinesDir, "seeplusplusEmacs5"), expected: []string{"C++"}},
   192  		{name: "TestGetLanguagesByModelineLinguist_9", filename: filepath.Join(modelinesDir, "seeplusplusEmacs6"), expected: []string{"C++"}},
   193  		{name: "TestGetLanguagesByModelineLinguist_10", filename: filepath.Join(modelinesDir, "seeplusplusEmacs7"), expected: []string{"C++"}},
   194  		{name: "TestGetLanguagesByModelineLinguist_11", filename: filepath.Join(modelinesDir, "seeplusplusEmacs9"), expected: []string{"C++"}},
   195  		{name: "TestGetLanguagesByModelineLinguist_12", filename: filepath.Join(modelinesDir, "seeplusplusEmacs10"), expected: []string{"C++"}},
   196  		{name: "TestGetLanguagesByModelineLinguist_13", filename: filepath.Join(modelinesDir, "seeplusplusEmacs11"), expected: []string{"C++"}},
   197  		{name: "TestGetLanguagesByModelineLinguist_14", filename: filepath.Join(modelinesDir, "seeplusplusEmacs12"), expected: []string{"C++"}},
   198  
   199  		// Vim
   200  		{name: "TestGetLanguagesByModelineLinguist_15", filename: filepath.Join(modelinesDir, "seeplusplus"), expected: []string{"C++"}},
   201  		{name: "TestGetLanguagesByModelineLinguist_16", filename: filepath.Join(modelinesDir, "iamjs.pl"), expected: []string{"JavaScript"}},
   202  		{name: "TestGetLanguagesByModelineLinguist_17", filename: filepath.Join(modelinesDir, "iamjs2.pl"), expected: []string{"JavaScript"}},
   203  		{name: "TestGetLanguagesByModelineLinguist_18", filename: filepath.Join(modelinesDir, "not_perl.pl"), expected: []string{"Prolog"}},
   204  		{name: "TestGetLanguagesByModelineLinguist_19", filename: filepath.Join(modelinesDir, "ruby"), expected: []string{"Ruby"}},
   205  		{name: "TestGetLanguagesByModelineLinguist_20", filename: filepath.Join(modelinesDir, "ruby2"), expected: []string{"Ruby"}},
   206  		{name: "TestGetLanguagesByModelineLinguist_21", filename: filepath.Join(modelinesDir, "ruby3"), expected: []string{"Ruby"}},
   207  		{name: "TestGetLanguagesByModelineLinguist_22", filename: filepath.Join(modelinesDir, "ruby4"), expected: []string{"Ruby"}},
   208  		{name: "TestGetLanguagesByModelineLinguist_23", filename: filepath.Join(modelinesDir, "ruby5"), expected: []string{"Ruby"}},
   209  		{name: "TestGetLanguagesByModelineLinguist_24", filename: filepath.Join(modelinesDir, "ruby6"), expected: []string{"Ruby"}},
   210  		{name: "TestGetLanguagesByModelineLinguist_25", filename: filepath.Join(modelinesDir, "ruby7"), expected: []string{"Ruby"}},
   211  		{name: "TestGetLanguagesByModelineLinguist_26", filename: filepath.Join(modelinesDir, "ruby8"), expected: []string{"Ruby"}},
   212  		{name: "TestGetLanguagesByModelineLinguist_27", filename: filepath.Join(modelinesDir, "ruby9"), expected: []string{"Ruby"}},
   213  		{name: "TestGetLanguagesByModelineLinguist_28", filename: filepath.Join(modelinesDir, "ruby10"), expected: []string{"Ruby"}},
   214  		{name: "TestGetLanguagesByModelineLinguist_29", filename: filepath.Join(modelinesDir, "ruby11"), expected: []string{"Ruby"}},
   215  		{name: "TestGetLanguagesByModelineLinguist_30", filename: filepath.Join(modelinesDir, "ruby12"), expected: []string{"Ruby"}},
   216  		{name: "TestGetLanguagesByModelineLinguist_31", filename: filepath.Join(s.samplesDir, "C++/runtime-compiler.cc"), expected: nil},
   217  		{name: "TestGetLanguagesByModelineLinguist_32", filename: "", expected: nil},
   218  	}
   219  
   220  	for _, test := range tests {
   221  		var content []byte
   222  		var err error
   223  
   224  		if test.filename != "" {
   225  			content, err = ioutil.ReadFile(test.filename)
   226  			assert.NoError(s.T(), err)
   227  		}
   228  
   229  		languages := GetLanguagesByModeline(test.filename, content, test.candidates)
   230  		assert.Equal(s.T(), test.expected, languages, fmt.Sprintf("%v: languages = %v, expected: %v", test.name, languages, test.expected))
   231  	}
   232  }
   233  
   234  func (s *enryTestSuite) TestGetLanguagesByModeline() {
   235  	const (
   236  		wrongVim  = `# vim: set syntax=ruby ft  =python filetype=perl :`
   237  		rightVim  = `/* vim: set syntax=python ft   =python filetype=python */`
   238  		noLangVim = `/* vim: set shiftwidth=4 softtabstop=0 cindent cinoptions={1s: */`
   239  	)
   240  
   241  	tests := []struct {
   242  		name       string
   243  		filename   string
   244  		content    []byte
   245  		candidates []string
   246  		expected   []string
   247  	}{
   248  		{name: "TestGetLanguagesByModeline_1", content: []byte(wrongVim), expected: nil},
   249  		{name: "TestGetLanguagesByModeline_2", content: []byte(rightVim), expected: []string{"Python"}},
   250  		{name: "TestGetLanguagesByModeline_3", content: []byte(noLangVim), expected: nil},
   251  		{name: "TestGetLanguagesByModeline_4", content: nil, expected: nil},
   252  		{name: "TestGetLanguagesByModeline_5", content: []byte{}, expected: nil},
   253  	}
   254  
   255  	for _, test := range tests {
   256  		languages := GetLanguagesByModeline(test.filename, test.content, test.candidates)
   257  		assert.Equal(s.T(), test.expected, languages, fmt.Sprintf("%v: languages = %v, expected: %v", test.name, languages, test.expected))
   258  	}
   259  }
   260  
   261  func (s *enryTestSuite) TestGetLanguagesByFilename() {
   262  	tests := []struct {
   263  		name       string
   264  		filename   string
   265  		content    []byte
   266  		candidates []string
   267  		expected   []string
   268  	}{
   269  		{name: "TestGetLanguagesByFilename_1", filename: "unknown.interpreter", expected: nil},
   270  		{name: "TestGetLanguagesByFilename_2", filename: ".bashrc", expected: []string{"Shell"}},
   271  		{name: "TestGetLanguagesByFilename_3", filename: "Dockerfile", expected: []string{"Dockerfile"}},
   272  		{name: "TestGetLanguagesByFilename_4", filename: "Makefile.frag", expected: []string{"Makefile"}},
   273  		{name: "TestGetLanguagesByFilename_5", filename: "makefile", expected: []string{"Makefile"}},
   274  		{name: "TestGetLanguagesByFilename_6", filename: "Vagrantfile", expected: []string{"Ruby"}},
   275  		{name: "TestGetLanguagesByFilename_7", filename: "_vimrc", expected: []string{"Vim Script"}},
   276  		{name: "TestGetLanguagesByFilename_8", filename: "pom.xml", expected: []string{"Maven POM"}},
   277  		{name: "TestGetLanguagesByFilename_9", filename: "", expected: nil},
   278  	}
   279  
   280  	for _, test := range tests {
   281  		languages := GetLanguagesByFilename(test.filename, test.content, test.candidates)
   282  		assert.Equal(s.T(), len(test.expected), len(languages), fmt.Sprintf("%v: number of languages = %v, expected: %v", test.name, len(languages), len(test.expected)))
   283  		for i := range languages { // case-insensitive name comparison
   284  			assert.True(s.T(), strings.EqualFold(test.expected[i], languages[i]), fmt.Sprintf("%v: languages = %v, expected: %v", test.name, languages, test.expected))
   285  		}
   286  	}
   287  }
   288  
   289  func (s *enryTestSuite) TestGetLanguagesByShebang() {
   290  	const (
   291  		multilineExecHack = `#!/bin/sh
   292  # Next line is comment in Tcl, but not in sh... \
   293  exec tclsh "$0" ${1+"$@"}`
   294  
   295  		multilineNoExecHack = `#!/bin/sh
   296  #<<<#
   297  echo "A shell script in a zkl program ($0)"
   298  echo "Now run zkl <this file> with Hello World as args"
   299  zkl $0 Hello World!
   300  exit
   301  #<<<#
   302  println("The shell script says ",vm.arglist.concat(" "));`
   303  	)
   304  
   305  	tests := []struct {
   306  		name       string
   307  		filename   string
   308  		content    []byte
   309  		candidates []string
   310  		expected   []string
   311  	}{
   312  		{name: "TestGetLanguagesByShebang_1", content: []byte(`#!/unknown/interpreter`), expected: nil},
   313  		{name: "TestGetLanguagesByShebang_2", content: []byte(`no shebang`), expected: nil},
   314  		{name: "TestGetLanguagesByShebang_3", content: []byte(`#!/usr/bin/env`), expected: nil},
   315  		{name: "TestGetLanguagesByShebang_4", content: []byte(`#!/usr/bin/python -tt`), expected: []string{"Python"}},
   316  		{name: "TestGetLanguagesByShebang_5", content: []byte(`#!/usr/bin/env python2.6`), expected: []string{"Python"}},
   317  		{name: "TestGetLanguagesByShebang_6", content: []byte(`#!/usr/bin/env perl`), expected: []string{"Perl", "Pod"}},
   318  		{name: "TestGetLanguagesByShebang_7", content: []byte(`#!	/bin/sh`), expected: []string{"Shell"}},
   319  		{name: "TestGetLanguagesByShebang_8", content: []byte(`#!bash`), expected: []string{"Shell"}},
   320  		{name: "TestGetLanguagesByShebang_9", content: []byte(multilineExecHack), expected: []string{"Tcl"}},
   321  		{name: "TestGetLanguagesByShebang_10", content: []byte(multilineNoExecHack), expected: []string{"Shell"}},
   322  		{name: "TestGetLanguagesByShebang_11", content: []byte(`#!/envinpath/python`), expected: []string{"Python"}},
   323  
   324  		{name: "TestGetLanguagesByShebang_12", content: []byte(""), expected: nil},
   325  		{name: "TestGetLanguagesByShebang_13", content: []byte("foo"), expected: nil},
   326  		{name: "TestGetLanguagesByShebang_14", content: []byte("#bar"), expected: nil},
   327  		{name: "TestGetLanguagesByShebang_15", content: []byte("#baz"), expected: nil},
   328  		{name: "TestGetLanguagesByShebang_16", content: []byte("///"), expected: nil},
   329  		{name: "TestGetLanguagesByShebang_17", content: []byte("\n\n\n\n\n"), expected: nil},
   330  		{name: "TestGetLanguagesByShebang_18", content: []byte(" #!/usr/sbin/ruby"), expected: nil},
   331  		{name: "TestGetLanguagesByShebang_19", content: []byte("\n#!/usr/sbin/ruby"), expected: nil},
   332  		{name: "TestGetLanguagesByShebang_20", content: []byte("#!"), expected: nil},
   333  		{name: "TestGetLanguagesByShebang_21", content: []byte("#! "), expected: nil},
   334  		{name: "TestGetLanguagesByShebang_22", content: []byte("#!/usr/bin/env"), expected: nil},
   335  		{name: "TestGetLanguagesByShebang_23", content: []byte("#!/usr/bin/env osascript -l JavaScript"), expected: nil},
   336  		{name: "TestGetLanguagesByShebang_24", content: []byte("#!/usr/bin/env osascript -l AppleScript"), expected: nil},
   337  		{name: "TestGetLanguagesByShebang_25", content: []byte("#!/usr/bin/env osascript -l foobar"), expected: nil},
   338  		{name: "TestGetLanguagesByShebang_26", content: []byte("#!/usr/bin/osascript -l JavaScript"), expected: nil},
   339  		{name: "TestGetLanguagesByShebang_27", content: []byte("#!/usr/bin/osascript -l foobar"), expected: nil},
   340  
   341  		{name: "TestGetLanguagesByShebang_28", content: []byte("#!/usr/sbin/ruby\n# bar"), expected: []string{"Ruby"}},
   342  		{name: "TestGetLanguagesByShebang_29", content: []byte("#!/usr/bin/ruby\n# foo"), expected: []string{"Ruby"}},
   343  		{name: "TestGetLanguagesByShebang_30", content: []byte("#!/usr/sbin/ruby"), expected: []string{"Ruby"}},
   344  		{name: "TestGetLanguagesByShebang_31", content: []byte("#!/usr/sbin/ruby foo bar baz\n"), expected: []string{"Ruby"}},
   345  
   346  		{name: "TestGetLanguagesByShebang_32", content: []byte("#!/usr/bin/env Rscript\n# example R script\n#\n"), expected: []string{"R"}},
   347  		{name: "TestGetLanguagesByShebang_33", content: []byte("#!/usr/bin/env ruby\n# baz"), expected: []string{"Ruby"}},
   348  
   349  		{name: "TestGetLanguagesByShebang_34", content: []byte("#!/usr/bin/bash\n"), expected: []string{"Shell"}},
   350  		{name: "TestGetLanguagesByShebang_35", content: []byte("#!/bin/sh"), expected: []string{"Shell"}},
   351  		{name: "TestGetLanguagesByShebang_36", content: []byte("#!/bin/python\n# foo\n# bar\n# baz"), expected: []string{"Python"}},
   352  		{name: "TestGetLanguagesByShebang_37", content: []byte("#!/usr/bin/python2.7\n\n\n\n"), expected: []string{"Python"}},
   353  		{name: "TestGetLanguagesByShebang_38", content: []byte("#!/usr/bin/python3\n\n\n\n"), expected: []string{"Python"}},
   354  		{name: "TestGetLanguagesByShebang_39", content: []byte("#!/usr/bin/sbcl --script\n\n"), expected: []string{"Common Lisp"}},
   355  		{name: "TestGetLanguagesByShebang_40", content: []byte("#! perl"), expected: []string{"Perl", "Pod"}},
   356  
   357  		{name: "TestGetLanguagesByShebang_41", content: []byte("#!/bin/sh\n\n\nexec ruby $0 $@"), expected: []string{"Ruby"}},
   358  		{name: "TestGetLanguagesByShebang_42", content: []byte("#! /usr/bin/env A=003 B=149 C=150 D=xzd E=base64 F=tar G=gz H=head I=tail sh"), expected: []string{"Shell"}},
   359  		{name: "TestGetLanguagesByShebang_43", content: []byte("#!/usr/bin/env foo=bar bar=foo python -cos=__import__(\"os\");"), expected: []string{"Python"}},
   360  		{name: "TestGetLanguagesByShebang_44", content: []byte("#!/usr/bin/env osascript"), expected: []string{"AppleScript"}},
   361  		{name: "TestGetLanguagesByShebang_45", content: []byte("#!/usr/bin/osascript"), expected: []string{"AppleScript"}},
   362  
   363  		{name: "TestGetLanguagesByShebang_46", content: []byte("#!/usr/bin/env -vS ruby -wKU\nputs ?t+?e+?s+?t"), expected: []string{"Ruby"}},
   364  		{name: "TestGetLanguagesByShebang_47", content: []byte("#!/usr/bin/env --split-string sed -f\ny/a/A/"), expected: []string{"sed"}},
   365  		{name: "TestGetLanguagesByShebang_48", content: []byte("#!/usr/bin/env -S GH_TOKEN=ghp_*** deno run --allow-net\nconsole.log(1);"), expected: []string{"TypeScript"}},
   366  	}
   367  
   368  	for _, test := range tests {
   369  		languages := GetLanguagesByShebang(test.filename, test.content, test.candidates)
   370  		assert.Equal(s.T(), test.expected, languages, fmt.Sprintf("%v: languages = %v, expected: %v", test.name, languages, test.expected))
   371  	}
   372  }
   373  
   374  func (s *enryTestSuite) TestGetLanguageByContent() {
   375  	tests := []struct {
   376  		name     string
   377  		filename string
   378  		content  []byte
   379  		expected string
   380  	}{
   381  		{name: "TestGetLanguageByContent_0", filename: "", expected: ""},
   382  		{name: "TestGetLanguageByContent_1", filename: "foo.cpp", content: []byte("int main() { return 0; }"), expected: ""},                      // as .cpp is unambiguous ¯\_(ツ)_/¯
   383  		{name: "TestGetLanguageByContent_2", filename: "foo.h", content: []byte("int main() { return 0; }"), expected: "C"},                       // C, as it does not match any of the heuristics for C++ or Objective-C
   384  		{name: "TestGetLanguageByContent_3", filename: "foo.h", content: []byte("#include <string>\n int main() { return 0; }"), expected: "C++"}, // '#include <string>' matches regex heuristic
   385  	}
   386  
   387  	for _, test := range tests {
   388  		languages, _ := GetLanguageByContent(test.filename, test.content)
   389  		assert.Equal(s.T(), test.expected, languages, fmt.Sprintf("%v: languages = %v, expected: %v", test.name, languages, test.expected))
   390  	}
   391  }
   392  
   393  func (s *enryTestSuite) TestGetLanguagesByExtension() {
   394  	tests := []struct {
   395  		name       string
   396  		filename   string
   397  		content    []byte
   398  		candidates []string
   399  		expected   []string
   400  	}{
   401  		{name: "TestGetLanguagesByExtension_0", filename: "foo.h", expected: []string{"C", "C++", "Objective-C"}},
   402  		{name: "TestGetLanguagesByExtension_1", filename: "foo.foo", expected: nil},
   403  		{name: "TestGetLanguagesByExtension_2", filename: "foo.go", expected: []string{"Go"}},
   404  		{name: "TestGetLanguagesByExtension_3", filename: "foo.go.php", expected: []string{"Hack", "PHP"}},
   405  		{name: "TestGetLanguagesByExtension_4", filename: "", expected: nil},
   406  	}
   407  
   408  	for _, test := range tests {
   409  		languages := GetLanguagesByExtension(test.filename, test.content, test.candidates)
   410  		assert.Equal(s.T(), test.expected, languages, fmt.Sprintf("%v: languages = %v, expected: %v", test.name, languages, test.expected))
   411  	}
   412  }
   413  
   414  func (s *enryTestSuite) TestGetLanguagesByManpage() {
   415  	tests := []struct {
   416  		name       string
   417  		filename   string
   418  		content    []byte
   419  		candidates []string
   420  		expected   []string
   421  	}{
   422  		{name: "TestGetLanguagesByManpage_1", filename: "bsdmalloc.3malloc", expected: []string{"Roff Manpage", "Roff"}},
   423  		{name: "TestGetLanguagesByManpage_2", filename: "dirent.h.0p", expected: []string{"Roff Manpage", "Roff"}},
   424  		{name: "TestGetLanguagesByManpage_3", filename: "linguist.1gh", expected: []string{"Roff Manpage", "Roff"}},
   425  		{name: "TestGetLanguagesByManpage_4", filename: "test.1.in", expected: []string{"Roff Manpage", "Roff"}},
   426  		{name: "TestGetLanguagesByManpage_5", filename: "test.man.in", expected: []string{"Roff Manpage", "Roff"}},
   427  		{name: "TestGetLanguagesByManpage_6", filename: "test.mdoc.in", expected: []string{"Roff Manpage", "Roff"}},
   428  		{name: "TestGetLanguagesByManpage_7", filename: "foo.h", expected: nil},
   429  		{name: "TestGetLanguagesByManpage_8", filename: "", expected: nil},
   430  	}
   431  
   432  	for _, test := range tests {
   433  		languages := GetLanguagesByManpage(test.filename, test.content, test.candidates)
   434  		assert.Equal(s.T(), test.expected, languages, fmt.Sprintf("%v: languages = %v, expected: %v", test.name, languages, test.expected))
   435  	}
   436  }
   437  
   438  func (s *enryTestSuite) TestGetLanguagesByXML() {
   439  	tests := []struct {
   440  		name       string
   441  		filename   string
   442  		candidates []string
   443  		expected   []string
   444  	}{
   445  		{name: "TestGetLanguagesByXML_1", filename: filepath.Join(s.testFixturesDir, "XML/app.config"), expected: []string{"XML"}},
   446  		{name: "TestGetLanguagesByXML_2", filename: filepath.Join(s.testFixturesDir, "XML/AssertionIDRequestOptionalAttributes.xml.svn-base"), expected: []string{"XML"}},
   447  		// no XML header so should not be identified by this strategy
   448  		{name: "TestGetLanguagesByXML_3", filename: filepath.Join(s.samplesDir, "XML/libsomething.dll.config"), expected: nil},
   449  		{name: "TestGetLanguagesByXML_4", filename: filepath.Join(s.samplesDir, "Eagle/Eagle.sch"), candidates: []string{"Eagle"}, expected: []string{"Eagle"}},
   450  	}
   451  
   452  	for _, test := range tests {
   453  		content, err := ioutil.ReadFile(test.filename)
   454  		assert.NoError(s.T(), err)
   455  
   456  		languages := GetLanguagesByXML(test.filename, content, test.candidates)
   457  		assert.Equal(s.T(), test.expected, languages, fmt.Sprintf("%v: languages = %v, expected: %v", test.name, languages, test.expected))
   458  	}
   459  }
   460  
   461  func (s *enryTestSuite) TestGetLanguagesByClassifier() {
   462  	test := []struct {
   463  		name       string
   464  		filename   string
   465  		candidates []string
   466  		expected   string
   467  	}{
   468  		{name: "TestGetLanguagesByClassifier_1", filename: filepath.Join(s.samplesDir, "C/blob.c"), candidates: []string{"python", "ruby", "c", "c++"}, expected: "C"},
   469  		{name: "TestGetLanguagesByClassifier_2", filename: filepath.Join(s.samplesDir, "C/blob.c"), candidates: nil, expected: OtherLanguage},
   470  		{name: "TestGetLanguagesByClassifier_3", filename: filepath.Join(s.samplesDir, "C++/runtime-compiler.cc"), candidates: []string{}, expected: OtherLanguage},
   471  		{name: "TestGetLanguagesByClassifier_4", filename: filepath.Join(s.samplesDir, "C/blob.c"), candidates: []string{"python", "ruby", "c++"}, expected: "C++"},
   472  		{name: "TestGetLanguagesByClassifier_5", filename: filepath.Join(s.samplesDir, "C/blob.c"), candidates: []string{"ruby"}, expected: "Ruby"},
   473  		{name: "TestGetLanguagesByClassifier_6", filename: filepath.Join(s.samplesDir, "Python/django-models-base.py"), candidates: []string{"python", "ruby", "c", "c++"}, expected: "Python"},
   474  		{name: "TestGetLanguagesByClassifier_7", filename: "", candidates: []string{"python"}, expected: "Python"},
   475  	}
   476  
   477  	for _, test := range test {
   478  		var content []byte
   479  		var err error
   480  
   481  		if test.filename != "" {
   482  			content, err = ioutil.ReadFile(test.filename)
   483  			assert.NoError(s.T(), err)
   484  		}
   485  
   486  		languages := GetLanguagesByClassifier(test.filename, content, test.candidates)
   487  		var language string
   488  		if len(languages) == 0 {
   489  			language = OtherLanguage
   490  		} else {
   491  			language = languages[0]
   492  		}
   493  
   494  		assert.Equal(s.T(), test.expected, language, fmt.Sprintf("%v: language = %v, expected: %v", test.name, language, test.expected))
   495  	}
   496  }
   497  
   498  func (s *enryTestSuite) TestGetLanguagesBySpecificClassifier() {
   499  	test := []struct {
   500  		name       string
   501  		filename   string
   502  		candidates []string
   503  		classifier classifier
   504  		expected   string
   505  	}{
   506  		{name: "TestGetLanguagesByClassifier_1", filename: filepath.Join(s.samplesDir, "C/blob.c"), candidates: []string{"python", "ruby", "c", "c++"}, classifier: defaultClassifier, expected: "C"},
   507  		{name: "TestGetLanguagesByClassifier_2", filename: filepath.Join(s.samplesDir, "C/blob.c"), candidates: nil, classifier: defaultClassifier, expected: "C"},
   508  		{name: "TestGetLanguagesByClassifier_3", filename: filepath.Join(s.samplesDir, "C++/runtime-compiler.cc"), candidates: []string{}, classifier: defaultClassifier, expected: "C++"},
   509  		{name: "TestGetLanguagesByClassifier_4", filename: filepath.Join(s.samplesDir, "C/blob.c"), candidates: []string{"python", "ruby", "c++"}, classifier: defaultClassifier, expected: "C++"},
   510  		{name: "TestGetLanguagesByClassifier_5", filename: filepath.Join(s.samplesDir, "C/blob.c"), candidates: []string{"ruby"}, classifier: defaultClassifier, expected: "Ruby"},
   511  		{name: "TestGetLanguagesByClassifier_6", filename: filepath.Join(s.samplesDir, "Python/django-models-base.py"), candidates: []string{"python", "ruby", "c", "c++"}, classifier: defaultClassifier, expected: "Python"},
   512  		{name: "TestGetLanguagesByClassifier_7", filename: os.DevNull, candidates: nil, classifier: defaultClassifier, expected: "XML"},
   513  	}
   514  
   515  	for _, test := range test {
   516  		content, err := ioutil.ReadFile(test.filename)
   517  		assert.NoError(s.T(), err)
   518  
   519  		languages := getLanguagesBySpecificClassifier(content, test.candidates, test.classifier)
   520  		var language string
   521  		if len(languages) == 0 {
   522  			language = OtherLanguage
   523  		} else {
   524  			language = languages[0]
   525  		}
   526  
   527  		assert.Equal(s.T(), test.expected, language, fmt.Sprintf("%v: language = %v, expected: %v", test.name, language, test.expected))
   528  	}
   529  }
   530  
   531  func (s *enryTestSuite) TestGetLanguageExtensions() {
   532  	tests := []struct {
   533  		name     string
   534  		language string
   535  		expected []string
   536  	}{
   537  		{name: "TestGetLanguageExtensions_1", language: "foo", expected: nil},
   538  		{name: "TestGetLanguageExtensions_2", language: "COBOL", expected: []string{".cob", ".cbl", ".ccp", ".cobol", ".cpy"}},
   539  		{name: "TestGetLanguageExtensions_3", language: "Maven POM", expected: nil},
   540  	}
   541  
   542  	for _, test := range tests {
   543  		extensions := GetLanguageExtensions(test.language)
   544  		assert.EqualValues(s.T(), test.expected, extensions, fmt.Sprintf("%v: extensions = %v, expected: %v", test.name, extensions, test.expected))
   545  	}
   546  }
   547  
   548  func (s *enryTestSuite) TestGetLanguageType() {
   549  	tests := []struct {
   550  		name     string
   551  		language string
   552  		expected Type
   553  	}{
   554  		{name: "TestGetLanguageType_1", language: "BestLanguageEver", expected: Unknown},
   555  		{name: "TestGetLanguageType_2", language: "JSON", expected: Data},
   556  		{name: "TestGetLanguageType_3", language: "COLLADA", expected: Data},
   557  		{name: "TestGetLanguageType_4", language: "Go", expected: Programming},
   558  		{name: "TestGetLanguageType_5", language: "Brainfuck", expected: Programming},
   559  		{name: "TestGetLanguageType_6", language: "HTML", expected: Markup},
   560  		{name: "TestGetLanguageType_7", language: "Sass", expected: Markup},
   561  		{name: "TestGetLanguageType_8", language: "AsciiDoc", expected: Prose},
   562  		{name: "TestGetLanguageType_9", language: "Textile", expected: Prose},
   563  	}
   564  
   565  	for _, test := range tests {
   566  		langType := GetLanguageType(test.language)
   567  		assert.Equal(s.T(), test.expected, langType, fmt.Sprintf("%v: langType = %v, expected: %v", test.name, langType, test.expected))
   568  	}
   569  }
   570  
   571  func (s *enryTestSuite) TestGetLanguageGroup() {
   572  	tests := []struct {
   573  		name     string
   574  		language string
   575  		expected string
   576  	}{
   577  		{name: "TestGetLanguageGroup_1", language: "BestLanguageEver", expected: ""},
   578  		{name: "TestGetLanguageGroup_2", language: "Bison", expected: "Yacc"},
   579  		{name: "TestGetLanguageGroup_3", language: "HTML+PHP", expected: "HTML"},
   580  		{name: "TestGetLanguageGroup_4", language: "HTML", expected: ""},
   581  	}
   582  
   583  	for _, test := range tests {
   584  		langGroup := GetLanguageGroup(test.language)
   585  		assert.Equal(s.T(), test.expected, langGroup, fmt.Sprintf("%v: langGroup = %v, expected: %v", test.name, langGroup, test.expected))
   586  	}
   587  }
   588  
   589  func (s *enryTestSuite) TestGetLanguageByAlias() {
   590  	tests := []struct {
   591  		name         string
   592  		alias        string
   593  		expectedLang string
   594  		expectedOk   bool
   595  	}{
   596  		{name: "TestGetLanguageByAlias_1", alias: "BestLanguageEver", expectedLang: OtherLanguage, expectedOk: false},
   597  		{name: "TestGetLanguageByAlias_2", alias: "aspx-vb", expectedLang: "ASP.NET", expectedOk: true},
   598  		{name: "TestGetLanguageByAlias_3", alias: "C++", expectedLang: "C++", expectedOk: true},
   599  		{name: "TestGetLanguageByAlias_4", alias: "c++", expectedLang: "C++", expectedOk: true},
   600  		{name: "TestGetLanguageByAlias_5", alias: "objc", expectedLang: "Objective-C", expectedOk: true},
   601  		{name: "TestGetLanguageByAlias_6", alias: "golang", expectedLang: "Go", expectedOk: true},
   602  		{name: "TestGetLanguageByAlias_7", alias: "GOLANG", expectedLang: "Go", expectedOk: true},
   603  		{name: "TestGetLanguageByAlias_8", alias: "bsdmake", expectedLang: "Makefile", expectedOk: true},
   604  		{name: "TestGetLanguageByAlias_9", alias: "xhTmL", expectedLang: "HTML", expectedOk: true},
   605  		{name: "TestGetLanguageByAlias_10", alias: "python", expectedLang: "Python", expectedOk: true},
   606  	}
   607  
   608  	for _, test := range tests {
   609  		lang, ok := GetLanguageByAlias(test.alias)
   610  		assert.Equal(s.T(), test.expectedLang, lang, fmt.Sprintf("%v: lang = %v, expected: %v", test.name, lang, test.expectedLang))
   611  		assert.Equal(s.T(), test.expectedOk, ok, fmt.Sprintf("%v: ok = %v, expected: %v", test.name, ok, test.expectedOk))
   612  	}
   613  }
   614  
   615  func (s *enryTestSuite) TestGetLanguageID() {
   616  	tests := []struct {
   617  		name       string
   618  		language   string
   619  		expectedID int
   620  		found      bool
   621  	}{
   622  		{name: "TestGetLanguageID_1", language: "1C Enterprise", expectedID: 0, found: true},
   623  		{name: "TestGetLanguageID_2", language: "BestLanguageEver", expectedID: 0, found: false},
   624  		{name: "TestGetLanguageID_3", language: "C++", expectedID: 43, found: true},
   625  		{name: "TestGetLanguageID_5", language: "Objective-C", expectedID: 257, found: true},
   626  		{name: "TestGetLanguageID_6", language: "golang", expectedID: 0, found: false}, // Aliases are not supported
   627  		{name: "TestGetLanguageID_7", language: "Go", expectedID: 132, found: true},
   628  		{name: "TestGetLanguageID_8", language: "Makefile", expectedID: 220, found: true},
   629  	}
   630  
   631  	for _, test := range tests {
   632  		id, found := GetLanguageID(test.language)
   633  		assert.Equal(s.T(), test.expectedID, id, fmt.Sprintf("%v: id = %v, expected: %v", test.name, id, test.expectedID))
   634  		assert.Equal(s.T(), test.found, found, fmt.Sprintf("%v: found = %t, expected: %t", test.name, found, test.found))
   635  	}
   636  }
   637  
   638  func (s *enryTestSuite) TestGetLanguageInfo() {
   639  	tests := []struct {
   640  		name       string
   641  		language   string
   642  		expectedID int
   643  		error      bool
   644  	}{
   645  		{name: "TestGetLanguageID_1", language: "1C Enterprise", expectedID: 0},
   646  		{name: "TestGetLanguageID_2", language: "BestLanguageEver", error: true},
   647  		{name: "TestGetLanguageID_3", language: "C++", expectedID: 43},
   648  		{name: "TestGetLanguageID_5", language: "Objective-C", expectedID: 257},
   649  		{name: "TestGetLanguageID_6", language: "golang", error: true}, // Aliases are not supported
   650  		{name: "TestGetLanguageID_7", language: "Go", expectedID: 132},
   651  		{name: "TestGetLanguageID_8", language: "Makefile", expectedID: 220},
   652  	}
   653  
   654  	for _, test := range tests {
   655  		info, err := GetLanguageInfo(test.language)
   656  		if test.error {
   657  			assert.Error(s.T(), err, "%v: expected error for %q", test.name, test.language)
   658  		} else {
   659  			assert.NoError(s.T(), err)
   660  			assert.Equal(s.T(), test.expectedID, info.LanguageID, fmt.Sprintf("%v: id = %v, expected: %v", test.name, info.LanguageID, test.expectedID))
   661  		}
   662  	}
   663  }
   664  
   665  func (s *enryTestSuite) TestGetLanguageInfoByID() {
   666  	tests := []struct {
   667  		name         string
   668  		id           int
   669  		expectedName string
   670  		error        bool
   671  	}{
   672  		{name: "TestGetLanguageID_1", id: 0, expectedName: "1C Enterprise"},
   673  		{name: "TestGetLanguageID_2", id: -1, error: true},
   674  		{name: "TestGetLanguageID_3", id: 43, expectedName: "C++"},
   675  		{name: "TestGetLanguageID_5", id: 257, expectedName: "Objective-C"},
   676  		{name: "TestGetLanguageID_7", id: 132, expectedName: "Go"},
   677  		{name: "TestGetLanguageID_8", id: 220, expectedName: "Makefile"},
   678  	}
   679  
   680  	for _, test := range tests {
   681  		info, err := GetLanguageInfoByID(test.id)
   682  		if test.error {
   683  			assert.Error(s.T(), err, "%v: expected error for %q", test.name, test.id)
   684  		} else {
   685  			assert.NoError(s.T(), err)
   686  			assert.Equal(s.T(), test.expectedName, info.Name, fmt.Sprintf("%v: id = %v, expected: %v", test.name, test.id, test.expectedName))
   687  		}
   688  	}
   689  }