git.templeos.me/xultist/go-enry/v2@v2.0.0-20230215093429-6ef3e87f47c0/linguist_corpus_test.go (about)

     1  package enry
     2  
     3  import (
     4  	"fmt"
     5  	"io/ioutil"
     6  	"os"
     7  	"path/filepath"
     8  	"testing"
     9  
    10  	"github.com/go-enry/go-enry/v2/data"
    11  	"github.com/stretchr/testify/assert"
    12  	"github.com/stretchr/testify/suite"
    13  )
    14  
    15  type linguistCorpusSuite struct {
    16  	enryBaseTestSuite
    17  }
    18  
    19  func Test_EnryOnLinguistCorpus(t *testing.T) {
    20  	suite.Run(t, new(linguistCorpusSuite))
    21  }
    22  
    23  // First part of the test_blob.rb#test_language
    24  // https://github.com/github/linguist/blob/59b2d88b2242e6062384e5fb876668cc30ead951/test/test_blob.rb#L258
    25  func (s *linguistCorpusSuite) TestLinguistSamples() {
    26  	const filenamesDir = "filenames"
    27  	var cornerCases = map[string]bool{
    28  		"drop_stuff.sql":        true, // https://github.com/src-d/enry/issues/194
    29  		"textobj-rubyblock.vba": true, // Because of unsupported negative lookahead RE syntax (https://github.com/github/linguist/blob/8083cb5a89cee2d99f5a988f165994d0243f0d1e/lib/linguist/heuristics.yml#L521)
    30  		// .es and .ice fail heuristics parsing, but do not fail any tests
    31  	}
    32  
    33  	var total, failed, ok, other int
    34  	var expected string
    35  	filepath.Walk(s.samplesDir, func(path string, f os.FileInfo, err error) error {
    36  		if f.IsDir() {
    37  			if f.Name() != filenamesDir {
    38  				expected, _ = data.LanguageByAlias(f.Name())
    39  			}
    40  
    41  			return nil
    42  		}
    43  
    44  		filename := filepath.Base(path)
    45  		content, _ := ioutil.ReadFile(path)
    46  
    47  		total++
    48  		obtained := GetLanguage(filename, content)
    49  		if obtained == OtherLanguage {
    50  			obtained = "Other"
    51  			other++
    52  		}
    53  
    54  		var status string
    55  		if expected == obtained {
    56  			status = "ok"
    57  			ok++
    58  		} else {
    59  			status = "failed"
    60  			failed++
    61  		}
    62  
    63  		if _, ok := cornerCases[filename]; ok {
    64  			s.T().Logf("\t\t[considered corner case] %s\texpected: %s\tobtained: %s\tstatus: %s\n", filename, expected, obtained, status)
    65  		} else {
    66  			assert.Equal(s.T(), expected, obtained, fmt.Sprintf("%s\texpected: %s\tobtained: %s\tstatus: %s\n", filename, expected, obtained, status))
    67  		}
    68  		return nil
    69  	})
    70  	s.T().Logf("\t\ttotal files: %d, ok: %d, failed: %d, other: %d\n", total, ok, failed, other)
    71  }