github.com/rohankumardubey/draft-classic@v0.16.0/pkg/linguist/linguist.go (about)

     1  package linguist
     2  
     3  import (
     4  	"bufio"
     5  	"fmt"
     6  	"io"
     7  	"os"
     8  	"path/filepath"
     9  	"runtime"
    10  	"sort"
    11  	"strings"
    12  
    13  	"github.com/Azure/draft/pkg/osutil"
    14  	log "github.com/sirupsen/logrus"
    15  )
    16  
    17  var (
    18  	isIgnored                 func(string) bool
    19  	isDetectedInGitAttributes func(filename string) string
    20  )
    21  
    22  // used for displaying results
    23  type (
    24  	// Language is the programming langage and the percentage on how sure linguist feels about its
    25  	// decision.
    26  	Language struct {
    27  		Language string  `json:"language"`
    28  		Percent  float64 `json:"percent"`
    29  		// Color represents the color associated with the language in HTML hex notation.
    30  		Color string `json:"color"`
    31  	}
    32  )
    33  
    34  // sortableResult is a list or programming languages, sorted based on the likelihood of the
    35  // primary programming language the application was written in.
    36  type sortableResult []*Language
    37  
    38  func (s sortableResult) Len() int {
    39  	return len(s)
    40  }
    41  
    42  func (s sortableResult) Less(i, j int) bool {
    43  	return s[i].Percent < s[j].Percent
    44  }
    45  
    46  func (s sortableResult) Swap(i, j int) {
    47  	s[i], s[j] = s[j], s[i]
    48  }
    49  
    50  func initLinguistAttributes(dir string) error {
    51  	ignore := []string{}
    52  	except := []string{}
    53  	detected := make(map[string]string)
    54  
    55  	gitignoreExists, err := osutil.Exists(filepath.Join(dir, ".gitignore"))
    56  	if err != nil {
    57  		return err
    58  	}
    59  	if gitignoreExists {
    60  		log.Debugln("found .gitignore")
    61  
    62  		f, err := os.Open(filepath.Join(dir, ".gitignore"))
    63  		if err != nil {
    64  			return err
    65  		}
    66  		defer f.Close()
    67  
    68  		ignoreScanner := bufio.NewScanner(f)
    69  		for ignoreScanner.Scan() {
    70  			var isExcept bool
    71  			path := strings.TrimSpace(ignoreScanner.Text())
    72  			// if it's whitespace or a comment
    73  			if len(path) == 0 || string(path[0]) == "#" {
    74  				continue
    75  			}
    76  			if string(path[0]) == "!" {
    77  				isExcept = true
    78  				path = path[1:]
    79  			}
    80  			p := strings.Trim(path, string(filepath.Separator))
    81  			if isExcept {
    82  				except = append(except, p)
    83  			} else {
    84  				ignore = append(ignore, p)
    85  			}
    86  		}
    87  		if err := ignoreScanner.Err(); err != nil {
    88  			return fmt.Errorf("error reading .gitignore: %v", err)
    89  		}
    90  	}
    91  
    92  	gitAttributesExists, err := osutil.Exists(filepath.Join(dir, ".gitattributes"))
    93  	if err != nil {
    94  		return err
    95  	}
    96  	if gitAttributesExists {
    97  		log.Debugln("found .gitattributes")
    98  
    99  		f, err := os.Open(filepath.Join(dir, ".gitattributes"))
   100  		if err != nil {
   101  			return err
   102  		}
   103  		defer f.Close()
   104  
   105  		attributeScanner := bufio.NewScanner(f)
   106  		var lineNumber int
   107  		for attributeScanner.Scan() {
   108  			lineNumber++
   109  			line := strings.TrimSpace(attributeScanner.Text())
   110  			words := strings.Fields(line)
   111  			if len(words) != 2 {
   112  				log.Printf("invalid line in .gitattributes at L%d: '%s'\n", lineNumber, line)
   113  				continue
   114  			}
   115  			path := strings.Trim(words[0], string(filepath.Separator))
   116  			if runtime.GOOS == "windows" {
   117  				// on Windows, we also accept / as a path separator, so let's strip those as well
   118  				path = strings.Trim(words[0], "/")
   119  			}
   120  			attribute := words[1]
   121  			if strings.HasPrefix(attribute, "linguist-documentation") || strings.HasPrefix(attribute, "linguist-vendored") || strings.HasPrefix(attribute, "linguist-generated") {
   122  				if !strings.HasSuffix(strings.ToLower(attribute), "false") {
   123  					ignore = append(ignore, path)
   124  				}
   125  			} else if strings.HasPrefix(attribute, "linguist-language") {
   126  				attr := strings.Split(attribute, "=")
   127  				if len(attr) != 2 {
   128  					log.Printf("invalid line in .gitattributes at L%d: '%s'\n", lineNumber, line)
   129  					continue
   130  				}
   131  				language := attr[1]
   132  				detected[path] = language
   133  			}
   134  		}
   135  		if err := attributeScanner.Err(); err != nil {
   136  			return fmt.Errorf("error reading .gitattributes: %v", err)
   137  		}
   138  	}
   139  
   140  	isIgnored = func(filename string) bool {
   141  		for _, p := range ignore {
   142  			cleanPath, err := filepath.Rel(dir, filename)
   143  			if err != nil {
   144  				log.Debugf("could not get relative path: %v", err)
   145  				return false
   146  			}
   147  			if m, _ := filepath.Match(p, cleanPath); m {
   148  				for _, e := range except {
   149  					if m, _ := filepath.Match(e, cleanPath); m {
   150  						return false
   151  					}
   152  				}
   153  				return true
   154  			}
   155  		}
   156  		return false
   157  	}
   158  	isDetectedInGitAttributes = func(filename string) string {
   159  		for p, lang := range detected {
   160  			cleanPath, err := filepath.Rel(dir, filename)
   161  			if err != nil {
   162  				log.Debugf("could not get relative path: %v", err)
   163  				return ""
   164  			}
   165  			if m, _ := filepath.Match(p, cleanPath); m {
   166  				return lang
   167  			}
   168  		}
   169  		return ""
   170  	}
   171  	return nil
   172  }
   173  
   174  // shoutouts to php
   175  func fileGetContents(filename string) ([]byte, error) {
   176  	log.Debugln("reading contents of", filename)
   177  
   178  	// read only first 512 bytes of files
   179  	contents := make([]byte, 512)
   180  	f, err := os.Open(filename)
   181  	if err != nil {
   182  		return nil, err
   183  	}
   184  	_, err = f.Read(contents)
   185  	f.Close()
   186  	if err != io.EOF {
   187  		if err != nil {
   188  			return nil, err
   189  		}
   190  	}
   191  	return contents, nil
   192  }
   193  
   194  // ProcessDir walks through a directory and returns a list of sorted languages within that directory.
   195  func ProcessDir(dirname string) ([]*Language, error) {
   196  	var (
   197  		langs     = make(map[string]int)
   198  		totalSize int
   199  	)
   200  	if err := initLinguistAttributes(dirname); err != nil {
   201  		return nil, err
   202  	}
   203  	exists, err := osutil.Exists(dirname)
   204  	if err != nil {
   205  		return nil, err
   206  	}
   207  	if !exists {
   208  		return nil, os.ErrNotExist
   209  	}
   210  	filepath.Walk(dirname, func(path string, file os.FileInfo, err error) error {
   211  		size := int(file.Size())
   212  		log.Debugf("with file: %s", path)
   213  		log.Debugln(path, "is", size, "bytes")
   214  		if isIgnored(path) {
   215  			log.Debugln(path, "is ignored, skipping")
   216  			if file.IsDir() {
   217  				return filepath.SkipDir
   218  			}
   219  			return nil
   220  		}
   221  		if size == 0 {
   222  			log.Debugln(path, "is empty file, skipping")
   223  			return nil
   224  		}
   225  		if file.IsDir() {
   226  			if file.Name() == ".git" {
   227  				log.Debugln(".git directory, skipping")
   228  				return filepath.SkipDir
   229  			}
   230  		} else if (file.Mode() & os.ModeSymlink) == 0 {
   231  			if ShouldIgnoreFilename(path) {
   232  				log.Debugf("%s: filename should be ignored, skipping", path)
   233  				return nil
   234  			}
   235  
   236  			byGitAttr := isDetectedInGitAttributes(path)
   237  			if byGitAttr != "" {
   238  				log.Debugln(path, "got result by .gitattributes: ", byGitAttr)
   239  				langs[byGitAttr] += size
   240  				totalSize += size
   241  				return nil
   242  			}
   243  
   244  			if byName := LanguageByFilename(path); byName != "" {
   245  				log.Debugln(path, "got result by name: ", byName)
   246  				langs[byName] += size
   247  				totalSize += size
   248  				return nil
   249  			}
   250  
   251  			contents, err := fileGetContents(path)
   252  			if err != nil {
   253  				return err
   254  			}
   255  
   256  			if ShouldIgnoreContents(contents) {
   257  				log.Debugln(path, ": contents should be ignored, skipping")
   258  				return nil
   259  			}
   260  
   261  			hints := LanguageHints(path)
   262  			log.Debugf("%s got language hints: %#v\n", path, hints)
   263  			byData := LanguageByContents(contents, hints)
   264  
   265  			if byData != "" {
   266  				log.Debugln(path, "got result by data: ", byData)
   267  				langs[byData] += size
   268  				totalSize += size
   269  				return nil
   270  			}
   271  
   272  			log.Debugln(path, "got no result!!")
   273  			langs["(unknown)"] += size
   274  			totalSize += size
   275  		}
   276  		return nil
   277  	})
   278  
   279  	results := []*Language{}
   280  	for lang, size := range langs {
   281  		l := &Language{
   282  			Language: lang,
   283  			Percent:  (float64(size) / float64(totalSize)) * 100.0,
   284  			Color:    LanguageColor(lang),
   285  		}
   286  		results = append(results, l)
   287  		log.Debugf("language: %s percent: %f color: %s", l.Language, l.Percent, l.Color)
   288  	}
   289  	sort.Sort(sort.Reverse(sortableResult(results)))
   290  	return results, nil
   291  }
   292  
   293  // Alias returns the language name for a given known alias.
   294  //
   295  // Occasionally linguist comes up with odd language names, or determines a Java app as a "Maven POM"
   296  // app, which in essence is the same thing for Draft's intent.
   297  func Alias(lang *Language) *Language {
   298  	packAliases := map[string]string{
   299  		"maven pom": "Java",
   300  		"c#":        "csharp",
   301  	}
   302  
   303  	if alias, ok := packAliases[strings.ToLower(lang.Language)]; ok {
   304  		lang.Language = alias
   305  	}
   306  	return lang
   307  }