code.gitea.io/gitea@v1.19.3/modules/git/repo_language_stats_gogit.go (about)

     1  // Copyright 2020 The Gitea Authors. All rights reserved.
     2  // SPDX-License-Identifier: MIT
     3  
     4  //go:build gogit
     5  
     6  package git
     7  
     8  import (
     9  	"bytes"
    10  	"io"
    11  	"strings"
    12  
    13  	"code.gitea.io/gitea/modules/analyze"
    14  
    15  	"github.com/go-enry/go-enry/v2"
    16  	"github.com/go-git/go-git/v5"
    17  	"github.com/go-git/go-git/v5/plumbing"
    18  	"github.com/go-git/go-git/v5/plumbing/object"
    19  )
    20  
    21  // GetLanguageStats calculates language stats for git repository at specified commit
    22  func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, error) {
    23  	r, err := git.PlainOpen(repo.Path)
    24  	if err != nil {
    25  		return nil, err
    26  	}
    27  
    28  	rev, err := r.ResolveRevision(plumbing.Revision(commitID))
    29  	if err != nil {
    30  		return nil, err
    31  	}
    32  
    33  	commit, err := r.CommitObject(*rev)
    34  	if err != nil {
    35  		return nil, err
    36  	}
    37  
    38  	tree, err := commit.Tree()
    39  	if err != nil {
    40  		return nil, err
    41  	}
    42  
    43  	checker, deferable := repo.CheckAttributeReader(commitID)
    44  	defer deferable()
    45  
    46  	// sizes contains the current calculated size of all files by language
    47  	sizes := make(map[string]int64)
    48  	// by default we will only count the sizes of programming languages or markup languages
    49  	// unless they are explicitly set using linguist-language
    50  	includedLanguage := map[string]bool{}
    51  	// or if there's only one language in the repository
    52  	firstExcludedLanguage := ""
    53  	firstExcludedLanguageSize := int64(0)
    54  
    55  	err = tree.Files().ForEach(func(f *object.File) error {
    56  		if f.Size == 0 {
    57  			return nil
    58  		}
    59  
    60  		notVendored := false
    61  		notGenerated := false
    62  
    63  		if checker != nil {
    64  			attrs, err := checker.CheckPath(f.Name)
    65  			if err == nil {
    66  				if vendored, has := attrs["linguist-vendored"]; has {
    67  					if vendored == "set" || vendored == "true" {
    68  						return nil
    69  					}
    70  					notVendored = vendored == "false"
    71  				}
    72  				if generated, has := attrs["linguist-generated"]; has {
    73  					if generated == "set" || generated == "true" {
    74  						return nil
    75  					}
    76  					notGenerated = generated == "false"
    77  				}
    78  				if language, has := attrs["linguist-language"]; has && language != "unspecified" && language != "" {
    79  					// group languages, such as Pug -> HTML; SCSS -> CSS
    80  					group := enry.GetLanguageGroup(language)
    81  					if len(group) != 0 {
    82  						language = group
    83  					}
    84  
    85  					// this language will always be added to the size
    86  					sizes[language] += f.Size
    87  					return nil
    88  				} else if language, has := attrs["gitlab-language"]; has && language != "unspecified" && language != "" {
    89  					// strip off a ? if present
    90  					if idx := strings.IndexByte(language, '?'); idx >= 0 {
    91  						language = language[:idx]
    92  					}
    93  					if len(language) != 0 {
    94  						// group languages, such as Pug -> HTML; SCSS -> CSS
    95  						group := enry.GetLanguageGroup(language)
    96  						if len(group) != 0 {
    97  							language = group
    98  						}
    99  
   100  						// this language will always be added to the size
   101  						sizes[language] += f.Size
   102  						return nil
   103  					}
   104  				}
   105  			}
   106  		}
   107  
   108  		if (!notVendored && analyze.IsVendor(f.Name)) || enry.IsDotFile(f.Name) ||
   109  			enry.IsDocumentation(f.Name) || enry.IsConfiguration(f.Name) {
   110  			return nil
   111  		}
   112  
   113  		// If content can not be read or file is too big just do detection by filename
   114  		var content []byte
   115  		if f.Size <= bigFileSize {
   116  			content, _ = readFile(f, fileSizeLimit)
   117  		}
   118  		if !notGenerated && enry.IsGenerated(f.Name, content) {
   119  			return nil
   120  		}
   121  
   122  		// TODO: Use .gitattributes file for linguist overrides
   123  
   124  		language := analyze.GetCodeLanguage(f.Name, content)
   125  		if language == enry.OtherLanguage || language == "" {
   126  			return nil
   127  		}
   128  
   129  		// group languages, such as Pug -> HTML; SCSS -> CSS
   130  		group := enry.GetLanguageGroup(language)
   131  		if group != "" {
   132  			language = group
   133  		}
   134  
   135  		included, checked := includedLanguage[language]
   136  		if !checked {
   137  			langtype := enry.GetLanguageType(language)
   138  			included = langtype == enry.Programming || langtype == enry.Markup
   139  			includedLanguage[language] = included
   140  		}
   141  		if included {
   142  			sizes[language] += f.Size
   143  		} else if len(sizes) == 0 && (firstExcludedLanguage == "" || firstExcludedLanguage == language) {
   144  			firstExcludedLanguage = language
   145  			firstExcludedLanguageSize += f.Size
   146  		}
   147  
   148  		return nil
   149  	})
   150  	if err != nil {
   151  		return nil, err
   152  	}
   153  
   154  	// If there are no included languages add the first excluded language
   155  	if len(sizes) == 0 && firstExcludedLanguage != "" {
   156  		sizes[firstExcludedLanguage] = firstExcludedLanguageSize
   157  	}
   158  
   159  	return sizes, nil
   160  }
   161  
   162  func readFile(f *object.File, limit int64) ([]byte, error) {
   163  	r, err := f.Reader()
   164  	if err != nil {
   165  		return nil, err
   166  	}
   167  	defer r.Close()
   168  
   169  	if limit <= 0 {
   170  		return io.ReadAll(r)
   171  	}
   172  
   173  	size := f.Size
   174  	if limit > 0 && size > limit {
   175  		size = limit
   176  	}
   177  	buf := bytes.NewBuffer(nil)
   178  	buf.Grow(int(size))
   179  	_, err = io.Copy(buf, io.LimitReader(r, limit))
   180  	return buf.Bytes(), err
   181  }