code.gitea.io/gitea@v1.22.3/modules/git/repo_language_stats_gogit.go (about)

     1  // Copyright 2020 The Gitea Authors. All rights reserved.
     2  // SPDX-License-Identifier: MIT
     3  
     4  //go:build gogit
     5  
     6  package git
     7  
     8  import (
     9  	"bytes"
    10  	"io"
    11  
    12  	"code.gitea.io/gitea/modules/analyze"
    13  	"code.gitea.io/gitea/modules/optional"
    14  
    15  	"github.com/go-enry/go-enry/v2"
    16  	"github.com/go-git/go-git/v5"
    17  	"github.com/go-git/go-git/v5/plumbing"
    18  	"github.com/go-git/go-git/v5/plumbing/object"
    19  )
    20  
    21  // GetLanguageStats calculates language stats for git repository at specified commit
    22  func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, error) {
    23  	r, err := git.PlainOpen(repo.Path)
    24  	if err != nil {
    25  		return nil, err
    26  	}
    27  
    28  	rev, err := r.ResolveRevision(plumbing.Revision(commitID))
    29  	if err != nil {
    30  		return nil, err
    31  	}
    32  
    33  	commit, err := r.CommitObject(*rev)
    34  	if err != nil {
    35  		return nil, err
    36  	}
    37  
    38  	tree, err := commit.Tree()
    39  	if err != nil {
    40  		return nil, err
    41  	}
    42  
    43  	checker, deferable := repo.CheckAttributeReader(commitID)
    44  	defer deferable()
    45  
    46  	// sizes contains the current calculated size of all files by language
    47  	sizes := make(map[string]int64)
    48  	// by default we will only count the sizes of programming languages or markup languages
    49  	// unless they are explicitly set using linguist-language
    50  	includedLanguage := map[string]bool{}
    51  	// or if there's only one language in the repository
    52  	firstExcludedLanguage := ""
    53  	firstExcludedLanguageSize := int64(0)
    54  
    55  	err = tree.Files().ForEach(func(f *object.File) error {
    56  		if f.Size == 0 {
    57  			return nil
    58  		}
    59  
    60  		isVendored := optional.None[bool]()
    61  		isGenerated := optional.None[bool]()
    62  		isDocumentation := optional.None[bool]()
    63  		isDetectable := optional.None[bool]()
    64  
    65  		if checker != nil {
    66  			attrs, err := checker.CheckPath(f.Name)
    67  			if err == nil {
    68  				isVendored = AttributeToBool(attrs, AttributeLinguistVendored)
    69  				if isVendored.ValueOrDefault(false) {
    70  					return nil
    71  				}
    72  
    73  				isGenerated = AttributeToBool(attrs, AttributeLinguistGenerated)
    74  				if isGenerated.ValueOrDefault(false) {
    75  					return nil
    76  				}
    77  
    78  				isDocumentation = AttributeToBool(attrs, AttributeLinguistDocumentation)
    79  				if isDocumentation.ValueOrDefault(false) {
    80  					return nil
    81  				}
    82  
    83  				isDetectable = AttributeToBool(attrs, AttributeLinguistDetectable)
    84  				if !isDetectable.ValueOrDefault(true) {
    85  					return nil
    86  				}
    87  
    88  				hasLanguage := TryReadLanguageAttribute(attrs)
    89  				if hasLanguage.Value() != "" {
    90  					language := hasLanguage.Value()
    91  
    92  					// group languages, such as Pug -> HTML; SCSS -> CSS
    93  					group := enry.GetLanguageGroup(language)
    94  					if len(group) != 0 {
    95  						language = group
    96  					}
    97  
    98  					// this language will always be added to the size
    99  					sizes[language] += f.Size
   100  					return nil
   101  				}
   102  			}
   103  		}
   104  
   105  		if (!isVendored.Has() && analyze.IsVendor(f.Name)) ||
   106  			enry.IsDotFile(f.Name) ||
   107  			(!isDocumentation.Has() && enry.IsDocumentation(f.Name)) ||
   108  			enry.IsConfiguration(f.Name) {
   109  			return nil
   110  		}
   111  
   112  		// If content can not be read or file is too big just do detection by filename
   113  		var content []byte
   114  		if f.Size <= bigFileSize {
   115  			content, _ = readFile(f, fileSizeLimit)
   116  		}
   117  		if !isGenerated.Has() && enry.IsGenerated(f.Name, content) {
   118  			return nil
   119  		}
   120  
   121  		language := analyze.GetCodeLanguage(f.Name, content)
   122  		if language == enry.OtherLanguage || language == "" {
   123  			return nil
   124  		}
   125  
   126  		// group languages, such as Pug -> HTML; SCSS -> CSS
   127  		group := enry.GetLanguageGroup(language)
   128  		if group != "" {
   129  			language = group
   130  		}
   131  
   132  		included, checked := includedLanguage[language]
   133  		if !checked {
   134  			langtype := enry.GetLanguageType(language)
   135  			included = langtype == enry.Programming || langtype == enry.Markup
   136  			includedLanguage[language] = included
   137  		}
   138  		if included || isDetectable.ValueOrDefault(false) {
   139  			sizes[language] += f.Size
   140  		} else if len(sizes) == 0 && (firstExcludedLanguage == "" || firstExcludedLanguage == language) {
   141  			firstExcludedLanguage = language
   142  			firstExcludedLanguageSize += f.Size
   143  		}
   144  
   145  		return nil
   146  	})
   147  	if err != nil {
   148  		return nil, err
   149  	}
   150  
   151  	// If there are no included languages add the first excluded language
   152  	if len(sizes) == 0 && firstExcludedLanguage != "" {
   153  		sizes[firstExcludedLanguage] = firstExcludedLanguageSize
   154  	}
   155  
   156  	return mergeLanguageStats(sizes), nil
   157  }
   158  
   159  func readFile(f *object.File, limit int64) ([]byte, error) {
   160  	r, err := f.Reader()
   161  	if err != nil {
   162  		return nil, err
   163  	}
   164  	defer r.Close()
   165  
   166  	if limit <= 0 {
   167  		return io.ReadAll(r)
   168  	}
   169  
   170  	size := f.Size
   171  	if limit > 0 && size > limit {
   172  		size = limit
   173  	}
   174  	buf := bytes.NewBuffer(nil)
   175  	buf.Grow(int(size))
   176  	_, err = io.Copy(buf, io.LimitReader(r, limit))
   177  	return buf.Bytes(), err
   178  }