github.com/gitbundle/modules@v0.0.0-20231025071548-85b91c5c3b01/git/repo_language_stats_gogit.go (about)

     1  // Copyright 2023 The GitBundle Inc. All rights reserved.
     2  // Copyright 2017 The Gitea Authors. All rights reserved.
     3  // Use of this source code is governed by a MIT-style
     4  // license that can be found in the LICENSE file.
     5  
     6  //go:build gogit
     7  
     8  package git
     9  
    10  import (
    11  	"bytes"
    12  	"io"
    13  	"strings"
    14  
    15  	"github.com/gitbundle/modules/analyze"
    16  
    17  	"github.com/go-enry/go-enry/v2"
    18  	"github.com/go-git/go-git/v5"
    19  	"github.com/go-git/go-git/v5/plumbing"
    20  	"github.com/go-git/go-git/v5/plumbing/object"
    21  )
    22  
    23  // GetLanguageStats calculates language stats for git repository at specified commit
    24  func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, error) {
    25  	r, err := git.PlainOpen(repo.Path)
    26  	if err != nil {
    27  		return nil, err
    28  	}
    29  
    30  	rev, err := r.ResolveRevision(plumbing.Revision(commitID))
    31  	if err != nil {
    32  		return nil, err
    33  	}
    34  
    35  	commit, err := r.CommitObject(*rev)
    36  	if err != nil {
    37  		return nil, err
    38  	}
    39  
    40  	tree, err := commit.Tree()
    41  	if err != nil {
    42  		return nil, err
    43  	}
    44  
    45  	checker, deferable := repo.CheckAttributeReader(commitID)
    46  	defer deferable()
    47  
    48  	sizes := make(map[string]int64)
    49  	err = tree.Files().ForEach(func(f *object.File) error {
    50  		if f.Size == 0 {
    51  			return nil
    52  		}
    53  
    54  		notVendored := false
    55  		notGenerated := false
    56  
    57  		if checker != nil {
    58  			attrs, err := checker.CheckPath(f.Name)
    59  			if err == nil {
    60  				if vendored, has := attrs["linguist-vendored"]; has {
    61  					if vendored == "set" || vendored == "true" {
    62  						return nil
    63  					}
    64  					notVendored = vendored == "false"
    65  				}
    66  				if generated, has := attrs["linguist-generated"]; has {
    67  					if generated == "set" || generated == "true" {
    68  						return nil
    69  					}
    70  					notGenerated = generated == "false"
    71  				}
    72  				if language, has := attrs["linguist-language"]; has && language != "unspecified" && language != "" {
    73  					// group languages, such as Pug -> HTML; SCSS -> CSS
    74  					group := enry.GetLanguageGroup(language)
    75  					if len(group) != 0 {
    76  						language = group
    77  					}
    78  
    79  					sizes[language] += f.Size
    80  
    81  					return nil
    82  				} else if language, has := attrs["gitlab-language"]; has && language != "unspecified" && language != "" {
    83  					// strip off a ? if present
    84  					if idx := strings.IndexByte(language, '?'); idx >= 0 {
    85  						language = language[:idx]
    86  					}
    87  					if len(language) != 0 {
    88  						// group languages, such as Pug -> HTML; SCSS -> CSS
    89  						group := enry.GetLanguageGroup(language)
    90  						if len(group) != 0 {
    91  							language = group
    92  						}
    93  
    94  						sizes[language] += f.Size
    95  						return nil
    96  					}
    97  				}
    98  			}
    99  		}
   100  
   101  		if (!notVendored && analyze.IsVendor(f.Name)) || enry.IsDotFile(f.Name) ||
   102  			enry.IsDocumentation(f.Name) || enry.IsConfiguration(f.Name) {
   103  			return nil
   104  		}
   105  
   106  		// If content can not be read or file is too big just do detection by filename
   107  		var content []byte
   108  		if f.Size <= bigFileSize {
   109  			content, _ = readFile(f, fileSizeLimit)
   110  		}
   111  		if !notGenerated && enry.IsGenerated(f.Name, content) {
   112  			return nil
   113  		}
   114  
   115  		// TODO: Use .gitattributes file for linguist overrides
   116  
   117  		language := analyze.GetCodeLanguage(f.Name, content)
   118  		if language == enry.OtherLanguage || language == "" {
   119  			return nil
   120  		}
   121  
   122  		// group languages, such as Pug -> HTML; SCSS -> CSS
   123  		group := enry.GetLanguageGroup(language)
   124  		if group != "" {
   125  			language = group
   126  		}
   127  
   128  		sizes[language] += f.Size
   129  
   130  		return nil
   131  	})
   132  	if err != nil {
   133  		return nil, err
   134  	}
   135  
   136  	// filter special languages unless they are the only language
   137  	if len(sizes) > 1 {
   138  		for language := range sizes {
   139  			langtype := enry.GetLanguageType(language)
   140  			if langtype != enry.Programming && langtype != enry.Markup {
   141  				delete(sizes, language)
   142  			}
   143  		}
   144  	}
   145  
   146  	return sizes, nil
   147  }
   148  
   149  func readFile(f *object.File, limit int64) ([]byte, error) {
   150  	r, err := f.Reader()
   151  	if err != nil {
   152  		return nil, err
   153  	}
   154  	defer r.Close()
   155  
   156  	if limit <= 0 {
   157  		return io.ReadAll(r)
   158  	}
   159  
   160  	size := f.Size
   161  	if limit > 0 && size > limit {
   162  		size = limit
   163  	}
   164  	buf := bytes.NewBuffer(nil)
   165  	buf.Grow(int(size))
   166  	_, err = io.Copy(buf, io.LimitReader(r, limit))
   167  	return buf.Bytes(), err
   168  }