code.gitea.io/gitea@v1.19.3/modules/git/repo_language_stats_gogit.go (about) 1 // Copyright 2020 The Gitea Authors. All rights reserved. 2 // SPDX-License-Identifier: MIT 3 4 //go:build gogit 5 6 package git 7 8 import ( 9 "bytes" 10 "io" 11 "strings" 12 13 "code.gitea.io/gitea/modules/analyze" 14 15 "github.com/go-enry/go-enry/v2" 16 "github.com/go-git/go-git/v5" 17 "github.com/go-git/go-git/v5/plumbing" 18 "github.com/go-git/go-git/v5/plumbing/object" 19 ) 20 21 // GetLanguageStats calculates language stats for git repository at specified commit 22 func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, error) { 23 r, err := git.PlainOpen(repo.Path) 24 if err != nil { 25 return nil, err 26 } 27 28 rev, err := r.ResolveRevision(plumbing.Revision(commitID)) 29 if err != nil { 30 return nil, err 31 } 32 33 commit, err := r.CommitObject(*rev) 34 if err != nil { 35 return nil, err 36 } 37 38 tree, err := commit.Tree() 39 if err != nil { 40 return nil, err 41 } 42 43 checker, deferable := repo.CheckAttributeReader(commitID) 44 defer deferable() 45 46 // sizes contains the current calculated size of all files by language 47 sizes := make(map[string]int64) 48 // by default we will only count the sizes of programming languages or markup languages 49 // unless they are explicitly set using linguist-language 50 includedLanguage := map[string]bool{} 51 // or if there's only one language in the repository 52 firstExcludedLanguage := "" 53 firstExcludedLanguageSize := int64(0) 54 55 err = tree.Files().ForEach(func(f *object.File) error { 56 if f.Size == 0 { 57 return nil 58 } 59 60 notVendored := false 61 notGenerated := false 62 63 if checker != nil { 64 attrs, err := checker.CheckPath(f.Name) 65 if err == nil { 66 if vendored, has := attrs["linguist-vendored"]; has { 67 if vendored == "set" || vendored == "true" { 68 return nil 69 } 70 notVendored = vendored == "false" 71 } 72 if generated, has := attrs["linguist-generated"]; has { 73 if generated == "set" || generated == "true" { 74 return nil 75 } 76 notGenerated = generated == "false" 77 } 78 if language, has := attrs["linguist-language"]; has && language != "unspecified" && language != "" { 79 // group languages, such as Pug -> HTML; SCSS -> CSS 80 group := enry.GetLanguageGroup(language) 81 if len(group) != 0 { 82 language = group 83 } 84 85 // this language will always be added to the size 86 sizes[language] += f.Size 87 return nil 88 } else if language, has := attrs["gitlab-language"]; has && language != "unspecified" && language != "" { 89 // strip off a ? if present 90 if idx := strings.IndexByte(language, '?'); idx >= 0 { 91 language = language[:idx] 92 } 93 if len(language) != 0 { 94 // group languages, such as Pug -> HTML; SCSS -> CSS 95 group := enry.GetLanguageGroup(language) 96 if len(group) != 0 { 97 language = group 98 } 99 100 // this language will always be added to the size 101 sizes[language] += f.Size 102 return nil 103 } 104 } 105 } 106 } 107 108 if (!notVendored && analyze.IsVendor(f.Name)) || enry.IsDotFile(f.Name) || 109 enry.IsDocumentation(f.Name) || enry.IsConfiguration(f.Name) { 110 return nil 111 } 112 113 // If content can not be read or file is too big just do detection by filename 114 var content []byte 115 if f.Size <= bigFileSize { 116 content, _ = readFile(f, fileSizeLimit) 117 } 118 if !notGenerated && enry.IsGenerated(f.Name, content) { 119 return nil 120 } 121 122 // TODO: Use .gitattributes file for linguist overrides 123 124 language := analyze.GetCodeLanguage(f.Name, content) 125 if language == enry.OtherLanguage || language == "" { 126 return nil 127 } 128 129 // group languages, such as Pug -> HTML; SCSS -> CSS 130 group := enry.GetLanguageGroup(language) 131 if group != "" { 132 language = group 133 } 134 135 included, checked := includedLanguage[language] 136 if !checked { 137 langtype := enry.GetLanguageType(language) 138 included = langtype == enry.Programming || langtype == enry.Markup 139 includedLanguage[language] = included 140 } 141 if included { 142 sizes[language] += f.Size 143 } else if len(sizes) == 0 && (firstExcludedLanguage == "" || firstExcludedLanguage == language) { 144 firstExcludedLanguage = language 145 firstExcludedLanguageSize += f.Size 146 } 147 148 return nil 149 }) 150 if err != nil { 151 return nil, err 152 } 153 154 // If there are no included languages add the first excluded language 155 if len(sizes) == 0 && firstExcludedLanguage != "" { 156 sizes[firstExcludedLanguage] = firstExcludedLanguageSize 157 } 158 159 return sizes, nil 160 } 161 162 func readFile(f *object.File, limit int64) ([]byte, error) { 163 r, err := f.Reader() 164 if err != nil { 165 return nil, err 166 } 167 defer r.Close() 168 169 if limit <= 0 { 170 return io.ReadAll(r) 171 } 172 173 size := f.Size 174 if limit > 0 && size > limit { 175 size = limit 176 } 177 buf := bytes.NewBuffer(nil) 178 buf.Grow(int(size)) 179 _, err = io.Copy(buf, io.LimitReader(r, limit)) 180 return buf.Bytes(), err 181 }