github.com/gitbundle/modules@v0.0.0-20231025071548-85b91c5c3b01/git/repo_language_stats_gogit.go (about) 1 // Copyright 2023 The GitBundle Inc. All rights reserved. 2 // Copyright 2017 The Gitea Authors. All rights reserved. 3 // Use of this source code is governed by a MIT-style 4 // license that can be found in the LICENSE file. 5 6 //go:build gogit 7 8 package git 9 10 import ( 11 "bytes" 12 "io" 13 "strings" 14 15 "github.com/gitbundle/modules/analyze" 16 17 "github.com/go-enry/go-enry/v2" 18 "github.com/go-git/go-git/v5" 19 "github.com/go-git/go-git/v5/plumbing" 20 "github.com/go-git/go-git/v5/plumbing/object" 21 ) 22 23 // GetLanguageStats calculates language stats for git repository at specified commit 24 func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, error) { 25 r, err := git.PlainOpen(repo.Path) 26 if err != nil { 27 return nil, err 28 } 29 30 rev, err := r.ResolveRevision(plumbing.Revision(commitID)) 31 if err != nil { 32 return nil, err 33 } 34 35 commit, err := r.CommitObject(*rev) 36 if err != nil { 37 return nil, err 38 } 39 40 tree, err := commit.Tree() 41 if err != nil { 42 return nil, err 43 } 44 45 checker, deferable := repo.CheckAttributeReader(commitID) 46 defer deferable() 47 48 sizes := make(map[string]int64) 49 err = tree.Files().ForEach(func(f *object.File) error { 50 if f.Size == 0 { 51 return nil 52 } 53 54 notVendored := false 55 notGenerated := false 56 57 if checker != nil { 58 attrs, err := checker.CheckPath(f.Name) 59 if err == nil { 60 if vendored, has := attrs["linguist-vendored"]; has { 61 if vendored == "set" || vendored == "true" { 62 return nil 63 } 64 notVendored = vendored == "false" 65 } 66 if generated, has := attrs["linguist-generated"]; has { 67 if generated == "set" || generated == "true" { 68 return nil 69 } 70 notGenerated = generated == "false" 71 } 72 if language, has := attrs["linguist-language"]; has && language != "unspecified" && language != "" { 73 // group languages, such as Pug -> HTML; SCSS -> CSS 74 group := enry.GetLanguageGroup(language) 75 if len(group) != 0 { 76 language = group 77 } 78 79 sizes[language] += f.Size 80 81 return nil 82 } else if language, has := attrs["gitlab-language"]; has && language != "unspecified" && language != "" { 83 // strip off a ? if present 84 if idx := strings.IndexByte(language, '?'); idx >= 0 { 85 language = language[:idx] 86 } 87 if len(language) != 0 { 88 // group languages, such as Pug -> HTML; SCSS -> CSS 89 group := enry.GetLanguageGroup(language) 90 if len(group) != 0 { 91 language = group 92 } 93 94 sizes[language] += f.Size 95 return nil 96 } 97 } 98 } 99 } 100 101 if (!notVendored && analyze.IsVendor(f.Name)) || enry.IsDotFile(f.Name) || 102 enry.IsDocumentation(f.Name) || enry.IsConfiguration(f.Name) { 103 return nil 104 } 105 106 // If content can not be read or file is too big just do detection by filename 107 var content []byte 108 if f.Size <= bigFileSize { 109 content, _ = readFile(f, fileSizeLimit) 110 } 111 if !notGenerated && enry.IsGenerated(f.Name, content) { 112 return nil 113 } 114 115 // TODO: Use .gitattributes file for linguist overrides 116 117 language := analyze.GetCodeLanguage(f.Name, content) 118 if language == enry.OtherLanguage || language == "" { 119 return nil 120 } 121 122 // group languages, such as Pug -> HTML; SCSS -> CSS 123 group := enry.GetLanguageGroup(language) 124 if group != "" { 125 language = group 126 } 127 128 sizes[language] += f.Size 129 130 return nil 131 }) 132 if err != nil { 133 return nil, err 134 } 135 136 // filter special languages unless they are the only language 137 if len(sizes) > 1 { 138 for language := range sizes { 139 langtype := enry.GetLanguageType(language) 140 if langtype != enry.Programming && langtype != enry.Markup { 141 delete(sizes, language) 142 } 143 } 144 } 145 146 return sizes, nil 147 } 148 149 func readFile(f *object.File, limit int64) ([]byte, error) { 150 r, err := f.Reader() 151 if err != nil { 152 return nil, err 153 } 154 defer r.Close() 155 156 if limit <= 0 { 157 return io.ReadAll(r) 158 } 159 160 size := f.Size 161 if limit > 0 && size > limit { 162 size = limit 163 } 164 buf := bytes.NewBuffer(nil) 165 buf.Grow(int(size)) 166 _, err = io.Copy(buf, io.LimitReader(r, limit)) 167 return buf.Bytes(), err 168 }