code.gitea.io/gitea@v1.22.3/modules/git/repo_language_stats_nogogit.go (about) 1 // Copyright 2020 The Gitea Authors. All rights reserved. 2 // SPDX-License-Identifier: MIT 3 4 //go:build !gogit 5 6 package git 7 8 import ( 9 "bytes" 10 "io" 11 12 "code.gitea.io/gitea/modules/analyze" 13 "code.gitea.io/gitea/modules/log" 14 "code.gitea.io/gitea/modules/optional" 15 16 "github.com/go-enry/go-enry/v2" 17 ) 18 19 // GetLanguageStats calculates language stats for git repository at specified commit 20 func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, error) { 21 // We will feed the commit IDs in order into cat-file --batch, followed by blobs as necessary. 22 // so let's create a batch stdin and stdout 23 batchStdinWriter, batchReader, cancel, err := repo.CatFileBatch(repo.Ctx) 24 if err != nil { 25 return nil, err 26 } 27 defer cancel() 28 29 writeID := func(id string) error { 30 _, err := batchStdinWriter.Write([]byte(id + "\n")) 31 return err 32 } 33 34 if err := writeID(commitID); err != nil { 35 return nil, err 36 } 37 shaBytes, typ, size, err := ReadBatchLine(batchReader) 38 if typ != "commit" { 39 log.Debug("Unable to get commit for: %s. Err: %v", commitID, err) 40 return nil, ErrNotExist{commitID, ""} 41 } 42 43 sha, err := NewIDFromString(string(shaBytes)) 44 if err != nil { 45 log.Debug("Unable to get commit for: %s. Err: %v", commitID, err) 46 return nil, ErrNotExist{commitID, ""} 47 } 48 49 commit, err := CommitFromReader(repo, sha, io.LimitReader(batchReader, size)) 50 if err != nil { 51 log.Debug("Unable to get commit for: %s. Err: %v", commitID, err) 52 return nil, err 53 } 54 if _, err = batchReader.Discard(1); err != nil { 55 return nil, err 56 } 57 58 tree := commit.Tree 59 60 entries, err := tree.ListEntriesRecursiveWithSize() 61 if err != nil { 62 return nil, err 63 } 64 65 checker, deferable := repo.CheckAttributeReader(commitID) 66 defer deferable() 67 68 contentBuf := bytes.Buffer{} 69 var content []byte 70 71 // sizes contains the current calculated size of all files by language 72 sizes := make(map[string]int64) 73 // by default we will only count the sizes of programming languages or markup languages 74 // unless they are explicitly set using linguist-language 75 includedLanguage := map[string]bool{} 76 // or if there's only one language in the repository 77 firstExcludedLanguage := "" 78 firstExcludedLanguageSize := int64(0) 79 80 for _, f := range entries { 81 select { 82 case <-repo.Ctx.Done(): 83 return sizes, repo.Ctx.Err() 84 default: 85 } 86 87 contentBuf.Reset() 88 content = contentBuf.Bytes() 89 90 if f.Size() == 0 { 91 continue 92 } 93 94 isVendored := optional.None[bool]() 95 isGenerated := optional.None[bool]() 96 isDocumentation := optional.None[bool]() 97 isDetectable := optional.None[bool]() 98 99 if checker != nil { 100 attrs, err := checker.CheckPath(f.Name()) 101 if err == nil { 102 isVendored = AttributeToBool(attrs, AttributeLinguistVendored) 103 if isVendored.ValueOrDefault(false) { 104 continue 105 } 106 107 isGenerated = AttributeToBool(attrs, AttributeLinguistGenerated) 108 if isGenerated.ValueOrDefault(false) { 109 continue 110 } 111 112 isDocumentation = AttributeToBool(attrs, AttributeLinguistDocumentation) 113 if isDocumentation.ValueOrDefault(false) { 114 continue 115 } 116 117 isDetectable = AttributeToBool(attrs, AttributeLinguistDetectable) 118 if !isDetectable.ValueOrDefault(true) { 119 continue 120 } 121 122 hasLanguage := TryReadLanguageAttribute(attrs) 123 if hasLanguage.Value() != "" { 124 language := hasLanguage.Value() 125 126 // group languages, such as Pug -> HTML; SCSS -> CSS 127 group := enry.GetLanguageGroup(language) 128 if len(group) != 0 { 129 language = group 130 } 131 132 // this language will always be added to the size 133 sizes[language] += f.Size() 134 continue 135 } 136 } 137 } 138 139 if (!isVendored.Has() && analyze.IsVendor(f.Name())) || 140 enry.IsDotFile(f.Name()) || 141 (!isDocumentation.Has() && enry.IsDocumentation(f.Name())) || 142 enry.IsConfiguration(f.Name()) { 143 continue 144 } 145 146 // If content can not be read or file is too big just do detection by filename 147 148 if f.Size() <= bigFileSize { 149 if err := writeID(f.ID.String()); err != nil { 150 return nil, err 151 } 152 _, _, size, err := ReadBatchLine(batchReader) 153 if err != nil { 154 log.Debug("Error reading blob: %s Err: %v", f.ID.String(), err) 155 return nil, err 156 } 157 158 sizeToRead := size 159 discard := int64(1) 160 if size > fileSizeLimit { 161 sizeToRead = fileSizeLimit 162 discard = size - fileSizeLimit + 1 163 } 164 165 _, err = contentBuf.ReadFrom(io.LimitReader(batchReader, sizeToRead)) 166 if err != nil { 167 return nil, err 168 } 169 content = contentBuf.Bytes() 170 if err := DiscardFull(batchReader, discard); err != nil { 171 return nil, err 172 } 173 } 174 if !isGenerated.Has() && enry.IsGenerated(f.Name(), content) { 175 continue 176 } 177 178 // FIXME: Why can't we split this and the IsGenerated tests to avoid reading the blob unless absolutely necessary? 179 // - eg. do the all the detection tests using filename first before reading content. 180 language := analyze.GetCodeLanguage(f.Name(), content) 181 if language == "" { 182 continue 183 } 184 185 // group languages, such as Pug -> HTML; SCSS -> CSS 186 group := enry.GetLanguageGroup(language) 187 if group != "" { 188 language = group 189 } 190 191 included, checked := includedLanguage[language] 192 if !checked { 193 langType := enry.GetLanguageType(language) 194 included = langType == enry.Programming || langType == enry.Markup 195 includedLanguage[language] = included 196 } 197 if included || isDetectable.ValueOrDefault(false) { 198 sizes[language] += f.Size() 199 } else if len(sizes) == 0 && (firstExcludedLanguage == "" || firstExcludedLanguage == language) { 200 firstExcludedLanguage = language 201 firstExcludedLanguageSize += f.Size() 202 } 203 } 204 205 // If there are no included languages add the first excluded language 206 if len(sizes) == 0 && firstExcludedLanguage != "" { 207 sizes[firstExcludedLanguage] = firstExcludedLanguageSize 208 } 209 210 return mergeLanguageStats(sizes), nil 211 }