kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/go/platform/kzip/info/info.go (about) 1 /* 2 * Copyright 2019 The Kythe Authors. All rights reserved. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 // Package info provides utilities for summarizing the contents of a kzip. 18 package info // import "kythe.io/kythe/go/platform/kzip/info" 19 20 import ( 21 "fmt" 22 "path/filepath" 23 "strconv" 24 "strings" 25 26 "kythe.io/kythe/go/platform/kzip" 27 "kythe.io/kythe/go/util/log" 28 29 "bitbucket.org/creachadair/stringset" 30 "google.golang.org/protobuf/encoding/prototext" 31 32 apb "kythe.io/kythe/proto/analysis_go_proto" 33 spb "kythe.io/kythe/proto/storage_go_proto" 34 ) 35 36 // KzipInfo scans the kzip in f and counts contained files and units, giving a 37 // breakdown by corpus and language. It also records the size (in bytes) of the 38 // kzip specified by fileSize in the returned KzipInfo. This is a convenience 39 // method and thin wrapper over the Accumulator. If you need to do more than 40 // just calculate KzipInfo while doing a kzip.Scan(), you should use the 41 // Accumulator directly. 42 func KzipInfo(f kzip.File, fileSize int64, scanOpts ...kzip.ScanOption) (*apb.KzipInfo, error) { 43 a := NewAccumulator(fileSize) 44 if err := kzip.Scan(f, func(r *kzip.Reader, unit *kzip.Unit) error { 45 a.Accumulate(unit) 46 return nil 47 }, scanOpts...); err != nil { 48 return nil, fmt.Errorf("scanning kzip: %v", err) 49 } 50 return a.Get(), nil 51 } 52 53 // Accumulator is used to build a summary of a collection of compilation units. 54 // Usage: 55 // 56 // a := NewAccumulator(fileSize) 57 // a.Accumulate(unit) // call for each compilation unit 58 // info := a.Get() // get the resulting KzipInfo 59 type Accumulator struct { 60 KzipInfo *apb.KzipInfo 61 } 62 63 // NewAccumulator creates a new Accumulator instance given the kzip fileSize (in 64 // bytes). 65 func NewAccumulator(fileSize int64) *Accumulator { 66 return &Accumulator{ 67 KzipInfo: &apb.KzipInfo{ 68 Corpora: make(map[string]*apb.KzipInfo_CorpusInfo), 69 Size: fileSize, 70 }, 71 } 72 } 73 74 // Accumulate should be called for each unit in the kzip so its counts can be 75 // recorded. 76 func (a *Accumulator) Accumulate(u *kzip.Unit) { 77 // Set of canonicalized source file paths in the kzip 78 srcs := stringset.New() 79 for _, p := range u.Proto.SourceFile { 80 srcs.Add(filepath.Clean(p)) 81 } 82 83 cuLang := u.Proto.GetVName().GetLanguage() 84 if cuLang == "" { 85 msg := fmt.Sprintf("CU(%s) does not specify a language", formatVName(u.Proto.GetVName())) 86 a.KzipInfo.CriticalKzipErrors = append(a.KzipInfo.CriticalKzipErrors, msg) 87 return 88 } 89 90 cuInfo(u.Proto.GetVName().GetCorpus(), cuLang, a.KzipInfo).Count++ 91 92 if cuLang == "java" { 93 v := javaSourceVersion(u.Proto.GetArgument()) 94 cuInfo(u.Proto.GetVName().GetCorpus(), cuLang, a.KzipInfo).JavaVersionCount[int32(v)]++ 95 } 96 97 var srcCorpora stringset.Set 98 var absPaths stringset.Set 99 srcsWithRI := stringset.New() 100 for _, ri := range u.Proto.RequiredInput { 101 if strings.HasPrefix(ri.GetVName().GetPath(), "/") && !strings.HasPrefix(ri.GetVName().GetPath(), "/kythe_builtins/") { 102 absPaths.Add(ri.GetVName().GetPath()) 103 } 104 105 riCorpus := requiredInputCorpus(u, ri) 106 if riCorpus == "" { 107 // Trim spaces to work around the fact that log("%v", proto) is inconsistent about trailing spaces in google3 vs open-source go. 108 msg := strings.TrimSpace(fmt.Sprintf("unable to determine corpus for required_input %q in CU(%s)", ri.Info.Path, formatVName(u.Proto.GetVName()))) 109 a.KzipInfo.CriticalKzipErrors = append(a.KzipInfo.CriticalKzipErrors, msg) 110 return 111 } 112 requiredInputInfo(riCorpus, cuLang, a.KzipInfo).Count++ 113 // canonicalize required_input path before checking against source 114 // files. In some cases, required_input paths may be non-canonical due 115 // to compiler idiosyncrasies (ahem c++), but it's ok to canonicalize 116 // for the purposes of this validation check. 117 normalizedInputPath := filepath.Clean(ri.Info.Path) 118 if srcs.Contains(normalizedInputPath) { 119 sourceInfo(riCorpus, cuLang, a.KzipInfo).Count++ 120 srcCorpora.Add(riCorpus) 121 srcsWithRI.Add(normalizedInputPath) 122 } 123 } 124 srcsWithoutRI := srcs.Diff(srcsWithRI) 125 for path := range srcsWithoutRI { 126 msg := fmt.Sprintf("source %q in CU(%s) doesn't have a required_input entry", path, formatVName(u.Proto.GetVName())) 127 a.KzipInfo.CriticalKzipErrors = append(a.KzipInfo.CriticalKzipErrors, msg) 128 } 129 if srcCorpora.Len() != 1 { 130 // This is a warning for now, but may become an error. 131 log.Infof("Multiple corpora in unit. unit vname={%v}; src corpora=%v; srcs=%v", u.Proto.GetVName(), srcCorpora, u.Proto.SourceFile) 132 } 133 134 a.KzipInfo.AbsolutePaths = absPaths.Elements() 135 } 136 137 // javaSourceVersion checks if the CU args include setting source to a language level and if it does 138 // it returns that version. If no source flag is set or it can't be read, it returns 0. 139 func javaSourceVersion(args []string) int { 140 for i, arg := range args { 141 if arg == "-source" || arg == "--source" { 142 if i+1 < len(args) { 143 version := args[i+1] 144 version = strings.TrimPrefix(version, "1.") 145 intVersion, err := strconv.Atoi(version) 146 if err != nil { 147 log.Errorf("Unable to parse java version string: %v", err) 148 return 0 149 } 150 return intVersion 151 } 152 } 153 } 154 return 0 155 } 156 157 // Get returns the final KzipInfo after info from each unit in the kzip has been 158 // accumulated. 159 func (a *Accumulator) Get() *apb.KzipInfo { 160 return a.KzipInfo 161 } 162 163 // requiredInputCorpus computes the corpus for a required input. It follows the rules in the 164 // CompilationUnit proto comments in kythe/proto/analysis.proto that say that any 165 // required_input that does not set corpus in its VName should inherit corpus from the compilation 166 // unit's VName. 167 func requiredInputCorpus(u *kzip.Unit, ri *apb.CompilationUnit_FileInput) string { 168 if c := ri.GetVName().GetCorpus(); c != "" { 169 return c 170 } 171 return u.Proto.GetVName().GetCorpus() 172 } 173 174 // KzipInfoTotalCount returns the total CompilationUnits counts for infos split apart by language. 175 func KzipInfoTotalCount(infos []*apb.KzipInfo) *apb.KzipInfo_CorpusInfo { 176 totals := &apb.KzipInfo_CorpusInfo{ 177 LanguageRequiredInputs: make(map[string]*apb.KzipInfo_CorpusInfo_Inputs), 178 LanguageSources: make(map[string]*apb.KzipInfo_CorpusInfo_Inputs), 179 LanguageCuInfo: make(map[string]*apb.KzipInfo_CorpusInfo_CUInfo), 180 } 181 for _, info := range infos { 182 for _, i := range info.GetCorpora() { 183 for lang, stats := range i.GetLanguageRequiredInputs() { 184 total := totals.LanguageRequiredInputs[lang] 185 if total == nil { 186 total = &apb.KzipInfo_CorpusInfo_Inputs{} 187 totals.LanguageRequiredInputs[lang] = total 188 } 189 total.Count += stats.GetCount() 190 } 191 for lang, stats := range i.GetLanguageSources() { 192 total := totals.LanguageSources[lang] 193 if total == nil { 194 total = &apb.KzipInfo_CorpusInfo_Inputs{} 195 totals.LanguageSources[lang] = total 196 } 197 total.Count += stats.GetCount() 198 } 199 for lang, stats := range i.GetLanguageCuInfo() { 200 total := totals.LanguageCuInfo[lang] 201 if total == nil { 202 total = makeCUInfo() 203 totals.LanguageCuInfo[lang] = total 204 } 205 total.Count += stats.GetCount() 206 for version, count := range stats.GetJavaVersionCount() { 207 total.JavaVersionCount[version] += count 208 } 209 } 210 } 211 } 212 return totals 213 } 214 215 // MergeKzipInfo combines the counts from multiple KzipInfos. 216 func MergeKzipInfo(infos []*apb.KzipInfo) *apb.KzipInfo { 217 kzipInfo := &apb.KzipInfo{Corpora: make(map[string]*apb.KzipInfo_CorpusInfo)} 218 219 for _, i := range infos { 220 for corpus, cinfo := range i.GetCorpora() { 221 for lang, inputs := range cinfo.GetLanguageRequiredInputs() { 222 c := requiredInputInfo(corpus, lang, kzipInfo) 223 c.Count += inputs.GetCount() 224 } 225 for lang, sources := range cinfo.GetLanguageSources() { 226 c := sourceInfo(corpus, lang, kzipInfo) 227 c.Count += sources.GetCount() 228 } 229 for lang, cu := range cinfo.GetLanguageCuInfo() { 230 c := cuInfo(corpus, lang, kzipInfo) 231 c.Count += cu.GetCount() 232 for version, count := range cu.GetJavaVersionCount() { 233 c.JavaVersionCount[version] += count 234 } 235 } 236 } 237 kzipInfo.CriticalKzipErrors = append(kzipInfo.GetCriticalKzipErrors(), i.GetCriticalKzipErrors()...) 238 kzipInfo.Size += i.Size 239 kzipInfo.AbsolutePaths = stringset.New(kzipInfo.AbsolutePaths...).Union(stringset.New(i.AbsolutePaths...)).Elements() 240 } 241 return kzipInfo 242 } 243 244 func requiredInputInfo(corpus, lang string, kzipInfo *apb.KzipInfo) *apb.KzipInfo_CorpusInfo_Inputs { 245 c := corpusInfo(corpus, kzipInfo) 246 lri := c.LanguageRequiredInputs[lang] 247 if lri == nil { 248 lri = &apb.KzipInfo_CorpusInfo_Inputs{} 249 c.LanguageRequiredInputs[lang] = lri 250 } 251 return lri 252 } 253 254 func sourceInfo(corpus, lang string, kzipInfo *apb.KzipInfo) *apb.KzipInfo_CorpusInfo_Inputs { 255 c := corpusInfo(corpus, kzipInfo) 256 ls := c.LanguageSources[lang] 257 if ls == nil { 258 ls = &apb.KzipInfo_CorpusInfo_Inputs{} 259 c.LanguageSources[lang] = ls 260 } 261 return ls 262 } 263 264 func cuInfo(corpus, lang string, kzipInfo *apb.KzipInfo) *apb.KzipInfo_CorpusInfo_CUInfo { 265 c := corpusInfo(corpus, kzipInfo) 266 cuInfo := c.LanguageCuInfo[lang] 267 if cuInfo == nil { 268 cuInfo = makeCUInfo() 269 c.LanguageCuInfo[lang] = cuInfo 270 } 271 return cuInfo 272 } 273 274 func makeCUInfo() *apb.KzipInfo_CorpusInfo_CUInfo { 275 return &apb.KzipInfo_CorpusInfo_CUInfo{ 276 JavaVersionCount: make(map[int32]int32), 277 } 278 } 279 280 func corpusInfo(corpus string, kzipInfo *apb.KzipInfo) *apb.KzipInfo_CorpusInfo { 281 i := kzipInfo.GetCorpora()[corpus] 282 if i == nil { 283 i = &apb.KzipInfo_CorpusInfo{ 284 LanguageRequiredInputs: make(map[string]*apb.KzipInfo_CorpusInfo_Inputs), 285 LanguageSources: make(map[string]*apb.KzipInfo_CorpusInfo_Inputs), 286 LanguageCuInfo: make(map[string]*apb.KzipInfo_CorpusInfo_CUInfo), 287 } 288 kzipInfo.Corpora[corpus] = i 289 } 290 return i 291 } 292 293 func formatVName(v *spb.VName) string { 294 return strings.ReplaceAll(prototext.MarshalOptions{}.Format(v), " ", " ") 295 }