kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/go/indexer/cmd/go_indexer/go_indexer.go (about) 1 /* 2 * Copyright 2016 The Kythe Authors. All rights reserved. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 // Program go_indexer implements a Kythe indexer for the Go language. Input is 18 // read from one or more .kzip paths. 19 package main 20 21 import ( 22 "bytes" 23 "context" 24 "encoding/json" 25 "flag" 26 "fmt" 27 "net/url" 28 "os" 29 "path/filepath" 30 "strings" 31 32 "kythe.io/kythe/go/indexer" 33 "kythe.io/kythe/go/platform/delimited" 34 "kythe.io/kythe/go/platform/kzip" 35 "kythe.io/kythe/go/platform/vfs" 36 "kythe.io/kythe/go/util/log" 37 "kythe.io/kythe/go/util/metadata" 38 39 "github.com/golang/protobuf/proto" 40 "google.golang.org/protobuf/encoding/prototext" 41 42 protopb "github.com/golang/protobuf/protoc-gen-go/descriptor" 43 apb "kythe.io/kythe/proto/analysis_go_proto" 44 gopb "kythe.io/kythe/proto/go_go_proto" 45 spb "kythe.io/kythe/proto/storage_go_proto" 46 ) 47 48 var ( 49 doJSON = flag.Bool("json", false, "Write output as JSON") 50 doLibNodes = flag.Bool("libnodes", false, "Emit nodes for standard library packages") 51 doCodeFacts = flag.Bool("code", false, "Emit code facts containing MarkedSource markup") 52 doAnchorScopes = flag.Bool("anchor_scopes", false, "Emit childof edges to an anchor's semantic scope") 53 metaSuffix = flag.String("meta", "", "If set, treat files with this suffix as JSON linkage metadata") 54 docBase = flag.String("docbase", "http://godoc.org", "If set, use as the base URL for godoc links") 55 onlyEmitDocURIsForStandardLibs = flag.Bool("only_emit_doc_uris_for_standard_libs", false, "If true, the doc/uri fact is only emitted for go std library packages") 56 emitRefCallOverIdentifier = flag.Bool("emit_ref_call_over_identifier", false, "If true, emit ref/call anchor spans over the function identifier") 57 verbose = flag.Bool("verbose", false, "Emit verbose log information") 58 contOnErr = flag.Bool("continue", false, "Log errors encountered during analysis but do not exit unsuccessfully") 59 useCompilationCorpusForAll = flag.Bool("use_compilation_corpus_for_all", false, "If enabled, all Entry VNames are given the corpus of the compilation unit being indexed. This includes items in the go std library and builtin types.") 60 useFileAsTopLevelScope = flag.Bool("use_file_as_top_level_scope", false, "If enabled, use the file node for top-level callsite scopes") 61 overrideStdlibCorpus = flag.String("override_stdlib_corpus", "", "If set, all stdlib nodes are assigned this corpus. Note that this takes precedence over --use_compilation_corpus_for_all") 62 flagConstructorsPath = flag.String("flag_constructors", "", "Path to a textproto containing known FlagConstructors") 63 64 writeEntry func(context.Context, *spb.Entry) error 65 ) 66 67 func init() { 68 flag.Usage = func() { 69 fmt.Fprintf(os.Stderr, `Usage: %s [options] <path>... 70 71 Generate Kythe graph data for the compilations stored in .kzip format 72 named by the path arguments. Output is written to stdout. 73 74 By default, the output is a delimited stream of wire-format Kythe Entry 75 protobuf messages. With the --json flag, output is instead a stream of 76 undelimited JSON messages. 77 78 Options: 79 `, filepath.Base(os.Args[0])) 80 81 flag.PrintDefaults() 82 } 83 } 84 85 func main() { 86 flag.Parse() 87 88 if flag.NArg() == 0 { 89 log.Fatal("No input paths were specified to index") 90 } 91 if *doJSON { 92 enc := json.NewEncoder(os.Stdout) 93 writeEntry = func(_ context.Context, entry *spb.Entry) error { 94 return enc.Encode(entry) 95 } 96 } else { 97 rw := delimited.NewWriter(os.Stdout) 98 writeEntry = func(_ context.Context, entry *spb.Entry) error { 99 return rw.PutProto(entry) 100 } 101 } 102 var docURL *url.URL 103 if *docBase != "" { 104 u, err := url.Parse(*docBase) 105 if err != nil { 106 log.Fatalf("Invalid doc base URL: %v", err) 107 } 108 docURL = u 109 } 110 111 ctx := context.Background() 112 var flagConstructors *gopb.FlagConstructors 113 if *flagConstructorsPath != "" { 114 rec, err := vfs.ReadFile(ctx, *flagConstructorsPath) 115 if err != nil { 116 log.Exitf("Error reading --flag_constructors=%q file: %v", *flagConstructorsPath, err) 117 } 118 flagConstructors = new(gopb.FlagConstructors) 119 if err := prototext.Unmarshal(rec, flagConstructors); err != nil { 120 log.Exitf("Error parsing --flag_constructors=%q file: %v", *flagConstructorsPath, err) 121 } 122 } 123 124 opts := &indexer.EmitOptions{ 125 EmitStandardLibs: *doLibNodes, 126 EmitMarkedSource: *doCodeFacts, 127 EmitAnchorScopes: *doAnchorScopes, 128 EmitLinkages: *metaSuffix != "", 129 DocBase: docURL, 130 OnlyEmitDocURIsForStandardLibs: *onlyEmitDocURIsForStandardLibs, 131 UseCompilationCorpusForAll: *useCompilationCorpusForAll, 132 UseFileAsTopLevelScope: *useFileAsTopLevelScope, 133 OverrideStdlibCorpus: *overrideStdlibCorpus, 134 EmitRefCallOverIdentifier: *emitRefCallOverIdentifier, 135 FlagConstructors: flagConstructors, 136 Verbose: *verbose, 137 } 138 139 for _, path := range flag.Args() { 140 if err := visitPath(ctx, path, func(ctx context.Context, unit *apb.CompilationUnit, f indexer.Fetcher) error { 141 err := indexGo(ctx, unit, f, opts) 142 if err != nil && *contOnErr { 143 log.ErrorContextf(ctx, "Continuing after error: %v", err) 144 return nil 145 } 146 return err 147 }); err != nil { 148 log.Fatalf("Error indexing %q: %v", path, err) 149 } 150 } 151 } 152 153 // checkMetadata checks whether ri denotes a metadata file according to the 154 // setting of the -meta flag, and if so loads the corresponding ruleset. 155 func checkMetadata(ri *apb.CompilationUnit_FileInput, f indexer.Fetcher) (*indexer.Ruleset, error) { 156 if *metaSuffix == "" || !strings.HasSuffix(ri.Info.GetPath(), *metaSuffix) { 157 return nil, nil // nothing to do 158 } 159 bits, err := f.Fetch(ri.Info.GetPath(), ri.Info.GetDigest()) 160 if err != nil { 161 return nil, fmt.Errorf("reading metadata file: %w", err) 162 } 163 rules, err := metadata.Parse(bytes.NewReader(bits)) 164 if err != nil { 165 // Check if file is actually a GeneratedCodeInfo proto. 166 var gci protopb.GeneratedCodeInfo 167 if err := proto.UnmarshalText(string(bits), &gci); err != nil { 168 return nil, fmt.Errorf("cannot parse .meta file as JSON or textproto: %w", err) 169 } 170 rules = metadata.FromGeneratedCodeInfo(&gci, ri.VName) 171 } 172 return &indexer.Ruleset{ 173 Path: strings.TrimSuffix(ri.Info.GetPath(), *metaSuffix), 174 Rules: rules, 175 }, nil 176 } 177 178 // indexGo is a visitFunc that invokes the Kythe Go indexer on unit. 179 func indexGo(ctx context.Context, unit *apb.CompilationUnit, f indexer.Fetcher, opts *indexer.EmitOptions) error { 180 pi, err := indexer.Resolve(unit, f, &indexer.ResolveOptions{ 181 Info: indexer.XRefTypeInfo(), 182 CheckRules: checkMetadata, 183 }) 184 if err != nil { 185 return err 186 } 187 if *verbose { 188 log.InfoContextf(ctx, "Finished resolving compilation: %s", pi.String()) 189 } 190 return pi.Emit(ctx, writeEntry, opts) 191 } 192 193 type visitFunc func(context.Context, *apb.CompilationUnit, indexer.Fetcher) error 194 195 // visitPath invokes visit for each compilation denoted by path, which is 196 // must be a .kzip file (with a single compilation). 197 func visitPath(ctx context.Context, path string, visit visitFunc) error { 198 f, err := os.Open(path) 199 if err != nil { 200 return err 201 } 202 defer f.Close() 203 switch ext := filepath.Ext(path); ext { 204 case ".kzip": 205 return kzip.Scan(f, func(r *kzip.Reader, unit *kzip.Unit) error { 206 return visit(ctx, unit.Proto, kzipFetcher{r}) 207 }) 208 209 default: 210 return fmt.Errorf("unknown file extension %q", ext) 211 } 212 } 213 214 type kzipFetcher struct{ r *kzip.Reader } 215 216 // Fetch implements the analysis.Fetcher interface. Only the digest is used in 217 // this implementation, the path is ignored. 218 func (k kzipFetcher) Fetch(_, digest string) ([]byte, error) { 219 return k.r.ReadAll(digest) 220 }