kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/go/indexer/cmd/go_indexer/go_indexer.go (about)

     1  /*
     2   * Copyright 2016 The Kythe Authors. All rights reserved.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *   http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  // Program go_indexer implements a Kythe indexer for the Go language.  Input is
    18  // read from one or more .kzip paths.
    19  package main
    20  
    21  import (
    22  	"bytes"
    23  	"context"
    24  	"encoding/json"
    25  	"flag"
    26  	"fmt"
    27  	"net/url"
    28  	"os"
    29  	"path/filepath"
    30  	"strings"
    31  
    32  	"kythe.io/kythe/go/indexer"
    33  	"kythe.io/kythe/go/platform/delimited"
    34  	"kythe.io/kythe/go/platform/kzip"
    35  	"kythe.io/kythe/go/platform/vfs"
    36  	"kythe.io/kythe/go/util/log"
    37  	"kythe.io/kythe/go/util/metadata"
    38  
    39  	"github.com/golang/protobuf/proto"
    40  	"google.golang.org/protobuf/encoding/prototext"
    41  
    42  	protopb "github.com/golang/protobuf/protoc-gen-go/descriptor"
    43  	apb "kythe.io/kythe/proto/analysis_go_proto"
    44  	gopb "kythe.io/kythe/proto/go_go_proto"
    45  	spb "kythe.io/kythe/proto/storage_go_proto"
    46  )
    47  
    48  var (
    49  	doJSON                         = flag.Bool("json", false, "Write output as JSON")
    50  	doLibNodes                     = flag.Bool("libnodes", false, "Emit nodes for standard library packages")
    51  	doCodeFacts                    = flag.Bool("code", false, "Emit code facts containing MarkedSource markup")
    52  	doAnchorScopes                 = flag.Bool("anchor_scopes", false, "Emit childof edges to an anchor's semantic scope")
    53  	metaSuffix                     = flag.String("meta", "", "If set, treat files with this suffix as JSON linkage metadata")
    54  	docBase                        = flag.String("docbase", "http://godoc.org", "If set, use as the base URL for godoc links")
    55  	onlyEmitDocURIsForStandardLibs = flag.Bool("only_emit_doc_uris_for_standard_libs", false, "If true, the doc/uri fact is only emitted for go std library packages")
    56  	emitRefCallOverIdentifier      = flag.Bool("emit_ref_call_over_identifier", false, "If true, emit ref/call anchor spans over the function identifier")
    57  	verbose                        = flag.Bool("verbose", false, "Emit verbose log information")
    58  	contOnErr                      = flag.Bool("continue", false, "Log errors encountered during analysis but do not exit unsuccessfully")
    59  	useCompilationCorpusForAll     = flag.Bool("use_compilation_corpus_for_all", false, "If enabled, all Entry VNames are given the corpus of the compilation unit being indexed. This includes items in the go std library and builtin types.")
    60  	useFileAsTopLevelScope         = flag.Bool("use_file_as_top_level_scope", false, "If enabled, use the file node for top-level callsite scopes")
    61  	overrideStdlibCorpus           = flag.String("override_stdlib_corpus", "", "If set, all stdlib nodes are assigned this corpus. Note that this takes precedence over --use_compilation_corpus_for_all")
    62  	flagConstructorsPath           = flag.String("flag_constructors", "", "Path to a textproto containing known FlagConstructors")
    63  
    64  	writeEntry func(context.Context, *spb.Entry) error
    65  )
    66  
    67  func init() {
    68  	flag.Usage = func() {
    69  		fmt.Fprintf(os.Stderr, `Usage: %s [options] <path>...
    70  
    71  Generate Kythe graph data for the compilations stored in .kzip format
    72  named by the path arguments. Output is written to stdout.
    73  
    74  By default, the output is a delimited stream of wire-format Kythe Entry
    75  protobuf messages. With the --json flag, output is instead a stream of
    76  undelimited JSON messages.
    77  
    78  Options:
    79  `, filepath.Base(os.Args[0]))
    80  
    81  		flag.PrintDefaults()
    82  	}
    83  }
    84  
    85  func main() {
    86  	flag.Parse()
    87  
    88  	if flag.NArg() == 0 {
    89  		log.Fatal("No input paths were specified to index")
    90  	}
    91  	if *doJSON {
    92  		enc := json.NewEncoder(os.Stdout)
    93  		writeEntry = func(_ context.Context, entry *spb.Entry) error {
    94  			return enc.Encode(entry)
    95  		}
    96  	} else {
    97  		rw := delimited.NewWriter(os.Stdout)
    98  		writeEntry = func(_ context.Context, entry *spb.Entry) error {
    99  			return rw.PutProto(entry)
   100  		}
   101  	}
   102  	var docURL *url.URL
   103  	if *docBase != "" {
   104  		u, err := url.Parse(*docBase)
   105  		if err != nil {
   106  			log.Fatalf("Invalid doc base URL: %v", err)
   107  		}
   108  		docURL = u
   109  	}
   110  
   111  	ctx := context.Background()
   112  	var flagConstructors *gopb.FlagConstructors
   113  	if *flagConstructorsPath != "" {
   114  		rec, err := vfs.ReadFile(ctx, *flagConstructorsPath)
   115  		if err != nil {
   116  			log.Exitf("Error reading --flag_constructors=%q file: %v", *flagConstructorsPath, err)
   117  		}
   118  		flagConstructors = new(gopb.FlagConstructors)
   119  		if err := prototext.Unmarshal(rec, flagConstructors); err != nil {
   120  			log.Exitf("Error parsing --flag_constructors=%q file: %v", *flagConstructorsPath, err)
   121  		}
   122  	}
   123  
   124  	opts := &indexer.EmitOptions{
   125  		EmitStandardLibs:               *doLibNodes,
   126  		EmitMarkedSource:               *doCodeFacts,
   127  		EmitAnchorScopes:               *doAnchorScopes,
   128  		EmitLinkages:                   *metaSuffix != "",
   129  		DocBase:                        docURL,
   130  		OnlyEmitDocURIsForStandardLibs: *onlyEmitDocURIsForStandardLibs,
   131  		UseCompilationCorpusForAll:     *useCompilationCorpusForAll,
   132  		UseFileAsTopLevelScope:         *useFileAsTopLevelScope,
   133  		OverrideStdlibCorpus:           *overrideStdlibCorpus,
   134  		EmitRefCallOverIdentifier:      *emitRefCallOverIdentifier,
   135  		FlagConstructors:               flagConstructors,
   136  		Verbose:                        *verbose,
   137  	}
   138  
   139  	for _, path := range flag.Args() {
   140  		if err := visitPath(ctx, path, func(ctx context.Context, unit *apb.CompilationUnit, f indexer.Fetcher) error {
   141  			err := indexGo(ctx, unit, f, opts)
   142  			if err != nil && *contOnErr {
   143  				log.ErrorContextf(ctx, "Continuing after error: %v", err)
   144  				return nil
   145  			}
   146  			return err
   147  		}); err != nil {
   148  			log.Fatalf("Error indexing %q: %v", path, err)
   149  		}
   150  	}
   151  }
   152  
   153  // checkMetadata checks whether ri denotes a metadata file according to the
   154  // setting of the -meta flag, and if so loads the corresponding ruleset.
   155  func checkMetadata(ri *apb.CompilationUnit_FileInput, f indexer.Fetcher) (*indexer.Ruleset, error) {
   156  	if *metaSuffix == "" || !strings.HasSuffix(ri.Info.GetPath(), *metaSuffix) {
   157  		return nil, nil // nothing to do
   158  	}
   159  	bits, err := f.Fetch(ri.Info.GetPath(), ri.Info.GetDigest())
   160  	if err != nil {
   161  		return nil, fmt.Errorf("reading metadata file: %w", err)
   162  	}
   163  	rules, err := metadata.Parse(bytes.NewReader(bits))
   164  	if err != nil {
   165  		// Check if file is actually a GeneratedCodeInfo proto.
   166  		var gci protopb.GeneratedCodeInfo
   167  		if err := proto.UnmarshalText(string(bits), &gci); err != nil {
   168  			return nil, fmt.Errorf("cannot parse .meta file as JSON or textproto: %w", err)
   169  		}
   170  		rules = metadata.FromGeneratedCodeInfo(&gci, ri.VName)
   171  	}
   172  	return &indexer.Ruleset{
   173  		Path:  strings.TrimSuffix(ri.Info.GetPath(), *metaSuffix),
   174  		Rules: rules,
   175  	}, nil
   176  }
   177  
   178  // indexGo is a visitFunc that invokes the Kythe Go indexer on unit.
   179  func indexGo(ctx context.Context, unit *apb.CompilationUnit, f indexer.Fetcher, opts *indexer.EmitOptions) error {
   180  	pi, err := indexer.Resolve(unit, f, &indexer.ResolveOptions{
   181  		Info:       indexer.XRefTypeInfo(),
   182  		CheckRules: checkMetadata,
   183  	})
   184  	if err != nil {
   185  		return err
   186  	}
   187  	if *verbose {
   188  		log.InfoContextf(ctx, "Finished resolving compilation: %s", pi.String())
   189  	}
   190  	return pi.Emit(ctx, writeEntry, opts)
   191  }
   192  
   193  type visitFunc func(context.Context, *apb.CompilationUnit, indexer.Fetcher) error
   194  
   195  // visitPath invokes visit for each compilation denoted by path, which is
   196  // must be a .kzip file (with a single compilation).
   197  func visitPath(ctx context.Context, path string, visit visitFunc) error {
   198  	f, err := os.Open(path)
   199  	if err != nil {
   200  		return err
   201  	}
   202  	defer f.Close()
   203  	switch ext := filepath.Ext(path); ext {
   204  	case ".kzip":
   205  		return kzip.Scan(f, func(r *kzip.Reader, unit *kzip.Unit) error {
   206  			return visit(ctx, unit.Proto, kzipFetcher{r})
   207  		})
   208  
   209  	default:
   210  		return fmt.Errorf("unknown file extension %q", ext)
   211  	}
   212  }
   213  
   214  type kzipFetcher struct{ r *kzip.Reader }
   215  
   216  // Fetch implements the analysis.Fetcher interface. Only the digest is used in
   217  // this implementation, the path is ignored.
   218  func (k kzipFetcher) Fetch(_, digest string) ([]byte, error) {
   219  	return k.r.ReadAll(digest)
   220  }