github.com/amarpal/go-tools@v0.0.0-20240422043104-40142f59f616/lintcmd/runner/runner.go

github.com/amarpal/go-tools@v0.0.0-20240422043104-40142f59f616/lintcmd/runner/runner.go (about)

     1  // Package runner implements a go/analysis runner. It makes heavy use
     2  // of on-disk caching to reduce overall memory usage and to speed up
     3  // repeat runs.
     4  //
     5  // # Public API
     6  //
     7  // A Runner maps a list of analyzers and package patterns to a list of
     8  // results. Results provide access to diagnostics, directives, errors
     9  // encountered, and information about packages. Results explicitly do
    10  // not contain ASTs or type information. All position information is
    11  // returned in the form of token.Position, not token.Pos. All work
    12  // that requires access to the loaded representation of a package has
    13  // to occur inside analyzers.
    14  //
    15  // # Planning and execution
    16  //
    17  // Analyzing packages is split into two phases: planning and
    18  // execution.
    19  //
    20  // During planning, a directed acyclic graph of package dependencies
    21  // is computed. We materialize the full graph so that we can execute
    22  // the graph from the bottom up, without keeping unnecessary data in
    23  // memory during a DFS and with simplified parallel execution.
    24  //
    25  // During execution, leaf nodes (nodes with no outstanding
    26  // dependencies) get executed in parallel, bounded by a semaphore
    27  // sized according to the number of CPUs. Conceptually, this happens
    28  // in a loop, processing new leaf nodes as they appear, until no more
    29  // nodes are left. In the actual implementation, nodes know their
    30  // dependents, and the last dependency of a node to be processed is
    31  // responsible for scheduling its dependent.
    32  //
    33  // The graph is rooted at a synthetic root node. Upon execution of the
    34  // root node, the algorithm terminates.
    35  //
    36  // Analyzing a package repeats the same planning + execution steps,
    37  // but this time on a graph of analyzers for the package. Parallel
    38  // execution of individual analyzers is bounded by the same semaphore
    39  // as executing packages.
    40  //
    41  // # Parallelism
    42  //
    43  // Actions are executed in parallel where the dependency graph allows.
    44  // Overall parallelism is bounded by a semaphore, sized according to
    45  // GOMAXPROCS. Each concurrently processed package takes up a
    46  // token, as does each analyzer – but a package can always execute at
    47  // least one analyzer, using the package's token.
    48  //
    49  // Depending on the overall shape of the graph, there may be GOMAXPROCS
    50  // packages running a single analyzer each, a single package running
    51  // GOMAXPROCS analyzers, or anything in between.
    52  //
    53  // Total memory consumption grows roughly linearly with the number of
    54  // CPUs, while total execution time is inversely proportional to the
    55  // number of CPUs. Overall, parallelism is affected by the shape of
    56  // the dependency graph. A lot of inter-connected packages will see
    57  // less parallelism than a lot of independent packages.
    58  //
    59  // # Caching
    60  //
    61  // The runner caches facts, directives and diagnostics in a
    62  // content-addressable cache that is designed after Go's own cache.
    63  // Additionally, it makes use of Go's export data.
    64  //
    65  // This cache not only speeds up repeat runs, it also reduces peak
    66  // memory usage. When we've analyzed a package, we cache the results
    67  // and drop them from memory. When a dependent needs any of this
    68  // information, or when analysis is complete and we wish to render the
    69  // results, the data gets loaded from disk again.
    70  //
    71  // Data only exists in memory when it is immediately needed, not
    72  // retained for possible future uses. This trades increased CPU usage
    73  // for reduced memory usage. A single dependency may be loaded many
    74  // times over, but it greatly reduces peak memory usage, as an
    75  // arbitrary amount of time may pass between analyzing a dependency
    76  // and its dependent, during which other packages will be processed.
    77  package runner
    78  
    79  // OPT(dh): we could reduce disk storage usage of cached data by
    80  // compressing it, either directly at the cache layer, or by feeding
    81  // compressed data to the cache. Of course doing so may negatively
    82  // affect CPU usage, and there are lower hanging fruit, such as
    83  // needing to cache less data in the first place.
    84  
    85  // OPT(dh): right now, each package is analyzed completely
    86  // independently. Each package loads all of its dependencies from
    87  // export data and cached facts. If we have two packages A and B,
    88  // which both depend on C, and which both get analyzed in parallel,
    89  // then C will be loaded twice. This wastes CPU time and memory. It
    90  // would be nice if we could reuse a single C for the analysis of both
    91  // A and B.
    92  //
    93  // We can't reuse the actual types.Package or facts, because each
    94  // package gets its own token.FileSet. Sharing a global FileSet has
    95  // several drawbacks, including increased memory usage and running the
    96  // risk of running out of FileSet address space.
    97  //
    98  // We could however avoid loading the same raw export data from disk
    99  // twice, as well as deserializing gob data twice. One possible
   100  // solution would be a duplicate-suppressing in-memory cache that
   101  // caches data for a limited amount of time. When the same package
   102  // needs to be loaded twice in close succession, we can reuse work,
   103  // without holding unnecessary data in memory for an extended period
   104  // of time.
   105  //
   106  // We would likely need to do extensive benchmarking to figure out how
   107  // long to keep data around to find a sweet spot where we reduce CPU
   108  // load without increasing memory usage.
   109  //
   110  // We can probably populate the cache after we've analyzed a package,
   111  // on the assumption that it will have to be loaded again in the near
   112  // future.
   113  
   114  import (
   115  	"bytes"
   116  	"encoding/gob"
   117  	"fmt"
   118  	"go/token"
   119  	"go/types"
   120  	"io"
   121  	"os"
   122  	"reflect"
   123  	"runtime"
   124  	"sort"
   125  	"strings"
   126  	"sync/atomic"
   127  	"time"
   128  
   129  	"github.com/amarpal/go-tools/analysis/lint"
   130  	"github.com/amarpal/go-tools/analysis/report"
   131  	"github.com/amarpal/go-tools/config"
   132  	"github.com/amarpal/go-tools/go/loader"
   133  	tsync "github.com/amarpal/go-tools/internal/sync"
   134  	"github.com/amarpal/go-tools/lintcmd/cache"
   135  	"github.com/amarpal/go-tools/unused"
   136  
   137  	"golang.org/x/tools/go/analysis"
   138  	"golang.org/x/tools/go/packages"
   139  	"golang.org/x/tools/go/types/objectpath"
   140  )
   141  
   142  const sanityCheck = false
   143  
   144  // Diagnostic is like go/analysis.Diagnostic, but with all token.Pos resolved to token.Position.
   145  type Diagnostic struct {
   146  	Position token.Position
   147  	End      token.Position
   148  	Category string
   149  	Message  string
   150  
   151  	SuggestedFixes []SuggestedFix
   152  	Related        []RelatedInformation
   153  }
   154  
   155  // RelatedInformation provides additional context for a diagnostic.
   156  type RelatedInformation struct {
   157  	Position token.Position
   158  	End      token.Position
   159  	Message  string
   160  }
   161  
   162  type SuggestedFix struct {
   163  	Message   string
   164  	TextEdits []TextEdit
   165  }
   166  
   167  type TextEdit struct {
   168  	Position token.Position
   169  	End      token.Position
   170  	NewText  []byte
   171  }
   172  
   173  // A Result describes the result of analyzing a single package.
   174  //
   175  // It holds references to cached diagnostics and directives. They can
   176  // be loaded on demand with the Load method.
   177  type Result struct {
   178  	Package *loader.PackageSpec
   179  	Config  config.Config
   180  	Initial bool
   181  	Skipped bool
   182  
   183  	Failed bool
   184  	Errors []error
   185  	// Action results, path to file
   186  	results string
   187  	// Results relevant to testing, only set when test mode is enabled, path to file
   188  	testData string
   189  }
   190  
   191  type SerializedDirective struct {
   192  	Command   string
   193  	Arguments []string
   194  	// The position of the comment
   195  	DirectivePosition token.Position
   196  	// The position of the node that the comment is attached to
   197  	NodePosition token.Position
   198  }
   199  
   200  func serializeDirective(dir lint.Directive, fset *token.FileSet) SerializedDirective {
   201  	return SerializedDirective{
   202  		Command:           dir.Command,
   203  		Arguments:         dir.Arguments,
   204  		DirectivePosition: report.DisplayPosition(fset, dir.Directive.Pos()),
   205  		NodePosition:      report.DisplayPosition(fset, dir.Node.Pos()),
   206  	}
   207  }
   208  
   209  type ResultData struct {
   210  	Directives  []SerializedDirective
   211  	Diagnostics []Diagnostic
   212  	Unused      unused.Result
   213  }
   214  
   215  func (r Result) Load() (ResultData, error) {
   216  	if r.Failed {
   217  		panic("Load called on failed Result")
   218  	}
   219  	if r.results == "" {
   220  		// this package was only a dependency
   221  		return ResultData{}, nil
   222  	}
   223  	f, err := os.Open(r.results)
   224  	if err != nil {
   225  		return ResultData{}, fmt.Errorf("failed loading result: %w", err)
   226  	}
   227  	defer f.Close()
   228  	var out ResultData
   229  	err = gob.NewDecoder(f).Decode(&out)
   230  	return out, err
   231  }
   232  
   233  // TestData contains extra information about analysis runs that is only available in test mode.
   234  type TestData struct {
   235  	// Facts contains facts produced by analyzers for a package.
   236  	// Unlike vetx, this list only contains facts specific to this package,
   237  	// not all facts for the transitive closure of dependencies.
   238  	Facts []TestFact
   239  	// List of files that were part of the package.
   240  	Files []string
   241  }
   242  
   243  // LoadTest returns data relevant to testing.
   244  // It should only be called if Runner.TestMode was set to true.
   245  func (r Result) LoadTest() (TestData, error) {
   246  	if r.Failed {
   247  		panic("Load called on failed Result")
   248  	}
   249  	if r.results == "" {
   250  		// this package was only a dependency
   251  		return TestData{}, nil
   252  	}
   253  	f, err := os.Open(r.testData)
   254  	if err != nil {
   255  		return TestData{}, fmt.Errorf("failed loading test data: %w", err)
   256  	}
   257  	defer f.Close()
   258  	var out TestData
   259  	err = gob.NewDecoder(f).Decode(&out)
   260  	return out, err
   261  }
   262  
   263  type action interface {
   264  	Deps() []action
   265  	Triggers() []action
   266  	DecrementPending() bool
   267  	MarkFailed()
   268  	IsFailed() bool
   269  	AddError(error)
   270  }
   271  
   272  type baseAction struct {
   273  	// Action description
   274  
   275  	deps     []action
   276  	triggers []action
   277  	pending  uint32
   278  
   279  	// Action results
   280  
   281  	// failed is set to true if the action couldn't be processed. This
   282  	// may either be due to an error specific to this action, in
   283  	// which case the errors field will be populated, or due to a
   284  	// dependency being marked as failed, in which case errors will be
   285  	// empty.
   286  	failed bool
   287  	errors []error
   288  }
   289  
   290  func (act *baseAction) Deps() []action     { return act.deps }
   291  func (act *baseAction) Triggers() []action { return act.triggers }
   292  func (act *baseAction) DecrementPending() bool {
   293  	return atomic.AddUint32(&act.pending, ^uint32(0)) == 0
   294  }
   295  func (act *baseAction) MarkFailed()        { act.failed = true }
   296  func (act *baseAction) IsFailed() bool     { return act.failed }
   297  func (act *baseAction) AddError(err error) { act.errors = append(act.errors, err) }
   298  
   299  // packageAction describes the act of loading a package, fully
   300  // analyzing it, and storing the results.
   301  type packageAction struct {
   302  	baseAction
   303  
   304  	// Action description
   305  	Package   *loader.PackageSpec
   306  	factsOnly bool
   307  	hash      cache.ActionID
   308  
   309  	// Action results
   310  	cfg      config.Config
   311  	vetx     string
   312  	results  string
   313  	testData string
   314  	skipped  bool
   315  }
   316  
   317  func (act *packageAction) String() string {
   318  	return fmt.Sprintf("packageAction(%s)", act.Package)
   319  }
   320  
   321  type objectFact struct {
   322  	fact analysis.Fact
   323  	// TODO(dh): why do we store the objectpath when producing the
   324  	// fact? Is it just for the sanity checking, which compares the
   325  	// stored path with a path recomputed from objectFactKey.Obj?
   326  	path objectpath.Path
   327  }
   328  
   329  type objectFactKey struct {
   330  	Obj  types.Object
   331  	Type reflect.Type
   332  }
   333  
   334  type packageFactKey struct {
   335  	Pkg  *types.Package
   336  	Type reflect.Type
   337  }
   338  
   339  type gobFact struct {
   340  	PkgPath string
   341  	ObjPath string
   342  	Fact    analysis.Fact
   343  }
   344  
   345  // TestFact is a serialization of facts that is specific to the test mode.
   346  type TestFact struct {
   347  	ObjectName string
   348  	Position   token.Position
   349  	FactString string
   350  	Analyzer   string
   351  }
   352  
   353  // analyzerAction describes the act of analyzing a package with a
   354  // single analyzer.
   355  type analyzerAction struct {
   356  	baseAction
   357  
   358  	// Action description
   359  
   360  	Analyzer *analysis.Analyzer
   361  
   362  	// Action results
   363  
   364  	// We can store actual results here without worrying about memory
   365  	// consumption because analyzer actions get garbage collected once
   366  	// a package has been fully analyzed.
   367  	Result       interface{}
   368  	Diagnostics  []Diagnostic
   369  	ObjectFacts  map[objectFactKey]objectFact
   370  	PackageFacts map[packageFactKey]analysis.Fact
   371  	Pass         *analysis.Pass
   372  }
   373  
   374  func (act *analyzerAction) String() string {
   375  	return fmt.Sprintf("analyzerAction(%s)", act.Analyzer)
   376  }
   377  
   378  // A Runner executes analyzers on packages.
   379  type Runner struct {
   380  	Stats     Stats
   381  	GoVersion string
   382  	// if GoVersion == "module", and we couldn't determine the
   383  	// module's Go version, use this as the fallback
   384  	FallbackGoVersion string
   385  	// If set to true, Runner will populate results with data relevant to testing analyzers
   386  	TestMode bool
   387  
   388  	// GoVersion might be "module"; actualGoVersion contains the resolved version
   389  	actualGoVersion string
   390  
   391  	// Config that gets merged with per-package configs
   392  	cfg       config.Config
   393  	cache     *cache.Cache
   394  	semaphore tsync.Semaphore
   395  }
   396  
   397  type subrunner struct {
   398  	*Runner
   399  	analyzers     []*analysis.Analyzer
   400  	factAnalyzers []*analysis.Analyzer
   401  	analyzerNames string
   402  	cache         *cache.Cache
   403  }
   404  
   405  // New returns a new Runner.
   406  func New(cfg config.Config, c *cache.Cache) (*Runner, error) {
   407  	return &Runner{
   408  		cfg:       cfg,
   409  		cache:     c,
   410  		semaphore: tsync.NewSemaphore(runtime.GOMAXPROCS(0)),
   411  	}, nil
   412  }
   413  
   414  func newSubrunner(r *Runner, analyzers []*analysis.Analyzer) *subrunner {
   415  	analyzerNames := make([]string, len(analyzers))
   416  	for i, a := range analyzers {
   417  		analyzerNames[i] = a.Name
   418  	}
   419  	sort.Strings(analyzerNames)
   420  
   421  	var factAnalyzers []*analysis.Analyzer
   422  	for _, a := range analyzers {
   423  		if len(a.FactTypes) > 0 {
   424  			factAnalyzers = append(factAnalyzers, a)
   425  		}
   426  	}
   427  	return &subrunner{
   428  		Runner:        r,
   429  		analyzers:     analyzers,
   430  		factAnalyzers: factAnalyzers,
   431  		analyzerNames: strings.Join(analyzerNames, ","),
   432  		cache:         r.cache,
   433  	}
   434  }
   435  
   436  func newPackageActionRoot(pkg *loader.PackageSpec, cache map[*loader.PackageSpec]*packageAction) *packageAction {
   437  	a := newPackageAction(pkg, cache)
   438  	a.factsOnly = false
   439  	return a
   440  }
   441  
   442  func newPackageAction(pkg *loader.PackageSpec, cache map[*loader.PackageSpec]*packageAction) *packageAction {
   443  	if a, ok := cache[pkg]; ok {
   444  		return a
   445  	}
   446  
   447  	a := &packageAction{
   448  		Package:   pkg,
   449  		factsOnly: true, // will be overwritten by any call to Action
   450  	}
   451  	cache[pkg] = a
   452  
   453  	if len(pkg.Errors) > 0 {
   454  		a.errors = make([]error, len(pkg.Errors))
   455  		for i, err := range pkg.Errors {
   456  			a.errors[i] = err
   457  		}
   458  		a.failed = true
   459  
   460  		// We don't need to process our imports if this package is
   461  		// already broken.
   462  		return a
   463  	}
   464  
   465  	a.deps = make([]action, 0, len(pkg.Imports))
   466  	for _, dep := range pkg.Imports {
   467  		depa := newPackageAction(dep, cache)
   468  		depa.triggers = append(depa.triggers, a)
   469  		a.deps = append(a.deps, depa)
   470  
   471  		if depa.failed {
   472  			a.failed = true
   473  		}
   474  	}
   475  	// sort dependencies because the list of dependencies is part of
   476  	// the cache key
   477  	sort.Slice(a.deps, func(i, j int) bool {
   478  		return a.deps[i].(*packageAction).Package.ID < a.deps[j].(*packageAction).Package.ID
   479  	})
   480  
   481  	a.pending = uint32(len(a.deps))
   482  
   483  	return a
   484  }
   485  
   486  func newAnalyzerAction(an *analysis.Analyzer, cache map[*analysis.Analyzer]*analyzerAction) *analyzerAction {
   487  	if a, ok := cache[an]; ok {
   488  		return a
   489  	}
   490  
   491  	a := &analyzerAction{
   492  		Analyzer:     an,
   493  		ObjectFacts:  map[objectFactKey]objectFact{},
   494  		PackageFacts: map[packageFactKey]analysis.Fact{},
   495  	}
   496  	cache[an] = a
   497  	for _, dep := range an.Requires {
   498  		depa := newAnalyzerAction(dep, cache)
   499  		depa.triggers = append(depa.triggers, a)
   500  		a.deps = append(a.deps, depa)
   501  	}
   502  	a.pending = uint32(len(a.deps))
   503  	return a
   504  }
   505  
   506  func getCachedFiles(cache *cache.Cache, ids []cache.ActionID, out []*string) error {
   507  	for i, id := range ids {
   508  		var err error
   509  		*out[i], _, err = cache.GetFile(id)
   510  		if err != nil {
   511  			return err
   512  		}
   513  	}
   514  	return nil
   515  }
   516  
   517  func (r *subrunner) do(act action) error {
   518  	a := act.(*packageAction)
   519  	defer func() {
   520  		r.Stats.finishPackage()
   521  		if !a.factsOnly {
   522  			r.Stats.finishInitialPackage()
   523  		}
   524  	}()
   525  
   526  	// compute hash of action
   527  	a.cfg = a.Package.Config.Merge(r.cfg)
   528  	h := r.cache.NewHash("staticcheck " + a.Package.PkgPath)
   529  
   530  	// Note that we do not filter the list of analyzers by the
   531  	// package's configuration. We don't allow configuration to
   532  	// accidentally break dependencies between analyzers, and it's
   533  	// easier to always run all checks and filter the output. This
   534  	// also makes cached data more reusable.
   535  
   536  	// OPT(dh): not all changes in configuration invalidate cached
   537  	// data. specifically, when a.factsOnly == true, we only care
   538  	// about checks that produce facts, and settings that affect those
   539  	// checks.
   540  
   541  	// Config used for constructing the hash; this config doesn't have
   542  	// Checks populated, because we always run all checks.
   543  	//
   544  	// This even works for users who add custom checks, because we include the binary's hash.
   545  	hashCfg := a.cfg
   546  	hashCfg.Checks = nil
   547  	// note that we don't hash staticcheck's version; it is set as the
   548  	// salt by a package main.
   549  	fmt.Fprintf(h, "cfg %#v\n", hashCfg)
   550  	fmt.Fprintf(h, "pkg %x\n", a.Package.Hash)
   551  	fmt.Fprintf(h, "analyzers %s\n", r.analyzerNames)
   552  	fmt.Fprintf(h, "go %s\n", r.actualGoVersion)
   553  
   554  	// OPT(dh): do we actually need to hash vetx? can we not assume
   555  	// that for identical inputs, staticcheck will produce identical
   556  	// vetx?
   557  	for _, dep := range a.deps {
   558  		dep := dep.(*packageAction)
   559  		vetxHash, err := cache.FileHash(dep.vetx)
   560  		if err != nil {
   561  			return fmt.Errorf("failed computing hash: %w", err)
   562  		}
   563  		fmt.Fprintf(h, "vetout %q %x\n", dep.Package.PkgPath, vetxHash)
   564  	}
   565  	a.hash = cache.ActionID(h.Sum())
   566  
   567  	// try to fetch hashed data
   568  	ids := make([]cache.ActionID, 0, 2)
   569  	ids = append(ids, cache.Subkey(a.hash, "vetx"))
   570  	if !a.factsOnly {
   571  		ids = append(ids, cache.Subkey(a.hash, "results"))
   572  		if r.TestMode {
   573  			ids = append(ids, cache.Subkey(a.hash, "testdata"))
   574  		}
   575  	}
   576  	if err := getCachedFiles(r.cache, ids, []*string{&a.vetx, &a.results, &a.testData}); err != nil {
   577  		result, err := r.doUncached(a)
   578  		if err != nil {
   579  			return err
   580  		}
   581  		if a.failed {
   582  			return nil
   583  		}
   584  
   585  		a.skipped = result.skipped
   586  
   587  		// OPT(dh) instead of collecting all object facts and encoding
   588  		// them after analysis finishes, we could encode them as we
   589  		// go. however, that would require some locking.
   590  		//
   591  		// OPT(dh): We could sort gobFacts for more consistent output,
   592  		// but it doesn't matter. The hash of a package includes all
   593  		// of its files, so whether the vetx hash changes or not, a
   594  		// change to a package requires re-analyzing all dependents,
   595  		// even if the vetx data stayed the same. See also the note at
   596  		// the top of loader/hash.go.
   597  
   598  		tf := &bytes.Buffer{}
   599  		enc := gob.NewEncoder(tf)
   600  		for _, gf := range result.facts {
   601  			if err := enc.Encode(gf); err != nil {
   602  				return fmt.Errorf("failed gob encoding data: %w", err)
   603  			}
   604  		}
   605  
   606  		a.vetx, err = r.writeCacheReader(a, "vetx", bytes.NewReader(tf.Bytes()))
   607  		if err != nil {
   608  			return err
   609  		}
   610  
   611  		if a.factsOnly {
   612  			return nil
   613  		}
   614  
   615  		var out ResultData
   616  		out.Directives = make([]SerializedDirective, len(result.dirs))
   617  		for i, dir := range result.dirs {
   618  			out.Directives[i] = serializeDirective(dir, result.lpkg.Fset)
   619  		}
   620  
   621  		out.Diagnostics = result.diags
   622  		out.Unused = result.unused
   623  		a.results, err = r.writeCacheGob(a, "results", out)
   624  		if err != nil {
   625  			return err
   626  		}
   627  
   628  		if r.TestMode {
   629  			out := TestData{
   630  				Facts: result.testFacts,
   631  				Files: result.lpkg.GoFiles,
   632  			}
   633  			a.testData, err = r.writeCacheGob(a, "testdata", out)
   634  			if err != nil {
   635  				return err
   636  			}
   637  		}
   638  	}
   639  	return nil
   640  }
   641  
   642  // ActiveWorkers returns the number of currently running workers.
   643  func (r *Runner) ActiveWorkers() int {
   644  	return r.semaphore.Len()
   645  }
   646  
   647  // TotalWorkers returns the maximum number of possible workers.
   648  func (r *Runner) TotalWorkers() int {
   649  	return r.semaphore.Cap()
   650  }
   651  
   652  func (r *Runner) writeCacheReader(a *packageAction, kind string, rs io.ReadSeeker) (string, error) {
   653  	h := cache.Subkey(a.hash, kind)
   654  	out, _, err := r.cache.Put(h, rs)
   655  	if err != nil {
   656  		return "", fmt.Errorf("failed caching data: %w", err)
   657  	}
   658  	return r.cache.OutputFile(out), nil
   659  }
   660  
   661  func (r *Runner) writeCacheGob(a *packageAction, kind string, data interface{}) (string, error) {
   662  	f, err := os.CreateTemp("", "staticcheck")
   663  	if err != nil {
   664  		return "", err
   665  	}
   666  	defer f.Close()
   667  	os.Remove(f.Name())
   668  	if err := gob.NewEncoder(f).Encode(data); err != nil {
   669  		return "", fmt.Errorf("failed gob encoding data: %w", err)
   670  	}
   671  	if _, err := f.Seek(0, io.SeekStart); err != nil {
   672  		return "", err
   673  	}
   674  	return r.writeCacheReader(a, kind, f)
   675  }
   676  
   677  type packageActionResult struct {
   678  	facts   []gobFact
   679  	diags   []Diagnostic
   680  	unused  unused.Result
   681  	dirs    []lint.Directive
   682  	lpkg    *loader.Package
   683  	skipped bool
   684  
   685  	// Only set when using test mode
   686  	testFacts []TestFact
   687  }
   688  
   689  func (r *subrunner) doUncached(a *packageAction) (packageActionResult, error) {
   690  	// OPT(dh): for a -> b; c -> b; if both a and b are being
   691  	// processed concurrently, we shouldn't load b's export data
   692  	// twice.
   693  
   694  	pkg, _, err := loader.Load(a.Package)
   695  	if err != nil {
   696  		return packageActionResult{}, err
   697  	}
   698  
   699  	if len(pkg.Errors) > 0 {
   700  		// this handles errors that occurred during type-checking the
   701  		// package in loader.Load
   702  		for _, err := range pkg.Errors {
   703  			a.errors = append(a.errors, err)
   704  		}
   705  		a.failed = true
   706  		return packageActionResult{}, nil
   707  	}
   708  
   709  	if len(pkg.Syntax) == 0 && pkg.PkgPath != "unsafe" {
   710  		return packageActionResult{lpkg: pkg, skipped: true}, nil
   711  	}
   712  
   713  	// OPT(dh): instead of parsing directives twice (twice because
   714  	// U1000 depends on the facts.Directives analyzer), reuse the
   715  	// existing result
   716  	var dirs []lint.Directive
   717  	if !a.factsOnly {
   718  		dirs = lint.ParseDirectives(pkg.Syntax, pkg.Fset)
   719  	}
   720  	res, err := r.runAnalyzers(a, pkg)
   721  
   722  	return packageActionResult{
   723  		facts:     res.facts,
   724  		testFacts: res.testFacts,
   725  		diags:     res.diagnostics,
   726  		unused:    res.unused,
   727  		dirs:      dirs,
   728  		lpkg:      pkg,
   729  	}, err
   730  }
   731  
   732  func pkgPaths(root *types.Package) map[string]*types.Package {
   733  	out := map[string]*types.Package{}
   734  	var dfs func(*types.Package)
   735  	dfs = func(pkg *types.Package) {
   736  		if _, ok := out[pkg.Path()]; ok {
   737  			return
   738  		}
   739  		out[pkg.Path()] = pkg
   740  		for _, imp := range pkg.Imports() {
   741  			dfs(imp)
   742  		}
   743  	}
   744  	dfs(root)
   745  	return out
   746  }
   747  
   748  func (r *Runner) loadFacts(root *types.Package, dep *packageAction, objFacts map[objectFactKey]objectFact, pkgFacts map[packageFactKey]analysis.Fact) error {
   749  	// Load facts of all imported packages
   750  	vetx, err := os.Open(dep.vetx)
   751  	if err != nil {
   752  		return fmt.Errorf("failed loading cached facts: %w", err)
   753  	}
   754  	defer vetx.Close()
   755  
   756  	pathToPkg := pkgPaths(root)
   757  	dec := gob.NewDecoder(vetx)
   758  	for {
   759  		var gf gobFact
   760  		err := dec.Decode(&gf)
   761  		if err != nil {
   762  			if err == io.EOF {
   763  				break
   764  			}
   765  			return fmt.Errorf("failed loading cached facts: %w", err)
   766  		}
   767  
   768  		pkg, ok := pathToPkg[gf.PkgPath]
   769  		if !ok {
   770  			continue
   771  		}
   772  		if gf.ObjPath == "" {
   773  			pkgFacts[packageFactKey{
   774  				Pkg:  pkg,
   775  				Type: reflect.TypeOf(gf.Fact),
   776  			}] = gf.Fact
   777  		} else {
   778  			obj, err := objectpath.Object(pkg, objectpath.Path(gf.ObjPath))
   779  			if err != nil {
   780  				continue
   781  			}
   782  			objFacts[objectFactKey{
   783  				Obj:  obj,
   784  				Type: reflect.TypeOf(gf.Fact),
   785  			}] = objectFact{gf.Fact, objectpath.Path(gf.ObjPath)}
   786  		}
   787  	}
   788  	return nil
   789  }
   790  
   791  func genericHandle(a action, root action, queue chan action, sem *tsync.Semaphore, exec func(a action) error) {
   792  	if a == root {
   793  		close(queue)
   794  		if sem != nil {
   795  			sem.Release()
   796  		}
   797  		return
   798  	}
   799  	if !a.IsFailed() {
   800  		// the action may have already been marked as failed during
   801  		// construction of the action graph, for example because of
   802  		// unresolved imports.
   803  
   804  		for _, dep := range a.Deps() {
   805  			if dep.IsFailed() {
   806  				// One of our dependencies failed, so mark this package as
   807  				// failed and bail. We don't need to record an error for
   808  				// this package, the relevant error will have been
   809  				// reported by the first package in the chain that failed.
   810  				a.MarkFailed()
   811  				break
   812  			}
   813  		}
   814  	}
   815  
   816  	if !a.IsFailed() {
   817  		if err := exec(a); err != nil {
   818  			a.MarkFailed()
   819  			a.AddError(err)
   820  		}
   821  	}
   822  	if sem != nil {
   823  		sem.Release()
   824  	}
   825  
   826  	for _, t := range a.Triggers() {
   827  		if t.DecrementPending() {
   828  			queue <- t
   829  		}
   830  	}
   831  }
   832  
   833  type analyzerRunner struct {
   834  	pkg *loader.Package
   835  	// object facts of our dependencies; may contain facts of
   836  	// analyzers other than the current one
   837  	depObjFacts map[objectFactKey]objectFact
   838  	// package facts of our dependencies; may contain facts of
   839  	// analyzers other than the current one
   840  	depPkgFacts map[packageFactKey]analysis.Fact
   841  	factsOnly   bool
   842  
   843  	stats *Stats
   844  }
   845  
   846  func (ar *analyzerRunner) do(act action) error {
   847  	a := act.(*analyzerAction)
   848  	results := map[*analysis.Analyzer]interface{}{}
   849  	// TODO(dh): does this have to be recursive?
   850  	for _, dep := range a.deps {
   851  		dep := dep.(*analyzerAction)
   852  		results[dep.Analyzer] = dep.Result
   853  	}
   854  	// OPT(dh): cache factTypes, it is the same for all packages for a given analyzer
   855  	//
   856  	// OPT(dh): do we need the factTypes map? most analyzers have 0-1
   857  	// fact types. iterating over the slice is probably faster than
   858  	// indexing a map.
   859  	factTypes := map[reflect.Type]struct{}{}
   860  	for _, typ := range a.Analyzer.FactTypes {
   861  		factTypes[reflect.TypeOf(typ)] = struct{}{}
   862  	}
   863  	filterFactType := func(typ reflect.Type) bool {
   864  		_, ok := factTypes[typ]
   865  		return ok
   866  	}
   867  	a.Pass = &analysis.Pass{
   868  		Analyzer:   a.Analyzer,
   869  		Fset:       ar.pkg.Fset,
   870  		Files:      ar.pkg.Syntax,
   871  		OtherFiles: ar.pkg.OtherFiles,
   872  		Pkg:        ar.pkg.Types,
   873  		TypesInfo:  ar.pkg.TypesInfo,
   874  		TypesSizes: ar.pkg.TypesSizes,
   875  		Report: func(diag analysis.Diagnostic) {
   876  			if !ar.factsOnly {
   877  				if diag.Category == "" {
   878  					diag.Category = a.Analyzer.Name
   879  				}
   880  				d := Diagnostic{
   881  					Position: report.DisplayPosition(ar.pkg.Fset, diag.Pos),
   882  					End:      report.DisplayPosition(ar.pkg.Fset, diag.End),
   883  					Category: diag.Category,
   884  					Message:  diag.Message,
   885  				}
   886  				for _, sugg := range diag.SuggestedFixes {
   887  					s := SuggestedFix{
   888  						Message: sugg.Message,
   889  					}
   890  					for _, edit := range sugg.TextEdits {
   891  						s.TextEdits = append(s.TextEdits, TextEdit{
   892  							Position: report.DisplayPosition(ar.pkg.Fset, edit.Pos),
   893  							End:      report.DisplayPosition(ar.pkg.Fset, edit.End),
   894  							NewText:  edit.NewText,
   895  						})
   896  					}
   897  					d.SuggestedFixes = append(d.SuggestedFixes, s)
   898  				}
   899  				for _, rel := range diag.Related {
   900  					d.Related = append(d.Related, RelatedInformation{
   901  						Position: report.DisplayPosition(ar.pkg.Fset, rel.Pos),
   902  						End:      report.DisplayPosition(ar.pkg.Fset, rel.End),
   903  						Message:  rel.Message,
   904  					})
   905  				}
   906  				a.Diagnostics = append(a.Diagnostics, d)
   907  			}
   908  		},
   909  		ResultOf: results,
   910  		ImportObjectFact: func(obj types.Object, fact analysis.Fact) bool {
   911  			key := objectFactKey{
   912  				Obj:  obj,
   913  				Type: reflect.TypeOf(fact),
   914  			}
   915  			if f, ok := ar.depObjFacts[key]; ok {
   916  				reflect.ValueOf(fact).Elem().Set(reflect.ValueOf(f.fact).Elem())
   917  				return true
   918  			} else if f, ok := a.ObjectFacts[key]; ok {
   919  				reflect.ValueOf(fact).Elem().Set(reflect.ValueOf(f.fact).Elem())
   920  				return true
   921  			}
   922  			return false
   923  		},
   924  		ImportPackageFact: func(pkg *types.Package, fact analysis.Fact) bool {
   925  			key := packageFactKey{
   926  				Pkg:  pkg,
   927  				Type: reflect.TypeOf(fact),
   928  			}
   929  			if f, ok := ar.depPkgFacts[key]; ok {
   930  				reflect.ValueOf(fact).Elem().Set(reflect.ValueOf(f).Elem())
   931  				return true
   932  			} else if f, ok := a.PackageFacts[key]; ok {
   933  				reflect.ValueOf(fact).Elem().Set(reflect.ValueOf(f).Elem())
   934  				return true
   935  			}
   936  			return false
   937  		},
   938  		ExportObjectFact: func(obj types.Object, fact analysis.Fact) {
   939  			key := objectFactKey{
   940  				Obj:  obj,
   941  				Type: reflect.TypeOf(fact),
   942  			}
   943  			path, _ := objectpath.For(obj)
   944  			a.ObjectFacts[key] = objectFact{fact, path}
   945  		},
   946  		ExportPackageFact: func(fact analysis.Fact) {
   947  			key := packageFactKey{
   948  				Pkg:  ar.pkg.Types,
   949  				Type: reflect.TypeOf(fact),
   950  			}
   951  			a.PackageFacts[key] = fact
   952  		},
   953  		AllPackageFacts: func() []analysis.PackageFact {
   954  			out := make([]analysis.PackageFact, 0, len(ar.depPkgFacts)+len(a.PackageFacts))
   955  			for key, fact := range ar.depPkgFacts {
   956  				out = append(out, analysis.PackageFact{
   957  					Package: key.Pkg,
   958  					Fact:    fact,
   959  				})
   960  			}
   961  			for key, fact := range a.PackageFacts {
   962  				out = append(out, analysis.PackageFact{
   963  					Package: key.Pkg,
   964  					Fact:    fact,
   965  				})
   966  			}
   967  			return out
   968  		},
   969  		AllObjectFacts: func() []analysis.ObjectFact {
   970  			out := make([]analysis.ObjectFact, 0, len(ar.depObjFacts)+len(a.ObjectFacts))
   971  			for key, fact := range ar.depObjFacts {
   972  				if filterFactType(key.Type) {
   973  					out = append(out, analysis.ObjectFact{
   974  						Object: key.Obj,
   975  						Fact:   fact.fact,
   976  					})
   977  				}
   978  			}
   979  			for key, fact := range a.ObjectFacts {
   980  				if filterFactType(key.Type) {
   981  					out = append(out, analysis.ObjectFact{
   982  						Object: key.Obj,
   983  						Fact:   fact.fact,
   984  					})
   985  				}
   986  			}
   987  			return out
   988  		},
   989  	}
   990  
   991  	t := time.Now()
   992  	res, err := a.Analyzer.Run(a.Pass)
   993  	ar.stats.measureAnalyzer(a.Analyzer, ar.pkg.PackageSpec, time.Since(t))
   994  	if err != nil {
   995  		return err
   996  	}
   997  	a.Result = res
   998  	return nil
   999  }
  1000  
  1001  type analysisResult struct {
  1002  	facts       []gobFact
  1003  	diagnostics []Diagnostic
  1004  	unused      unused.Result
  1005  
  1006  	// Only set when using test mode
  1007  	testFacts []TestFact
  1008  }
  1009  
  1010  func (r *subrunner) runAnalyzers(pkgAct *packageAction, pkg *loader.Package) (analysisResult, error) {
  1011  	depObjFacts := map[objectFactKey]objectFact{}
  1012  	depPkgFacts := map[packageFactKey]analysis.Fact{}
  1013  
  1014  	for _, dep := range pkgAct.deps {
  1015  		if err := r.loadFacts(pkg.Types, dep.(*packageAction), depObjFacts, depPkgFacts); err != nil {
  1016  			return analysisResult{}, err
  1017  		}
  1018  	}
  1019  
  1020  	root := &analyzerAction{}
  1021  	var analyzers []*analysis.Analyzer
  1022  	if pkgAct.factsOnly {
  1023  		// When analyzing non-initial packages, we only care about
  1024  		// analyzers that produce facts.
  1025  		analyzers = r.factAnalyzers
  1026  	} else {
  1027  		analyzers = r.analyzers
  1028  	}
  1029  
  1030  	all := map[*analysis.Analyzer]*analyzerAction{}
  1031  	for _, a := range analyzers {
  1032  		a := newAnalyzerAction(a, all)
  1033  		root.deps = append(root.deps, a)
  1034  		a.triggers = append(a.triggers, root)
  1035  	}
  1036  	root.pending = uint32(len(root.deps))
  1037  
  1038  	ar := &analyzerRunner{
  1039  		pkg:         pkg,
  1040  		factsOnly:   pkgAct.factsOnly,
  1041  		depObjFacts: depObjFacts,
  1042  		depPkgFacts: depPkgFacts,
  1043  		stats:       &r.Stats,
  1044  	}
  1045  	queue := make(chan action, len(all))
  1046  	for _, a := range all {
  1047  		if len(a.Deps()) == 0 {
  1048  			queue <- a
  1049  		}
  1050  	}
  1051  
  1052  	// Don't hang if there are no analyzers to run; for example
  1053  	// because we are analyzing a dependency but have no analyzers
  1054  	// that produce facts.
  1055  	if len(all) == 0 {
  1056  		close(queue)
  1057  	}
  1058  	for item := range queue {
  1059  		b := r.semaphore.AcquireMaybe()
  1060  		if b {
  1061  			go genericHandle(item, root, queue, &r.semaphore, ar.do)
  1062  		} else {
  1063  			// the semaphore is exhausted; run the analysis under the
  1064  			// token we've acquired for analyzing the package.
  1065  			genericHandle(item, root, queue, nil, ar.do)
  1066  		}
  1067  	}
  1068  
  1069  	var unusedResult unused.Result
  1070  	for _, a := range all {
  1071  		if a != root && a.Analyzer.Name == "U1000" && !a.failed {
  1072  			// TODO(dh): figure out a clean abstraction, instead of
  1073  			// special-casing U1000.
  1074  			unusedResult = a.Result.(unused.Result)
  1075  		}
  1076  
  1077  		for key, fact := range a.ObjectFacts {
  1078  			depObjFacts[key] = fact
  1079  		}
  1080  		for key, fact := range a.PackageFacts {
  1081  			depPkgFacts[key] = fact
  1082  		}
  1083  	}
  1084  
  1085  	// OPT(dh): cull objects not reachable via the exported closure
  1086  	var testFacts []TestFact
  1087  	gobFacts := make([]gobFact, 0, len(depObjFacts)+len(depPkgFacts))
  1088  	for key, fact := range depObjFacts {
  1089  		if fact.path == "" {
  1090  			continue
  1091  		}
  1092  		if sanityCheck {
  1093  			p, _ := objectpath.For(key.Obj)
  1094  			if p != fact.path {
  1095  				panic(fmt.Sprintf("got different object paths for %v. old: %q new: %q", key.Obj, fact.path, p))
  1096  			}
  1097  		}
  1098  		gf := gobFact{
  1099  			PkgPath: key.Obj.Pkg().Path(),
  1100  			ObjPath: string(fact.path),
  1101  			Fact:    fact.fact,
  1102  		}
  1103  		gobFacts = append(gobFacts, gf)
  1104  	}
  1105  
  1106  	for key, fact := range depPkgFacts {
  1107  		gf := gobFact{
  1108  			PkgPath: key.Pkg.Path(),
  1109  			Fact:    fact,
  1110  		}
  1111  		gobFacts = append(gobFacts, gf)
  1112  	}
  1113  
  1114  	if r.TestMode {
  1115  		for _, a := range all {
  1116  			for key, fact := range a.ObjectFacts {
  1117  				tgf := TestFact{
  1118  					ObjectName: key.Obj.Name(),
  1119  					Position:   pkg.Fset.Position(key.Obj.Pos()),
  1120  					FactString: fmt.Sprint(fact.fact),
  1121  					Analyzer:   a.Analyzer.Name,
  1122  				}
  1123  				testFacts = append(testFacts, tgf)
  1124  			}
  1125  
  1126  			for _, fact := range a.PackageFacts {
  1127  				tgf := TestFact{
  1128  					ObjectName: "",
  1129  					Position:   pkg.Fset.Position(pkg.Syntax[0].Pos()),
  1130  					FactString: fmt.Sprint(fact),
  1131  					Analyzer:   a.Analyzer.Name,
  1132  				}
  1133  				testFacts = append(testFacts, tgf)
  1134  			}
  1135  		}
  1136  	}
  1137  
  1138  	var diags []Diagnostic
  1139  	for _, a := range root.deps {
  1140  		a := a.(*analyzerAction)
  1141  		diags = append(diags, a.Diagnostics...)
  1142  	}
  1143  	return analysisResult{
  1144  		facts:       gobFacts,
  1145  		testFacts:   testFacts,
  1146  		diagnostics: diags,
  1147  		unused:      unusedResult,
  1148  	}, nil
  1149  }
  1150  
  1151  func registerGobTypes(analyzers []*analysis.Analyzer) {
  1152  	for _, a := range analyzers {
  1153  		for _, typ := range a.FactTypes {
  1154  			// FIXME(dh): use RegisterName so we can work around collisions
  1155  			// in names. For pointer-types, gob incorrectly qualifies
  1156  			// type names with the package name, not the import path.
  1157  			gob.Register(typ)
  1158  		}
  1159  	}
  1160  }
  1161  
  1162  func allAnalyzers(analyzers []*analysis.Analyzer) []*analysis.Analyzer {
  1163  	seen := map[*analysis.Analyzer]struct{}{}
  1164  	out := make([]*analysis.Analyzer, 0, len(analyzers))
  1165  	var dfs func(*analysis.Analyzer)
  1166  	dfs = func(a *analysis.Analyzer) {
  1167  		if _, ok := seen[a]; ok {
  1168  			return
  1169  		}
  1170  		seen[a] = struct{}{}
  1171  		out = append(out, a)
  1172  		for _, dep := range a.Requires {
  1173  			dfs(dep)
  1174  		}
  1175  	}
  1176  	for _, a := range analyzers {
  1177  		dfs(a)
  1178  	}
  1179  	return out
  1180  }
  1181  
  1182  // Run loads the packages specified by patterns, runs analyzers on
  1183  // them and returns the results. Each result corresponds to a single
  1184  // package. Results will be returned for all packages, including
  1185  // dependencies. Errors specific to packages will be reported in the
  1186  // respective results.
  1187  //
  1188  // If cfg is nil, a default config will be used. Otherwise, cfg will
  1189  // be used, with the exception of the Mode field.
  1190  func (r *Runner) Run(cfg *packages.Config, analyzers []*analysis.Analyzer, patterns []string) ([]Result, error) {
  1191  	analyzers = allAnalyzers(analyzers)
  1192  	registerGobTypes(analyzers)
  1193  
  1194  	r.Stats.setState(StateLoadPackageGraph)
  1195  	lpkgs, err := loader.Graph(r.cache, cfg, patterns...)
  1196  	if err != nil {
  1197  		return nil, err
  1198  	}
  1199  	r.Stats.setInitialPackages(len(lpkgs))
  1200  
  1201  	if len(lpkgs) == 0 {
  1202  		return nil, nil
  1203  	}
  1204  
  1205  	var goVersion string
  1206  	if r.GoVersion == "module" {
  1207  		for _, lpkg := range lpkgs {
  1208  			if m := lpkg.Module; m != nil {
  1209  				if goVersion == "" {
  1210  					goVersion = m.GoVersion
  1211  				} else if goVersion != m.GoVersion {
  1212  					// Theoretically, we should only ever see a single Go
  1213  					// module. At least that's currently (as of Go 1.15)
  1214  					// true when using 'go list'.
  1215  					fmt.Fprintln(os.Stderr, "warning: encountered multiple modules and could not deduce targeted Go version")
  1216  					goVersion = ""
  1217  					break
  1218  				}
  1219  			}
  1220  		}
  1221  	} else {
  1222  		goVersion = r.GoVersion
  1223  	}
  1224  
  1225  	if goVersion == "" {
  1226  		if r.FallbackGoVersion == "" {
  1227  			panic("could not determine Go version of module, and fallback version hasn't been set")
  1228  		}
  1229  		goVersion = r.FallbackGoVersion
  1230  	}
  1231  	r.actualGoVersion = goVersion
  1232  	for _, a := range analyzers {
  1233  		flag := a.Flags.Lookup("go")
  1234  		if flag == nil {
  1235  			continue
  1236  		}
  1237  		if err := flag.Value.Set(goVersion); err != nil {
  1238  			return nil, err
  1239  		}
  1240  	}
  1241  
  1242  	r.Stats.setState(StateBuildActionGraph)
  1243  	all := map[*loader.PackageSpec]*packageAction{}
  1244  	root := &packageAction{}
  1245  	for _, lpkg := range lpkgs {
  1246  		a := newPackageActionRoot(lpkg, all)
  1247  		root.deps = append(root.deps, a)
  1248  		a.triggers = append(a.triggers, root)
  1249  	}
  1250  	root.pending = uint32(len(root.deps))
  1251  
  1252  	queue := make(chan action)
  1253  	r.Stats.setTotalPackages(len(all) - 1)
  1254  
  1255  	r.Stats.setState(StateProcessing)
  1256  	go func() {
  1257  		for _, a := range all {
  1258  			if len(a.Deps()) == 0 {
  1259  				queue <- a
  1260  			}
  1261  		}
  1262  	}()
  1263  
  1264  	sr := newSubrunner(r, analyzers)
  1265  	for item := range queue {
  1266  		r.semaphore.Acquire()
  1267  		go genericHandle(item, root, queue, &r.semaphore, func(act action) error {
  1268  			return sr.do(act)
  1269  		})
  1270  	}
  1271  
  1272  	r.Stats.setState(StateFinalizing)
  1273  	out := make([]Result, 0, len(all))
  1274  	for _, item := range all {
  1275  		if item.Package == nil {
  1276  			continue
  1277  		}
  1278  		out = append(out, Result{
  1279  			Package:  item.Package,
  1280  			Config:   item.cfg,
  1281  			Initial:  !item.factsOnly,
  1282  			Skipped:  item.skipped,
  1283  			Failed:   item.failed,
  1284  			Errors:   item.errors,
  1285  			results:  item.results,
  1286  			testData: item.testData,
  1287  		})
  1288  	}
  1289  	return out, nil
  1290  }