github.com/amarpal/go-tools@v0.0.0-20240422043104-40142f59f616/lintcmd/runner/runner.go (about) 1 // Package runner implements a go/analysis runner. It makes heavy use 2 // of on-disk caching to reduce overall memory usage and to speed up 3 // repeat runs. 4 // 5 // # Public API 6 // 7 // A Runner maps a list of analyzers and package patterns to a list of 8 // results. Results provide access to diagnostics, directives, errors 9 // encountered, and information about packages. Results explicitly do 10 // not contain ASTs or type information. All position information is 11 // returned in the form of token.Position, not token.Pos. All work 12 // that requires access to the loaded representation of a package has 13 // to occur inside analyzers. 14 // 15 // # Planning and execution 16 // 17 // Analyzing packages is split into two phases: planning and 18 // execution. 19 // 20 // During planning, a directed acyclic graph of package dependencies 21 // is computed. We materialize the full graph so that we can execute 22 // the graph from the bottom up, without keeping unnecessary data in 23 // memory during a DFS and with simplified parallel execution. 24 // 25 // During execution, leaf nodes (nodes with no outstanding 26 // dependencies) get executed in parallel, bounded by a semaphore 27 // sized according to the number of CPUs. Conceptually, this happens 28 // in a loop, processing new leaf nodes as they appear, until no more 29 // nodes are left. In the actual implementation, nodes know their 30 // dependents, and the last dependency of a node to be processed is 31 // responsible for scheduling its dependent. 32 // 33 // The graph is rooted at a synthetic root node. Upon execution of the 34 // root node, the algorithm terminates. 35 // 36 // Analyzing a package repeats the same planning + execution steps, 37 // but this time on a graph of analyzers for the package. Parallel 38 // execution of individual analyzers is bounded by the same semaphore 39 // as executing packages. 40 // 41 // # Parallelism 42 // 43 // Actions are executed in parallel where the dependency graph allows. 44 // Overall parallelism is bounded by a semaphore, sized according to 45 // GOMAXPROCS. Each concurrently processed package takes up a 46 // token, as does each analyzer – but a package can always execute at 47 // least one analyzer, using the package's token. 48 // 49 // Depending on the overall shape of the graph, there may be GOMAXPROCS 50 // packages running a single analyzer each, a single package running 51 // GOMAXPROCS analyzers, or anything in between. 52 // 53 // Total memory consumption grows roughly linearly with the number of 54 // CPUs, while total execution time is inversely proportional to the 55 // number of CPUs. Overall, parallelism is affected by the shape of 56 // the dependency graph. A lot of inter-connected packages will see 57 // less parallelism than a lot of independent packages. 58 // 59 // # Caching 60 // 61 // The runner caches facts, directives and diagnostics in a 62 // content-addressable cache that is designed after Go's own cache. 63 // Additionally, it makes use of Go's export data. 64 // 65 // This cache not only speeds up repeat runs, it also reduces peak 66 // memory usage. When we've analyzed a package, we cache the results 67 // and drop them from memory. When a dependent needs any of this 68 // information, or when analysis is complete and we wish to render the 69 // results, the data gets loaded from disk again. 70 // 71 // Data only exists in memory when it is immediately needed, not 72 // retained for possible future uses. This trades increased CPU usage 73 // for reduced memory usage. A single dependency may be loaded many 74 // times over, but it greatly reduces peak memory usage, as an 75 // arbitrary amount of time may pass between analyzing a dependency 76 // and its dependent, during which other packages will be processed. 77 package runner 78 79 // OPT(dh): we could reduce disk storage usage of cached data by 80 // compressing it, either directly at the cache layer, or by feeding 81 // compressed data to the cache. Of course doing so may negatively 82 // affect CPU usage, and there are lower hanging fruit, such as 83 // needing to cache less data in the first place. 84 85 // OPT(dh): right now, each package is analyzed completely 86 // independently. Each package loads all of its dependencies from 87 // export data and cached facts. If we have two packages A and B, 88 // which both depend on C, and which both get analyzed in parallel, 89 // then C will be loaded twice. This wastes CPU time and memory. It 90 // would be nice if we could reuse a single C for the analysis of both 91 // A and B. 92 // 93 // We can't reuse the actual types.Package or facts, because each 94 // package gets its own token.FileSet. Sharing a global FileSet has 95 // several drawbacks, including increased memory usage and running the 96 // risk of running out of FileSet address space. 97 // 98 // We could however avoid loading the same raw export data from disk 99 // twice, as well as deserializing gob data twice. One possible 100 // solution would be a duplicate-suppressing in-memory cache that 101 // caches data for a limited amount of time. When the same package 102 // needs to be loaded twice in close succession, we can reuse work, 103 // without holding unnecessary data in memory for an extended period 104 // of time. 105 // 106 // We would likely need to do extensive benchmarking to figure out how 107 // long to keep data around to find a sweet spot where we reduce CPU 108 // load without increasing memory usage. 109 // 110 // We can probably populate the cache after we've analyzed a package, 111 // on the assumption that it will have to be loaded again in the near 112 // future. 113 114 import ( 115 "bytes" 116 "encoding/gob" 117 "fmt" 118 "go/token" 119 "go/types" 120 "io" 121 "os" 122 "reflect" 123 "runtime" 124 "sort" 125 "strings" 126 "sync/atomic" 127 "time" 128 129 "github.com/amarpal/go-tools/analysis/lint" 130 "github.com/amarpal/go-tools/analysis/report" 131 "github.com/amarpal/go-tools/config" 132 "github.com/amarpal/go-tools/go/loader" 133 tsync "github.com/amarpal/go-tools/internal/sync" 134 "github.com/amarpal/go-tools/lintcmd/cache" 135 "github.com/amarpal/go-tools/unused" 136 137 "golang.org/x/tools/go/analysis" 138 "golang.org/x/tools/go/packages" 139 "golang.org/x/tools/go/types/objectpath" 140 ) 141 142 const sanityCheck = false 143 144 // Diagnostic is like go/analysis.Diagnostic, but with all token.Pos resolved to token.Position. 145 type Diagnostic struct { 146 Position token.Position 147 End token.Position 148 Category string 149 Message string 150 151 SuggestedFixes []SuggestedFix 152 Related []RelatedInformation 153 } 154 155 // RelatedInformation provides additional context for a diagnostic. 156 type RelatedInformation struct { 157 Position token.Position 158 End token.Position 159 Message string 160 } 161 162 type SuggestedFix struct { 163 Message string 164 TextEdits []TextEdit 165 } 166 167 type TextEdit struct { 168 Position token.Position 169 End token.Position 170 NewText []byte 171 } 172 173 // A Result describes the result of analyzing a single package. 174 // 175 // It holds references to cached diagnostics and directives. They can 176 // be loaded on demand with the Load method. 177 type Result struct { 178 Package *loader.PackageSpec 179 Config config.Config 180 Initial bool 181 Skipped bool 182 183 Failed bool 184 Errors []error 185 // Action results, path to file 186 results string 187 // Results relevant to testing, only set when test mode is enabled, path to file 188 testData string 189 } 190 191 type SerializedDirective struct { 192 Command string 193 Arguments []string 194 // The position of the comment 195 DirectivePosition token.Position 196 // The position of the node that the comment is attached to 197 NodePosition token.Position 198 } 199 200 func serializeDirective(dir lint.Directive, fset *token.FileSet) SerializedDirective { 201 return SerializedDirective{ 202 Command: dir.Command, 203 Arguments: dir.Arguments, 204 DirectivePosition: report.DisplayPosition(fset, dir.Directive.Pos()), 205 NodePosition: report.DisplayPosition(fset, dir.Node.Pos()), 206 } 207 } 208 209 type ResultData struct { 210 Directives []SerializedDirective 211 Diagnostics []Diagnostic 212 Unused unused.Result 213 } 214 215 func (r Result) Load() (ResultData, error) { 216 if r.Failed { 217 panic("Load called on failed Result") 218 } 219 if r.results == "" { 220 // this package was only a dependency 221 return ResultData{}, nil 222 } 223 f, err := os.Open(r.results) 224 if err != nil { 225 return ResultData{}, fmt.Errorf("failed loading result: %w", err) 226 } 227 defer f.Close() 228 var out ResultData 229 err = gob.NewDecoder(f).Decode(&out) 230 return out, err 231 } 232 233 // TestData contains extra information about analysis runs that is only available in test mode. 234 type TestData struct { 235 // Facts contains facts produced by analyzers for a package. 236 // Unlike vetx, this list only contains facts specific to this package, 237 // not all facts for the transitive closure of dependencies. 238 Facts []TestFact 239 // List of files that were part of the package. 240 Files []string 241 } 242 243 // LoadTest returns data relevant to testing. 244 // It should only be called if Runner.TestMode was set to true. 245 func (r Result) LoadTest() (TestData, error) { 246 if r.Failed { 247 panic("Load called on failed Result") 248 } 249 if r.results == "" { 250 // this package was only a dependency 251 return TestData{}, nil 252 } 253 f, err := os.Open(r.testData) 254 if err != nil { 255 return TestData{}, fmt.Errorf("failed loading test data: %w", err) 256 } 257 defer f.Close() 258 var out TestData 259 err = gob.NewDecoder(f).Decode(&out) 260 return out, err 261 } 262 263 type action interface { 264 Deps() []action 265 Triggers() []action 266 DecrementPending() bool 267 MarkFailed() 268 IsFailed() bool 269 AddError(error) 270 } 271 272 type baseAction struct { 273 // Action description 274 275 deps []action 276 triggers []action 277 pending uint32 278 279 // Action results 280 281 // failed is set to true if the action couldn't be processed. This 282 // may either be due to an error specific to this action, in 283 // which case the errors field will be populated, or due to a 284 // dependency being marked as failed, in which case errors will be 285 // empty. 286 failed bool 287 errors []error 288 } 289 290 func (act *baseAction) Deps() []action { return act.deps } 291 func (act *baseAction) Triggers() []action { return act.triggers } 292 func (act *baseAction) DecrementPending() bool { 293 return atomic.AddUint32(&act.pending, ^uint32(0)) == 0 294 } 295 func (act *baseAction) MarkFailed() { act.failed = true } 296 func (act *baseAction) IsFailed() bool { return act.failed } 297 func (act *baseAction) AddError(err error) { act.errors = append(act.errors, err) } 298 299 // packageAction describes the act of loading a package, fully 300 // analyzing it, and storing the results. 301 type packageAction struct { 302 baseAction 303 304 // Action description 305 Package *loader.PackageSpec 306 factsOnly bool 307 hash cache.ActionID 308 309 // Action results 310 cfg config.Config 311 vetx string 312 results string 313 testData string 314 skipped bool 315 } 316 317 func (act *packageAction) String() string { 318 return fmt.Sprintf("packageAction(%s)", act.Package) 319 } 320 321 type objectFact struct { 322 fact analysis.Fact 323 // TODO(dh): why do we store the objectpath when producing the 324 // fact? Is it just for the sanity checking, which compares the 325 // stored path with a path recomputed from objectFactKey.Obj? 326 path objectpath.Path 327 } 328 329 type objectFactKey struct { 330 Obj types.Object 331 Type reflect.Type 332 } 333 334 type packageFactKey struct { 335 Pkg *types.Package 336 Type reflect.Type 337 } 338 339 type gobFact struct { 340 PkgPath string 341 ObjPath string 342 Fact analysis.Fact 343 } 344 345 // TestFact is a serialization of facts that is specific to the test mode. 346 type TestFact struct { 347 ObjectName string 348 Position token.Position 349 FactString string 350 Analyzer string 351 } 352 353 // analyzerAction describes the act of analyzing a package with a 354 // single analyzer. 355 type analyzerAction struct { 356 baseAction 357 358 // Action description 359 360 Analyzer *analysis.Analyzer 361 362 // Action results 363 364 // We can store actual results here without worrying about memory 365 // consumption because analyzer actions get garbage collected once 366 // a package has been fully analyzed. 367 Result interface{} 368 Diagnostics []Diagnostic 369 ObjectFacts map[objectFactKey]objectFact 370 PackageFacts map[packageFactKey]analysis.Fact 371 Pass *analysis.Pass 372 } 373 374 func (act *analyzerAction) String() string { 375 return fmt.Sprintf("analyzerAction(%s)", act.Analyzer) 376 } 377 378 // A Runner executes analyzers on packages. 379 type Runner struct { 380 Stats Stats 381 GoVersion string 382 // if GoVersion == "module", and we couldn't determine the 383 // module's Go version, use this as the fallback 384 FallbackGoVersion string 385 // If set to true, Runner will populate results with data relevant to testing analyzers 386 TestMode bool 387 388 // GoVersion might be "module"; actualGoVersion contains the resolved version 389 actualGoVersion string 390 391 // Config that gets merged with per-package configs 392 cfg config.Config 393 cache *cache.Cache 394 semaphore tsync.Semaphore 395 } 396 397 type subrunner struct { 398 *Runner 399 analyzers []*analysis.Analyzer 400 factAnalyzers []*analysis.Analyzer 401 analyzerNames string 402 cache *cache.Cache 403 } 404 405 // New returns a new Runner. 406 func New(cfg config.Config, c *cache.Cache) (*Runner, error) { 407 return &Runner{ 408 cfg: cfg, 409 cache: c, 410 semaphore: tsync.NewSemaphore(runtime.GOMAXPROCS(0)), 411 }, nil 412 } 413 414 func newSubrunner(r *Runner, analyzers []*analysis.Analyzer) *subrunner { 415 analyzerNames := make([]string, len(analyzers)) 416 for i, a := range analyzers { 417 analyzerNames[i] = a.Name 418 } 419 sort.Strings(analyzerNames) 420 421 var factAnalyzers []*analysis.Analyzer 422 for _, a := range analyzers { 423 if len(a.FactTypes) > 0 { 424 factAnalyzers = append(factAnalyzers, a) 425 } 426 } 427 return &subrunner{ 428 Runner: r, 429 analyzers: analyzers, 430 factAnalyzers: factAnalyzers, 431 analyzerNames: strings.Join(analyzerNames, ","), 432 cache: r.cache, 433 } 434 } 435 436 func newPackageActionRoot(pkg *loader.PackageSpec, cache map[*loader.PackageSpec]*packageAction) *packageAction { 437 a := newPackageAction(pkg, cache) 438 a.factsOnly = false 439 return a 440 } 441 442 func newPackageAction(pkg *loader.PackageSpec, cache map[*loader.PackageSpec]*packageAction) *packageAction { 443 if a, ok := cache[pkg]; ok { 444 return a 445 } 446 447 a := &packageAction{ 448 Package: pkg, 449 factsOnly: true, // will be overwritten by any call to Action 450 } 451 cache[pkg] = a 452 453 if len(pkg.Errors) > 0 { 454 a.errors = make([]error, len(pkg.Errors)) 455 for i, err := range pkg.Errors { 456 a.errors[i] = err 457 } 458 a.failed = true 459 460 // We don't need to process our imports if this package is 461 // already broken. 462 return a 463 } 464 465 a.deps = make([]action, 0, len(pkg.Imports)) 466 for _, dep := range pkg.Imports { 467 depa := newPackageAction(dep, cache) 468 depa.triggers = append(depa.triggers, a) 469 a.deps = append(a.deps, depa) 470 471 if depa.failed { 472 a.failed = true 473 } 474 } 475 // sort dependencies because the list of dependencies is part of 476 // the cache key 477 sort.Slice(a.deps, func(i, j int) bool { 478 return a.deps[i].(*packageAction).Package.ID < a.deps[j].(*packageAction).Package.ID 479 }) 480 481 a.pending = uint32(len(a.deps)) 482 483 return a 484 } 485 486 func newAnalyzerAction(an *analysis.Analyzer, cache map[*analysis.Analyzer]*analyzerAction) *analyzerAction { 487 if a, ok := cache[an]; ok { 488 return a 489 } 490 491 a := &analyzerAction{ 492 Analyzer: an, 493 ObjectFacts: map[objectFactKey]objectFact{}, 494 PackageFacts: map[packageFactKey]analysis.Fact{}, 495 } 496 cache[an] = a 497 for _, dep := range an.Requires { 498 depa := newAnalyzerAction(dep, cache) 499 depa.triggers = append(depa.triggers, a) 500 a.deps = append(a.deps, depa) 501 } 502 a.pending = uint32(len(a.deps)) 503 return a 504 } 505 506 func getCachedFiles(cache *cache.Cache, ids []cache.ActionID, out []*string) error { 507 for i, id := range ids { 508 var err error 509 *out[i], _, err = cache.GetFile(id) 510 if err != nil { 511 return err 512 } 513 } 514 return nil 515 } 516 517 func (r *subrunner) do(act action) error { 518 a := act.(*packageAction) 519 defer func() { 520 r.Stats.finishPackage() 521 if !a.factsOnly { 522 r.Stats.finishInitialPackage() 523 } 524 }() 525 526 // compute hash of action 527 a.cfg = a.Package.Config.Merge(r.cfg) 528 h := r.cache.NewHash("staticcheck " + a.Package.PkgPath) 529 530 // Note that we do not filter the list of analyzers by the 531 // package's configuration. We don't allow configuration to 532 // accidentally break dependencies between analyzers, and it's 533 // easier to always run all checks and filter the output. This 534 // also makes cached data more reusable. 535 536 // OPT(dh): not all changes in configuration invalidate cached 537 // data. specifically, when a.factsOnly == true, we only care 538 // about checks that produce facts, and settings that affect those 539 // checks. 540 541 // Config used for constructing the hash; this config doesn't have 542 // Checks populated, because we always run all checks. 543 // 544 // This even works for users who add custom checks, because we include the binary's hash. 545 hashCfg := a.cfg 546 hashCfg.Checks = nil 547 // note that we don't hash staticcheck's version; it is set as the 548 // salt by a package main. 549 fmt.Fprintf(h, "cfg %#v\n", hashCfg) 550 fmt.Fprintf(h, "pkg %x\n", a.Package.Hash) 551 fmt.Fprintf(h, "analyzers %s\n", r.analyzerNames) 552 fmt.Fprintf(h, "go %s\n", r.actualGoVersion) 553 554 // OPT(dh): do we actually need to hash vetx? can we not assume 555 // that for identical inputs, staticcheck will produce identical 556 // vetx? 557 for _, dep := range a.deps { 558 dep := dep.(*packageAction) 559 vetxHash, err := cache.FileHash(dep.vetx) 560 if err != nil { 561 return fmt.Errorf("failed computing hash: %w", err) 562 } 563 fmt.Fprintf(h, "vetout %q %x\n", dep.Package.PkgPath, vetxHash) 564 } 565 a.hash = cache.ActionID(h.Sum()) 566 567 // try to fetch hashed data 568 ids := make([]cache.ActionID, 0, 2) 569 ids = append(ids, cache.Subkey(a.hash, "vetx")) 570 if !a.factsOnly { 571 ids = append(ids, cache.Subkey(a.hash, "results")) 572 if r.TestMode { 573 ids = append(ids, cache.Subkey(a.hash, "testdata")) 574 } 575 } 576 if err := getCachedFiles(r.cache, ids, []*string{&a.vetx, &a.results, &a.testData}); err != nil { 577 result, err := r.doUncached(a) 578 if err != nil { 579 return err 580 } 581 if a.failed { 582 return nil 583 } 584 585 a.skipped = result.skipped 586 587 // OPT(dh) instead of collecting all object facts and encoding 588 // them after analysis finishes, we could encode them as we 589 // go. however, that would require some locking. 590 // 591 // OPT(dh): We could sort gobFacts for more consistent output, 592 // but it doesn't matter. The hash of a package includes all 593 // of its files, so whether the vetx hash changes or not, a 594 // change to a package requires re-analyzing all dependents, 595 // even if the vetx data stayed the same. See also the note at 596 // the top of loader/hash.go. 597 598 tf := &bytes.Buffer{} 599 enc := gob.NewEncoder(tf) 600 for _, gf := range result.facts { 601 if err := enc.Encode(gf); err != nil { 602 return fmt.Errorf("failed gob encoding data: %w", err) 603 } 604 } 605 606 a.vetx, err = r.writeCacheReader(a, "vetx", bytes.NewReader(tf.Bytes())) 607 if err != nil { 608 return err 609 } 610 611 if a.factsOnly { 612 return nil 613 } 614 615 var out ResultData 616 out.Directives = make([]SerializedDirective, len(result.dirs)) 617 for i, dir := range result.dirs { 618 out.Directives[i] = serializeDirective(dir, result.lpkg.Fset) 619 } 620 621 out.Diagnostics = result.diags 622 out.Unused = result.unused 623 a.results, err = r.writeCacheGob(a, "results", out) 624 if err != nil { 625 return err 626 } 627 628 if r.TestMode { 629 out := TestData{ 630 Facts: result.testFacts, 631 Files: result.lpkg.GoFiles, 632 } 633 a.testData, err = r.writeCacheGob(a, "testdata", out) 634 if err != nil { 635 return err 636 } 637 } 638 } 639 return nil 640 } 641 642 // ActiveWorkers returns the number of currently running workers. 643 func (r *Runner) ActiveWorkers() int { 644 return r.semaphore.Len() 645 } 646 647 // TotalWorkers returns the maximum number of possible workers. 648 func (r *Runner) TotalWorkers() int { 649 return r.semaphore.Cap() 650 } 651 652 func (r *Runner) writeCacheReader(a *packageAction, kind string, rs io.ReadSeeker) (string, error) { 653 h := cache.Subkey(a.hash, kind) 654 out, _, err := r.cache.Put(h, rs) 655 if err != nil { 656 return "", fmt.Errorf("failed caching data: %w", err) 657 } 658 return r.cache.OutputFile(out), nil 659 } 660 661 func (r *Runner) writeCacheGob(a *packageAction, kind string, data interface{}) (string, error) { 662 f, err := os.CreateTemp("", "staticcheck") 663 if err != nil { 664 return "", err 665 } 666 defer f.Close() 667 os.Remove(f.Name()) 668 if err := gob.NewEncoder(f).Encode(data); err != nil { 669 return "", fmt.Errorf("failed gob encoding data: %w", err) 670 } 671 if _, err := f.Seek(0, io.SeekStart); err != nil { 672 return "", err 673 } 674 return r.writeCacheReader(a, kind, f) 675 } 676 677 type packageActionResult struct { 678 facts []gobFact 679 diags []Diagnostic 680 unused unused.Result 681 dirs []lint.Directive 682 lpkg *loader.Package 683 skipped bool 684 685 // Only set when using test mode 686 testFacts []TestFact 687 } 688 689 func (r *subrunner) doUncached(a *packageAction) (packageActionResult, error) { 690 // OPT(dh): for a -> b; c -> b; if both a and b are being 691 // processed concurrently, we shouldn't load b's export data 692 // twice. 693 694 pkg, _, err := loader.Load(a.Package) 695 if err != nil { 696 return packageActionResult{}, err 697 } 698 699 if len(pkg.Errors) > 0 { 700 // this handles errors that occurred during type-checking the 701 // package in loader.Load 702 for _, err := range pkg.Errors { 703 a.errors = append(a.errors, err) 704 } 705 a.failed = true 706 return packageActionResult{}, nil 707 } 708 709 if len(pkg.Syntax) == 0 && pkg.PkgPath != "unsafe" { 710 return packageActionResult{lpkg: pkg, skipped: true}, nil 711 } 712 713 // OPT(dh): instead of parsing directives twice (twice because 714 // U1000 depends on the facts.Directives analyzer), reuse the 715 // existing result 716 var dirs []lint.Directive 717 if !a.factsOnly { 718 dirs = lint.ParseDirectives(pkg.Syntax, pkg.Fset) 719 } 720 res, err := r.runAnalyzers(a, pkg) 721 722 return packageActionResult{ 723 facts: res.facts, 724 testFacts: res.testFacts, 725 diags: res.diagnostics, 726 unused: res.unused, 727 dirs: dirs, 728 lpkg: pkg, 729 }, err 730 } 731 732 func pkgPaths(root *types.Package) map[string]*types.Package { 733 out := map[string]*types.Package{} 734 var dfs func(*types.Package) 735 dfs = func(pkg *types.Package) { 736 if _, ok := out[pkg.Path()]; ok { 737 return 738 } 739 out[pkg.Path()] = pkg 740 for _, imp := range pkg.Imports() { 741 dfs(imp) 742 } 743 } 744 dfs(root) 745 return out 746 } 747 748 func (r *Runner) loadFacts(root *types.Package, dep *packageAction, objFacts map[objectFactKey]objectFact, pkgFacts map[packageFactKey]analysis.Fact) error { 749 // Load facts of all imported packages 750 vetx, err := os.Open(dep.vetx) 751 if err != nil { 752 return fmt.Errorf("failed loading cached facts: %w", err) 753 } 754 defer vetx.Close() 755 756 pathToPkg := pkgPaths(root) 757 dec := gob.NewDecoder(vetx) 758 for { 759 var gf gobFact 760 err := dec.Decode(&gf) 761 if err != nil { 762 if err == io.EOF { 763 break 764 } 765 return fmt.Errorf("failed loading cached facts: %w", err) 766 } 767 768 pkg, ok := pathToPkg[gf.PkgPath] 769 if !ok { 770 continue 771 } 772 if gf.ObjPath == "" { 773 pkgFacts[packageFactKey{ 774 Pkg: pkg, 775 Type: reflect.TypeOf(gf.Fact), 776 }] = gf.Fact 777 } else { 778 obj, err := objectpath.Object(pkg, objectpath.Path(gf.ObjPath)) 779 if err != nil { 780 continue 781 } 782 objFacts[objectFactKey{ 783 Obj: obj, 784 Type: reflect.TypeOf(gf.Fact), 785 }] = objectFact{gf.Fact, objectpath.Path(gf.ObjPath)} 786 } 787 } 788 return nil 789 } 790 791 func genericHandle(a action, root action, queue chan action, sem *tsync.Semaphore, exec func(a action) error) { 792 if a == root { 793 close(queue) 794 if sem != nil { 795 sem.Release() 796 } 797 return 798 } 799 if !a.IsFailed() { 800 // the action may have already been marked as failed during 801 // construction of the action graph, for example because of 802 // unresolved imports. 803 804 for _, dep := range a.Deps() { 805 if dep.IsFailed() { 806 // One of our dependencies failed, so mark this package as 807 // failed and bail. We don't need to record an error for 808 // this package, the relevant error will have been 809 // reported by the first package in the chain that failed. 810 a.MarkFailed() 811 break 812 } 813 } 814 } 815 816 if !a.IsFailed() { 817 if err := exec(a); err != nil { 818 a.MarkFailed() 819 a.AddError(err) 820 } 821 } 822 if sem != nil { 823 sem.Release() 824 } 825 826 for _, t := range a.Triggers() { 827 if t.DecrementPending() { 828 queue <- t 829 } 830 } 831 } 832 833 type analyzerRunner struct { 834 pkg *loader.Package 835 // object facts of our dependencies; may contain facts of 836 // analyzers other than the current one 837 depObjFacts map[objectFactKey]objectFact 838 // package facts of our dependencies; may contain facts of 839 // analyzers other than the current one 840 depPkgFacts map[packageFactKey]analysis.Fact 841 factsOnly bool 842 843 stats *Stats 844 } 845 846 func (ar *analyzerRunner) do(act action) error { 847 a := act.(*analyzerAction) 848 results := map[*analysis.Analyzer]interface{}{} 849 // TODO(dh): does this have to be recursive? 850 for _, dep := range a.deps { 851 dep := dep.(*analyzerAction) 852 results[dep.Analyzer] = dep.Result 853 } 854 // OPT(dh): cache factTypes, it is the same for all packages for a given analyzer 855 // 856 // OPT(dh): do we need the factTypes map? most analyzers have 0-1 857 // fact types. iterating over the slice is probably faster than 858 // indexing a map. 859 factTypes := map[reflect.Type]struct{}{} 860 for _, typ := range a.Analyzer.FactTypes { 861 factTypes[reflect.TypeOf(typ)] = struct{}{} 862 } 863 filterFactType := func(typ reflect.Type) bool { 864 _, ok := factTypes[typ] 865 return ok 866 } 867 a.Pass = &analysis.Pass{ 868 Analyzer: a.Analyzer, 869 Fset: ar.pkg.Fset, 870 Files: ar.pkg.Syntax, 871 OtherFiles: ar.pkg.OtherFiles, 872 Pkg: ar.pkg.Types, 873 TypesInfo: ar.pkg.TypesInfo, 874 TypesSizes: ar.pkg.TypesSizes, 875 Report: func(diag analysis.Diagnostic) { 876 if !ar.factsOnly { 877 if diag.Category == "" { 878 diag.Category = a.Analyzer.Name 879 } 880 d := Diagnostic{ 881 Position: report.DisplayPosition(ar.pkg.Fset, diag.Pos), 882 End: report.DisplayPosition(ar.pkg.Fset, diag.End), 883 Category: diag.Category, 884 Message: diag.Message, 885 } 886 for _, sugg := range diag.SuggestedFixes { 887 s := SuggestedFix{ 888 Message: sugg.Message, 889 } 890 for _, edit := range sugg.TextEdits { 891 s.TextEdits = append(s.TextEdits, TextEdit{ 892 Position: report.DisplayPosition(ar.pkg.Fset, edit.Pos), 893 End: report.DisplayPosition(ar.pkg.Fset, edit.End), 894 NewText: edit.NewText, 895 }) 896 } 897 d.SuggestedFixes = append(d.SuggestedFixes, s) 898 } 899 for _, rel := range diag.Related { 900 d.Related = append(d.Related, RelatedInformation{ 901 Position: report.DisplayPosition(ar.pkg.Fset, rel.Pos), 902 End: report.DisplayPosition(ar.pkg.Fset, rel.End), 903 Message: rel.Message, 904 }) 905 } 906 a.Diagnostics = append(a.Diagnostics, d) 907 } 908 }, 909 ResultOf: results, 910 ImportObjectFact: func(obj types.Object, fact analysis.Fact) bool { 911 key := objectFactKey{ 912 Obj: obj, 913 Type: reflect.TypeOf(fact), 914 } 915 if f, ok := ar.depObjFacts[key]; ok { 916 reflect.ValueOf(fact).Elem().Set(reflect.ValueOf(f.fact).Elem()) 917 return true 918 } else if f, ok := a.ObjectFacts[key]; ok { 919 reflect.ValueOf(fact).Elem().Set(reflect.ValueOf(f.fact).Elem()) 920 return true 921 } 922 return false 923 }, 924 ImportPackageFact: func(pkg *types.Package, fact analysis.Fact) bool { 925 key := packageFactKey{ 926 Pkg: pkg, 927 Type: reflect.TypeOf(fact), 928 } 929 if f, ok := ar.depPkgFacts[key]; ok { 930 reflect.ValueOf(fact).Elem().Set(reflect.ValueOf(f).Elem()) 931 return true 932 } else if f, ok := a.PackageFacts[key]; ok { 933 reflect.ValueOf(fact).Elem().Set(reflect.ValueOf(f).Elem()) 934 return true 935 } 936 return false 937 }, 938 ExportObjectFact: func(obj types.Object, fact analysis.Fact) { 939 key := objectFactKey{ 940 Obj: obj, 941 Type: reflect.TypeOf(fact), 942 } 943 path, _ := objectpath.For(obj) 944 a.ObjectFacts[key] = objectFact{fact, path} 945 }, 946 ExportPackageFact: func(fact analysis.Fact) { 947 key := packageFactKey{ 948 Pkg: ar.pkg.Types, 949 Type: reflect.TypeOf(fact), 950 } 951 a.PackageFacts[key] = fact 952 }, 953 AllPackageFacts: func() []analysis.PackageFact { 954 out := make([]analysis.PackageFact, 0, len(ar.depPkgFacts)+len(a.PackageFacts)) 955 for key, fact := range ar.depPkgFacts { 956 out = append(out, analysis.PackageFact{ 957 Package: key.Pkg, 958 Fact: fact, 959 }) 960 } 961 for key, fact := range a.PackageFacts { 962 out = append(out, analysis.PackageFact{ 963 Package: key.Pkg, 964 Fact: fact, 965 }) 966 } 967 return out 968 }, 969 AllObjectFacts: func() []analysis.ObjectFact { 970 out := make([]analysis.ObjectFact, 0, len(ar.depObjFacts)+len(a.ObjectFacts)) 971 for key, fact := range ar.depObjFacts { 972 if filterFactType(key.Type) { 973 out = append(out, analysis.ObjectFact{ 974 Object: key.Obj, 975 Fact: fact.fact, 976 }) 977 } 978 } 979 for key, fact := range a.ObjectFacts { 980 if filterFactType(key.Type) { 981 out = append(out, analysis.ObjectFact{ 982 Object: key.Obj, 983 Fact: fact.fact, 984 }) 985 } 986 } 987 return out 988 }, 989 } 990 991 t := time.Now() 992 res, err := a.Analyzer.Run(a.Pass) 993 ar.stats.measureAnalyzer(a.Analyzer, ar.pkg.PackageSpec, time.Since(t)) 994 if err != nil { 995 return err 996 } 997 a.Result = res 998 return nil 999 } 1000 1001 type analysisResult struct { 1002 facts []gobFact 1003 diagnostics []Diagnostic 1004 unused unused.Result 1005 1006 // Only set when using test mode 1007 testFacts []TestFact 1008 } 1009 1010 func (r *subrunner) runAnalyzers(pkgAct *packageAction, pkg *loader.Package) (analysisResult, error) { 1011 depObjFacts := map[objectFactKey]objectFact{} 1012 depPkgFacts := map[packageFactKey]analysis.Fact{} 1013 1014 for _, dep := range pkgAct.deps { 1015 if err := r.loadFacts(pkg.Types, dep.(*packageAction), depObjFacts, depPkgFacts); err != nil { 1016 return analysisResult{}, err 1017 } 1018 } 1019 1020 root := &analyzerAction{} 1021 var analyzers []*analysis.Analyzer 1022 if pkgAct.factsOnly { 1023 // When analyzing non-initial packages, we only care about 1024 // analyzers that produce facts. 1025 analyzers = r.factAnalyzers 1026 } else { 1027 analyzers = r.analyzers 1028 } 1029 1030 all := map[*analysis.Analyzer]*analyzerAction{} 1031 for _, a := range analyzers { 1032 a := newAnalyzerAction(a, all) 1033 root.deps = append(root.deps, a) 1034 a.triggers = append(a.triggers, root) 1035 } 1036 root.pending = uint32(len(root.deps)) 1037 1038 ar := &analyzerRunner{ 1039 pkg: pkg, 1040 factsOnly: pkgAct.factsOnly, 1041 depObjFacts: depObjFacts, 1042 depPkgFacts: depPkgFacts, 1043 stats: &r.Stats, 1044 } 1045 queue := make(chan action, len(all)) 1046 for _, a := range all { 1047 if len(a.Deps()) == 0 { 1048 queue <- a 1049 } 1050 } 1051 1052 // Don't hang if there are no analyzers to run; for example 1053 // because we are analyzing a dependency but have no analyzers 1054 // that produce facts. 1055 if len(all) == 0 { 1056 close(queue) 1057 } 1058 for item := range queue { 1059 b := r.semaphore.AcquireMaybe() 1060 if b { 1061 go genericHandle(item, root, queue, &r.semaphore, ar.do) 1062 } else { 1063 // the semaphore is exhausted; run the analysis under the 1064 // token we've acquired for analyzing the package. 1065 genericHandle(item, root, queue, nil, ar.do) 1066 } 1067 } 1068 1069 var unusedResult unused.Result 1070 for _, a := range all { 1071 if a != root && a.Analyzer.Name == "U1000" && !a.failed { 1072 // TODO(dh): figure out a clean abstraction, instead of 1073 // special-casing U1000. 1074 unusedResult = a.Result.(unused.Result) 1075 } 1076 1077 for key, fact := range a.ObjectFacts { 1078 depObjFacts[key] = fact 1079 } 1080 for key, fact := range a.PackageFacts { 1081 depPkgFacts[key] = fact 1082 } 1083 } 1084 1085 // OPT(dh): cull objects not reachable via the exported closure 1086 var testFacts []TestFact 1087 gobFacts := make([]gobFact, 0, len(depObjFacts)+len(depPkgFacts)) 1088 for key, fact := range depObjFacts { 1089 if fact.path == "" { 1090 continue 1091 } 1092 if sanityCheck { 1093 p, _ := objectpath.For(key.Obj) 1094 if p != fact.path { 1095 panic(fmt.Sprintf("got different object paths for %v. old: %q new: %q", key.Obj, fact.path, p)) 1096 } 1097 } 1098 gf := gobFact{ 1099 PkgPath: key.Obj.Pkg().Path(), 1100 ObjPath: string(fact.path), 1101 Fact: fact.fact, 1102 } 1103 gobFacts = append(gobFacts, gf) 1104 } 1105 1106 for key, fact := range depPkgFacts { 1107 gf := gobFact{ 1108 PkgPath: key.Pkg.Path(), 1109 Fact: fact, 1110 } 1111 gobFacts = append(gobFacts, gf) 1112 } 1113 1114 if r.TestMode { 1115 for _, a := range all { 1116 for key, fact := range a.ObjectFacts { 1117 tgf := TestFact{ 1118 ObjectName: key.Obj.Name(), 1119 Position: pkg.Fset.Position(key.Obj.Pos()), 1120 FactString: fmt.Sprint(fact.fact), 1121 Analyzer: a.Analyzer.Name, 1122 } 1123 testFacts = append(testFacts, tgf) 1124 } 1125 1126 for _, fact := range a.PackageFacts { 1127 tgf := TestFact{ 1128 ObjectName: "", 1129 Position: pkg.Fset.Position(pkg.Syntax[0].Pos()), 1130 FactString: fmt.Sprint(fact), 1131 Analyzer: a.Analyzer.Name, 1132 } 1133 testFacts = append(testFacts, tgf) 1134 } 1135 } 1136 } 1137 1138 var diags []Diagnostic 1139 for _, a := range root.deps { 1140 a := a.(*analyzerAction) 1141 diags = append(diags, a.Diagnostics...) 1142 } 1143 return analysisResult{ 1144 facts: gobFacts, 1145 testFacts: testFacts, 1146 diagnostics: diags, 1147 unused: unusedResult, 1148 }, nil 1149 } 1150 1151 func registerGobTypes(analyzers []*analysis.Analyzer) { 1152 for _, a := range analyzers { 1153 for _, typ := range a.FactTypes { 1154 // FIXME(dh): use RegisterName so we can work around collisions 1155 // in names. For pointer-types, gob incorrectly qualifies 1156 // type names with the package name, not the import path. 1157 gob.Register(typ) 1158 } 1159 } 1160 } 1161 1162 func allAnalyzers(analyzers []*analysis.Analyzer) []*analysis.Analyzer { 1163 seen := map[*analysis.Analyzer]struct{}{} 1164 out := make([]*analysis.Analyzer, 0, len(analyzers)) 1165 var dfs func(*analysis.Analyzer) 1166 dfs = func(a *analysis.Analyzer) { 1167 if _, ok := seen[a]; ok { 1168 return 1169 } 1170 seen[a] = struct{}{} 1171 out = append(out, a) 1172 for _, dep := range a.Requires { 1173 dfs(dep) 1174 } 1175 } 1176 for _, a := range analyzers { 1177 dfs(a) 1178 } 1179 return out 1180 } 1181 1182 // Run loads the packages specified by patterns, runs analyzers on 1183 // them and returns the results. Each result corresponds to a single 1184 // package. Results will be returned for all packages, including 1185 // dependencies. Errors specific to packages will be reported in the 1186 // respective results. 1187 // 1188 // If cfg is nil, a default config will be used. Otherwise, cfg will 1189 // be used, with the exception of the Mode field. 1190 func (r *Runner) Run(cfg *packages.Config, analyzers []*analysis.Analyzer, patterns []string) ([]Result, error) { 1191 analyzers = allAnalyzers(analyzers) 1192 registerGobTypes(analyzers) 1193 1194 r.Stats.setState(StateLoadPackageGraph) 1195 lpkgs, err := loader.Graph(r.cache, cfg, patterns...) 1196 if err != nil { 1197 return nil, err 1198 } 1199 r.Stats.setInitialPackages(len(lpkgs)) 1200 1201 if len(lpkgs) == 0 { 1202 return nil, nil 1203 } 1204 1205 var goVersion string 1206 if r.GoVersion == "module" { 1207 for _, lpkg := range lpkgs { 1208 if m := lpkg.Module; m != nil { 1209 if goVersion == "" { 1210 goVersion = m.GoVersion 1211 } else if goVersion != m.GoVersion { 1212 // Theoretically, we should only ever see a single Go 1213 // module. At least that's currently (as of Go 1.15) 1214 // true when using 'go list'. 1215 fmt.Fprintln(os.Stderr, "warning: encountered multiple modules and could not deduce targeted Go version") 1216 goVersion = "" 1217 break 1218 } 1219 } 1220 } 1221 } else { 1222 goVersion = r.GoVersion 1223 } 1224 1225 if goVersion == "" { 1226 if r.FallbackGoVersion == "" { 1227 panic("could not determine Go version of module, and fallback version hasn't been set") 1228 } 1229 goVersion = r.FallbackGoVersion 1230 } 1231 r.actualGoVersion = goVersion 1232 for _, a := range analyzers { 1233 flag := a.Flags.Lookup("go") 1234 if flag == nil { 1235 continue 1236 } 1237 if err := flag.Value.Set(goVersion); err != nil { 1238 return nil, err 1239 } 1240 } 1241 1242 r.Stats.setState(StateBuildActionGraph) 1243 all := map[*loader.PackageSpec]*packageAction{} 1244 root := &packageAction{} 1245 for _, lpkg := range lpkgs { 1246 a := newPackageActionRoot(lpkg, all) 1247 root.deps = append(root.deps, a) 1248 a.triggers = append(a.triggers, root) 1249 } 1250 root.pending = uint32(len(root.deps)) 1251 1252 queue := make(chan action) 1253 r.Stats.setTotalPackages(len(all) - 1) 1254 1255 r.Stats.setState(StateProcessing) 1256 go func() { 1257 for _, a := range all { 1258 if len(a.Deps()) == 0 { 1259 queue <- a 1260 } 1261 } 1262 }() 1263 1264 sr := newSubrunner(r, analyzers) 1265 for item := range queue { 1266 r.semaphore.Acquire() 1267 go genericHandle(item, root, queue, &r.semaphore, func(act action) error { 1268 return sr.do(act) 1269 }) 1270 } 1271 1272 r.Stats.setState(StateFinalizing) 1273 out := make([]Result, 0, len(all)) 1274 for _, item := range all { 1275 if item.Package == nil { 1276 continue 1277 } 1278 out = append(out, Result{ 1279 Package: item.Package, 1280 Config: item.cfg, 1281 Initial: !item.factsOnly, 1282 Skipped: item.skipped, 1283 Failed: item.failed, 1284 Errors: item.errors, 1285 results: item.results, 1286 testData: item.testData, 1287 }) 1288 } 1289 return out, nil 1290 }