github.com/google/capslock@v0.2.3-0.20240517042941-dac19fc347c0/analyzer/analyzer.go (about)

     1  // Copyright 2023 Google LLC
     2  //
     3  // Use of this source code is governed by a BSD-style
     4  // license that can be found in the LICENSE file or at
     5  // https://developers.google.com/open-source/licenses/bsd
     6  
     7  package analyzer
     8  
     9  import (
    10  	"fmt"
    11  	"go/ast"
    12  	"go/types"
    13  	"path"
    14  	"sort"
    15  	"strings"
    16  
    17  	"github.com/google/capslock/interesting"
    18  	cpb "github.com/google/capslock/proto"
    19  	"golang.org/x/tools/go/callgraph"
    20  	"golang.org/x/tools/go/packages"
    21  	"golang.org/x/tools/go/ssa"
    22  	"google.golang.org/protobuf/proto"
    23  )
    24  
    25  // Config holds configuration for the analyzer.
    26  type Config struct {
    27  	Classifier     Classifier
    28  	DisableBuiltin bool
    29  }
    30  
    31  // Classifier is an interface for types that help map code features to
    32  // capabilities.
    33  type Classifier interface {
    34  	// FunctionCategory returns a Category for the given function specified by
    35  	// a package name and function name.  Examples of function names include
    36  	// "math.Cos", "(time.Time).Clock", and "(*sync.Cond).Signal".
    37  	//
    38  	// If the return value is Unspecified, then we have not declared it to be
    39  	// either safe or unsafe, so its descendants will have to be considered by the
    40  	// static analysis.
    41  	FunctionCategory(pkg string, name string) cpb.Capability
    42  
    43  	// IncludeCall returns true if a call from one function to another should be
    44  	// considered when searching for transitive capabilities.  Usually this should
    45  	// return true, unless there is some reason to know that the particular call
    46  	// cannot lead to additional capabilities for a function.
    47  	IncludeCall(caller string, callee string) bool
    48  }
    49  
    50  // GetClassifier returns a classifier for mapping packages and functions to the
    51  // appropriate capability.
    52  // If excludedUnanalyzed is true, the UNANALYZED capability is never returned.
    53  func GetClassifier(excludeUnanalyzed bool) *interesting.Classifier {
    54  	classifier := interesting.DefaultClassifier()
    55  	if excludeUnanalyzed {
    56  		return interesting.ClassifierExcludingUnanalyzed(classifier)
    57  	}
    58  	return classifier
    59  }
    60  
    61  // GetCapabilityInfo analyzes the packages in pkgs.  For each function in those
    62  // packages which have a path in the callgraph to an "interesting" function
    63  // (see the "interesting" package), we log details of the capability usage.
    64  //
    65  // One CapabilityInfo is returned for every (function, capability) pair, with
    66  // one example path in the callgraph that demonstrates that capability.
    67  func GetCapabilityInfo(pkgs []*packages.Package, queriedPackages map[*types.Package]struct{}, config *Config) *cpb.CapabilityInfoList {
    68  	type output struct {
    69  		*cpb.CapabilityInfo
    70  		*ssa.Function // used for sorting
    71  	}
    72  	var caps []output
    73  	forEachPath(pkgs, queriedPackages,
    74  		func(cap cpb.Capability, nodes map[*callgraph.Node]bfsState,
    75  			v *callgraph.Node,
    76  		) {
    77  			i := 0
    78  			c := cpb.CapabilityInfo{}
    79  			fn := v.Func
    80  			var n string
    81  			var ctype cpb.CapabilityType
    82  			var b strings.Builder
    83  			var incomingEdge *callgraph.Edge
    84  			for v != nil {
    85  				s := v.Func.String()
    86  				fn := &cpb.Function{Name: proto.String(s)}
    87  				if position := callsitePosition(incomingEdge); position.IsValid() {
    88  					fn.Site = &cpb.Function_Site{
    89  						Filename: proto.String(path.Base(position.Filename)),
    90  						Line:     proto.Int64(int64(position.Line)),
    91  						Column:   proto.Int64(int64(position.Column)),
    92  					}
    93  				}
    94  				c.Path = append(c.Path, fn)
    95  				if i == 0 {
    96  					n = v.Func.Package().Pkg.Path()
    97  					ctype = cpb.CapabilityType_CAPABILITY_TYPE_DIRECT
    98  					fmt.Fprintf(&b, "%s", s)
    99  					c.Capability = cap.Enum()
   100  					c.PackageDir = proto.String(v.Func.Package().Pkg.Path())
   101  					c.PackageName = proto.String(v.Func.Package().Pkg.Name())
   102  				} else {
   103  					fmt.Fprintf(&b, " %s", s)
   104  				}
   105  				i++
   106  				if pName := packagePath(v.Func); n != pName && !isStdLib(pName) {
   107  					ctype = cpb.CapabilityType_CAPABILITY_TYPE_TRANSITIVE
   108  				}
   109  				incomingEdge, v = nodes[v].edge, nodes[v].next()
   110  			}
   111  			c.CapabilityType = &ctype
   112  			c.DepPath = proto.String(b.String())
   113  			caps = append(caps, output{&c, fn})
   114  		}, config)
   115  	sort.Slice(caps, func(i, j int) bool {
   116  		if x, y := caps[i].CapabilityInfo.GetCapability(), caps[j].CapabilityInfo.GetCapability(); x != y {
   117  			return x < y
   118  		}
   119  		return funcCompare(caps[i].Function, caps[j].Function) < 0
   120  	})
   121  	cil := &cpb.CapabilityInfoList{
   122  		CapabilityInfo: make([]*cpb.CapabilityInfo, len(caps)),
   123  		ModuleInfo:     collectModuleInfo(pkgs),
   124  		PackageInfo:    collectPackageInfo(pkgs),
   125  	}
   126  	for i := range caps {
   127  		cil.CapabilityInfo[i] = caps[i].CapabilityInfo
   128  	}
   129  	return cil
   130  }
   131  
   132  type CapabilityCounter struct {
   133  	capability       cpb.Capability
   134  	count            int64
   135  	direct_count     int64
   136  	transitive_count int64
   137  	example          []*cpb.Function
   138  }
   139  
   140  // GetCapabilityStats analyzes the packages in pkgs.  For each function in
   141  // those packages which have a path in the callgraph to an "interesting"
   142  // function (see the "interesting" package), we give aggregated statistics
   143  // about the capability usage.
   144  func GetCapabilityStats(pkgs []*packages.Package, queriedPackages map[*types.Package]struct{}, config *Config) *cpb.CapabilityStatList {
   145  	var cs []*cpb.CapabilityStats
   146  	cm := make(map[string]*CapabilityCounter)
   147  	forEachPath(pkgs, queriedPackages,
   148  		func(cap cpb.Capability, nodes map[*callgraph.Node]bfsState, v *callgraph.Node) {
   149  			if _, ok := cm[cap.String()]; !ok {
   150  				cm[cap.String()] = &CapabilityCounter{count: 1, capability: cap}
   151  			} else {
   152  				cm[cap.String()].count += 1
   153  			}
   154  			i := 0
   155  			var n string
   156  			var incomingEdge *callgraph.Edge
   157  			isDirect := true
   158  			e := []*cpb.Function{}
   159  			for v != nil {
   160  				s := v.Func.String()
   161  				fn := &cpb.Function{Name: proto.String(s)}
   162  				if position := callsitePosition(incomingEdge); position.IsValid() {
   163  					fn.Site = &cpb.Function_Site{
   164  						Filename: proto.String(path.Base(position.Filename)),
   165  						Line:     proto.Int64(int64(position.Line)),
   166  						Column:   proto.Int64(int64(position.Column)),
   167  					}
   168  				}
   169  				e = append(e, fn)
   170  				if i == 0 {
   171  					n = v.Func.Package().Pkg.Path()
   172  				}
   173  				i++
   174  				if pName := packagePath(v.Func); n != pName && !isStdLib(pName) {
   175  					isDirect = false
   176  				}
   177  				incomingEdge, v = nodes[v].edge, nodes[v].next()
   178  			}
   179  			if isDirect {
   180  				if _, ok := cm[cap.String()]; !ok {
   181  					cm[cap.String()] = &CapabilityCounter{count: 1, direct_count: 1}
   182  				} else {
   183  					cm[cap.String()].direct_count += 1
   184  				}
   185  			} else {
   186  				if _, ok := cm[cap.String()]; !ok {
   187  					cm[cap.String()] = &CapabilityCounter{count: 1, transitive_count: 1}
   188  				} else {
   189  					cm[cap.String()].transitive_count += 1
   190  				}
   191  			}
   192  			if _, ok := cm[cap.String()]; !ok {
   193  				cm[cap.String()] = &CapabilityCounter{example: e}
   194  			} else {
   195  				cm[cap.String()].example = e
   196  			}
   197  		}, config)
   198  	for _, counts := range cm {
   199  		cs = append(cs, &cpb.CapabilityStats{
   200  			Capability:      &counts.capability,
   201  			Count:           &counts.count,
   202  			DirectCount:     &counts.direct_count,
   203  			TransitiveCount: &counts.transitive_count,
   204  			ExampleCallpath: counts.example,
   205  		})
   206  	}
   207  	sort.Slice(cs, func(i, j int) bool {
   208  		return cs[i].GetCapability() < cs[j].GetCapability()
   209  	})
   210  	return &cpb.CapabilityStatList{
   211  		CapabilityStats: cs,
   212  		ModuleInfo:      collectModuleInfo(pkgs),
   213  	}
   214  }
   215  
   216  // GetCapabilityCount analyzes the packages in pkgs.  For each function in
   217  // those packages which have a path in the callgraph to an "interesting"
   218  // function (see the "interesting" package), we give an aggregate count of the
   219  // capability usage.
   220  func GetCapabilityCounts(pkgs []*packages.Package, queriedPackages map[*types.Package]struct{}, config *Config) *cpb.CapabilityCountList {
   221  	cm := make(map[string]int64)
   222  	forEachPath(pkgs, queriedPackages,
   223  		func(cap cpb.Capability, nodes map[*callgraph.Node]bfsState, v *callgraph.Node) {
   224  			if _, ok := cm[cap.String()]; !ok {
   225  				cm[cap.String()] = 1
   226  			} else {
   227  				cm[cap.String()] += 1
   228  			}
   229  		}, config)
   230  	return &cpb.CapabilityCountList{
   231  		CapabilityCounts: cm,
   232  		ModuleInfo:       collectModuleInfo(pkgs),
   233  	}
   234  }
   235  
   236  // searchBackwardsFromCapabilities returns the set of all function nodes that
   237  // have a path to a function with some capability.
   238  func searchBackwardsFromCapabilities(nodesByCapability nodesetPerCapability, safe nodeset, classifier Classifier) nodeset {
   239  	var (
   240  		visited = make(nodeset)
   241  		q       []*callgraph.Node
   242  	)
   243  	// Initialize the queue to contain the nodes with a capability.
   244  	for _, nodes := range nodesByCapability {
   245  		for v := range nodes {
   246  			if _, ok := safe[v]; ok {
   247  				continue
   248  			}
   249  			q = append(q, v)
   250  			visited[v] = struct{}{}
   251  		}
   252  	}
   253  	// Perform a BFS backwards through the call graph from the interesting
   254  	// nodes.
   255  	for len(q) > 0 {
   256  		v := q[0]
   257  		q = q[1:]
   258  		calleeName := v.Func.String()
   259  		for _, edge := range v.In {
   260  			callerName := edge.Caller.Func.String()
   261  			if !classifier.IncludeCall(callerName, calleeName) {
   262  				continue
   263  			}
   264  			w := edge.Caller
   265  			if _, ok := safe[w]; ok {
   266  				continue
   267  			}
   268  			if _, ok := visited[w]; ok {
   269  				// We have already visited w.
   270  				continue
   271  			}
   272  			visited[w] = struct{}{}
   273  			q = append(q, w)
   274  		}
   275  	}
   276  	return visited
   277  }
   278  
   279  // searchForwardsFromQueriedFunctions searches from a set of function nodes to
   280  // find all the nodes they can reach which themselves reach a node with some
   281  // capability.
   282  //
   283  // outputCall is called for each edge between two such nodes.
   284  // outputCapability is called for each node reached in the graph that has some
   285  // direct capability.
   286  func searchForwardsFromQueriedFunctions(
   287  	nodes nodeset,
   288  	nodesByCapability nodesetPerCapability,
   289  	allNodesWithExplicitCapability,
   290  	canReachCapability nodeset,
   291  	classifier Classifier,
   292  	outputCall func(from, to *callgraph.Node),
   293  	outputCapability func(fn *callgraph.Node, c cpb.Capability),
   294  ) {
   295  	var q []*callgraph.Node
   296  	for v := range nodes {
   297  		q = append(q, v)
   298  	}
   299  	for len(q) > 0 {
   300  		v := q[0]
   301  		q = q[1:]
   302  		for c, nodes := range nodesByCapability {
   303  			if _, ok := nodes[v]; ok {
   304  				outputCapability(v, c)
   305  			}
   306  		}
   307  		if _, ok := allNodesWithExplicitCapability[v]; ok {
   308  			continue
   309  		}
   310  		calleeName := v.Func.String()
   311  		out := make(nodeset)
   312  		for _, edge := range v.Out {
   313  			callerName := edge.Caller.Func.String()
   314  			if !classifier.IncludeCall(callerName, calleeName) {
   315  				continue
   316  			}
   317  			w := edge.Callee
   318  			if _, ok := canReachCapability[w]; !ok {
   319  				continue
   320  			}
   321  			out[w] = struct{}{}
   322  		}
   323  		for w := range out {
   324  			outputCall(v, w)
   325  			if _, ok := nodes[w]; ok {
   326  				// We have already visited w.
   327  				continue
   328  			}
   329  			nodes[w] = struct{}{}
   330  			q = append(q, w)
   331  		}
   332  	}
   333  }
   334  
   335  // CapabilityGraph analyzes the callgraph for the packages in pkgs.
   336  //
   337  // It outputs the graph containing all paths from a function belonging
   338  // to one of the packages in queriedPackages to a function which has
   339  // some capability.
   340  //
   341  // outputCall is called for each edge between two nodes.
   342  // outputCapability is called for each node in the graph that has some
   343  // capability.
   344  func CapabilityGraph(pkgs []*packages.Package,
   345  	queriedPackages map[*types.Package]struct{},
   346  	config *Config,
   347  	outputCall func(from, to *callgraph.Node),
   348  	outputCapability func(fn *callgraph.Node, c cpb.Capability),
   349  ) {
   350  	safe, nodesByCapability, extraNodesByCapability := getPackageNodesWithCapability(pkgs, config)
   351  	nodesByCapability, allNodesWithExplicitCapability := mergeCapabilities(nodesByCapability, extraNodesByCapability)
   352  	extraNodesByCapability = nil
   353  
   354  	canReachCapability := searchBackwardsFromCapabilities(nodesByCapability, safe, config.Classifier)
   355  
   356  	canBeReachedFromQuery := make(nodeset)
   357  	for v := range canReachCapability {
   358  		if v.Func.Package() == nil {
   359  			continue
   360  		}
   361  		if _, ok := queriedPackages[v.Func.Package().Pkg]; ok {
   362  			canBeReachedFromQuery[v] = struct{}{}
   363  		}
   364  	}
   365  
   366  	searchForwardsFromQueriedFunctions(
   367  		canBeReachedFromQuery,
   368  		nodesByCapability,
   369  		allNodesWithExplicitCapability,
   370  		canReachCapability,
   371  		config.Classifier,
   372  		outputCall,
   373  		outputCapability)
   374  }
   375  
   376  // getPackageNodesWithCapability analyzes all the functions in pkgs and their
   377  // transitive dependencies, and returns three sets of callgraph nodes.
   378  //
   379  // safe contains the set of nodes for functions that have been explicitly
   380  // classified as safe.
   381  // nodesByCapability contains nodes that have been explicitly categorized
   382  // as having some particular capability.  These are in a map from capability
   383  // to a set of nodes.
   384  // extraNodesByCapability contains nodes for functions that use unsafe pointers
   385  // or the reflect package in a way that we want to report to the user.
   386  func getPackageNodesWithCapability(pkgs []*packages.Package,
   387  	config *Config,
   388  ) (safe nodeset, nodesByCapability, extraNodesByCapability nodesetPerCapability) {
   389  	graph, ssaProg, allFunctions := buildGraph(pkgs, true)
   390  	unsafePointerFunctions := findUnsafePointerConversions(pkgs, ssaProg, allFunctions)
   391  	ssaProg = nil // possibly save memory; we don't use ssaProg again
   392  	safe, nodesByCapability = getNodeCapabilities(graph, config.Classifier)
   393  
   394  	if !config.DisableBuiltin {
   395  		extraNodesByCapability = getExtraNodesByCapability(graph, allFunctions, unsafePointerFunctions)
   396  	}
   397  	return safe, nodesByCapability, extraNodesByCapability
   398  }
   399  
   400  func getExtraNodesByCapability(graph *callgraph.Graph, allFunctions map[*ssa.Function]bool, unsafePointerFunctions map[*ssa.Function]struct{}) nodesetPerCapability {
   401  	// Find functions that copy reflect.Value objects in a way that could
   402  	// possibly cause a data race, and add their nodes to
   403  	// extraNodesByCapability[Capability_CAPABILITY_REFLECT].
   404  	extraNodesByCapability := make(nodesetPerCapability)
   405  	for f := range allFunctions {
   406  		// Find the function variables that do not escape.
   407  		locals := map[ssa.Value]struct{}{}
   408  		for _, l := range f.Locals {
   409  			if !l.Heap {
   410  				locals[l] = struct{}{}
   411  			}
   412  		}
   413  		for _, b := range f.Blocks {
   414  			for _, i := range b.Instrs {
   415  				// An IndexAddr instruction creates an SSA value which refers to an
   416  				// element of an array.  An element of a local array is also local.
   417  				if ia, ok := i.(*ssa.IndexAddr); ok {
   418  					if _, islocal := locals[ia.X]; islocal {
   419  						locals[ia] = struct{}{}
   420  					}
   421  				}
   422  				// A FieldAddr instruction creates an SSA value which refers to a
   423  				// field of a struct.  A field of a local struct is also local.
   424  				if f, ok := i.(*ssa.FieldAddr); ok {
   425  					if _, islocal := locals[f.X]; islocal {
   426  						locals[f] = struct{}{}
   427  					}
   428  				}
   429  				// Check the destination of store instructions.
   430  				if s, ok := i.(*ssa.Store); ok {
   431  					dest := s.Addr
   432  					if _, islocal := locals[dest]; islocal {
   433  						continue
   434  					}
   435  					// dest.Type should be a types.Pointer pointing to the type of the
   436  					// value that is copied by this instruction.
   437  					typ, ok := dest.Type().(*types.Pointer)
   438  					if !ok {
   439  						continue
   440  					}
   441  					if !containsReflectValue(typ.Elem()) {
   442  						continue
   443  					}
   444  					if node, ok := graph.Nodes[f]; ok {
   445  						// This is a store to a non-local reflect.Value, or to a non-local
   446  						// object that contains a reflect.Value.
   447  						extraNodesByCapability.add(cpb.Capability_CAPABILITY_REFLECT, node)
   448  					}
   449  				}
   450  			}
   451  		}
   452  	}
   453  	// Add nodes for the functions in unsafePointerFunctions to
   454  	// extraNodesByCapability[Capability_CAPABILITY_UNSAFE_POINTER].
   455  	for f := range unsafePointerFunctions {
   456  		if node, ok := graph.Nodes[f]; ok {
   457  			extraNodesByCapability.add(cpb.Capability_CAPABILITY_UNSAFE_POINTER, node)
   458  		}
   459  	}
   460  	// Add the arbitrary-execution capability to asm function nodes.
   461  	for f, node := range graph.Nodes {
   462  		if f.Blocks == nil {
   463  			// No source code for this function.
   464  			if f.Synthetic != "" {
   465  				// Exclude synthetic functions, such as those loaded from object files.
   466  				continue
   467  			}
   468  			extraNodesByCapability.add(cpb.Capability_CAPABILITY_ARBITRARY_EXECUTION, node)
   469  		}
   470  	}
   471  	return extraNodesByCapability
   472  }
   473  
   474  // findUnsafePointerConversions uses analysis of the syntax tree to find
   475  // functions which convert unsafe.Pointer values to another type.
   476  func findUnsafePointerConversions(pkgs []*packages.Package, ssaProg *ssa.Program, allFunctions map[*ssa.Function]bool) (unsafePointer map[*ssa.Function]struct{}) {
   477  	// AST nodes corresponding to functions which convert unsafe.Pointer values.
   478  	unsafeFunctionNodes := make(map[ast.Node]struct{})
   479  	// Packages which contain variables that are initialized using
   480  	// unsafe.Pointer conversions.  We will later find the function nodes
   481  	// corresponding to the init functions for these packages.
   482  	packagesWithUnsafePointerUseInInitialization := make(map[*types.Package]struct{})
   483  	forEachPackageIncludingDependencies(pkgs, func(pkg *packages.Package) {
   484  		seenUnsafePointerUseInInitialization := false
   485  		for _, file := range pkg.Syntax {
   486  			vis := visitor{
   487  				unsafeFunctionNodes:                  unsafeFunctionNodes,
   488  				seenUnsafePointerUseInInitialization: &seenUnsafePointerUseInInitialization,
   489  				pkg:                                  pkg,
   490  			}
   491  			ast.Walk(vis, file)
   492  		}
   493  		if seenUnsafePointerUseInInitialization {
   494  			// One of the files in this package contained an unsafe.Pointer
   495  			// conversion in the initialization expression for a package-scoped
   496  			// variable.
   497  			// We want to find later the *ssa.Package object corresponding to the
   498  			// *packages.Package object we have now.  There is no direct pointer
   499  			// between the two, but each has a pointer to the corresponding
   500  			// *types.Package object, so we store that here.
   501  			packagesWithUnsafePointerUseInInitialization[pkg.Types] = struct{}{}
   502  		}
   503  	})
   504  	// Find the *ssa.Function pointers corresponding to the syntax nodes found
   505  	// above.
   506  	unsafePointerFunctions := make(map[*ssa.Function]struct{})
   507  	for f := range allFunctions {
   508  		if _, ok := unsafeFunctionNodes[f.Syntax()]; ok {
   509  			unsafePointerFunctions[f] = struct{}{}
   510  		}
   511  	}
   512  	for _, pkg := range ssaProg.AllPackages() {
   513  		if _, ok := packagesWithUnsafePointerUseInInitialization[pkg.Pkg]; ok {
   514  			// This package had an unsafe.Pointer conversion in the initialization
   515  			// expression for a package-scoped variable, so we add the package's
   516  			// "init" function to unsafePointerFunctions.
   517  			// There will always be an init function for each package; if one
   518  			// didn't exist in the source, a synthetic one will have been
   519  			// created.
   520  			if f := pkg.Func("init"); f != nil {
   521  				unsafePointerFunctions[f] = struct{}{}
   522  			}
   523  		}
   524  	}
   525  	return unsafePointerFunctions
   526  }
   527  
   528  func getNodeCapabilities(graph *callgraph.Graph,
   529  	classifier Classifier,
   530  ) (safe nodeset, nodesByCapability nodesetPerCapability) {
   531  	safe = make(nodeset)
   532  	nodesByCapability = make(nodesetPerCapability)
   533  	for _, v := range graph.Nodes {
   534  		if v.Func == nil {
   535  			continue
   536  		}
   537  		var c cpb.Capability
   538  		if v.Func.Package() != nil && v.Func.Package().Pkg != nil {
   539  			// Categorize v.Func.
   540  			pkg := v.Func.Package().Pkg.Path()
   541  			name := v.Func.String()
   542  			c = classifier.FunctionCategory(pkg, name)
   543  		} else {
   544  			origin := v.Func.Origin()
   545  			if origin == nil || origin.Package() == nil || origin.Package().Pkg == nil {
   546  				continue
   547  			}
   548  			// v.Func is an instantiation of a generic function.  Get the package
   549  			// name and function name of the generic function, and categorize that
   550  			// instead.
   551  			pkg := origin.Package().Pkg.Path()
   552  			name := origin.String()
   553  			c = classifier.FunctionCategory(pkg, name)
   554  		}
   555  		if c == cpb.Capability_CAPABILITY_SAFE {
   556  			safe[v] = struct{}{}
   557  		} else if c != cpb.Capability_CAPABILITY_UNSPECIFIED {
   558  			nodesByCapability.add(c, v)
   559  		}
   560  	}
   561  	return safe, nodesByCapability
   562  }
   563  
   564  func mergeCapabilities(nodesByCapability, extraNodesByCapability nodesetPerCapability) (nodesetPerCapability, nodeset) {
   565  	// We gather here all the nodes which were given an explicit categorization.
   566  	// We will not search for paths that go through these nodes to reach other
   567  	// capabilities; for example, we do not report that os.ReadFile also has
   568  	// a descendant that will make system calls.
   569  	allNodesWithExplicitCapability := make(nodeset)
   570  	for _, nodes := range nodesByCapability {
   571  		for v := range nodes {
   572  			allNodesWithExplicitCapability[v] = struct{}{}
   573  		}
   574  	}
   575  	// Now that we have constructed allNodesWithExplicitCapability, we add the
   576  	// nodes from extraNodesByCapability to nodesByCapability, so that we find
   577  	// paths to all these nodes together when we do a BFS.
   578  	// extraNodesByCapability contains function capabilities that our analyzer
   579  	// found by examining the function's source code.  These findings are
   580  	// ignored when they apply to a function that already has an explicit
   581  	// category.
   582  	for cap, ns := range extraNodesByCapability {
   583  		for node := range ns {
   584  			if _, ok := allNodesWithExplicitCapability[node]; ok {
   585  				// This function already has an explicit category; don't add this
   586  				// extra capability.
   587  				continue
   588  			}
   589  			nodesByCapability.add(cap, node)
   590  		}
   591  	}
   592  	return nodesByCapability, allNodesWithExplicitCapability
   593  }
   594  
   595  // forEachPath analyzes the callgraph rooted at the packages in pkgs.
   596  //
   597  // For each capability, a BFS is run to find all functions in queriedPackages
   598  // which have a path in the callgraph to a function with that capability.
   599  //
   600  // fn is called for each of these (capability, function) pairs.  fn is passed
   601  // the capability, a map describing the current state of the BFS, and the node
   602  // in the callgraph representing the function.  fn can use this information
   603  // to reconstruct the path.
   604  //
   605  // forEachPath may modify pkgs.
   606  func forEachPath(pkgs []*packages.Package, queriedPackages map[*types.Package]struct{},
   607  	fn func(cpb.Capability, map[*callgraph.Node]bfsState, *callgraph.Node), config *Config,
   608  ) {
   609  	safe, nodesByCapability, extraNodesByCapability := getPackageNodesWithCapability(pkgs, config)
   610  	nodesByCapability, allNodesWithExplicitCapability := mergeCapabilities(nodesByCapability, extraNodesByCapability)
   611  	extraNodesByCapability = nil // we don't use extraNodesByCapability again.
   612  	var caps []cpb.Capability
   613  	for cap := range nodesByCapability {
   614  		caps = append(caps, cap)
   615  	}
   616  	sort.Slice(caps, func(i, j int) bool { return caps[i] < caps[j] })
   617  	for _, cap := range caps {
   618  		nodes := nodesByCapability[cap]
   619  		var (
   620  			visited = make(map[*callgraph.Node]bfsState)
   621  			q       []*callgraph.Node // queue for the BFS
   622  		)
   623  		// Initialize the queue to contain the nodes with the capability.
   624  		for v := range nodes {
   625  			if _, ok := safe[v]; ok {
   626  				continue
   627  			}
   628  			q = append(q, v)
   629  			visited[v] = bfsState{}
   630  		}
   631  		sort.Sort(byFunction(q))
   632  		for _, v := range q {
   633  			// Skipping cases where v.Func.Package() doesn't exist.
   634  			if v.Func.Package() == nil {
   635  				continue
   636  			}
   637  			if _, ok := queriedPackages[v.Func.Package().Pkg]; ok {
   638  				// v itself is in one of the queried packages.  Call fn here because
   639  				// the BFS below will only call fn for functions that call v
   640  				// directly or transitively.
   641  				fn(cap, visited, v)
   642  			}
   643  		}
   644  		// Perform a BFS backwards through the call graph from the interesting
   645  		// nodes.
   646  		for len(q) > 0 {
   647  			v := q[0]
   648  			q = q[1:]
   649  			var incomingEdges []*callgraph.Edge
   650  			calleeName := v.Func.String()
   651  			for _, edge := range v.In {
   652  				callerName := edge.Caller.Func.String()
   653  				if config.Classifier.IncludeCall(callerName, calleeName) {
   654  					incomingEdges = append(incomingEdges, edge)
   655  				}
   656  			}
   657  			sort.Sort(byCaller(incomingEdges))
   658  			for _, edge := range incomingEdges {
   659  				w := edge.Caller
   660  				if w.Func == nil {
   661  					// Synthetic nodes may not have this information.
   662  					continue
   663  				}
   664  				if _, ok := safe[w]; ok {
   665  					continue
   666  				}
   667  				if _, ok := visited[w]; ok {
   668  					// We have already visited w.
   669  					continue
   670  				}
   671  				if _, ok := allNodesWithExplicitCapability[w]; ok {
   672  					// w already has an explicit categorization.
   673  					continue
   674  				}
   675  				visited[w] = bfsState{edge: edge}
   676  				q = append(q, w)
   677  				if w.Func.Package() != nil {
   678  					if _, ok := queriedPackages[w.Func.Package().Pkg]; ok {
   679  						fn(cap, visited, w)
   680  					}
   681  				}
   682  			}
   683  		}
   684  	}
   685  }