github.com/google/osv-scalibr@v0.4.1/guidedremediation/internal/resolution/dependency_subgraph.go (about)

     1  // Copyright 2025 Google LLC
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package resolution
    16  
    17  import (
    18  	"context"
    19  	"slices"
    20  
    21  	"deps.dev/util/resolve"
    22  	"deps.dev/util/resolve/dep"
    23  	"github.com/google/osv-scalibr/guidedremediation/internal/manifest"
    24  	"github.com/google/osv-scalibr/guidedremediation/internal/vulns"
    25  	osvpb "github.com/ossf/osv-schema/bindings/go/osvschema"
    26  )
    27  
    28  // DependencySubgraph is a subgraph of dependencies that contains all paths to a specific node.
    29  type DependencySubgraph struct {
    30  	Dependency resolve.NodeID // The NodeID of the end dependency of this subgraph.
    31  	Nodes      map[resolve.NodeID]GraphNode
    32  }
    33  
    34  // GraphNode is a node in a DependencySubgraph
    35  type GraphNode struct {
    36  	Version  resolve.VersionKey
    37  	Distance int            // The shortest distance to the end Dependency Node (which has a Distance of 0)
    38  	Parents  []resolve.Edge // Parent edges i.e. with Edge.To == this ID
    39  	Children []resolve.Edge // Child edges i.e. with Edge.From == this ID
    40  }
    41  
    42  // ComputeSubgraphs computes the DependencySubgraphs for each specified NodeID.
    43  // The computed Subgraphs contains all nodes and edges that transitively depend on the specified node, and the node itself.
    44  //
    45  // Modifying any of the returned DependencySubgraphs may cause unexpected behaviour.
    46  func ComputeSubgraphs(g *resolve.Graph, nodes []resolve.NodeID) []*DependencySubgraph {
    47  	// Find the parent nodes of each node in graph, for easier traversal.
    48  	// These slices are shared between the returned subgraphs.
    49  	parentEdges := make(map[resolve.NodeID][]resolve.Edge)
    50  	for _, e := range g.Edges {
    51  		// Check for a self-dependency, just in case.
    52  		if e.From == e.To {
    53  			continue
    54  		}
    55  		parentEdges[e.To] = append(parentEdges[e.To], e)
    56  	}
    57  
    58  	// For each node, compute the subgraph.
    59  	subGraphs := make([]*DependencySubgraph, 0, len(nodes))
    60  	for _, nodeID := range nodes {
    61  		// Starting at the node of interest, visit all unvisited parents,
    62  		// adding the corresponding edges to the GraphNodes.
    63  		gNodes := make(map[resolve.NodeID]GraphNode)
    64  		seen := make(map[resolve.NodeID]struct{})
    65  		seen[nodeID] = struct{}{}
    66  		toProcess := []resolve.NodeID{nodeID}
    67  		currDistance := 0 // The current distance from end dependency.
    68  		for len(toProcess) > 0 {
    69  			// Track the next set of nodes to process, which will be +1 Distance away from end.
    70  			var next []resolve.NodeID
    71  			for _, node := range toProcess {
    72  				// Construct the GraphNode
    73  				parents := parentEdges[node]
    74  				gNode := gNodes[node] // Grab the existing GraphNode, which will have some Children populated.
    75  				gNode.Version = g.Nodes[node].Version
    76  				gNode.Distance = currDistance
    77  				gNode.Parents = parents
    78  				gNodes[node] = gNode
    79  				// Populate parent's children and add to next set.
    80  				for _, edge := range parents {
    81  					nID := edge.From
    82  					pNode := gNodes[nID]
    83  					pNode.Children = append(pNode.Children, edge)
    84  					gNodes[nID] = pNode
    85  					if _, ok := seen[nID]; !ok {
    86  						seen[nID] = struct{}{}
    87  						next = append(next, nID)
    88  					}
    89  				}
    90  			}
    91  			toProcess = next
    92  			currDistance++
    93  		}
    94  
    95  		subGraphs = append(subGraphs, &DependencySubgraph{
    96  			Dependency: nodeID,
    97  			Nodes:      gNodes,
    98  		})
    99  	}
   100  
   101  	return subGraphs
   102  }
   103  
   104  // IsDevOnly checks if this DependencySubgraph solely contains dev (or test) dependencies.
   105  // If groups is nil, checks the dep.Type of the direct graph edges for the Dev Attr (for in-place).
   106  // Otherwise, uses the groups of the direct dependencies to determine if a non-dev path exists (for relax/override).
   107  func (ds *DependencySubgraph) IsDevOnly(groups map[manifest.RequirementKey][]string) bool {
   108  	if groups != nil {
   109  		// Check if any of the direct dependencies are not in the dev group.
   110  		return !slices.ContainsFunc(ds.Nodes[0].Children, func(e resolve.Edge) bool {
   111  			req := resolve.RequirementVersion{
   112  				VersionKey: ds.Nodes[e.To].Version,
   113  				Type:       e.Type.Clone(),
   114  			}
   115  			reqGroups := groups[MakeRequirementKey(req)]
   116  			switch req.System {
   117  			case resolve.NPM:
   118  				return !slices.Contains(reqGroups, "dev")
   119  			case resolve.Maven:
   120  				return !slices.Contains(reqGroups, "test")
   121  			case resolve.PyPI, resolve.UnknownSystem:
   122  				fallthrough
   123  			default:
   124  				return true
   125  			}
   126  		})
   127  	}
   128  
   129  	// groups == nil
   130  	// Check if any of the direct dependencies do not have the Dev attr.
   131  	for _, e := range ds.Nodes[0].Children {
   132  		if e.Type.HasAttr(dep.Dev) {
   133  			continue
   134  		}
   135  		// As a workaround for npm workspaces, check for the a Dev attr in the direct dependency's dependencies.
   136  		for _, e2 := range ds.Nodes[e.To].Children {
   137  			if !e2.Type.HasAttr(dep.Dev) {
   138  				return false
   139  			}
   140  		}
   141  		// If the vulnerable dependency is a direct dependency, it'd have no Children.
   142  		// Since we've already checked that it doesn't have the Dev attr, it must be a non-dev dependency.
   143  		if e.To == ds.Dependency {
   144  			return false
   145  		}
   146  	}
   147  
   148  	return true
   149  }
   150  
   151  // ConstrainingSubgraph tries to construct a subgraph of the subgraph that includes only the edges that contribute to a vulnerability.
   152  // It identifies the dependencies which constrain the vulnerable package to use a vulnerable version.
   153  // This is used by the 'relax' remediation strategy to identify which direct dependencies need to be updated.
   154  //
   155  // e.g. for a subgraph with:
   156  //
   157  //	A -> C@<2.0
   158  //	B -> C@<3.0
   159  //	C resolves to C@1.9
   160  //
   161  // If the vuln affecting C is fixed in version 2.0, the constraining subgraph would only contain A,
   162  // since B would allow versions >=2.0 of C to be selected if not for A.
   163  //
   164  // This is a heuristic approach and may produce false positives (meaning possibly unnecessary dependencies would be flagged to be relaxed).
   165  // If the constraining subgraph cannot be computed for some reason, returns the original DependencySubgraph.
   166  func (ds *DependencySubgraph) ConstrainingSubgraph(ctx context.Context, cl resolve.Client, vuln *osvpb.Vulnerability) *DependencySubgraph {
   167  	// Just check if the direct requirement of the vulnerable package is constraining it.
   168  	// This still has some false positives.
   169  	// e.g. if we have
   170  	// A@* -> B@2.*
   171  	// D@* -> B@2.1.1 -> C@1.0.0
   172  	// resolving both together picks B@2.1.1 & thus constrains C to C@1.0.0 for A
   173  	// But resolving A alone could pick B@2.2.0 which might not depend on C
   174  	// Similarly, a direct dependency could be constrained by an indirect dependency with similar results.
   175  	end := ds.Nodes[ds.Dependency]
   176  	newParents := make([]resolve.Edge, 0, len(end.Parents))
   177  	for _, pEdge := range end.Parents {
   178  		// Check if the latest allowable version of the package is vulnerable
   179  		vk := end.Version
   180  		vk.Version = pEdge.Requirement
   181  		vk.VersionType = resolve.Requirement
   182  		vers, err := cl.MatchingVersions(ctx, vk)
   183  		if err != nil || len(vers) == 0 {
   184  			// Could not determine MatchingVersions - assume this is constraining.
   185  			newParents = append(newParents, pEdge)
   186  			continue
   187  		}
   188  		bestVK := vers[len(vers)-1] // This should be the highest version for npm
   189  
   190  		if vulns.IsAffected(vuln, vulns.VKToPackage(bestVK.VersionKey)) {
   191  			newParents = append(newParents, pEdge)
   192  		}
   193  	}
   194  
   195  	if len(newParents) == 0 {
   196  		// There has to be at least one constraining path for the vulnerability to appear.
   197  		// If our heuristic couldn't determine any, treat the whole subgraph as constraining.
   198  		return ds
   199  	}
   200  
   201  	// Rebuild the DependencySubgraph using the dependency's newParents.
   202  	// Same logic as in ComputeSubgraphs.
   203  	newNodes := make(map[resolve.NodeID]GraphNode)
   204  	newNodes[ds.Dependency] = GraphNode{
   205  		Version:  end.Version,
   206  		Distance: 0,
   207  		Parents:  newParents,
   208  	}
   209  
   210  	seen := make(map[resolve.NodeID]struct{})
   211  	seen[ds.Dependency] = struct{}{}
   212  	toProcess := make([]resolve.NodeID, 0, len(newParents))
   213  	for _, e := range newParents {
   214  		toProcess = append(toProcess, e.From)
   215  		seen[e.From] = struct{}{}
   216  	}
   217  
   218  	currDistance := 1
   219  	for len(toProcess) > 0 {
   220  		var next []resolve.NodeID
   221  		for _, nID := range toProcess {
   222  			oldNode := ds.Nodes[nID]
   223  			newNode := GraphNode{
   224  				Version:  oldNode.Version,
   225  				Distance: currDistance,
   226  				Parents:  slices.Clone(oldNode.Parents),
   227  				Children: slices.Clone(oldNode.Children),
   228  			}
   229  			// Remove the non-constraining edge from the node's children if it ends up in the subgraph.
   230  			newNode.Children = slices.DeleteFunc(newNode.Children, func(e resolve.Edge) bool {
   231  				if e.To != ds.Dependency {
   232  					return false
   233  				}
   234  
   235  				return !slices.ContainsFunc(newParents, func(pEdge resolve.Edge) bool {
   236  					return pEdge.From == e.From &&
   237  						pEdge.Requirement == e.Requirement &&
   238  						pEdge.Type.Compare(e.Type) == 0
   239  				})
   240  			})
   241  			newNodes[nID] = newNode
   242  			for _, e := range newNode.Parents {
   243  				if _, ok := seen[e.From]; !ok {
   244  					seen[e.From] = struct{}{}
   245  					next = append(next, e.From)
   246  				}
   247  			}
   248  		}
   249  		toProcess = next
   250  		currDistance++
   251  	}
   252  	// Remove children edges to nodes that are not in the computed subgraph.
   253  	for nID, edge := range newNodes {
   254  		edge.Children = slices.DeleteFunc(edge.Children, func(e resolve.Edge) bool {
   255  			_, ok := seen[e.To]
   256  			return !ok
   257  		})
   258  		newNodes[nID] = edge
   259  	}
   260  
   261  	return &DependencySubgraph{
   262  		Dependency: ds.Dependency,
   263  		Nodes:      newNodes,
   264  	}
   265  }