github.com/google/osv-scalibr@v0.4.1/enricher/vulnmatch/osvdev/osvdev.go (about)

     1  // Copyright 2025 Google LLC
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package osvdev queries the OSV.dev API to find vulnerabilities in the inventory packages
    16  package osvdev
    17  
    18  import (
    19  	"context"
    20  	"errors"
    21  	"maps"
    22  	"slices"
    23  	"time"
    24  
    25  	"github.com/google/osv-scalibr/enricher"
    26  	"github.com/google/osv-scalibr/extractor"
    27  	"github.com/google/osv-scalibr/inventory"
    28  	"github.com/google/osv-scalibr/inventory/vex"
    29  	"github.com/google/osv-scalibr/plugin"
    30  	scalibrversion "github.com/google/osv-scalibr/version"
    31  	"golang.org/x/sync/errgroup"
    32  	"osv.dev/bindings/go/osvdev"
    33  	"osv.dev/bindings/go/osvdevexperimental"
    34  
    35  	osvpb "github.com/ossf/osv-schema/bindings/go/osvschema"
    36  	osvapipb "osv.dev/bindings/go/api"
    37  )
    38  
    39  const (
    40  	// Name is the unique name of this Enricher.
    41  	Name    = "vulnmatch/osvdev"
    42  	version = 1
    43  )
    44  
    45  const (
    46  	maxConcurrentRequests = 1000
    47  )
    48  
    49  // ErrInitialQueryTimeout is returned if the initial query to OSV.dev partially fails due to timeout
    50  var ErrInitialQueryTimeout = errors.New("initialQueryTimeout reached")
    51  
    52  var _ enricher.Enricher = &Enricher{}
    53  
    54  // Enricher queries the OSV.dev API to find vulnerabilities in the inventory packages
    55  type Enricher struct {
    56  	client              Client
    57  	initialQueryTimeout time.Duration
    58  }
    59  
    60  // NewWithClient returns an Enricher which uses a specified deps.dev client.
    61  func NewWithClient(c Client, initialQueryTimeout time.Duration) enricher.Enricher {
    62  	return &Enricher{
    63  		client:              c,
    64  		initialQueryTimeout: initialQueryTimeout,
    65  	}
    66  }
    67  
    68  // NewDefault creates a new Enricher with the default configuration and OSV.dev client
    69  func NewDefault() enricher.Enricher {
    70  	client := osvdev.DefaultClient()
    71  	client.Config.UserAgent = "osv-scanner_scan/" + scalibrversion.ScannerVersion
    72  	return &Enricher{
    73  		initialQueryTimeout: 5 * time.Minute,
    74  		client:              client,
    75  	}
    76  }
    77  
    78  // Name of the Enricher.
    79  func (Enricher) Name() string {
    80  	return Name
    81  }
    82  
    83  // Version of the Enricher.
    84  func (Enricher) Version() int {
    85  	return version
    86  }
    87  
    88  // Requirements of the Enricher.
    89  // Needs network access so it can validate Secrets.
    90  func (Enricher) Requirements() *plugin.Capabilities {
    91  	return &plugin.Capabilities{
    92  		Network: plugin.NetworkOnline,
    93  	}
    94  }
    95  
    96  // RequiredPlugins returns the plugins that are required to be enabled for this
    97  // Enricher to run. While it works on the results of other extractors,
    98  // the Enricher itself can run independently.
    99  func (Enricher) RequiredPlugins() []string {
   100  	return []string{}
   101  }
   102  
   103  // Enrich queries the OSV.dev API to find vulnerabilities in the inventory packages
   104  func (e *Enricher) Enrich(ctx context.Context, _ *enricher.ScanInput, inv *inventory.Inventory) error {
   105  	pkgs := make([]*extractor.Package, 0, len(inv.Packages))
   106  	queries := make([]*osvapipb.Query, 0, len(inv.Packages))
   107  	for _, pkg := range inv.Packages {
   108  		if query := pkgToQuery(pkg); query != nil {
   109  			pkgs = append(pkgs, pkg)
   110  			queries = append(queries, query)
   111  		}
   112  	}
   113  
   114  	if len(queries) == 0 {
   115  		return nil
   116  	}
   117  
   118  	queryCtx, cancel := withOptionalTimeoutCause(ctx, e.initialQueryTimeout, ErrInitialQueryTimeout)
   119  	defer cancel()
   120  
   121  	batchResp, initialQueryErr := osvdevexperimental.BatchQueryPaging(queryCtx, e.client, queries)
   122  	initialQueryErr = errors.Join(initialQueryErr, context.Cause(queryCtx))
   123  
   124  	// if an error happened and is not caused by the initialQueryTimeout return it
   125  	if initialQueryErr != nil && !errors.Is(initialQueryErr, ErrInitialQueryTimeout) {
   126  		return initialQueryErr
   127  	}
   128  
   129  	// if batchResp is not usable return the initial error anyway
   130  	if batchResp == nil {
   131  		return initialQueryErr
   132  	}
   133  
   134  	vulnToPkgs := map[string][]*extractor.Package{}
   135  	for i, batch := range batchResp.Results {
   136  		for _, vv := range batch.Vulns {
   137  			vulnToPkgs[vv.Id] = append(vulnToPkgs[vv.Id], pkgs[i])
   138  		}
   139  	}
   140  
   141  	vulnIDs := slices.Collect(maps.Keys(vulnToPkgs))
   142  	vulnerabilities, err := e.makeVulnerabilitiesRequest(ctx, vulnIDs)
   143  	if err != nil {
   144  		return err
   145  	}
   146  
   147  	for _, vuln := range vulnerabilities {
   148  		for _, pkg := range vulnToPkgs[vuln.Id] {
   149  			inv.PackageVulns = append(inv.PackageVulns, &inventory.PackageVuln{
   150  				Vulnerability:         vuln,
   151  				Package:               pkg,
   152  				ExploitabilitySignals: vex.FindingVEXFromPackageVEX(vuln.Id, pkg.ExploitabilitySignals),
   153  				Plugins:               []string{Name},
   154  			})
   155  		}
   156  	}
   157  
   158  	// It's possible for other enrichers/detectors to have already added the same vulnerability
   159  	// for the same package, so we deduplicate and merge the results.
   160  	inv.PackageVulns = dedupPackageVulns(inv.PackageVulns)
   161  
   162  	// return to the caller the initialQueryErr, which if not nil indicates that
   163  	// the list of vulnerabilities is not complete
   164  	return initialQueryErr
   165  }
   166  
   167  // dedupPackageVulns deduplicate package vulnerabilities that have the same pkg and vulnID
   168  func dedupPackageVulns(vulns []*inventory.PackageVuln) []*inventory.PackageVuln {
   169  	if len(vulns) == 0 {
   170  		return vulns
   171  	}
   172  
   173  	type key struct {
   174  		pkg    *extractor.Package
   175  		vulnID string
   176  	}
   177  	dedupVulns := map[key]*inventory.PackageVuln{}
   178  
   179  	for _, vv := range vulns {
   180  		k := key{vv.Package, vv.Vulnerability.Id}
   181  		if v, ok := dedupVulns[k]; !ok {
   182  			dedupVulns[k] = vv
   183  		} else {
   184  			// use the latest (from OSV.dev) as source of truth
   185  			vv.Plugins = append(v.Plugins, vv.Plugins...)
   186  			dedupVulns[k] = vv
   187  		}
   188  	}
   189  
   190  	return slices.Collect(maps.Values(dedupVulns))
   191  }
   192  
   193  func (e *Enricher) makeVulnerabilitiesRequest(ctx context.Context, vulnIDs []string) ([]*osvpb.Vulnerability, error) {
   194  	vulnerabilities := make([]*osvpb.Vulnerability, len(vulnIDs))
   195  	g, ctx := errgroup.WithContext(ctx)
   196  	g.SetLimit(maxConcurrentRequests)
   197  
   198  	for i, vulnID := range vulnIDs {
   199  		g.Go(func() error {
   200  			// exit early if another hydration request has already failed
   201  			// results are thrown away later, so avoid needless work
   202  			if ctx.Err() != nil {
   203  				return nil //nolint:nilerr // this value doesn't matter to errgroup.Wait()
   204  			}
   205  			vuln, err := e.client.GetVulnByID(ctx, vulnID)
   206  			if err != nil {
   207  				return err
   208  			}
   209  			vulnerabilities[i] = vuln
   210  
   211  			return nil
   212  		})
   213  	}
   214  	if err := g.Wait(); err != nil {
   215  		return nil, err
   216  	}
   217  
   218  	return vulnerabilities, nil
   219  }
   220  
   221  func pkgToQuery(pkg *extractor.Package) *osvapipb.Query {
   222  	if pkg.Name != "" && !pkg.Ecosystem().IsEmpty() && pkg.Version != "" {
   223  		// TODO(#1222): Ecosystems could return ecosystems
   224  		return &osvapipb.Query{
   225  			Package: &osvpb.Package{
   226  				Name:      pkg.Name,
   227  				Ecosystem: pkg.Ecosystem().String(),
   228  			},
   229  			Param: &osvapipb.Query_Version{
   230  				Version: pkg.Version,
   231  			},
   232  		}
   233  	}
   234  
   235  	if pkg.SourceCode != nil && pkg.SourceCode.Commit != "" {
   236  		return &osvapipb.Query{
   237  			Param: &osvapipb.Query_Commit{
   238  				Commit: pkg.SourceCode.Commit,
   239  			},
   240  		}
   241  	}
   242  
   243  	return nil
   244  }
   245  
   246  // withOptionalTimeoutCause creates a context that may time out after d.
   247  // If d == 0, it just returns the original context.
   248  func withOptionalTimeoutCause(ctx context.Context, d time.Duration, clause error) (context.Context, context.CancelFunc) {
   249  	if d == 0 {
   250  		return ctx, func() {}
   251  	}
   252  	return context.WithTimeoutCause(ctx, d, clause)
   253  }