github.com/google/osv-scalibr@v0.4.1/enricher/transitivedependency/requirements/requirements.go (about)

     1  // Copyright 2025 Google LLC
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package requirements implements an enricher to perform dependency resolution for Python requirements.txt.
    16  package requirements
    17  
    18  import (
    19  	"context"
    20  	"errors"
    21  	"slices"
    22  
    23  	"deps.dev/util/pypi"
    24  	"deps.dev/util/resolve"
    25  	"deps.dev/util/resolve/dep"
    26  	pypiresolve "deps.dev/util/resolve/pypi"
    27  	cpb "github.com/google/osv-scalibr/binary/proto/config_go_proto"
    28  	"github.com/google/osv-scalibr/clients/resolution"
    29  	"github.com/google/osv-scalibr/enricher"
    30  	"github.com/google/osv-scalibr/extractor"
    31  	"github.com/google/osv-scalibr/extractor/filesystem/language/python/requirements"
    32  	"github.com/google/osv-scalibr/inventory"
    33  	"github.com/google/osv-scalibr/log"
    34  	"github.com/google/osv-scalibr/plugin"
    35  	"github.com/google/osv-scalibr/purl"
    36  )
    37  
    38  const (
    39  	// Name is the unique name of this enricher.
    40  	Name = "transitivedependency/requirements"
    41  )
    42  
    43  // Enricher performs dependency resolution for requirements.txt.
    44  type Enricher struct {
    45  	resolve.Client
    46  }
    47  
    48  // Name returns the name of the enricher.
    49  func (Enricher) Name() string {
    50  	return Name
    51  }
    52  
    53  // Version returns the version of the enricher.
    54  func (Enricher) Version() int {
    55  	return 0
    56  }
    57  
    58  // Requirements returns the requirements of the enricher.
    59  func (Enricher) Requirements() *plugin.Capabilities {
    60  	return &plugin.Capabilities{
    61  		Network: plugin.NetworkOnline,
    62  	}
    63  }
    64  
    65  // RequiredPlugins returns the names of the plugins required by the enricher.
    66  func (Enricher) RequiredPlugins() []string {
    67  	return []string{requirements.Name}
    68  }
    69  
    70  // New creates a new Enricher.
    71  func New(cfg *cpb.PluginConfig) enricher.Enricher {
    72  	client := resolution.NewPyPIRegistryClient("", "")
    73  	client.SetLocalRegistry(cfg.LocalRegistry)
    74  	return &Enricher{Client: client}
    75  }
    76  
    77  // Enrich enriches the inventory in requirements.txt with transitive dependencies.
    78  func (e Enricher) Enrich(ctx context.Context, input *enricher.ScanInput, inv *inventory.Inventory) error {
    79  	pkgGroups := groupPackages(inv.Packages)
    80  	for path, pkgMap := range pkgGroups {
    81  		packages := make([]packageWithIndex, 0, len(pkgMap))
    82  		for _, indexPkg := range pkgMap {
    83  			packages = append(packages, indexPkg)
    84  		}
    85  		slices.SortFunc(packages, func(a, b packageWithIndex) int {
    86  			return a.index - b.index
    87  		})
    88  
    89  		list := make([]*extractor.Package, 0, len(packages))
    90  		for _, indexPkg := range packages {
    91  			list = append(list, indexPkg.pkg)
    92  		}
    93  		if len(list) == 0 || len(list[0].Metadata.(*requirements.Metadata).HashCheckingModeValues) > 0 {
    94  			// Do not perform transitive extraction with hash-checking mode.
    95  			// Hash-checking is an all-or-nothing proposition so we can assume the
    96  			// requirements is in hash-checking mode if the first package has hashes.
    97  			// https://pip.pypa.io/en/stable/topics/secure-installs/#hash-checking-mode
    98  			continue
    99  		}
   100  
   101  		// For each manifest, perform dependency resolution.
   102  		pkgs, err := e.resolve(ctx, path, list)
   103  		if err != nil {
   104  			log.Warnf("failed resolution: %v", err)
   105  			continue
   106  		}
   107  
   108  		for _, pkg := range pkgs {
   109  			indexPkg, ok := pkgMap[pkg.Name]
   110  			if ok {
   111  				// This dependency is in manifest, update the version and plugins.
   112  				i := indexPkg.index
   113  				inv.Packages[i].Version = pkg.Version
   114  				inv.Packages[i].Plugins = append(inv.Packages[i].Plugins, Name)
   115  			} else {
   116  				// This dependency is not found in manifest, so it's a transitive dependency.
   117  				inv.Packages = append(inv.Packages, pkg)
   118  			}
   119  		}
   120  	}
   121  	return nil
   122  }
   123  
   124  // packageWithIndex holds the package with its index in inv.Packages
   125  type packageWithIndex struct {
   126  	pkg   *extractor.Package
   127  	index int
   128  }
   129  
   130  // groupPackages groups packages found in requirements.txt by the first location that they are found
   131  // and returns a map of location -> package name -> package with index.
   132  func groupPackages(pkgs []*extractor.Package) map[string]map[string]packageWithIndex {
   133  	result := make(map[string]map[string]packageWithIndex)
   134  	for i, pkg := range pkgs {
   135  		if !slices.Contains(pkg.Plugins, requirements.Name) {
   136  			continue
   137  		}
   138  		if len(pkg.Locations) == 0 {
   139  			log.Warnf("package %s has no locations", pkg.Name)
   140  			continue
   141  		}
   142  		// Use the path where this package is first found.
   143  		path := pkg.Locations[0]
   144  		if _, ok := result[path]; !ok {
   145  			result[path] = make(map[string]packageWithIndex)
   146  		}
   147  		result[path][pkg.Name] = packageWithIndex{pkg, i}
   148  	}
   149  	return result
   150  }
   151  
   152  // resolve performs dependency resolution for packages found in a single requirements.txt.
   153  func (e Enricher) resolve(ctx context.Context, path string, list []*extractor.Package) ([]*extractor.Package, error) {
   154  	overrideClient := resolution.NewOverrideClient(e.Client)
   155  	resolver := pypiresolve.NewResolver(overrideClient)
   156  
   157  	// Resolve the dependencies.
   158  	root := resolve.Version{
   159  		VersionKey: resolve.VersionKey{
   160  			PackageKey: resolve.PackageKey{
   161  				System: resolve.PyPI,
   162  				// Name of root node does not matter
   163  			},
   164  			VersionType: resolve.Concrete,
   165  			// Version of root node does not matter
   166  		}}
   167  	reqs := make([]resolve.RequirementVersion, len(list))
   168  	for i, pkg := range list {
   169  		m := pkg.Metadata.(*requirements.Metadata)
   170  		d, err := pypi.ParseDependency(m.Requirement)
   171  		if err != nil {
   172  			log.Errorf("failed to parse requirement %s: %v", m.Requirement, err)
   173  			continue
   174  		}
   175  
   176  		t := dep.NewType()
   177  		if d.Extras != "" {
   178  			t.AddAttr(dep.EnabledDependencies, d.Extras)
   179  		}
   180  		if d.Environment != "" {
   181  			t.AddAttr(dep.Environment, d.Environment)
   182  		}
   183  
   184  		reqs[i] = resolve.RequirementVersion{
   185  			VersionKey: resolve.VersionKey{
   186  				PackageKey: resolve.PackageKey{
   187  					System: resolve.PyPI,
   188  					Name:   d.Name,
   189  				},
   190  				VersionType: resolve.Requirement,
   191  				Version:     d.Constraint,
   192  			},
   193  			Type: t,
   194  		}
   195  	}
   196  	overrideClient.AddVersion(root, reqs)
   197  
   198  	g, err := resolver.Resolve(ctx, root.VersionKey)
   199  	if err != nil {
   200  		return nil, err
   201  	}
   202  	if g.Error != "" {
   203  		return nil, errors.New(g.Error)
   204  	}
   205  
   206  	pkgs := make([]*extractor.Package, len(g.Nodes)-1)
   207  	for i := 1; i < len(g.Nodes); i++ {
   208  		// Ignore the first node which is the root.
   209  		node := g.Nodes[i]
   210  		pkgs[i-1] = &extractor.Package{
   211  			Name:      node.Version.Name,
   212  			Version:   node.Version.Version,
   213  			PURLType:  purl.TypePyPi,
   214  			Locations: []string{path},
   215  			Plugins:   []string{Name},
   216  		}
   217  	}
   218  	return pkgs, nil
   219  }