github.com/anchore/syft@v1.38.2/syft/pkg/cataloger/python/dependency.go (about)

     1  package python
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"path"
     7  	"strings"
     8  
     9  	"github.com/anchore/syft/internal"
    10  	"github.com/anchore/syft/internal/log"
    11  	"github.com/anchore/syft/internal/relationship"
    12  	"github.com/anchore/syft/syft/artifact"
    13  	"github.com/anchore/syft/syft/file"
    14  	"github.com/anchore/syft/syft/pkg"
    15  	"github.com/anchore/syft/syft/pkg/cataloger/internal/dependency"
    16  )
    17  
    18  func poetryLockDependencySpecifier(p pkg.Package) dependency.Specification { //nolint:dupl // this is very similar to the uv lock dependency specifier, but should remain separate
    19  	meta, ok := p.Metadata.(pkg.PythonPoetryLockEntry)
    20  	if !ok {
    21  		log.Tracef("cataloger failed to extract poetry lock metadata for package %+v", p.Name)
    22  		return dependency.Specification{}
    23  	}
    24  
    25  	// this package reference always includes the package name and no extras
    26  	provides := []string{packageRef(p.Name, "")}
    27  
    28  	var requires []string
    29  	// add required dependencies (those which a marker is not present indicating it is explicitly optional or needs an extra marker)
    30  	for _, dep := range meta.Dependencies {
    31  		if isDependencyForExtra(dep) {
    32  			continue
    33  		}
    34  
    35  		// we always have the base package requirement without any extras to get base dependencies
    36  		requires = append(requires, packageRef(dep.Name, ""))
    37  
    38  		// if there are extras, we need to add a requirement for each extra individually
    39  		// for example:
    40  		//    uvicorn = {version = ">=0.12.0", extras = ["standard", "else"]}
    41  		// then we must install uvicorn with the extras "standard" and "else" to satisfy the requirement
    42  		for _, extra := range dep.Extras {
    43  			// always refer to extras with the package name (e.g. name[extra])
    44  			// note: this must always be done independent of other extras (e.g.  name[extra1] and name[extra2] separately
    45  			// is correct and name[extra1,extra2] will result in dependency resolution failure)
    46  			requires = append(requires, packageRef(dep.Name, extra))
    47  		}
    48  	}
    49  
    50  	var variants []dependency.ProvidesRequires
    51  	for _, extra := range meta.Extras {
    52  		variants = append(variants,
    53  			dependency.ProvidesRequires{
    54  				// always refer to extras with the package name (e.g. name[extra])
    55  				// note: this must always be done independent of other extras (e.g.  name[extra1] and name[extra2] separately
    56  				// is correct and name[extra1,extra2] will result in dependency resolution failure)
    57  				Provides: []string{packageRef(p.Name, extra.Name)},
    58  				Requires: extractPackageNames(extra.Dependencies),
    59  			},
    60  		)
    61  	}
    62  
    63  	return dependency.Specification{
    64  		ProvidesRequires: dependency.ProvidesRequires{
    65  			Provides: provides,
    66  			Requires: requires,
    67  		},
    68  		Variants: variants,
    69  	}
    70  }
    71  
    72  func isDependencyForExtra(dep pkg.PythonPoetryLockDependencyEntry) bool {
    73  	return strings.Contains(dep.Markers, "extra ==")
    74  }
    75  
    76  func packageRef(name, extra string) string {
    77  	cleanExtra := strings.TrimSpace(extra)
    78  	cleanName := strings.TrimSpace(name)
    79  	if cleanExtra == "" {
    80  		return cleanName
    81  	}
    82  	return cleanName + "[" + cleanExtra + "]"
    83  }
    84  
    85  func pdmLockDependencySpecifier(p pkg.Package) dependency.Specification {
    86  	meta, ok := p.Metadata.(pkg.PythonPdmLockEntry)
    87  	if !ok {
    88  		log.Tracef("cataloger failed to extract pdm lock metadata for package %+v", p.Name)
    89  		return dependency.Specification{}
    90  	}
    91  
    92  	// base package provides the package name without extras
    93  	provides := []string{p.Name}
    94  
    95  	// base requirements from Dependencies field
    96  	var requires []string
    97  	for _, dep := range meta.Dependencies {
    98  		depName := extractPackageName(dep)
    99  		if depName == "" {
   100  			continue
   101  		}
   102  		requires = append(requires, depName)
   103  	}
   104  
   105  	// create variants for each extras combination
   106  	var variants []dependency.ProvidesRequires
   107  	for _, extraVariant := range meta.Extras {
   108  		// each extra in the variant provides packagename[extra]
   109  		var variantProvides []string
   110  		for _, extra := range extraVariant.Extras {
   111  			variantProvides = append(variantProvides, packageRef(p.Name, extra))
   112  		}
   113  
   114  		// extract dependencies for this variant, excluding self-references
   115  		var variantRequires []string
   116  		for _, dep := range extraVariant.Dependencies {
   117  			depName := extractPackageName(dep)
   118  			if depName == "" || depName == p.Name {
   119  				// skip empty or self-references (e.g., coverage[toml] depends on coverage==7.4.1)
   120  				continue
   121  			}
   122  			variantRequires = append(variantRequires, depName)
   123  		}
   124  
   125  		if len(variantProvides) > 0 {
   126  			variants = append(variants, dependency.ProvidesRequires{
   127  				Provides: variantProvides,
   128  				Requires: variantRequires,
   129  			})
   130  		}
   131  	}
   132  
   133  	return dependency.Specification{
   134  		ProvidesRequires: dependency.ProvidesRequires{
   135  			Provides: provides,
   136  			Requires: requires,
   137  		},
   138  		Variants: variants,
   139  	}
   140  }
   141  
   142  func wheelEggDependencySpecifier(p pkg.Package) dependency.Specification {
   143  	meta, ok := p.Metadata.(pkg.PythonPackage)
   144  	if !ok {
   145  		log.Tracef("cataloger failed to extract wheel/egg metadata for package %+v", p.Name)
   146  		return dependency.Specification{}
   147  	}
   148  
   149  	provides := []string{p.Name}
   150  
   151  	var requires []string
   152  	// extract dependencies from the Requires-Dist field
   153  	// note: this also includes Extras, which are currently partially supported.
   154  	// Specifically, we claim that a package needs all extra dependencies and a relationship will be created
   155  	// if that dependency happens to be installed. We currently do not do any version constraint resolution
   156  	// or similar behaviors to ensure what is installed will function correctly. This is somewhat consistent with
   157  	// how extras function, where there tends to be a try/except around imports as an indication if that extra
   158  	// functionality should be executed or not (there isn't a package declaration to reference at runtime).
   159  	for _, depSpecifier := range meta.RequiresDist {
   160  		depSpecifier = extractPackageName(depSpecifier)
   161  		if depSpecifier == "" {
   162  			continue
   163  		}
   164  		requires = append(requires, depSpecifier)
   165  	}
   166  
   167  	return dependency.Specification{
   168  		ProvidesRequires: dependency.ProvidesRequires{
   169  			Provides: provides,
   170  			Requires: requires,
   171  		},
   172  	}
   173  }
   174  
   175  // extractPackageName removes any extras, version constraints or environment markers from a dependency specifier string.
   176  // For example: "requests[security] >= 2.8.1 ; python_version < '3'" becomes "requests"
   177  func extractPackageName(s string) string {
   178  	// examples:
   179  	// requests [security,tests]		--> requests
   180  	// requests >= 2.8.1			--> requests
   181  	// requests (>= 2.8.1)			--> requests
   182  	// requests ; python_version < "2.7"	--> requests
   183  
   184  	name := strings.TrimSpace(internal.SplitAny(s, "[(<!=>~;")[0])
   185  	// normalize the name to match how packages are stored (lowercase, with hyphens instead of underscores)
   186  	return normalize(name)
   187  }
   188  
   189  // extractPackageNames applies extractPackageName to each string in the slice.
   190  func extractPackageNames(ss []string) []string {
   191  	var names []string
   192  	for _, s := range ss {
   193  		names = append(names, extractPackageName(s))
   194  	}
   195  	return names
   196  }
   197  
   198  func wheelEggRelationships(ctx context.Context, resolver file.Resolver, pkgs []pkg.Package, rels []artifact.Relationship, err error) ([]pkg.Package, []artifact.Relationship, error) {
   199  	if err != nil {
   200  		return pkgs, rels, err
   201  	}
   202  
   203  	pkgsBySitePackageAndName := make(map[string]map[string]pkg.Package)
   204  
   205  	for _, p := range pkgs {
   206  		sitePackagesDir := deriveSitePackageDir(p)
   207  		if pkgsBySitePackageAndName[sitePackagesDir] == nil {
   208  			pkgsBySitePackageAndName[sitePackagesDir] = make(map[string]pkg.Package)
   209  		}
   210  		pkgsBySitePackageAndName[sitePackagesDir][p.Name] = p
   211  	}
   212  
   213  	var sitePackagesDirs []string
   214  	for site := range pkgsBySitePackageAndName {
   215  		sitePackagesDirs = append(sitePackagesDirs, site)
   216  	}
   217  
   218  	venvs, globalSitePackages, err := findVirtualEnvs(ctx, resolver, sitePackagesDirs)
   219  	if err != nil {
   220  		return nil, nil, err
   221  	}
   222  
   223  	relationshipsProcessor := dependency.Processor(wheelEggDependencySpecifier)
   224  	relationshipIndex := relationship.NewIndex(rels...)
   225  
   226  	// create relationships between packages within each global site package directory
   227  	for _, globalSitePackage := range globalSitePackages {
   228  		sitePkgs := collectPackages(pkgsBySitePackageAndName, []string{globalSitePackage})
   229  		_, siteRels, err := relationshipsProcessor(sitePkgs, nil, nil)
   230  		if err != nil {
   231  			return nil, nil, fmt.Errorf("failed to resolve relationships for global site package %q: %w", globalSitePackage, err)
   232  		}
   233  		relationshipIndex.Add(siteRels...)
   234  	}
   235  
   236  	// create relationships between packages within each virtual env site package directory (that doesn't link to a global site-packages directory)
   237  	for _, venv := range venvs {
   238  		if venv.IncludeSystemSitePackages {
   239  			continue
   240  		}
   241  		sitePkgs := collectPackages(pkgsBySitePackageAndName, []string{venv.SitePackagesPath})
   242  		_, siteRels, err := relationshipsProcessor(sitePkgs, nil, nil)
   243  		if err != nil {
   244  			return nil, nil, fmt.Errorf("failed to resolve relationships for virtualenv site package %q: %w", venv.SitePackagesPath, err)
   245  		}
   246  		relationshipIndex.Add(siteRels...)
   247  	}
   248  
   249  	// create relationships between packages within each virtual env site package directory (that links to a global site package directory)
   250  	for _, venv := range venvs {
   251  		if !venv.IncludeSystemSitePackages {
   252  			continue
   253  		}
   254  
   255  		globalSitePackage := venv.matchSystemPackagesPath(globalSitePackages)
   256  
   257  		sitePkgs := collectPackages(pkgsBySitePackageAndName, []string{venv.SitePackagesPath, globalSitePackage})
   258  		_, siteRels, err := relationshipsProcessor(sitePkgs, nil, nil)
   259  		if err != nil {
   260  			return nil, nil, fmt.Errorf("failed to resolve relationships for virtualenv + global site package path %q + %q: %w", venv.SitePackagesPath, globalSitePackage, err)
   261  		}
   262  
   263  		relationshipIndex.Add(siteRels...)
   264  	}
   265  
   266  	return pkgs, relationshipIndex.All(), err
   267  }
   268  
   269  func collectPackages(pkgsBySitePackageAndName map[string]map[string]pkg.Package, sites []string) []pkg.Package {
   270  	// get packages for all sites, preferring packages from earlier sites for packages with the same name
   271  
   272  	pkgByName := make(map[string]struct{})
   273  	var pkgs []pkg.Package
   274  	for _, site := range sites {
   275  		for name, p := range pkgsBySitePackageAndName[site] {
   276  			if _, ok := pkgByName[name]; !ok {
   277  				pkgByName[name] = struct{}{}
   278  				pkgs = append(pkgs, p)
   279  			}
   280  		}
   281  	}
   282  
   283  	return pkgs
   284  }
   285  
   286  func deriveSitePackageDir(p pkg.Package) string {
   287  	for _, l := range packagePrimaryLocations(p) {
   288  		sitePackageDir := extractSitePackageDir(l.RealPath)
   289  		if sitePackageDir != "" {
   290  			return sitePackageDir
   291  		}
   292  	}
   293  	return ""
   294  }
   295  
   296  func packagePrimaryLocations(p pkg.Package) []file.Location {
   297  	var locs []file.Location
   298  	for _, l := range p.Locations.ToSlice() {
   299  		a, ok := l.Annotations[pkg.EvidenceAnnotationKey]
   300  		if !ok {
   301  			continue
   302  		}
   303  		if a == pkg.PrimaryEvidenceAnnotation {
   304  			locs = append(locs, l)
   305  		}
   306  	}
   307  	return locs
   308  }
   309  
   310  func extractSitePackageDir(p string) string {
   311  	// walk up the path until we find a site-packages or dist-packages directory
   312  	fields := strings.Split(path.Dir(p), "/")
   313  	for i := len(fields) - 1; i >= 0; i-- {
   314  		if fields[i] == "site-packages" || fields[i] == "dist-packages" {
   315  			return path.Join(fields[:i+1]...)
   316  		}
   317  	}
   318  	return ""
   319  }