github.com/anchore/syft@v1.38.2/syft/pkg/cataloger/python/parse_pdm_lock.go (about)

     1  package python
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"strings"
     7  
     8  	"github.com/BurntSushi/toml"
     9  
    10  	"github.com/anchore/syft/internal/unknown"
    11  	"github.com/anchore/syft/syft/artifact"
    12  	"github.com/anchore/syft/syft/file"
    13  	"github.com/anchore/syft/syft/pkg"
    14  	"github.com/anchore/syft/syft/pkg/cataloger/generic"
    15  	"github.com/anchore/syft/syft/pkg/cataloger/internal/dependency"
    16  )
    17  
    18  type pdmLock struct {
    19  	Metadata struct {
    20  		Groups      []string `toml:"groups"`
    21  		Strategy    []string `toml:"strategy"`
    22  		LockVersion string   `toml:"lock_version"`
    23  		ContentHash string   `toml:"content_hash"`
    24  	} `toml:"metadata"`
    25  	Package []pdmLockPackage `toml:"package"`
    26  }
    27  
    28  type pdmLockPackage struct {
    29  	Name           string               `toml:"name"`
    30  	Version        string               `toml:"version"`
    31  	RequiresPython string               `toml:"requires_python"`
    32  	Summary        string               `toml:"summary"`
    33  	Marker         string               `toml:"marker"`
    34  	Dependencies   []string             `toml:"dependencies"`
    35  	Extras         []string             `toml:"extras"`
    36  	Files          []pdmLockPackageFile `toml:"files"`
    37  }
    38  
    39  type pdmLockPackageFile struct {
    40  	File string `toml:"file"`
    41  	Hash string `toml:"hash"`
    42  }
    43  
    44  type pdmLockParser struct {
    45  	cfg             CatalogerConfig
    46  	licenseResolver pythonLicenseResolver
    47  }
    48  
    49  func newPdmLockParser(cfg CatalogerConfig) pdmLockParser {
    50  	return pdmLockParser{
    51  		cfg:             cfg,
    52  		licenseResolver: newPythonLicenseResolver(cfg),
    53  	}
    54  }
    55  
    56  // mergePdmLockPackages merges multiple package entries (with different extras) into a single PythonPdmLockEntry.
    57  //
    58  // PDM vs Poetry Lock File Behavior:
    59  //
    60  // PDM creates separate [[package]] entries in the lock file for each extras combination that is actually used
    61  // in the dependency tree. For example, if your project depends on coverage[toml], PDM will create TWO entries:
    62  //  1. A base "coverage" package entry (no extras field)
    63  //  2. A "coverage" package entry with extras = ["toml"] and its own dependencies
    64  //
    65  // Poetry, in contrast, creates a SINGLE package entry per package and uses conditional markers to indicate
    66  // when extra dependencies should be included.
    67  //
    68  // SBOM Representation:
    69  //
    70  // Semantically, "coverage" and "coverage[toml]" are NOT separate packages - they represent the same package
    71  // with optional features enabled. The [toml] syntax is Python's way of requesting optional dependencies.
    72  // Therefore, in the SBOM we create a SINGLE package node per name+version to accurately represent that:
    73  //
    74  //   - There is one logical package (e.g., "coverage")
    75  //   - The package may be used with different feature sets (extras) by different dependents
    76  //   - For example: "pytest-cov" depends on "coverage[toml]" while another package might depend on base "coverage"
    77  //
    78  // This function consolidates PDM's multiple entries into:
    79  //   - Base package metadata (files, summary, dependencies without extras)
    80  //   - Extras variants (each combination of extras with its specific dependencies)
    81  //
    82  // This approach ensures dependency resolution works correctly: when a package requires "coverage[toml]",
    83  // the dependency resolver can match it to the "coverage" package node and its "toml" variant.
    84  func mergePdmLockPackages(packages []pdmLockPackage) pkg.PythonPdmLockEntry {
    85  	if len(packages) == 0 {
    86  		return pkg.PythonPdmLockEntry{}
    87  	}
    88  
    89  	var entry pkg.PythonPdmLockEntry
    90  	var baseFiles []pkg.PythonPdmFileEntry
    91  
    92  	// Separate base package from extras variants
    93  	// note: this logic processes packages in order and assumes the base package (no extras) appears
    94  	// before extras variants in the PDM lock file, which is PDM's current behavior
    95  	for _, p := range packages {
    96  		// Convert files format
    97  		var files []pkg.PythonPdmFileEntry
    98  		for _, f := range p.Files {
    99  			// skip files with invalid hash format (missing colon separator between algorithm and value)
   100  			if colonIndex := strings.Index(f.Hash, ":"); colonIndex != -1 {
   101  				algorithm := f.Hash[:colonIndex]
   102  				value := f.Hash[colonIndex+1:]
   103  
   104  				files = append(files, pkg.PythonPdmFileEntry{
   105  					URL: f.File,
   106  					Digest: pkg.PythonFileDigest{
   107  						Algorithm: algorithm,
   108  						Value:     value,
   109  					},
   110  				})
   111  			}
   112  		}
   113  
   114  		// Base package (no extras field or empty extras)
   115  		if len(p.Extras) == 0 {
   116  			entry.Summary = p.Summary
   117  			entry.RequiresPython = p.RequiresPython
   118  			entry.Dependencies = p.Dependencies
   119  			entry.Marker = p.Marker
   120  			baseFiles = files
   121  		} else {
   122  			// Extras variant
   123  			variant := pkg.PythonPdmLockExtraVariant{
   124  				Extras:       p.Extras,
   125  				Dependencies: p.Dependencies,
   126  				Marker:       p.Marker,
   127  			}
   128  
   129  			// Only include files if different from base
   130  			// For now, we'll compare lengths as a simple check
   131  			if len(baseFiles) == 0 || !filesEqual(baseFiles, files) {
   132  				variant.Files = files
   133  			}
   134  
   135  			entry.Extras = append(entry.Extras, variant)
   136  		}
   137  	}
   138  
   139  	// Store base files
   140  	entry.Files = baseFiles
   141  
   142  	// If no base package was found but we have extras, use first package's metadata as base
   143  	if entry.Summary == "" && len(packages) > 0 {
   144  		entry.Summary = packages[0].Summary
   145  		entry.RequiresPython = packages[0].RequiresPython
   146  		entry.Dependencies = packages[0].Dependencies
   147  		entry.Marker = packages[0].Marker
   148  	}
   149  
   150  	return entry
   151  }
   152  
   153  // filesEqual checks if two file slices are equal by comparing URL and digest fields.
   154  // assumes files appear in the same order in both slices.
   155  func filesEqual(a, b []pkg.PythonPdmFileEntry) bool {
   156  	if len(a) != len(b) {
   157  		return false
   158  	}
   159  	for i := range a {
   160  		if a[i].URL != b[i].URL || a[i].Digest.Algorithm != b[i].Digest.Algorithm || a[i].Digest.Value != b[i].Digest.Value {
   161  			return false
   162  		}
   163  	}
   164  	return true
   165  }
   166  
   167  // parsePdmLock is a parser function for pdm.lock contents, returning python packages discovered.
   168  func (plp pdmLockParser) parsePdmLock(ctx context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
   169  	var lock pdmLock
   170  	_, err := toml.NewDecoder(reader).Decode(&lock)
   171  	if err != nil {
   172  		return nil, nil, fmt.Errorf("failed to parse pdm.lock file: %w", err)
   173  	}
   174  
   175  	// Group packages by name@version since PDM creates separate entries for different extras combinations
   176  	packageGroups := make(map[string][]pdmLockPackage)
   177  	for _, p := range lock.Package {
   178  		key := p.Name + "@" + p.Version
   179  		packageGroups[key] = append(packageGroups[key], p)
   180  	}
   181  
   182  	// Merge package groups and create packages
   183  	var pkgs []pkg.Package
   184  	for _, group := range packageGroups {
   185  		if len(group) == 0 {
   186  			continue
   187  		}
   188  
   189  		// Use first package for name/version (same across all entries in group)
   190  		name := group[0].Name
   191  		version := group[0].Version
   192  
   193  		// Merge all entries into single metadata
   194  		pythonPkgMetadata := mergePdmLockPackages(group)
   195  
   196  		pkgs = append(pkgs, newPackageForIndexWithMetadata(
   197  			ctx,
   198  			plp.licenseResolver,
   199  			name,
   200  			version,
   201  			pythonPkgMetadata,
   202  			reader.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation),
   203  		))
   204  	}
   205  
   206  	relationships := dependency.Resolve(pdmLockDependencySpecifier, pkgs)
   207  
   208  	return pkgs, relationships, unknown.IfEmptyf(pkgs, "unable to determine packages")
   209  }