github.com/anchore/syft@v1.38.2/syft/pkg/cataloger/python/parse_pdm_lock.go (about) 1 package python 2 3 import ( 4 "context" 5 "fmt" 6 "strings" 7 8 "github.com/BurntSushi/toml" 9 10 "github.com/anchore/syft/internal/unknown" 11 "github.com/anchore/syft/syft/artifact" 12 "github.com/anchore/syft/syft/file" 13 "github.com/anchore/syft/syft/pkg" 14 "github.com/anchore/syft/syft/pkg/cataloger/generic" 15 "github.com/anchore/syft/syft/pkg/cataloger/internal/dependency" 16 ) 17 18 type pdmLock struct { 19 Metadata struct { 20 Groups []string `toml:"groups"` 21 Strategy []string `toml:"strategy"` 22 LockVersion string `toml:"lock_version"` 23 ContentHash string `toml:"content_hash"` 24 } `toml:"metadata"` 25 Package []pdmLockPackage `toml:"package"` 26 } 27 28 type pdmLockPackage struct { 29 Name string `toml:"name"` 30 Version string `toml:"version"` 31 RequiresPython string `toml:"requires_python"` 32 Summary string `toml:"summary"` 33 Marker string `toml:"marker"` 34 Dependencies []string `toml:"dependencies"` 35 Extras []string `toml:"extras"` 36 Files []pdmLockPackageFile `toml:"files"` 37 } 38 39 type pdmLockPackageFile struct { 40 File string `toml:"file"` 41 Hash string `toml:"hash"` 42 } 43 44 type pdmLockParser struct { 45 cfg CatalogerConfig 46 licenseResolver pythonLicenseResolver 47 } 48 49 func newPdmLockParser(cfg CatalogerConfig) pdmLockParser { 50 return pdmLockParser{ 51 cfg: cfg, 52 licenseResolver: newPythonLicenseResolver(cfg), 53 } 54 } 55 56 // mergePdmLockPackages merges multiple package entries (with different extras) into a single PythonPdmLockEntry. 57 // 58 // PDM vs Poetry Lock File Behavior: 59 // 60 // PDM creates separate [[package]] entries in the lock file for each extras combination that is actually used 61 // in the dependency tree. For example, if your project depends on coverage[toml], PDM will create TWO entries: 62 // 1. A base "coverage" package entry (no extras field) 63 // 2. A "coverage" package entry with extras = ["toml"] and its own dependencies 64 // 65 // Poetry, in contrast, creates a SINGLE package entry per package and uses conditional markers to indicate 66 // when extra dependencies should be included. 67 // 68 // SBOM Representation: 69 // 70 // Semantically, "coverage" and "coverage[toml]" are NOT separate packages - they represent the same package 71 // with optional features enabled. The [toml] syntax is Python's way of requesting optional dependencies. 72 // Therefore, in the SBOM we create a SINGLE package node per name+version to accurately represent that: 73 // 74 // - There is one logical package (e.g., "coverage") 75 // - The package may be used with different feature sets (extras) by different dependents 76 // - For example: "pytest-cov" depends on "coverage[toml]" while another package might depend on base "coverage" 77 // 78 // This function consolidates PDM's multiple entries into: 79 // - Base package metadata (files, summary, dependencies without extras) 80 // - Extras variants (each combination of extras with its specific dependencies) 81 // 82 // This approach ensures dependency resolution works correctly: when a package requires "coverage[toml]", 83 // the dependency resolver can match it to the "coverage" package node and its "toml" variant. 84 func mergePdmLockPackages(packages []pdmLockPackage) pkg.PythonPdmLockEntry { 85 if len(packages) == 0 { 86 return pkg.PythonPdmLockEntry{} 87 } 88 89 var entry pkg.PythonPdmLockEntry 90 var baseFiles []pkg.PythonPdmFileEntry 91 92 // Separate base package from extras variants 93 // note: this logic processes packages in order and assumes the base package (no extras) appears 94 // before extras variants in the PDM lock file, which is PDM's current behavior 95 for _, p := range packages { 96 // Convert files format 97 var files []pkg.PythonPdmFileEntry 98 for _, f := range p.Files { 99 // skip files with invalid hash format (missing colon separator between algorithm and value) 100 if colonIndex := strings.Index(f.Hash, ":"); colonIndex != -1 { 101 algorithm := f.Hash[:colonIndex] 102 value := f.Hash[colonIndex+1:] 103 104 files = append(files, pkg.PythonPdmFileEntry{ 105 URL: f.File, 106 Digest: pkg.PythonFileDigest{ 107 Algorithm: algorithm, 108 Value: value, 109 }, 110 }) 111 } 112 } 113 114 // Base package (no extras field or empty extras) 115 if len(p.Extras) == 0 { 116 entry.Summary = p.Summary 117 entry.RequiresPython = p.RequiresPython 118 entry.Dependencies = p.Dependencies 119 entry.Marker = p.Marker 120 baseFiles = files 121 } else { 122 // Extras variant 123 variant := pkg.PythonPdmLockExtraVariant{ 124 Extras: p.Extras, 125 Dependencies: p.Dependencies, 126 Marker: p.Marker, 127 } 128 129 // Only include files if different from base 130 // For now, we'll compare lengths as a simple check 131 if len(baseFiles) == 0 || !filesEqual(baseFiles, files) { 132 variant.Files = files 133 } 134 135 entry.Extras = append(entry.Extras, variant) 136 } 137 } 138 139 // Store base files 140 entry.Files = baseFiles 141 142 // If no base package was found but we have extras, use first package's metadata as base 143 if entry.Summary == "" && len(packages) > 0 { 144 entry.Summary = packages[0].Summary 145 entry.RequiresPython = packages[0].RequiresPython 146 entry.Dependencies = packages[0].Dependencies 147 entry.Marker = packages[0].Marker 148 } 149 150 return entry 151 } 152 153 // filesEqual checks if two file slices are equal by comparing URL and digest fields. 154 // assumes files appear in the same order in both slices. 155 func filesEqual(a, b []pkg.PythonPdmFileEntry) bool { 156 if len(a) != len(b) { 157 return false 158 } 159 for i := range a { 160 if a[i].URL != b[i].URL || a[i].Digest.Algorithm != b[i].Digest.Algorithm || a[i].Digest.Value != b[i].Digest.Value { 161 return false 162 } 163 } 164 return true 165 } 166 167 // parsePdmLock is a parser function for pdm.lock contents, returning python packages discovered. 168 func (plp pdmLockParser) parsePdmLock(ctx context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { 169 var lock pdmLock 170 _, err := toml.NewDecoder(reader).Decode(&lock) 171 if err != nil { 172 return nil, nil, fmt.Errorf("failed to parse pdm.lock file: %w", err) 173 } 174 175 // Group packages by name@version since PDM creates separate entries for different extras combinations 176 packageGroups := make(map[string][]pdmLockPackage) 177 for _, p := range lock.Package { 178 key := p.Name + "@" + p.Version 179 packageGroups[key] = append(packageGroups[key], p) 180 } 181 182 // Merge package groups and create packages 183 var pkgs []pkg.Package 184 for _, group := range packageGroups { 185 if len(group) == 0 { 186 continue 187 } 188 189 // Use first package for name/version (same across all entries in group) 190 name := group[0].Name 191 version := group[0].Version 192 193 // Merge all entries into single metadata 194 pythonPkgMetadata := mergePdmLockPackages(group) 195 196 pkgs = append(pkgs, newPackageForIndexWithMetadata( 197 ctx, 198 plp.licenseResolver, 199 name, 200 version, 201 pythonPkgMetadata, 202 reader.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation), 203 )) 204 } 205 206 relationships := dependency.Resolve(pdmLockDependencySpecifier, pkgs) 207 208 return pkgs, relationships, unknown.IfEmptyf(pkgs, "unable to determine packages") 209 }