github.com/anchore/syft@v1.38.2/syft/pkg/cataloger/python/dependency.go (about) 1 package python 2 3 import ( 4 "context" 5 "fmt" 6 "path" 7 "strings" 8 9 "github.com/anchore/syft/internal" 10 "github.com/anchore/syft/internal/log" 11 "github.com/anchore/syft/internal/relationship" 12 "github.com/anchore/syft/syft/artifact" 13 "github.com/anchore/syft/syft/file" 14 "github.com/anchore/syft/syft/pkg" 15 "github.com/anchore/syft/syft/pkg/cataloger/internal/dependency" 16 ) 17 18 func poetryLockDependencySpecifier(p pkg.Package) dependency.Specification { //nolint:dupl // this is very similar to the uv lock dependency specifier, but should remain separate 19 meta, ok := p.Metadata.(pkg.PythonPoetryLockEntry) 20 if !ok { 21 log.Tracef("cataloger failed to extract poetry lock metadata for package %+v", p.Name) 22 return dependency.Specification{} 23 } 24 25 // this package reference always includes the package name and no extras 26 provides := []string{packageRef(p.Name, "")} 27 28 var requires []string 29 // add required dependencies (those which a marker is not present indicating it is explicitly optional or needs an extra marker) 30 for _, dep := range meta.Dependencies { 31 if isDependencyForExtra(dep) { 32 continue 33 } 34 35 // we always have the base package requirement without any extras to get base dependencies 36 requires = append(requires, packageRef(dep.Name, "")) 37 38 // if there are extras, we need to add a requirement for each extra individually 39 // for example: 40 // uvicorn = {version = ">=0.12.0", extras = ["standard", "else"]} 41 // then we must install uvicorn with the extras "standard" and "else" to satisfy the requirement 42 for _, extra := range dep.Extras { 43 // always refer to extras with the package name (e.g. name[extra]) 44 // note: this must always be done independent of other extras (e.g. name[extra1] and name[extra2] separately 45 // is correct and name[extra1,extra2] will result in dependency resolution failure) 46 requires = append(requires, packageRef(dep.Name, extra)) 47 } 48 } 49 50 var variants []dependency.ProvidesRequires 51 for _, extra := range meta.Extras { 52 variants = append(variants, 53 dependency.ProvidesRequires{ 54 // always refer to extras with the package name (e.g. name[extra]) 55 // note: this must always be done independent of other extras (e.g. name[extra1] and name[extra2] separately 56 // is correct and name[extra1,extra2] will result in dependency resolution failure) 57 Provides: []string{packageRef(p.Name, extra.Name)}, 58 Requires: extractPackageNames(extra.Dependencies), 59 }, 60 ) 61 } 62 63 return dependency.Specification{ 64 ProvidesRequires: dependency.ProvidesRequires{ 65 Provides: provides, 66 Requires: requires, 67 }, 68 Variants: variants, 69 } 70 } 71 72 func isDependencyForExtra(dep pkg.PythonPoetryLockDependencyEntry) bool { 73 return strings.Contains(dep.Markers, "extra ==") 74 } 75 76 func packageRef(name, extra string) string { 77 cleanExtra := strings.TrimSpace(extra) 78 cleanName := strings.TrimSpace(name) 79 if cleanExtra == "" { 80 return cleanName 81 } 82 return cleanName + "[" + cleanExtra + "]" 83 } 84 85 func pdmLockDependencySpecifier(p pkg.Package) dependency.Specification { 86 meta, ok := p.Metadata.(pkg.PythonPdmLockEntry) 87 if !ok { 88 log.Tracef("cataloger failed to extract pdm lock metadata for package %+v", p.Name) 89 return dependency.Specification{} 90 } 91 92 // base package provides the package name without extras 93 provides := []string{p.Name} 94 95 // base requirements from Dependencies field 96 var requires []string 97 for _, dep := range meta.Dependencies { 98 depName := extractPackageName(dep) 99 if depName == "" { 100 continue 101 } 102 requires = append(requires, depName) 103 } 104 105 // create variants for each extras combination 106 var variants []dependency.ProvidesRequires 107 for _, extraVariant := range meta.Extras { 108 // each extra in the variant provides packagename[extra] 109 var variantProvides []string 110 for _, extra := range extraVariant.Extras { 111 variantProvides = append(variantProvides, packageRef(p.Name, extra)) 112 } 113 114 // extract dependencies for this variant, excluding self-references 115 var variantRequires []string 116 for _, dep := range extraVariant.Dependencies { 117 depName := extractPackageName(dep) 118 if depName == "" || depName == p.Name { 119 // skip empty or self-references (e.g., coverage[toml] depends on coverage==7.4.1) 120 continue 121 } 122 variantRequires = append(variantRequires, depName) 123 } 124 125 if len(variantProvides) > 0 { 126 variants = append(variants, dependency.ProvidesRequires{ 127 Provides: variantProvides, 128 Requires: variantRequires, 129 }) 130 } 131 } 132 133 return dependency.Specification{ 134 ProvidesRequires: dependency.ProvidesRequires{ 135 Provides: provides, 136 Requires: requires, 137 }, 138 Variants: variants, 139 } 140 } 141 142 func wheelEggDependencySpecifier(p pkg.Package) dependency.Specification { 143 meta, ok := p.Metadata.(pkg.PythonPackage) 144 if !ok { 145 log.Tracef("cataloger failed to extract wheel/egg metadata for package %+v", p.Name) 146 return dependency.Specification{} 147 } 148 149 provides := []string{p.Name} 150 151 var requires []string 152 // extract dependencies from the Requires-Dist field 153 // note: this also includes Extras, which are currently partially supported. 154 // Specifically, we claim that a package needs all extra dependencies and a relationship will be created 155 // if that dependency happens to be installed. We currently do not do any version constraint resolution 156 // or similar behaviors to ensure what is installed will function correctly. This is somewhat consistent with 157 // how extras function, where there tends to be a try/except around imports as an indication if that extra 158 // functionality should be executed or not (there isn't a package declaration to reference at runtime). 159 for _, depSpecifier := range meta.RequiresDist { 160 depSpecifier = extractPackageName(depSpecifier) 161 if depSpecifier == "" { 162 continue 163 } 164 requires = append(requires, depSpecifier) 165 } 166 167 return dependency.Specification{ 168 ProvidesRequires: dependency.ProvidesRequires{ 169 Provides: provides, 170 Requires: requires, 171 }, 172 } 173 } 174 175 // extractPackageName removes any extras, version constraints or environment markers from a dependency specifier string. 176 // For example: "requests[security] >= 2.8.1 ; python_version < '3'" becomes "requests" 177 func extractPackageName(s string) string { 178 // examples: 179 // requests [security,tests] --> requests 180 // requests >= 2.8.1 --> requests 181 // requests (>= 2.8.1) --> requests 182 // requests ; python_version < "2.7" --> requests 183 184 name := strings.TrimSpace(internal.SplitAny(s, "[(<!=>~;")[0]) 185 // normalize the name to match how packages are stored (lowercase, with hyphens instead of underscores) 186 return normalize(name) 187 } 188 189 // extractPackageNames applies extractPackageName to each string in the slice. 190 func extractPackageNames(ss []string) []string { 191 var names []string 192 for _, s := range ss { 193 names = append(names, extractPackageName(s)) 194 } 195 return names 196 } 197 198 func wheelEggRelationships(ctx context.Context, resolver file.Resolver, pkgs []pkg.Package, rels []artifact.Relationship, err error) ([]pkg.Package, []artifact.Relationship, error) { 199 if err != nil { 200 return pkgs, rels, err 201 } 202 203 pkgsBySitePackageAndName := make(map[string]map[string]pkg.Package) 204 205 for _, p := range pkgs { 206 sitePackagesDir := deriveSitePackageDir(p) 207 if pkgsBySitePackageAndName[sitePackagesDir] == nil { 208 pkgsBySitePackageAndName[sitePackagesDir] = make(map[string]pkg.Package) 209 } 210 pkgsBySitePackageAndName[sitePackagesDir][p.Name] = p 211 } 212 213 var sitePackagesDirs []string 214 for site := range pkgsBySitePackageAndName { 215 sitePackagesDirs = append(sitePackagesDirs, site) 216 } 217 218 venvs, globalSitePackages, err := findVirtualEnvs(ctx, resolver, sitePackagesDirs) 219 if err != nil { 220 return nil, nil, err 221 } 222 223 relationshipsProcessor := dependency.Processor(wheelEggDependencySpecifier) 224 relationshipIndex := relationship.NewIndex(rels...) 225 226 // create relationships between packages within each global site package directory 227 for _, globalSitePackage := range globalSitePackages { 228 sitePkgs := collectPackages(pkgsBySitePackageAndName, []string{globalSitePackage}) 229 _, siteRels, err := relationshipsProcessor(sitePkgs, nil, nil) 230 if err != nil { 231 return nil, nil, fmt.Errorf("failed to resolve relationships for global site package %q: %w", globalSitePackage, err) 232 } 233 relationshipIndex.Add(siteRels...) 234 } 235 236 // create relationships between packages within each virtual env site package directory (that doesn't link to a global site-packages directory) 237 for _, venv := range venvs { 238 if venv.IncludeSystemSitePackages { 239 continue 240 } 241 sitePkgs := collectPackages(pkgsBySitePackageAndName, []string{venv.SitePackagesPath}) 242 _, siteRels, err := relationshipsProcessor(sitePkgs, nil, nil) 243 if err != nil { 244 return nil, nil, fmt.Errorf("failed to resolve relationships for virtualenv site package %q: %w", venv.SitePackagesPath, err) 245 } 246 relationshipIndex.Add(siteRels...) 247 } 248 249 // create relationships between packages within each virtual env site package directory (that links to a global site package directory) 250 for _, venv := range venvs { 251 if !venv.IncludeSystemSitePackages { 252 continue 253 } 254 255 globalSitePackage := venv.matchSystemPackagesPath(globalSitePackages) 256 257 sitePkgs := collectPackages(pkgsBySitePackageAndName, []string{venv.SitePackagesPath, globalSitePackage}) 258 _, siteRels, err := relationshipsProcessor(sitePkgs, nil, nil) 259 if err != nil { 260 return nil, nil, fmt.Errorf("failed to resolve relationships for virtualenv + global site package path %q + %q: %w", venv.SitePackagesPath, globalSitePackage, err) 261 } 262 263 relationshipIndex.Add(siteRels...) 264 } 265 266 return pkgs, relationshipIndex.All(), err 267 } 268 269 func collectPackages(pkgsBySitePackageAndName map[string]map[string]pkg.Package, sites []string) []pkg.Package { 270 // get packages for all sites, preferring packages from earlier sites for packages with the same name 271 272 pkgByName := make(map[string]struct{}) 273 var pkgs []pkg.Package 274 for _, site := range sites { 275 for name, p := range pkgsBySitePackageAndName[site] { 276 if _, ok := pkgByName[name]; !ok { 277 pkgByName[name] = struct{}{} 278 pkgs = append(pkgs, p) 279 } 280 } 281 } 282 283 return pkgs 284 } 285 286 func deriveSitePackageDir(p pkg.Package) string { 287 for _, l := range packagePrimaryLocations(p) { 288 sitePackageDir := extractSitePackageDir(l.RealPath) 289 if sitePackageDir != "" { 290 return sitePackageDir 291 } 292 } 293 return "" 294 } 295 296 func packagePrimaryLocations(p pkg.Package) []file.Location { 297 var locs []file.Location 298 for _, l := range p.Locations.ToSlice() { 299 a, ok := l.Annotations[pkg.EvidenceAnnotationKey] 300 if !ok { 301 continue 302 } 303 if a == pkg.PrimaryEvidenceAnnotation { 304 locs = append(locs, l) 305 } 306 } 307 return locs 308 } 309 310 func extractSitePackageDir(p string) string { 311 // walk up the path until we find a site-packages or dist-packages directory 312 fields := strings.Split(path.Dir(p), "/") 313 for i := len(fields) - 1; i >= 0; i-- { 314 if fields[i] == "site-packages" || fields[i] == "dist-packages" { 315 return path.Join(fields[:i+1]...) 316 } 317 } 318 return "" 319 }