github.com/anchore/syft@v1.38.2/syft/pkg/cataloger/python/parse_uv_lock.go (about)

     1  package python
     2  
     3  import (
     4  	"bytes"
     5  	"context"
     6  	"fmt"
     7  	"io"
     8  	"sort"
     9  	"strings"
    10  
    11  	"github.com/BurntSushi/toml"
    12  
    13  	"github.com/anchore/syft/internal/log"
    14  	"github.com/anchore/syft/internal/unknown"
    15  	"github.com/anchore/syft/syft/artifact"
    16  	"github.com/anchore/syft/syft/file"
    17  	"github.com/anchore/syft/syft/pkg"
    18  	"github.com/anchore/syft/syft/pkg/cataloger/generic"
    19  	"github.com/anchore/syft/syft/pkg/cataloger/internal/dependency"
    20  )
    21  
    22  // We use this to check for the version before we try to parse.
    23  // The TOML library handily ignores everything that isn't mentioend in the struct.
    24  type uvLockFileVersion struct {
    25  	Version  int `toml:"version"`
    26  	Revision int `toml:"revision"`
    27  }
    28  
    29  type uvLockFile struct {
    30  	Version        int         `toml:"version"`
    31  	Revision       int         `toml:"revision"`
    32  	RequiresPython string      `toml:"requires-python"`
    33  	Packages       []uvPackage `toml:"package"`
    34  }
    35  
    36  type uvPackage struct {
    37  	Name                 string                    `toml:"name"`
    38  	Version              string                    `toml:"version"`
    39  	Source               map[string]string         `toml:"source"` // Possible key values for Source are: registry, git, direct, path, directory, editable, virtual
    40  	Dependencies         uvDependencies            `toml:"dependencies"`
    41  	DevDependencies      map[string]uvDependencies `toml:"dev-dependencies"`
    42  	OptionalDependencies map[string]uvDependencies `toml:"optional-dependencies"`
    43  	Sdist                uvDistribution            `toml:"sdist"`
    44  	Wheels               []uvDistribution          `toml:"wheels"`
    45  	Metadata             uvMetadata                `toml:"metadata"`
    46  }
    47  
    48  type uvDependencies []struct {
    49  	Name    string   `toml:"name"`
    50  	Extras  []string `toml:"extra"`
    51  	Markers string   `toml:"marker"`
    52  }
    53  
    54  type uvDistribution struct {
    55  	URL  string `toml:"url"`
    56  	Hash string `toml:"hash"`
    57  	Size int    `toml:"size"`
    58  }
    59  
    60  type uvRequiresDist []struct {
    61  	Name      string   `toml:"name"`
    62  	Markers   string   `toml:"marker"`
    63  	Extras    []string `toml:"extras"`
    64  	Specifier string   `toml:"specifier"`
    65  }
    66  
    67  type uvMetadata struct {
    68  	RequiresDist   uvRequiresDist `toml:"requires-dist"`
    69  	ProvidesExtras []string       `toml:"provides-extras"`
    70  }
    71  
    72  type uvLockParser struct {
    73  	cfg             CatalogerConfig
    74  	licenseResolver pythonLicenseResolver
    75  }
    76  
    77  func newUvLockParser(cfg CatalogerConfig) uvLockParser {
    78  	return uvLockParser{
    79  		cfg:             cfg,
    80  		licenseResolver: newPythonLicenseResolver(cfg),
    81  	}
    82  }
    83  
    84  // parseUvLock is a parser function for uv.lock contents, returning all the pakcages discovered
    85  func (ulp uvLockParser) parseUvLock(ctx context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
    86  	pkgs, err := ulp.uvLockPackages(ctx, reader)
    87  	if err != nil {
    88  		return nil, nil, err
    89  	}
    90  
    91  	return pkgs, dependency.Resolve(uvLockDependencySpecifier, pkgs), err
    92  }
    93  
    94  func extractUvIndex(p uvPackage) string {
    95  	// This is a map, but there should only be one key, value pair
    96  	var rvalue string
    97  	for _, value := range p.Source {
    98  		rvalue = value
    99  	}
   100  
   101  	return rvalue
   102  }
   103  
   104  func extractUvDependencies(p uvPackage) []pkg.PythonUvLockDependencyEntry {
   105  	var deps []pkg.PythonUvLockDependencyEntry
   106  	for _, d := range p.Dependencies {
   107  		deps = append(deps, pkg.PythonUvLockDependencyEntry{
   108  			Name:    d.Name,
   109  			Extras:  d.Extras,
   110  			Markers: d.Markers,
   111  		})
   112  	}
   113  	sort.Slice(deps, func(i, j int) bool {
   114  		return deps[i].Name < deps[j].Name
   115  	})
   116  	return deps
   117  }
   118  
   119  func extractUvExtras(p uvPackage) []pkg.PythonUvLockExtraEntry {
   120  	var extras []pkg.PythonUvLockExtraEntry
   121  	for name, depsStruct := range p.OptionalDependencies {
   122  		var extraDeps []string
   123  		for _, deps := range depsStruct {
   124  			extraDeps = append(extraDeps, deps.Name)
   125  		}
   126  		extras = append(extras, pkg.PythonUvLockExtraEntry{
   127  			Name:         name,
   128  			Dependencies: extraDeps,
   129  		})
   130  	}
   131  	return extras
   132  }
   133  
   134  func newPythonUvLockEntry(p uvPackage) pkg.PythonUvLockEntry {
   135  	return pkg.PythonUvLockEntry{
   136  		Index:        extractUvIndex(p),
   137  		Dependencies: extractUvDependencies(p),
   138  		Extras:       extractUvExtras(p),
   139  	}
   140  }
   141  
   142  func (ulp uvLockParser) uvLockPackages(ctx context.Context, reader file.LocationReadCloser) ([]pkg.Package, error) {
   143  	var parsedLockFileVersion uvLockFileVersion
   144  
   145  	// we cannot use the reader twice, so we read the contents first --uv.lock files tend to be small enough
   146  	contents, err := io.ReadAll(reader)
   147  	if err != nil {
   148  		return nil, unknown.New(reader.Location, fmt.Errorf("failed to read uv lock file: %w", err))
   149  	}
   150  
   151  	_, err = toml.NewDecoder(bytes.NewReader(contents)).Decode(&parsedLockFileVersion)
   152  	if err != nil {
   153  		return nil, fmt.Errorf("failed to read uv lock version: %w", err)
   154  	}
   155  
   156  	// We will need to add some logic to parse and branch on different
   157  	// lock file versions should they arise, but this gets us
   158  	// started down this road for now.
   159  	if parsedLockFileVersion.Version > 1 {
   160  		return nil, fmt.Errorf("could not parse uv lock file version %d", parsedLockFileVersion.Version)
   161  	}
   162  
   163  	var parsedLockFile uvLockFile
   164  	_, err = toml.NewDecoder(bytes.NewReader(contents)).Decode(&parsedLockFile)
   165  
   166  	if err != nil {
   167  		return nil, fmt.Errorf("failed to parse uv lock packages: %w", err)
   168  	}
   169  
   170  	// The uv lock file doesn't store the dependency version in the dependency structure.
   171  	// Thus, we need a name -> version map for invoking extractUvDependencies.
   172  	// We then, of course, have to pass it down the call stack.
   173  	var pkgVerMap = make(map[string]string)
   174  	for _, p := range parsedLockFile.Packages {
   175  		pkgVerMap[p.Name] = p.Version
   176  	}
   177  
   178  	var pkgs []pkg.Package
   179  	for _, p := range parsedLockFile.Packages {
   180  		pkgs = append(pkgs,
   181  			newPackageForIndexWithMetadata(
   182  				ctx,
   183  				ulp.licenseResolver,
   184  				p.Name,
   185  				p.Version,
   186  				newPythonUvLockEntry(p),
   187  				reader.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation),
   188  			),
   189  		)
   190  	}
   191  
   192  	return pkgs, unknown.IfEmptyf(pkgs, "unable to determine packages")
   193  }
   194  
   195  func isDependencyForUvExtra(dep pkg.PythonUvLockDependencyEntry) bool {
   196  	return strings.Contains(dep.Markers, "extra ==")
   197  }
   198  
   199  // This is identical to poetryLockDependencySpecifier since it operates on identical
   200  // data structures. Keeping it separate for now since it's always possible for data
   201  // structures to change down the line.
   202  // It *is* possible we may be able to merge the Uv and Poetry data structures
   203  func uvLockDependencySpecifier(p pkg.Package) dependency.Specification { //nolint:dupl // this is very similar to the poetry lock dependency specifier, but should remain separate
   204  	meta, ok := p.Metadata.(pkg.PythonUvLockEntry)
   205  	if !ok {
   206  		log.Tracef("cataloger failed to extract UV lock metadata for package %+v", p.Name)
   207  		return dependency.Specification{}
   208  	}
   209  
   210  	provides := []string{packageRef(p.Name, "")}
   211  
   212  	var requires []string
   213  
   214  	for _, dep := range meta.Dependencies {
   215  		if isDependencyForUvExtra(dep) {
   216  			continue
   217  		}
   218  
   219  		requires = append(requires, packageRef(dep.Name, ""))
   220  
   221  		for _, extra := range dep.Extras {
   222  			requires = append(requires, packageRef(dep.Name, extra))
   223  		}
   224  	}
   225  
   226  	var variants []dependency.ProvidesRequires
   227  	for _, extra := range meta.Extras {
   228  		variants = append(variants,
   229  			dependency.ProvidesRequires{
   230  				Provides: []string{packageRef(p.Name, extra.Name)},
   231  				Requires: extractPackageNames(extra.Dependencies),
   232  			},
   233  		)
   234  	}
   235  
   236  	return dependency.Specification{
   237  		ProvidesRequires: dependency.ProvidesRequires{
   238  			Provides: provides,
   239  			Requires: requires,
   240  		},
   241  		Variants: variants,
   242  	}
   243  }