github.com/kastenhq/syft@v0.0.0-20230821225854-0710af25cdbe/syft/pkg/cataloger/python/parse_wheel_egg.go (about)

     1  package python
     2  
     3  import (
     4  	"bufio"
     5  	"encoding/json"
     6  	"fmt"
     7  	"io"
     8  	"path/filepath"
     9  
    10  	"github.com/kastenhq/syft/internal"
    11  	"github.com/kastenhq/syft/internal/log"
    12  	"github.com/kastenhq/syft/syft/artifact"
    13  	"github.com/kastenhq/syft/syft/file"
    14  	"github.com/kastenhq/syft/syft/pkg"
    15  	"github.com/kastenhq/syft/syft/pkg/cataloger/generic"
    16  )
    17  
    18  // parseWheelOrEgg takes the primary metadata file reference and returns the python package it represents.
    19  func parseWheelOrEgg(resolver file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
    20  	pd, sources, err := assembleEggOrWheelMetadata(resolver, reader.Location)
    21  	if err != nil {
    22  		return nil, nil, err
    23  	}
    24  	if pd == nil {
    25  		return nil, nil, nil
    26  	}
    27  
    28  	// This can happen for Python 2.7 where it is reported from an egg-info, but Python is
    29  	// the actual runtime, it isn't a "package". The special-casing here allows to skip it
    30  	if pd.Name == "Python" {
    31  		return nil, nil, nil
    32  	}
    33  
    34  	pkgs := []pkg.Package{newPackageForPackage(*pd, sources...)}
    35  
    36  	return pkgs, nil, nil
    37  }
    38  
    39  // fetchRecordFiles finds a corresponding installed-files.txt file for the given python package metadata file and returns the set of file records contained.
    40  func fetchInstalledFiles(resolver file.Resolver, metadataLocation file.Location, sitePackagesRootPath string) (files []pkg.PythonFileRecord, sources []file.Location, err error) {
    41  	// we've been given a file reference to a specific wheel METADATA file. note: this may be for a directory
    42  	// or for an image... for an image the METADATA file may be present within multiple layers, so it is important
    43  	// to reconcile the installed-files.txt path to the same layer (or the next adjacent lower layer).
    44  
    45  	// find the installed-files.txt file relative to the directory where the METADATA file resides (in path AND layer structure)
    46  	installedFilesPath := filepath.Join(filepath.Dir(metadataLocation.RealPath), "installed-files.txt")
    47  	installedFilesRef := resolver.RelativeFileByPath(metadataLocation, installedFilesPath)
    48  
    49  	if installedFilesRef != nil {
    50  		sources = append(sources, installedFilesRef.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.SupportingEvidenceAnnotation))
    51  
    52  		installedFilesContents, err := resolver.FileContentsByLocation(*installedFilesRef)
    53  		if err != nil {
    54  			return nil, nil, err
    55  		}
    56  		defer internal.CloseAndLogError(installedFilesContents, installedFilesPath)
    57  
    58  		// parse the installed-files contents
    59  		installedFiles, err := parseInstalledFiles(installedFilesContents, metadataLocation.RealPath, sitePackagesRootPath)
    60  		if err != nil {
    61  			log.Warnf("unable to parse installed-files.txt for python package=%+v: %w", metadataLocation.RealPath, err)
    62  			return files, sources, nil
    63  		}
    64  
    65  		files = append(files, installedFiles...)
    66  	}
    67  	return files, sources, nil
    68  }
    69  
    70  // fetchRecordFiles finds a corresponding RECORD file for the given python package metadata file and returns the set of file records contained.
    71  func fetchRecordFiles(resolver file.Resolver, metadataLocation file.Location) (files []pkg.PythonFileRecord, sources []file.Location, err error) {
    72  	// we've been given a file reference to a specific wheel METADATA file. note: this may be for a directory
    73  	// or for an image... for an image the METADATA file may be present within multiple layers, so it is important
    74  	// to reconcile the RECORD path to the same layer (or the next adjacent lower layer).
    75  
    76  	// find the RECORD file relative to the directory where the METADATA file resides (in path AND layer structure)
    77  	recordPath := filepath.Join(filepath.Dir(metadataLocation.RealPath), "RECORD")
    78  	recordRef := resolver.RelativeFileByPath(metadataLocation, recordPath)
    79  
    80  	if recordRef != nil {
    81  		sources = append(sources, recordRef.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.SupportingEvidenceAnnotation))
    82  
    83  		recordContents, err := resolver.FileContentsByLocation(*recordRef)
    84  		if err != nil {
    85  			return nil, nil, err
    86  		}
    87  		defer internal.CloseAndLogError(recordContents, recordPath)
    88  
    89  		// parse the record contents
    90  		records := parseWheelOrEggRecord(recordContents)
    91  
    92  		files = append(files, records...)
    93  	}
    94  	return files, sources, nil
    95  }
    96  
    97  // fetchTopLevelPackages finds a corresponding top_level.txt file for the given python package metadata file and returns the set of package names contained.
    98  func fetchTopLevelPackages(resolver file.Resolver, metadataLocation file.Location) (pkgs []string, sources []file.Location, err error) {
    99  	// a top_level.txt file specifies the python top-level packages (provided by this python package) installed into site-packages
   100  	parentDir := filepath.Dir(metadataLocation.RealPath)
   101  	topLevelPath := filepath.Join(parentDir, "top_level.txt")
   102  	topLevelLocation := resolver.RelativeFileByPath(metadataLocation, topLevelPath)
   103  
   104  	if topLevelLocation == nil {
   105  		return nil, nil, nil
   106  	}
   107  
   108  	sources = append(sources, topLevelLocation.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.SupportingEvidenceAnnotation))
   109  
   110  	topLevelContents, err := resolver.FileContentsByLocation(*topLevelLocation)
   111  	if err != nil {
   112  		return nil, nil, err
   113  	}
   114  	defer internal.CloseAndLogError(topLevelContents, topLevelLocation.VirtualPath)
   115  
   116  	scanner := bufio.NewScanner(topLevelContents)
   117  	for scanner.Scan() {
   118  		pkgs = append(pkgs, scanner.Text())
   119  	}
   120  
   121  	if err := scanner.Err(); err != nil {
   122  		return nil, nil, fmt.Errorf("could not read python package top_level.txt: %w", err)
   123  	}
   124  
   125  	return pkgs, sources, nil
   126  }
   127  
   128  func fetchDirectURLData(resolver file.Resolver, metadataLocation file.Location) (d *pkg.PythonDirectURLOriginInfo, sources []file.Location, err error) {
   129  	parentDir := filepath.Dir(metadataLocation.RealPath)
   130  	directURLPath := filepath.Join(parentDir, "direct_url.json")
   131  	directURLLocation := resolver.RelativeFileByPath(metadataLocation, directURLPath)
   132  
   133  	if directURLLocation == nil {
   134  		return nil, nil, nil
   135  	}
   136  
   137  	sources = append(sources, directURLLocation.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.SupportingEvidenceAnnotation))
   138  
   139  	directURLContents, err := resolver.FileContentsByLocation(*directURLLocation)
   140  	if err != nil {
   141  		return nil, nil, err
   142  	}
   143  	defer internal.CloseAndLogError(directURLContents, directURLLocation.VirtualPath)
   144  
   145  	buffer, err := io.ReadAll(directURLContents)
   146  	if err != nil {
   147  		return nil, nil, err
   148  	}
   149  
   150  	var directURLJson pkg.DirectURLOrigin
   151  	if err := json.Unmarshal(buffer, &directURLJson); err != nil {
   152  		return nil, nil, err
   153  	}
   154  
   155  	return &pkg.PythonDirectURLOriginInfo{
   156  		URL:      directURLJson.URL,
   157  		CommitID: directURLJson.VCSInfo.CommitID,
   158  		VCS:      directURLJson.VCSInfo.VCS,
   159  	}, sources, nil
   160  }
   161  
   162  // assembleEggOrWheelMetadata discovers and accumulates python package metadata from multiple file sources and returns a single metadata object as well as a list of files where the metadata was derived from.
   163  func assembleEggOrWheelMetadata(resolver file.Resolver, metadataLocation file.Location) (*parsedData, []file.Location, error) {
   164  	var sources = []file.Location{
   165  		metadataLocation.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation),
   166  	}
   167  
   168  	metadataContents, err := resolver.FileContentsByLocation(metadataLocation)
   169  	if err != nil {
   170  		return nil, nil, err
   171  	}
   172  	defer internal.CloseAndLogError(metadataContents, metadataLocation.VirtualPath)
   173  
   174  	pd, err := parseWheelOrEggMetadata(metadataLocation.RealPath, metadataContents)
   175  	if err != nil {
   176  		return nil, nil, err
   177  	}
   178  
   179  	if pd.Name == "" {
   180  		return nil, nil, nil
   181  	}
   182  
   183  	// attach any python files found for the given wheel/egg installation
   184  	r, s, err := fetchRecordFiles(resolver, metadataLocation)
   185  	if err != nil {
   186  		return nil, nil, err
   187  	}
   188  	if len(r) == 0 {
   189  		r, s, err = fetchInstalledFiles(resolver, metadataLocation, pd.SitePackagesRootPath)
   190  		if err != nil {
   191  			return nil, nil, err
   192  		}
   193  	}
   194  
   195  	sources = append(sources, s...)
   196  	pd.Files = r
   197  
   198  	// attach any top-level package names found for the given wheel/egg installation
   199  	p, s, err := fetchTopLevelPackages(resolver, metadataLocation)
   200  	if err != nil {
   201  		return nil, nil, err
   202  	}
   203  	sources = append(sources, s...)
   204  	pd.TopLevelPackages = p
   205  
   206  	// attach any direct-url package data found for the given wheel/egg installation
   207  	d, s, err := fetchDirectURLData(resolver, metadataLocation)
   208  	if err != nil {
   209  		return nil, nil, err
   210  	}
   211  
   212  	sources = append(sources, s...)
   213  	pd.DirectURLOrigin = d
   214  	return &pd, sources, nil
   215  }