github.com/noqcks/syft@v0.0.0-20230920222752-a9e2c4e288e5/syft/pkg/cataloger/python/parse_wheel_egg_metadata.go (about)

     1  package python
     2  
     3  import (
     4  	"bufio"
     5  	"fmt"
     6  	"io"
     7  	"path/filepath"
     8  	"strings"
     9  
    10  	"github.com/mitchellh/mapstructure"
    11  
    12  	intFile "github.com/anchore/syft/internal/file"
    13  	"github.com/anchore/syft/internal/log"
    14  	"github.com/anchore/syft/syft/file"
    15  	"github.com/anchore/syft/syft/pkg"
    16  )
    17  
    18  type parsedData struct {
    19  	Licenses                  string `mapstructure:"License"`
    20  	LicenseLocation           file.Location
    21  	pkg.PythonPackageMetadata `mapstructure:",squash"`
    22  }
    23  
    24  // parseWheelOrEggMetadata takes a Python Egg or Wheel (which share the same format and values for our purposes),
    25  // returning all Python packages listed.
    26  func parseWheelOrEggMetadata(path string, reader io.Reader) (parsedData, error) {
    27  	fields := make(map[string]string)
    28  	var key string
    29  
    30  	scanner := bufio.NewScanner(reader)
    31  	for scanner.Scan() {
    32  		line := scanner.Text()
    33  		line = strings.TrimRight(line, "\n")
    34  
    35  		// An empty line means we are done parsing (either because there's no more data,
    36  		// or because a description follows as specified in
    37  		// https://packaging.python.org/specifications/core-metadata/#description;
    38  		// and at this time, we're not interested in the description).
    39  		if len(line) == 0 {
    40  			if len(fields) > 0 {
    41  				break
    42  			}
    43  
    44  			// however, if the field parsing has not started yet, keep scanning lines
    45  			continue
    46  		}
    47  
    48  		switch {
    49  		case strings.HasPrefix(line, " "):
    50  			// a field-body continuation
    51  			updatedValue, err := handleFieldBodyContinuation(key, line, fields)
    52  			if err != nil {
    53  				return parsedData{}, err
    54  			}
    55  
    56  			fields[key] = updatedValue
    57  		default:
    58  			// parse a new key (note, duplicate keys are overridden)
    59  			if i := strings.Index(line, ":"); i > 0 {
    60  				// mapstruct cannot map keys with dashes, and we are expected to persist the "Author-email" field
    61  				key = strings.ReplaceAll(strings.TrimSpace(line[0:i]), "-", "")
    62  				val := strings.TrimSpace(line[i+1:])
    63  
    64  				fields[key] = val
    65  			} else {
    66  				log.Warnf("cannot parse field from path: %q from line: %q", path, line)
    67  			}
    68  		}
    69  	}
    70  
    71  	if err := scanner.Err(); err != nil {
    72  		return parsedData{}, fmt.Errorf("failed to parse python wheel/egg: %w", err)
    73  	}
    74  
    75  	var pd parsedData
    76  	if err := mapstructure.Decode(fields, &pd); err != nil {
    77  		return pd, fmt.Errorf("unable to parse APK metadata: %w", err)
    78  	}
    79  
    80  	// add additional metadata not stored in the egg/wheel metadata file
    81  
    82  	pd.SitePackagesRootPath = determineSitePackagesRootPath(path)
    83  	if pd.Licenses != "" {
    84  		pd.LicenseLocation = file.NewLocation(path)
    85  	}
    86  
    87  	return pd, nil
    88  }
    89  
    90  // isEggRegularFile determines if the specified path is the regular file variant
    91  // of egg metadata (as opposed to a directory that contains more metadata
    92  // files).
    93  func isEggRegularFile(path string) bool {
    94  	return intFile.GlobMatch(eggInfoGlob, path)
    95  }
    96  
    97  // determineSitePackagesRootPath returns the path of the site packages root,
    98  // given the egg metadata file or directory specified in the path.
    99  func determineSitePackagesRootPath(path string) string {
   100  	if isEggRegularFile(path) {
   101  		return filepath.Clean(filepath.Dir(path))
   102  	}
   103  
   104  	return filepath.Clean(filepath.Dir(filepath.Dir(path)))
   105  }
   106  
   107  // handleFieldBodyContinuation returns the updated value for the specified field after processing the specified line.
   108  // If the continuation cannot be processed, it returns an error.
   109  func handleFieldBodyContinuation(key, line string, fields map[string]string) (string, error) {
   110  	if len(key) == 0 {
   111  		return "", fmt.Errorf("no match for continuation: line: '%s'", line)
   112  	}
   113  
   114  	val, ok := fields[key]
   115  	if !ok {
   116  		return "", fmt.Errorf("no previous key exists, expecting: %s", key)
   117  	}
   118  
   119  	// concatenate onto previous value
   120  	return fmt.Sprintf("%s\n %s", val, strings.TrimSpace(line)), nil
   121  }