github.com/nextlinux/gosbom@v0.81.1-0.20230627115839-1ff50c281391/gosbom/pkg/cataloger/python/parse_wheel_egg_metadata.go (about)

     1  package python
     2  
     3  import (
     4  	"bufio"
     5  	"fmt"
     6  	"io"
     7  	"path/filepath"
     8  	"strings"
     9  
    10  	"github.com/mitchellh/mapstructure"
    11  	"github.com/nextlinux/gosbom/gosbom/file"
    12  	"github.com/nextlinux/gosbom/gosbom/pkg"
    13  	intFile "github.com/nextlinux/gosbom/internal/file"
    14  	"github.com/nextlinux/gosbom/internal/log"
    15  )
    16  
    17  type parsedData struct {
    18  	Licenses                  string `mapstructure:"License"`
    19  	LicenseLocation           file.Location
    20  	pkg.PythonPackageMetadata `mapstructure:",squash"`
    21  }
    22  
    23  // parseWheelOrEggMetadata takes a Python Egg or Wheel (which share the same format and values for our purposes),
    24  // returning all Python packages listed.
    25  func parseWheelOrEggMetadata(path string, reader io.Reader) (parsedData, error) {
    26  	fields := make(map[string]string)
    27  	var key string
    28  
    29  	scanner := bufio.NewScanner(reader)
    30  	for scanner.Scan() {
    31  		line := scanner.Text()
    32  		line = strings.TrimRight(line, "\n")
    33  
    34  		// An empty line means we are done parsing (either because there's no more data,
    35  		// or because a description follows as specified in
    36  		// https://packaging.python.org/specifications/core-metadata/#description;
    37  		// and at this time, we're not interested in the description).
    38  		if len(line) == 0 {
    39  			if len(fields) > 0 {
    40  				break
    41  			}
    42  
    43  			// however, if the field parsing has not started yet, keep scanning lines
    44  			continue
    45  		}
    46  
    47  		switch {
    48  		case strings.HasPrefix(line, " "):
    49  			// a field-body continuation
    50  			updatedValue, err := handleFieldBodyContinuation(key, line, fields)
    51  			if err != nil {
    52  				return parsedData{}, err
    53  			}
    54  
    55  			fields[key] = updatedValue
    56  		default:
    57  			// parse a new key (note, duplicate keys are overridden)
    58  			if i := strings.Index(line, ":"); i > 0 {
    59  				// mapstruct cannot map keys with dashes, and we are expected to persist the "Author-email" field
    60  				key = strings.ReplaceAll(strings.TrimSpace(line[0:i]), "-", "")
    61  				val := strings.TrimSpace(line[i+1:])
    62  
    63  				fields[key] = val
    64  			} else {
    65  				log.Warnf("cannot parse field from path: %q from line: %q", path, line)
    66  			}
    67  		}
    68  	}
    69  
    70  	if err := scanner.Err(); err != nil {
    71  		return parsedData{}, fmt.Errorf("failed to parse python wheel/egg: %w", err)
    72  	}
    73  
    74  	var pd parsedData
    75  	if err := mapstructure.Decode(fields, &pd); err != nil {
    76  		return pd, fmt.Errorf("unable to parse APK metadata: %w", err)
    77  	}
    78  
    79  	// add additional metadata not stored in the egg/wheel metadata file
    80  
    81  	pd.SitePackagesRootPath = determineSitePackagesRootPath(path)
    82  	if pd.Licenses != "" {
    83  		pd.LicenseLocation = file.NewLocation(path)
    84  	}
    85  
    86  	return pd, nil
    87  }
    88  
    89  // isEggRegularFile determines if the specified path is the regular file variant
    90  // of egg metadata (as opposed to a directory that contains more metadata
    91  // files).
    92  func isEggRegularFile(path string) bool {
    93  	return intFile.GlobMatch(eggInfoGlob, path)
    94  }
    95  
    96  // determineSitePackagesRootPath returns the path of the site packages root,
    97  // given the egg metadata file or directory specified in the path.
    98  func determineSitePackagesRootPath(path string) string {
    99  	if isEggRegularFile(path) {
   100  		return filepath.Clean(filepath.Dir(path))
   101  	}
   102  
   103  	return filepath.Clean(filepath.Dir(filepath.Dir(path)))
   104  }
   105  
   106  // handleFieldBodyContinuation returns the updated value for the specified field after processing the specified line.
   107  // If the continuation cannot be processed, it returns an error.
   108  func handleFieldBodyContinuation(key, line string, fields map[string]string) (string, error) {
   109  	if len(key) == 0 {
   110  		return "", fmt.Errorf("no match for continuation: line: '%s'", line)
   111  	}
   112  
   113  	val, ok := fields[key]
   114  	if !ok {
   115  		return "", fmt.Errorf("no previous key exists, expecting: %s", key)
   116  	}
   117  
   118  	// concatenate onto previous value
   119  	return fmt.Sprintf("%s\n %s", val, strings.TrimSpace(line)), nil
   120  }