github.com/anchore/syft@v1.4.2-0.20240516191711-1bec1fc5d397/syft/pkg/cataloger/python/parse_wheel_egg_metadata.go (about) 1 package python 2 3 import ( 4 "bufio" 5 "fmt" 6 "io" 7 "path/filepath" 8 "strings" 9 10 "github.com/mitchellh/mapstructure" 11 12 intFile "github.com/anchore/syft/internal/file" 13 "github.com/anchore/syft/internal/log" 14 "github.com/anchore/syft/syft/file" 15 "github.com/anchore/syft/syft/pkg" 16 ) 17 18 type parsedData struct { 19 Licenses string `mapstructure:"License"` 20 LicenseFile string `mapstructure:"LicenseFile"` 21 LicenseExpression string `mapstructure:"LicenseExpression"` 22 LicenseLocation file.Location 23 pkg.PythonPackage `mapstructure:",squash"` 24 } 25 26 // parseWheelOrEggMetadata takes a Python Egg or Wheel (which share the same format and values for our purposes), 27 // returning all Python packages listed. 28 func parseWheelOrEggMetadata(path string, reader io.Reader) (parsedData, error) { 29 fields := make(map[string]string) 30 var key string 31 32 scanner := bufio.NewScanner(reader) 33 for scanner.Scan() { 34 line := scanner.Text() 35 line = strings.TrimRight(line, "\n") 36 37 // An empty line means we are done parsing (either because there's no more data, 38 // or because a description follows as specified in 39 // https://packaging.python.org/specifications/core-metadata/#description; 40 // and at this time, we're not interested in the description). 41 if len(line) == 0 { 42 if len(fields) > 0 { 43 break 44 } 45 46 // however, if the field parsing has not started yet, keep scanning lines 47 continue 48 } 49 50 switch { 51 case strings.HasPrefix(line, " "): 52 // a field-body continuation 53 updatedValue, err := handleFieldBodyContinuation(key, line, fields) 54 if err != nil { 55 return parsedData{}, err 56 } 57 58 fields[key] = updatedValue 59 default: 60 // parse a new key (note, duplicate keys are overridden) 61 if i := strings.Index(line, ":"); i > 0 { 62 // mapstruct cannot map keys with dashes, and we are expected to persist the "Author-email" field 63 key = strings.ReplaceAll(strings.TrimSpace(line[0:i]), "-", "") 64 val := strings.TrimSpace(line[i+1:]) 65 66 fields[key] = val 67 } else { 68 log.Warnf("cannot parse field from path: %q from line: %q", path, line) 69 } 70 } 71 } 72 73 if err := scanner.Err(); err != nil { 74 return parsedData{}, fmt.Errorf("failed to parse python wheel/egg: %w", err) 75 } 76 77 var pd parsedData 78 if err := mapstructure.Decode(fields, &pd); err != nil { 79 return pd, fmt.Errorf("unable to parse APK metadata: %w", err) 80 } 81 82 // add additional metadata not stored in the egg/wheel metadata file 83 84 pd.SitePackagesRootPath = determineSitePackagesRootPath(path) 85 if pd.Licenses != "" || pd.LicenseExpression != "" { 86 pd.LicenseLocation = file.NewLocation(path) 87 } else if pd.LicenseFile != "" { 88 pd.LicenseLocation = file.NewLocation(filepath.Join(filepath.Dir(path), pd.LicenseFile)) 89 } 90 91 return pd, nil 92 } 93 94 // isEggRegularFile determines if the specified path is the regular file variant 95 // of egg metadata (as opposed to a directory that contains more metadata 96 // files). 97 func isEggRegularFile(path string) bool { 98 return intFile.GlobMatch(eggInfoGlob, path) 99 } 100 101 // determineSitePackagesRootPath returns the path of the site packages root, 102 // given the egg metadata file or directory specified in the path. 103 func determineSitePackagesRootPath(path string) string { 104 if isEggRegularFile(path) { 105 return filepath.Clean(filepath.Dir(path)) 106 } 107 108 return filepath.Clean(filepath.Dir(filepath.Dir(path))) 109 } 110 111 // handleFieldBodyContinuation returns the updated value for the specified field after processing the specified line. 112 // If the continuation cannot be processed, it returns an error. 113 func handleFieldBodyContinuation(key, line string, fields map[string]string) (string, error) { 114 if len(key) == 0 { 115 return "", fmt.Errorf("no match for continuation: line: '%s'", line) 116 } 117 118 val, ok := fields[key] 119 if !ok { 120 return "", fmt.Errorf("no previous key exists, expecting: %s", key) 121 } 122 123 // concatenate onto previous value 124 return fmt.Sprintf("%s\n %s", val, strings.TrimSpace(line)), nil 125 }