github.com/noqcks/syft@v0.0.0-20230920222752-a9e2c4e288e5/syft/pkg/cataloger/python/parse_wheel_egg_metadata.go (about) 1 package python 2 3 import ( 4 "bufio" 5 "fmt" 6 "io" 7 "path/filepath" 8 "strings" 9 10 "github.com/mitchellh/mapstructure" 11 12 intFile "github.com/anchore/syft/internal/file" 13 "github.com/anchore/syft/internal/log" 14 "github.com/anchore/syft/syft/file" 15 "github.com/anchore/syft/syft/pkg" 16 ) 17 18 type parsedData struct { 19 Licenses string `mapstructure:"License"` 20 LicenseLocation file.Location 21 pkg.PythonPackageMetadata `mapstructure:",squash"` 22 } 23 24 // parseWheelOrEggMetadata takes a Python Egg or Wheel (which share the same format and values for our purposes), 25 // returning all Python packages listed. 26 func parseWheelOrEggMetadata(path string, reader io.Reader) (parsedData, error) { 27 fields := make(map[string]string) 28 var key string 29 30 scanner := bufio.NewScanner(reader) 31 for scanner.Scan() { 32 line := scanner.Text() 33 line = strings.TrimRight(line, "\n") 34 35 // An empty line means we are done parsing (either because there's no more data, 36 // or because a description follows as specified in 37 // https://packaging.python.org/specifications/core-metadata/#description; 38 // and at this time, we're not interested in the description). 39 if len(line) == 0 { 40 if len(fields) > 0 { 41 break 42 } 43 44 // however, if the field parsing has not started yet, keep scanning lines 45 continue 46 } 47 48 switch { 49 case strings.HasPrefix(line, " "): 50 // a field-body continuation 51 updatedValue, err := handleFieldBodyContinuation(key, line, fields) 52 if err != nil { 53 return parsedData{}, err 54 } 55 56 fields[key] = updatedValue 57 default: 58 // parse a new key (note, duplicate keys are overridden) 59 if i := strings.Index(line, ":"); i > 0 { 60 // mapstruct cannot map keys with dashes, and we are expected to persist the "Author-email" field 61 key = strings.ReplaceAll(strings.TrimSpace(line[0:i]), "-", "") 62 val := strings.TrimSpace(line[i+1:]) 63 64 fields[key] = val 65 } else { 66 log.Warnf("cannot parse field from path: %q from line: %q", path, line) 67 } 68 } 69 } 70 71 if err := scanner.Err(); err != nil { 72 return parsedData{}, fmt.Errorf("failed to parse python wheel/egg: %w", err) 73 } 74 75 var pd parsedData 76 if err := mapstructure.Decode(fields, &pd); err != nil { 77 return pd, fmt.Errorf("unable to parse APK metadata: %w", err) 78 } 79 80 // add additional metadata not stored in the egg/wheel metadata file 81 82 pd.SitePackagesRootPath = determineSitePackagesRootPath(path) 83 if pd.Licenses != "" { 84 pd.LicenseLocation = file.NewLocation(path) 85 } 86 87 return pd, nil 88 } 89 90 // isEggRegularFile determines if the specified path is the regular file variant 91 // of egg metadata (as opposed to a directory that contains more metadata 92 // files). 93 func isEggRegularFile(path string) bool { 94 return intFile.GlobMatch(eggInfoGlob, path) 95 } 96 97 // determineSitePackagesRootPath returns the path of the site packages root, 98 // given the egg metadata file or directory specified in the path. 99 func determineSitePackagesRootPath(path string) string { 100 if isEggRegularFile(path) { 101 return filepath.Clean(filepath.Dir(path)) 102 } 103 104 return filepath.Clean(filepath.Dir(filepath.Dir(path))) 105 } 106 107 // handleFieldBodyContinuation returns the updated value for the specified field after processing the specified line. 108 // If the continuation cannot be processed, it returns an error. 109 func handleFieldBodyContinuation(key, line string, fields map[string]string) (string, error) { 110 if len(key) == 0 { 111 return "", fmt.Errorf("no match for continuation: line: '%s'", line) 112 } 113 114 val, ok := fields[key] 115 if !ok { 116 return "", fmt.Errorf("no previous key exists, expecting: %s", key) 117 } 118 119 // concatenate onto previous value 120 return fmt.Sprintf("%s\n %s", val, strings.TrimSpace(line)), nil 121 }