github.com/nextlinux/gosbom@v0.81.1-0.20230627115839-1ff50c281391/gosbom/pkg/cataloger/python/parse_wheel_egg_metadata.go (about) 1 package python 2 3 import ( 4 "bufio" 5 "fmt" 6 "io" 7 "path/filepath" 8 "strings" 9 10 "github.com/mitchellh/mapstructure" 11 "github.com/nextlinux/gosbom/gosbom/file" 12 "github.com/nextlinux/gosbom/gosbom/pkg" 13 intFile "github.com/nextlinux/gosbom/internal/file" 14 "github.com/nextlinux/gosbom/internal/log" 15 ) 16 17 type parsedData struct { 18 Licenses string `mapstructure:"License"` 19 LicenseLocation file.Location 20 pkg.PythonPackageMetadata `mapstructure:",squash"` 21 } 22 23 // parseWheelOrEggMetadata takes a Python Egg or Wheel (which share the same format and values for our purposes), 24 // returning all Python packages listed. 25 func parseWheelOrEggMetadata(path string, reader io.Reader) (parsedData, error) { 26 fields := make(map[string]string) 27 var key string 28 29 scanner := bufio.NewScanner(reader) 30 for scanner.Scan() { 31 line := scanner.Text() 32 line = strings.TrimRight(line, "\n") 33 34 // An empty line means we are done parsing (either because there's no more data, 35 // or because a description follows as specified in 36 // https://packaging.python.org/specifications/core-metadata/#description; 37 // and at this time, we're not interested in the description). 38 if len(line) == 0 { 39 if len(fields) > 0 { 40 break 41 } 42 43 // however, if the field parsing has not started yet, keep scanning lines 44 continue 45 } 46 47 switch { 48 case strings.HasPrefix(line, " "): 49 // a field-body continuation 50 updatedValue, err := handleFieldBodyContinuation(key, line, fields) 51 if err != nil { 52 return parsedData{}, err 53 } 54 55 fields[key] = updatedValue 56 default: 57 // parse a new key (note, duplicate keys are overridden) 58 if i := strings.Index(line, ":"); i > 0 { 59 // mapstruct cannot map keys with dashes, and we are expected to persist the "Author-email" field 60 key = strings.ReplaceAll(strings.TrimSpace(line[0:i]), "-", "") 61 val := strings.TrimSpace(line[i+1:]) 62 63 fields[key] = val 64 } else { 65 log.Warnf("cannot parse field from path: %q from line: %q", path, line) 66 } 67 } 68 } 69 70 if err := scanner.Err(); err != nil { 71 return parsedData{}, fmt.Errorf("failed to parse python wheel/egg: %w", err) 72 } 73 74 var pd parsedData 75 if err := mapstructure.Decode(fields, &pd); err != nil { 76 return pd, fmt.Errorf("unable to parse APK metadata: %w", err) 77 } 78 79 // add additional metadata not stored in the egg/wheel metadata file 80 81 pd.SitePackagesRootPath = determineSitePackagesRootPath(path) 82 if pd.Licenses != "" { 83 pd.LicenseLocation = file.NewLocation(path) 84 } 85 86 return pd, nil 87 } 88 89 // isEggRegularFile determines if the specified path is the regular file variant 90 // of egg metadata (as opposed to a directory that contains more metadata 91 // files). 92 func isEggRegularFile(path string) bool { 93 return intFile.GlobMatch(eggInfoGlob, path) 94 } 95 96 // determineSitePackagesRootPath returns the path of the site packages root, 97 // given the egg metadata file or directory specified in the path. 98 func determineSitePackagesRootPath(path string) string { 99 if isEggRegularFile(path) { 100 return filepath.Clean(filepath.Dir(path)) 101 } 102 103 return filepath.Clean(filepath.Dir(filepath.Dir(path))) 104 } 105 106 // handleFieldBodyContinuation returns the updated value for the specified field after processing the specified line. 107 // If the continuation cannot be processed, it returns an error. 108 func handleFieldBodyContinuation(key, line string, fields map[string]string) (string, error) { 109 if len(key) == 0 { 110 return "", fmt.Errorf("no match for continuation: line: '%s'", line) 111 } 112 113 val, ok := fields[key] 114 if !ok { 115 return "", fmt.Errorf("no previous key exists, expecting: %s", key) 116 } 117 118 // concatenate onto previous value 119 return fmt.Sprintf("%s\n %s", val, strings.TrimSpace(line)), nil 120 }