github.com/anchore/syft@v1.38.2/syft/pkg/cataloger/python/parse_wheel_egg_metadata.go (about) 1 package python 2 3 import ( 4 "bufio" 5 "fmt" 6 "path/filepath" 7 "strings" 8 9 "github.com/bmatcuk/doublestar/v4" 10 "github.com/go-viper/mapstructure/v2" 11 12 "github.com/anchore/syft/internal/log" 13 "github.com/anchore/syft/syft/file" 14 "github.com/anchore/syft/syft/pkg" 15 ) 16 17 type parsedData struct { 18 // core info 19 20 // DistInfoLocation is the location of the METADATA file within the .dist-info directory where we obtained the python package information 21 DistInfoLocation file.Location 22 pkg.PythonPackage `mapstructure:",squash"` 23 24 // license info 25 26 Licenses string `mapstructure:"License"` 27 LicenseFile string `mapstructure:"LicenseFile"` 28 LicenseExpression string `mapstructure:"LicenseExpression"` 29 LicenseFilePath string 30 } 31 32 var pluralFields = map[string]bool{ 33 "ProvidesExtra": true, 34 "RequiresDist": true, 35 } 36 37 // parseWheelOrEggMetadata takes a Python Egg or Wheel (which share the same format and values for our purposes), 38 // returning all Python packages listed. 39 func parseWheelOrEggMetadata(locationReader file.LocationReadCloser) (parsedData, error) { 40 fields, err := extractRFC5322Fields(locationReader) 41 if err != nil { 42 return parsedData{}, fmt.Errorf("unable to extract python wheel/egg metadata: %w", err) 43 } 44 45 var pd parsedData 46 if err := mapstructure.Decode(fields, &pd); err != nil { 47 return pd, fmt.Errorf("unable to translate python wheel/egg metadata: %w", err) 48 } 49 50 // add additional metadata not stored in the egg/wheel metadata file 51 path := locationReader.Path() 52 53 pd.SitePackagesRootPath = determineSitePackagesRootPath(path) 54 if pd.Licenses != "" || pd.LicenseExpression != "" { 55 pd.LicenseFilePath = path 56 } else if pd.LicenseFile != "" { 57 pd.LicenseFilePath = filepath.Join(filepath.Dir(path), pd.LicenseFile) 58 } 59 60 pd.DistInfoLocation = locationReader.Location 61 62 return pd, nil 63 } 64 65 func extractRFC5322Fields(locationReader file.LocationReadCloser) (map[string]any, error) { 66 fields := make(map[string]any) 67 var key string 68 69 // though this spec is governed by RFC 5322 (mail message), the metadata files are not guaranteed to be compliant. 70 // We must survive parsing as much info as possible without failing and dropping the data. 71 scanner := bufio.NewScanner(locationReader) 72 for scanner.Scan() { 73 line := scanner.Text() 74 line = strings.TrimRight(line, "\n") 75 76 // An empty line means we are done parsing (either because there's no more data, 77 // or because a description follows as specified in 78 // https://packaging.python.org/specifications/core-metadata/#description; 79 // and at this time, we're not interested in the description). 80 if len(line) == 0 { 81 if len(fields) > 0 { 82 break 83 } 84 85 // however, if the field parsing has not started yet, keep scanning lines 86 continue 87 } 88 89 switch { 90 case strings.HasPrefix(line, " "): 91 // a field-body continuation 92 updatedValue, err := handleFieldBodyContinuation(key, line, fields) 93 if err != nil { 94 return nil, err 95 } 96 97 fields[key] = updatedValue 98 default: 99 // parse a new key (note, duplicate keys that are for singular fields are overridden, where as plural fields are appended) 100 if i := strings.Index(line, ":"); i > 0 { 101 // mapstruct cannot map keys with dashes, and we are expected to persist the "Author-email" field 102 key = strings.ReplaceAll(strings.TrimSpace(line[0:i]), "-", "") 103 val := getFieldType(key, strings.TrimSpace(line[i+1:])) 104 105 fields[key] = handleSingleOrMultiField(fields[key], val) 106 } else { 107 log.Debugf("cannot parse field from path: %q from line: %q", locationReader.Path(), line) 108 } 109 } 110 } 111 return fields, nil 112 } 113 114 func handleSingleOrMultiField(existingValue, val any) any { 115 strSlice, ok := val.([]string) 116 if !ok { 117 return val 118 } 119 if existingValue == nil { 120 return strSlice 121 } 122 123 switch existingValueTy := existingValue.(type) { 124 case []string: 125 return append(existingValueTy, strSlice...) 126 case string: 127 return append([]string{existingValueTy}, strSlice...) 128 } 129 130 return append([]string{fmt.Sprintf("%s", existingValue)}, strSlice...) 131 } 132 133 func getFieldType(key, in string) any { 134 if plural, ok := pluralFields[key]; ok && plural { 135 return []string{in} 136 } 137 return in 138 } 139 140 // isEggRegularFile determines if the specified path is the regular file variant 141 // of egg metadata (as opposed to a directory that contains more metadata 142 // files). 143 func isEggRegularFile(path string) bool { 144 return doublestar.MatchUnvalidated(eggInfoGlob, path) 145 } 146 147 // determineSitePackagesRootPath returns the path of the site packages root, 148 // given the egg metadata file or directory specified in the path. 149 func determineSitePackagesRootPath(path string) string { 150 if isEggRegularFile(path) { 151 return filepath.Clean(filepath.Dir(path)) 152 } 153 154 return filepath.Clean(filepath.Dir(filepath.Dir(path))) 155 } 156 157 // handleFieldBodyContinuation returns the updated value for the specified field after processing the specified line. 158 // If the continuation cannot be processed, it returns an error. 159 func handleFieldBodyContinuation(key, line string, fields map[string]any) (any, error) { 160 if len(key) == 0 { 161 return "", fmt.Errorf("no match for continuation: line: '%s'", line) 162 } 163 164 val, ok := fields[key] 165 if !ok { 166 return "", fmt.Errorf("no previous key exists, expecting: %s", key) 167 } 168 169 // concatenate onto previous value 170 switch s := val.(type) { 171 case string: 172 return fmt.Sprintf("%s\n %s", s, strings.TrimSpace(line)), nil 173 case []string: 174 if len(s) == 0 { 175 s = append(s, "") 176 } 177 s[len(s)-1] = fmt.Sprintf("%s\n %s", s[len(s)-1], strings.TrimSpace(line)) 178 return s, nil 179 default: 180 return "", fmt.Errorf("unexpected type for continuation: %T", val) 181 } 182 }