github.com/kastenhq/syft@v0.0.0-20230821225854-0710af25cdbe/syft/pkg/cataloger/python/parse_wheel_egg.go (about) 1 package python 2 3 import ( 4 "bufio" 5 "encoding/json" 6 "fmt" 7 "io" 8 "path/filepath" 9 10 "github.com/kastenhq/syft/internal" 11 "github.com/kastenhq/syft/internal/log" 12 "github.com/kastenhq/syft/syft/artifact" 13 "github.com/kastenhq/syft/syft/file" 14 "github.com/kastenhq/syft/syft/pkg" 15 "github.com/kastenhq/syft/syft/pkg/cataloger/generic" 16 ) 17 18 // parseWheelOrEgg takes the primary metadata file reference and returns the python package it represents. 19 func parseWheelOrEgg(resolver file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { 20 pd, sources, err := assembleEggOrWheelMetadata(resolver, reader.Location) 21 if err != nil { 22 return nil, nil, err 23 } 24 if pd == nil { 25 return nil, nil, nil 26 } 27 28 // This can happen for Python 2.7 where it is reported from an egg-info, but Python is 29 // the actual runtime, it isn't a "package". The special-casing here allows to skip it 30 if pd.Name == "Python" { 31 return nil, nil, nil 32 } 33 34 pkgs := []pkg.Package{newPackageForPackage(*pd, sources...)} 35 36 return pkgs, nil, nil 37 } 38 39 // fetchRecordFiles finds a corresponding installed-files.txt file for the given python package metadata file and returns the set of file records contained. 40 func fetchInstalledFiles(resolver file.Resolver, metadataLocation file.Location, sitePackagesRootPath string) (files []pkg.PythonFileRecord, sources []file.Location, err error) { 41 // we've been given a file reference to a specific wheel METADATA file. note: this may be for a directory 42 // or for an image... for an image the METADATA file may be present within multiple layers, so it is important 43 // to reconcile the installed-files.txt path to the same layer (or the next adjacent lower layer). 44 45 // find the installed-files.txt file relative to the directory where the METADATA file resides (in path AND layer structure) 46 installedFilesPath := filepath.Join(filepath.Dir(metadataLocation.RealPath), "installed-files.txt") 47 installedFilesRef := resolver.RelativeFileByPath(metadataLocation, installedFilesPath) 48 49 if installedFilesRef != nil { 50 sources = append(sources, installedFilesRef.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.SupportingEvidenceAnnotation)) 51 52 installedFilesContents, err := resolver.FileContentsByLocation(*installedFilesRef) 53 if err != nil { 54 return nil, nil, err 55 } 56 defer internal.CloseAndLogError(installedFilesContents, installedFilesPath) 57 58 // parse the installed-files contents 59 installedFiles, err := parseInstalledFiles(installedFilesContents, metadataLocation.RealPath, sitePackagesRootPath) 60 if err != nil { 61 log.Warnf("unable to parse installed-files.txt for python package=%+v: %w", metadataLocation.RealPath, err) 62 return files, sources, nil 63 } 64 65 files = append(files, installedFiles...) 66 } 67 return files, sources, nil 68 } 69 70 // fetchRecordFiles finds a corresponding RECORD file for the given python package metadata file and returns the set of file records contained. 71 func fetchRecordFiles(resolver file.Resolver, metadataLocation file.Location) (files []pkg.PythonFileRecord, sources []file.Location, err error) { 72 // we've been given a file reference to a specific wheel METADATA file. note: this may be for a directory 73 // or for an image... for an image the METADATA file may be present within multiple layers, so it is important 74 // to reconcile the RECORD path to the same layer (or the next adjacent lower layer). 75 76 // find the RECORD file relative to the directory where the METADATA file resides (in path AND layer structure) 77 recordPath := filepath.Join(filepath.Dir(metadataLocation.RealPath), "RECORD") 78 recordRef := resolver.RelativeFileByPath(metadataLocation, recordPath) 79 80 if recordRef != nil { 81 sources = append(sources, recordRef.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.SupportingEvidenceAnnotation)) 82 83 recordContents, err := resolver.FileContentsByLocation(*recordRef) 84 if err != nil { 85 return nil, nil, err 86 } 87 defer internal.CloseAndLogError(recordContents, recordPath) 88 89 // parse the record contents 90 records := parseWheelOrEggRecord(recordContents) 91 92 files = append(files, records...) 93 } 94 return files, sources, nil 95 } 96 97 // fetchTopLevelPackages finds a corresponding top_level.txt file for the given python package metadata file and returns the set of package names contained. 98 func fetchTopLevelPackages(resolver file.Resolver, metadataLocation file.Location) (pkgs []string, sources []file.Location, err error) { 99 // a top_level.txt file specifies the python top-level packages (provided by this python package) installed into site-packages 100 parentDir := filepath.Dir(metadataLocation.RealPath) 101 topLevelPath := filepath.Join(parentDir, "top_level.txt") 102 topLevelLocation := resolver.RelativeFileByPath(metadataLocation, topLevelPath) 103 104 if topLevelLocation == nil { 105 return nil, nil, nil 106 } 107 108 sources = append(sources, topLevelLocation.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.SupportingEvidenceAnnotation)) 109 110 topLevelContents, err := resolver.FileContentsByLocation(*topLevelLocation) 111 if err != nil { 112 return nil, nil, err 113 } 114 defer internal.CloseAndLogError(topLevelContents, topLevelLocation.VirtualPath) 115 116 scanner := bufio.NewScanner(topLevelContents) 117 for scanner.Scan() { 118 pkgs = append(pkgs, scanner.Text()) 119 } 120 121 if err := scanner.Err(); err != nil { 122 return nil, nil, fmt.Errorf("could not read python package top_level.txt: %w", err) 123 } 124 125 return pkgs, sources, nil 126 } 127 128 func fetchDirectURLData(resolver file.Resolver, metadataLocation file.Location) (d *pkg.PythonDirectURLOriginInfo, sources []file.Location, err error) { 129 parentDir := filepath.Dir(metadataLocation.RealPath) 130 directURLPath := filepath.Join(parentDir, "direct_url.json") 131 directURLLocation := resolver.RelativeFileByPath(metadataLocation, directURLPath) 132 133 if directURLLocation == nil { 134 return nil, nil, nil 135 } 136 137 sources = append(sources, directURLLocation.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.SupportingEvidenceAnnotation)) 138 139 directURLContents, err := resolver.FileContentsByLocation(*directURLLocation) 140 if err != nil { 141 return nil, nil, err 142 } 143 defer internal.CloseAndLogError(directURLContents, directURLLocation.VirtualPath) 144 145 buffer, err := io.ReadAll(directURLContents) 146 if err != nil { 147 return nil, nil, err 148 } 149 150 var directURLJson pkg.DirectURLOrigin 151 if err := json.Unmarshal(buffer, &directURLJson); err != nil { 152 return nil, nil, err 153 } 154 155 return &pkg.PythonDirectURLOriginInfo{ 156 URL: directURLJson.URL, 157 CommitID: directURLJson.VCSInfo.CommitID, 158 VCS: directURLJson.VCSInfo.VCS, 159 }, sources, nil 160 } 161 162 // assembleEggOrWheelMetadata discovers and accumulates python package metadata from multiple file sources and returns a single metadata object as well as a list of files where the metadata was derived from. 163 func assembleEggOrWheelMetadata(resolver file.Resolver, metadataLocation file.Location) (*parsedData, []file.Location, error) { 164 var sources = []file.Location{ 165 metadataLocation.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation), 166 } 167 168 metadataContents, err := resolver.FileContentsByLocation(metadataLocation) 169 if err != nil { 170 return nil, nil, err 171 } 172 defer internal.CloseAndLogError(metadataContents, metadataLocation.VirtualPath) 173 174 pd, err := parseWheelOrEggMetadata(metadataLocation.RealPath, metadataContents) 175 if err != nil { 176 return nil, nil, err 177 } 178 179 if pd.Name == "" { 180 return nil, nil, nil 181 } 182 183 // attach any python files found for the given wheel/egg installation 184 r, s, err := fetchRecordFiles(resolver, metadataLocation) 185 if err != nil { 186 return nil, nil, err 187 } 188 if len(r) == 0 { 189 r, s, err = fetchInstalledFiles(resolver, metadataLocation, pd.SitePackagesRootPath) 190 if err != nil { 191 return nil, nil, err 192 } 193 } 194 195 sources = append(sources, s...) 196 pd.Files = r 197 198 // attach any top-level package names found for the given wheel/egg installation 199 p, s, err := fetchTopLevelPackages(resolver, metadataLocation) 200 if err != nil { 201 return nil, nil, err 202 } 203 sources = append(sources, s...) 204 pd.TopLevelPackages = p 205 206 // attach any direct-url package data found for the given wheel/egg installation 207 d, s, err := fetchDirectURLData(resolver, metadataLocation) 208 if err != nil { 209 return nil, nil, err 210 } 211 212 sources = append(sources, s...) 213 pd.DirectURLOrigin = d 214 return &pd, sources, nil 215 }