github.com/noqcks/syft@v0.0.0-20230920222752-a9e2c4e288e5/syft/pkg/cataloger/generic/cataloger.go (about) 1 package generic 2 3 import ( 4 "path/filepath" 5 6 "github.com/bmatcuk/doublestar/v4" 7 8 "github.com/anchore/syft/internal" 9 "github.com/anchore/syft/internal/log" 10 "github.com/anchore/syft/syft/artifact" 11 "github.com/anchore/syft/syft/file" 12 "github.com/anchore/syft/syft/linux" 13 "github.com/anchore/syft/syft/pkg" 14 ) 15 16 type processor func(resolver file.Resolver, env Environment) []request 17 type groupedProcessor func(resolver file.Resolver, env Environment) []groupedRequest 18 19 type request struct { 20 file.Location 21 Parser 22 } 23 24 type groupedRequest struct { 25 Locations []file.Location 26 PrimaryFileLocation file.Location 27 GroupedParser 28 } 29 30 // Cataloger implements the Catalog interface and is responsible for dispatching the proper parser function for 31 // a given path or glob pattern. This is intended to be reusable across many package cataloger types. 32 type Cataloger struct { 33 processor []processor 34 upstreamCataloger string 35 } 36 37 // GroupedCataloger is a special case of Cataloger that will process files together 38 // this is needed for the case of package.json and package-lock.json files for example 39 type GroupedCataloger struct { 40 groupedProcessor []groupedProcessor 41 upstreamCataloger string 42 } 43 44 func (c *GroupedCataloger) Name() string { 45 return c.upstreamCataloger 46 } 47 48 func isPrimaryFileGlobPresent(primaryFileGlob string, globs []string) bool { 49 for _, g := range globs { 50 if g == primaryFileGlob { 51 return true 52 } 53 } 54 return false 55 } 56 57 func generateGroupedProcessor(parser GroupedParser, primaryFileGlob string, globs []string) func(resolver file.Resolver, env Environment) []groupedRequest { 58 return func(resolver file.Resolver, env Environment) []groupedRequest { 59 var requests []groupedRequest 60 colocatedFiles := collectColocatedFiles(resolver, globs) 61 62 // Filter to only directories that contain all specified files 63 for _, files := range colocatedFiles { 64 allMatched, primaryFileLocation := isAllGlobsMatched(files, globs, primaryFileGlob) 65 if allMatched { 66 requests = append(requests, makeGroupedRequests(parser, files, primaryFileLocation)) 67 } 68 } 69 70 return requests 71 } 72 } 73 74 func collectColocatedFiles(resolver file.Resolver, globs []string) map[string][]file.Location { 75 colocatedFiles := make(map[string][]file.Location) 76 for _, g := range globs { 77 log.WithFields("glob", g).Trace("searching for paths matching glob") 78 matches, err := resolver.FilesByGlob(g) 79 if err != nil { 80 log.Warnf("unable to process glob=%q: %+v", g, err) 81 continue 82 } 83 for _, match := range matches { 84 dir := filepath.Dir(match.RealPath) 85 colocatedFiles[dir] = append(colocatedFiles[dir], match) 86 } 87 } 88 return colocatedFiles 89 } 90 91 func isAllGlobsMatched(files []file.Location, globs []string, primaryFileGlob string) (bool, file.Location) { 92 globMatches := make(map[string]bool) 93 var primaryFileLocation file.Location 94 95 for _, g := range globs { 96 for _, file := range files { 97 if matched, _ := doublestar.PathMatch(g, file.RealPath); matched { 98 if g == primaryFileGlob { 99 primaryFileLocation = file 100 } 101 globMatches[g] = true 102 break 103 } 104 } 105 } 106 107 return len(globMatches) == len(globs), primaryFileLocation 108 } 109 110 // WithParserByGlobColocation is a special case of WithParserByGlob that will only match files that are colocated 111 // with all of the provided globs. This is useful for cases where a package is defined by multiple files (e.g. package.json + package-lock.json). 112 // This function will only match files that are colocated with all of the provided globs. 113 func (c *GroupedCataloger) WithParserByGlobColocation(parser GroupedParser, primaryFileGlob string, globs []string) *GroupedCataloger { 114 if !isPrimaryFileGlobPresent(primaryFileGlob, globs) { 115 log.Warnf("primary file glob=%q not present in globs=%+v", primaryFileGlob, globs) 116 return c 117 } 118 119 c.groupedProcessor = append(c.groupedProcessor, generateGroupedProcessor(parser, primaryFileGlob, globs)) 120 return c 121 } 122 123 func (c *Cataloger) WithParserByGlobs(parser Parser, globs ...string) *Cataloger { 124 c.processor = append(c.processor, 125 func(resolver file.Resolver, env Environment) []request { 126 var requests []request 127 for _, g := range globs { 128 log.WithFields("glob", g).Trace("searching for paths matching glob") 129 130 matches, err := resolver.FilesByGlob(g) 131 if err != nil { 132 log.Warnf("unable to process glob=%q: %+v", g, err) 133 continue 134 } 135 requests = append(requests, makeRequests(parser, matches)...) 136 } 137 return requests 138 }, 139 ) 140 return c 141 } 142 143 // selectFiles takes a set of file trees and resolves and file references of interest for future cataloging 144 func (c *GroupedCataloger) selectFiles(resolver file.Resolver) []groupedRequest { 145 var requests []groupedRequest 146 for _, proc := range c.groupedProcessor { 147 requests = append(requests, proc(resolver, Environment{})...) 148 } 149 return requests 150 } 151 152 // Catalog is given an object to resolve file references and content, this function returns any discovered Packages after analyzing the catalog source. 153 func (c *GroupedCataloger) Catalog(resolver file.Resolver) ([]pkg.Package, []artifact.Relationship, error) { 154 var packages []pkg.Package 155 var relationships []artifact.Relationship 156 157 logger := log.Nested("cataloger", c.upstreamCataloger) 158 159 env := Environment{ 160 // TODO: consider passing into the cataloger, this would affect the cataloger interface (and all implementations). This can be deferred until later. 161 LinuxRelease: linux.IdentifyRelease(resolver), 162 } 163 164 for _, req := range c.selectFiles(resolver) { 165 parser := req.GroupedParser 166 var readClosers []file.LocationReadCloser 167 168 for _, location := range req.Locations { 169 log.WithFields("path", location.RealPath).Trace("parsing file contents") 170 contentReader, err := resolver.FileContentsByLocation(location) 171 if err != nil { 172 logger.WithFields("location", location.RealPath, "error", err).Warn("unable to fetch contents") 173 continue 174 } 175 readClosers = append(readClosers, file.NewLocationReadCloser(location, contentReader)) 176 } 177 178 // If your parser is expecting multiple file contents, ensure its signature reflects this change 179 discoveredPackages, discoveredRelationships, err := parser(resolver, &env, readClosers) 180 for _, rc := range readClosers { 181 internal.CloseAndLogError(rc, rc.VirtualPath) 182 } 183 if err != nil { 184 logger.WithFields("error", err).Warnf("cataloger failed") 185 continue 186 } 187 188 for _, p := range discoveredPackages { 189 p.FoundBy = c.upstreamCataloger 190 packages = append(packages, p) 191 } 192 193 relationships = append(relationships, discoveredRelationships...) 194 } 195 return packages, relationships, nil 196 } 197 198 func makeGroupedRequests(parser GroupedParser, locations []file.Location, primaryFileLocation file.Location) groupedRequest { 199 return groupedRequest{ 200 Locations: locations, 201 PrimaryFileLocation: primaryFileLocation, 202 GroupedParser: parser, 203 } 204 } 205 206 func (c *Cataloger) WithParserByMimeTypes(parser Parser, types ...string) *Cataloger { 207 c.processor = append(c.processor, 208 func(resolver file.Resolver, env Environment) []request { 209 var requests []request 210 log.WithFields("mimetypes", types).Trace("searching for paths matching mimetype") 211 matches, err := resolver.FilesByMIMEType(types...) 212 if err != nil { 213 log.Warnf("unable to process mimetypes=%+v: %+v", types, err) 214 return nil 215 } 216 requests = append(requests, makeRequests(parser, matches)...) 217 return requests 218 }, 219 ) 220 return c 221 } 222 223 func (c *Cataloger) WithParserByPath(parser Parser, paths ...string) *Cataloger { 224 c.processor = append(c.processor, 225 func(resolver file.Resolver, env Environment) []request { 226 var requests []request 227 for _, p := range paths { 228 log.WithFields("path", p).Trace("searching for path") 229 230 matches, err := resolver.FilesByPath(p) 231 if err != nil { 232 log.Warnf("unable to process path=%q: %+v", p, err) 233 continue 234 } 235 requests = append(requests, makeRequests(parser, matches)...) 236 } 237 return requests 238 }, 239 ) 240 return c 241 } 242 243 func makeRequests(parser Parser, locations []file.Location) []request { 244 var requests []request 245 for _, l := range locations { 246 requests = append(requests, request{ 247 Location: l, 248 Parser: parser, 249 }) 250 } 251 return requests 252 } 253 254 // NewCataloger if provided path-to-parser-function and glob-to-parser-function lookups creates a Cataloger 255 func NewCataloger(upstreamCataloger string) *Cataloger { 256 return &Cataloger{ 257 upstreamCataloger: upstreamCataloger, 258 } 259 } 260 261 func NewGroupedCataloger(upstreamCataloger string) *GroupedCataloger { 262 return &GroupedCataloger{ 263 upstreamCataloger: upstreamCataloger, 264 } 265 } 266 267 // Name returns a string that uniquely describes the upstream cataloger that this Generic Cataloger represents. 268 func (c *Cataloger) Name() string { 269 return c.upstreamCataloger 270 } 271 272 // Catalog is given an object to resolve file references and content, this function returns any discovered Packages after analyzing the catalog source. 273 func (c *Cataloger) Catalog(resolver file.Resolver) ([]pkg.Package, []artifact.Relationship, error) { 274 var packages []pkg.Package 275 var relationships []artifact.Relationship 276 277 logger := log.Nested("cataloger", c.upstreamCataloger) 278 279 env := Environment{ 280 // TODO: consider passing into the cataloger, this would affect the cataloger interface (and all implementations). This can be deferred until later. 281 LinuxRelease: linux.IdentifyRelease(resolver), 282 } 283 284 for _, req := range c.selectFiles(resolver) { 285 location, parser := req.Location, req.Parser 286 287 log.WithFields("path", location.RealPath).Trace("parsing file contents") 288 289 contentReader, err := resolver.FileContentsByLocation(location) 290 if err != nil { 291 logger.WithFields("location", location.RealPath, "error", err).Warn("unable to fetch contents") 292 continue 293 } 294 discoveredPackages, discoveredRelationships, err := parser(resolver, &env, file.NewLocationReadCloser(location, contentReader)) 295 internal.CloseAndLogError(contentReader, location.VirtualPath) 296 if err != nil { 297 logger.WithFields("location", location.RealPath, "error", err).Warnf("cataloger failed") 298 continue 299 } 300 301 for _, p := range discoveredPackages { 302 p.FoundBy = c.upstreamCataloger 303 packages = append(packages, p) 304 } 305 306 relationships = append(relationships, discoveredRelationships...) 307 } 308 return packages, relationships, nil 309 } 310 311 // selectFiles takes a set of file trees and resolves and file references of interest for future cataloging 312 func (c *Cataloger) selectFiles(resolver file.Resolver) []request { 313 var requests []request 314 for _, proc := range c.processor { 315 requests = append(requests, proc(resolver, Environment{})...) 316 } 317 return requests 318 }