github.com/anchore/syft@v1.38.2/syft/pkg/cataloger/generic/cataloger.go (about) 1 package generic 2 3 import ( 4 "context" 5 6 "github.com/anchore/go-logger" 7 "github.com/anchore/go-sync" 8 "github.com/anchore/syft/internal" 9 "github.com/anchore/syft/internal/log" 10 "github.com/anchore/syft/internal/unknown" 11 "github.com/anchore/syft/syft/artifact" 12 "github.com/anchore/syft/syft/cataloging" 13 "github.com/anchore/syft/syft/file" 14 "github.com/anchore/syft/syft/linux" 15 "github.com/anchore/syft/syft/pkg" 16 ) 17 18 // Processor is a function that can filter or augment existing packages and relationships based on existing material. 19 type Processor func([]pkg.Package, []artifact.Relationship, error) ([]pkg.Package, []artifact.Relationship, error) 20 21 // ResolvingProcessor is a Processor with the additional behavior of being able to reference additional material from a file resolver. 22 type ResolvingProcessor func(context.Context, file.Resolver, []pkg.Package, []artifact.Relationship, error) ([]pkg.Package, []artifact.Relationship, error) 23 24 type requester func(resolver file.Resolver, env Environment) []request 25 26 type request struct { 27 file.Location 28 Parser 29 } 30 31 type processExecutor interface { 32 process(ctx context.Context, resolver file.Resolver, pkgs []pkg.Package, rels []artifact.Relationship, err error) ([]pkg.Package, []artifact.Relationship, error) 33 } 34 35 type processorWrapper struct { 36 Processor 37 } 38 39 func (p processorWrapper) process(_ context.Context, _ file.Resolver, pkgs []pkg.Package, rels []artifact.Relationship, err error) ([]pkg.Package, []artifact.Relationship, error) { 40 return p.Processor(pkgs, rels, err) 41 } 42 43 type resolvingProcessorWrapper struct { 44 ResolvingProcessor 45 } 46 47 func (p resolvingProcessorWrapper) process(ctx context.Context, resolver file.Resolver, pkgs []pkg.Package, rels []artifact.Relationship, err error) ([]pkg.Package, []artifact.Relationship, error) { 48 return p.ResolvingProcessor(ctx, resolver, pkgs, rels, err) 49 } 50 51 // Cataloger implements the Catalog interface and is responsible for dispatching the proper parser function for 52 // a given path or glob pattern. This is intended to be reusable across many package cataloger types. 53 type Cataloger struct { 54 processors []processExecutor 55 requesters []requester 56 checks []func() error 57 upstreamCataloger string 58 } 59 60 func (c *Cataloger) WithParserByGlobs(parser Parser, globs ...string) *Cataloger { 61 c.requesters = append(c.requesters, 62 func(resolver file.Resolver, _ Environment) []request { 63 var requests []request 64 for _, g := range globs { 65 log.WithFields("glob", g).Trace("searching for paths matching glob") 66 67 matches, err := resolver.FilesByGlob(g) 68 if err != nil { 69 log.Debugf("unable to process glob=%q: %+v", g, err) 70 continue 71 } 72 requests = append(requests, makeRequests(parser, matches)...) 73 } 74 return requests 75 }, 76 ) 77 return c 78 } 79 80 func (c *Cataloger) WithParserByMimeTypes(parser Parser, types ...string) *Cataloger { 81 c.requesters = append(c.requesters, 82 func(resolver file.Resolver, _ Environment) []request { 83 var requests []request 84 log.WithFields("mimetypes", types).Trace("searching for paths matching mimetype") 85 matches, err := resolver.FilesByMIMEType(types...) 86 if err != nil { 87 log.Debugf("unable to process mimetypes=%+v: %+v", types, err) 88 return nil 89 } 90 requests = append(requests, makeRequests(parser, matches)...) 91 return requests 92 }, 93 ) 94 return c 95 } 96 97 func (c *Cataloger) WithParserByPath(parser Parser, paths ...string) *Cataloger { 98 c.requesters = append(c.requesters, 99 func(resolver file.Resolver, _ Environment) []request { 100 var requests []request 101 for _, p := range paths { 102 log.WithFields("path", p).Trace("searching for path") 103 104 matches, err := resolver.FilesByPath(p) 105 if err != nil { 106 log.Debugf("unable to process path=%q: %+v", p, err) 107 continue 108 } 109 requests = append(requests, makeRequests(parser, matches)...) 110 } 111 return requests 112 }, 113 ) 114 return c 115 } 116 117 func (c *Cataloger) WithProcessors(processors ...Processor) *Cataloger { 118 for _, p := range processors { 119 c.processors = append(c.processors, processorWrapper{Processor: p}) 120 } 121 return c 122 } 123 124 func (c *Cataloger) WithResolvingProcessors(processors ...ResolvingProcessor) *Cataloger { 125 for _, p := range processors { 126 c.processors = append(c.processors, resolvingProcessorWrapper{ResolvingProcessor: p}) 127 } 128 return c 129 } 130 131 func (c *Cataloger) WithChecks(checks ...func() error) *Cataloger { 132 c.checks = append(c.checks, checks...) 133 return c 134 } 135 136 func makeRequests(parser Parser, locations []file.Location) []request { 137 var requests []request 138 for _, l := range locations { 139 requests = append(requests, request{ 140 Location: l, 141 Parser: parser, 142 }) 143 } 144 return requests 145 } 146 147 // NewCataloger if provided path-to-parser-function and glob-to-parser-function lookups creates a Cataloger 148 func NewCataloger(upstreamCataloger string) *Cataloger { 149 return &Cataloger{ 150 upstreamCataloger: upstreamCataloger, 151 } 152 } 153 154 // Name returns a string that uniquely describes the upstream cataloger that this Generic Cataloger represents. 155 func (c *Cataloger) Name() string { 156 return c.upstreamCataloger 157 } 158 159 // Catalog is given an object to resolve file references and content, this function returns any discovered Packages after analyzing the catalog source. 160 func (c *Cataloger) Catalog(ctx context.Context, resolver file.Resolver) ([]pkg.Package, []artifact.Relationship, error) { 161 for _, check := range c.checks { 162 if err := check(); err != nil { 163 return nil, nil, err 164 } 165 } 166 167 var packages []pkg.Package 168 var relationships []artifact.Relationship 169 170 lgr := log.Nested("cataloger", c.upstreamCataloger) 171 172 env := Environment{ 173 // TODO: consider passing into the cataloger, this would affect the cataloger interface (and all implementations). This can be deferred until later. 174 LinuxRelease: linux.IdentifyRelease(resolver), 175 } 176 177 type result struct { 178 pkgs []pkg.Package 179 rels []artifact.Relationship 180 } 181 errs := sync.Collect(&ctx, cataloging.ExecutorFile, sync.ToSeq(c.selectFiles(resolver)), func(req request) (result, error) { 182 location, parser := req.Location, req.Parser 183 184 log.WithFields("path", location.RealPath).Trace("parsing file contents") 185 186 discoveredPackages, discoveredRelationships, err := invokeParser(ctx, resolver, location, lgr, parser, &env) 187 if err != nil { 188 // parsers may return errors and valid packages / relationships 189 err = unknown.New(location, err) 190 } 191 return result{discoveredPackages, discoveredRelationships}, err 192 }, func(_ request, res result) { 193 for _, p := range res.pkgs { 194 p.FoundBy = c.upstreamCataloger 195 packages = append(packages, p) 196 } 197 relationships = append(relationships, res.rels...) 198 }) 199 return c.process(ctx, resolver, packages, relationships, errs) 200 } 201 202 func (c *Cataloger) process(ctx context.Context, resolver file.Resolver, pkgs []pkg.Package, rels []artifact.Relationship, err error) ([]pkg.Package, []artifact.Relationship, error) { 203 for _, p := range c.processors { 204 pkgs, rels, err = p.process(ctx, resolver, pkgs, rels, err) 205 } 206 return pkgs, rels, err 207 } 208 209 func invokeParser(ctx context.Context, resolver file.Resolver, location file.Location, logger logger.Logger, parser Parser, env *Environment) ([]pkg.Package, []artifact.Relationship, error) { 210 contentReader, err := resolver.FileContentsByLocation(location) 211 if err != nil { 212 logger.WithFields("location", location.RealPath, "error", err).Debug("unable to fetch contents") 213 return nil, nil, err 214 } 215 defer internal.CloseAndLogError(contentReader, location.AccessPath) 216 217 discoveredPackages, discoveredRelationships, err := parser(ctx, resolver, env, file.NewLocationReadCloser(location, contentReader)) 218 if err != nil { 219 // these errors are propagated up, and are likely to be coordinate errors 220 logger.WithFields("location", location.RealPath, "error", err).Trace("cataloger returned errors") 221 } 222 223 return discoveredPackages, discoveredRelationships, err 224 } 225 226 // selectFiles takes a set of file trees and resolves and file references of interest for future cataloging 227 func (c *Cataloger) selectFiles(resolver file.Resolver) []request { 228 var requests []request 229 for _, proc := range c.requesters { 230 requests = append(requests, proc(resolver, Environment{})...) 231 } 232 return requests 233 }