github.com/anchore/syft@v1.38.2/syft/pkg/cataloger/generic/cataloger.go (about)

     1  package generic
     2  
     3  import (
     4  	"context"
     5  
     6  	"github.com/anchore/go-logger"
     7  	"github.com/anchore/go-sync"
     8  	"github.com/anchore/syft/internal"
     9  	"github.com/anchore/syft/internal/log"
    10  	"github.com/anchore/syft/internal/unknown"
    11  	"github.com/anchore/syft/syft/artifact"
    12  	"github.com/anchore/syft/syft/cataloging"
    13  	"github.com/anchore/syft/syft/file"
    14  	"github.com/anchore/syft/syft/linux"
    15  	"github.com/anchore/syft/syft/pkg"
    16  )
    17  
    18  // Processor is a function that can filter or augment existing packages and relationships based on existing material.
    19  type Processor func([]pkg.Package, []artifact.Relationship, error) ([]pkg.Package, []artifact.Relationship, error)
    20  
    21  // ResolvingProcessor is a Processor with the additional behavior of being able to reference additional material from a file resolver.
    22  type ResolvingProcessor func(context.Context, file.Resolver, []pkg.Package, []artifact.Relationship, error) ([]pkg.Package, []artifact.Relationship, error)
    23  
    24  type requester func(resolver file.Resolver, env Environment) []request
    25  
    26  type request struct {
    27  	file.Location
    28  	Parser
    29  }
    30  
    31  type processExecutor interface {
    32  	process(ctx context.Context, resolver file.Resolver, pkgs []pkg.Package, rels []artifact.Relationship, err error) ([]pkg.Package, []artifact.Relationship, error)
    33  }
    34  
    35  type processorWrapper struct {
    36  	Processor
    37  }
    38  
    39  func (p processorWrapper) process(_ context.Context, _ file.Resolver, pkgs []pkg.Package, rels []artifact.Relationship, err error) ([]pkg.Package, []artifact.Relationship, error) {
    40  	return p.Processor(pkgs, rels, err)
    41  }
    42  
    43  type resolvingProcessorWrapper struct {
    44  	ResolvingProcessor
    45  }
    46  
    47  func (p resolvingProcessorWrapper) process(ctx context.Context, resolver file.Resolver, pkgs []pkg.Package, rels []artifact.Relationship, err error) ([]pkg.Package, []artifact.Relationship, error) {
    48  	return p.ResolvingProcessor(ctx, resolver, pkgs, rels, err)
    49  }
    50  
    51  // Cataloger implements the Catalog interface and is responsible for dispatching the proper parser function for
    52  // a given path or glob pattern. This is intended to be reusable across many package cataloger types.
    53  type Cataloger struct {
    54  	processors        []processExecutor
    55  	requesters        []requester
    56  	checks            []func() error
    57  	upstreamCataloger string
    58  }
    59  
    60  func (c *Cataloger) WithParserByGlobs(parser Parser, globs ...string) *Cataloger {
    61  	c.requesters = append(c.requesters,
    62  		func(resolver file.Resolver, _ Environment) []request {
    63  			var requests []request
    64  			for _, g := range globs {
    65  				log.WithFields("glob", g).Trace("searching for paths matching glob")
    66  
    67  				matches, err := resolver.FilesByGlob(g)
    68  				if err != nil {
    69  					log.Debugf("unable to process glob=%q: %+v", g, err)
    70  					continue
    71  				}
    72  				requests = append(requests, makeRequests(parser, matches)...)
    73  			}
    74  			return requests
    75  		},
    76  	)
    77  	return c
    78  }
    79  
    80  func (c *Cataloger) WithParserByMimeTypes(parser Parser, types ...string) *Cataloger {
    81  	c.requesters = append(c.requesters,
    82  		func(resolver file.Resolver, _ Environment) []request {
    83  			var requests []request
    84  			log.WithFields("mimetypes", types).Trace("searching for paths matching mimetype")
    85  			matches, err := resolver.FilesByMIMEType(types...)
    86  			if err != nil {
    87  				log.Debugf("unable to process mimetypes=%+v: %+v", types, err)
    88  				return nil
    89  			}
    90  			requests = append(requests, makeRequests(parser, matches)...)
    91  			return requests
    92  		},
    93  	)
    94  	return c
    95  }
    96  
    97  func (c *Cataloger) WithParserByPath(parser Parser, paths ...string) *Cataloger {
    98  	c.requesters = append(c.requesters,
    99  		func(resolver file.Resolver, _ Environment) []request {
   100  			var requests []request
   101  			for _, p := range paths {
   102  				log.WithFields("path", p).Trace("searching for path")
   103  
   104  				matches, err := resolver.FilesByPath(p)
   105  				if err != nil {
   106  					log.Debugf("unable to process path=%q: %+v", p, err)
   107  					continue
   108  				}
   109  				requests = append(requests, makeRequests(parser, matches)...)
   110  			}
   111  			return requests
   112  		},
   113  	)
   114  	return c
   115  }
   116  
   117  func (c *Cataloger) WithProcessors(processors ...Processor) *Cataloger {
   118  	for _, p := range processors {
   119  		c.processors = append(c.processors, processorWrapper{Processor: p})
   120  	}
   121  	return c
   122  }
   123  
   124  func (c *Cataloger) WithResolvingProcessors(processors ...ResolvingProcessor) *Cataloger {
   125  	for _, p := range processors {
   126  		c.processors = append(c.processors, resolvingProcessorWrapper{ResolvingProcessor: p})
   127  	}
   128  	return c
   129  }
   130  
   131  func (c *Cataloger) WithChecks(checks ...func() error) *Cataloger {
   132  	c.checks = append(c.checks, checks...)
   133  	return c
   134  }
   135  
   136  func makeRequests(parser Parser, locations []file.Location) []request {
   137  	var requests []request
   138  	for _, l := range locations {
   139  		requests = append(requests, request{
   140  			Location: l,
   141  			Parser:   parser,
   142  		})
   143  	}
   144  	return requests
   145  }
   146  
   147  // NewCataloger if provided path-to-parser-function and glob-to-parser-function lookups creates a Cataloger
   148  func NewCataloger(upstreamCataloger string) *Cataloger {
   149  	return &Cataloger{
   150  		upstreamCataloger: upstreamCataloger,
   151  	}
   152  }
   153  
   154  // Name returns a string that uniquely describes the upstream cataloger that this Generic Cataloger represents.
   155  func (c *Cataloger) Name() string {
   156  	return c.upstreamCataloger
   157  }
   158  
   159  // Catalog is given an object to resolve file references and content, this function returns any discovered Packages after analyzing the catalog source.
   160  func (c *Cataloger) Catalog(ctx context.Context, resolver file.Resolver) ([]pkg.Package, []artifact.Relationship, error) {
   161  	for _, check := range c.checks {
   162  		if err := check(); err != nil {
   163  			return nil, nil, err
   164  		}
   165  	}
   166  
   167  	var packages []pkg.Package
   168  	var relationships []artifact.Relationship
   169  
   170  	lgr := log.Nested("cataloger", c.upstreamCataloger)
   171  
   172  	env := Environment{
   173  		// TODO: consider passing into the cataloger, this would affect the cataloger interface (and all implementations). This can be deferred until later.
   174  		LinuxRelease: linux.IdentifyRelease(resolver),
   175  	}
   176  
   177  	type result struct {
   178  		pkgs []pkg.Package
   179  		rels []artifact.Relationship
   180  	}
   181  	errs := sync.Collect(&ctx, cataloging.ExecutorFile, sync.ToSeq(c.selectFiles(resolver)), func(req request) (result, error) {
   182  		location, parser := req.Location, req.Parser
   183  
   184  		log.WithFields("path", location.RealPath).Trace("parsing file contents")
   185  
   186  		discoveredPackages, discoveredRelationships, err := invokeParser(ctx, resolver, location, lgr, parser, &env)
   187  		if err != nil {
   188  			// parsers may return errors and valid packages / relationships
   189  			err = unknown.New(location, err)
   190  		}
   191  		return result{discoveredPackages, discoveredRelationships}, err
   192  	}, func(_ request, res result) {
   193  		for _, p := range res.pkgs {
   194  			p.FoundBy = c.upstreamCataloger
   195  			packages = append(packages, p)
   196  		}
   197  		relationships = append(relationships, res.rels...)
   198  	})
   199  	return c.process(ctx, resolver, packages, relationships, errs)
   200  }
   201  
   202  func (c *Cataloger) process(ctx context.Context, resolver file.Resolver, pkgs []pkg.Package, rels []artifact.Relationship, err error) ([]pkg.Package, []artifact.Relationship, error) {
   203  	for _, p := range c.processors {
   204  		pkgs, rels, err = p.process(ctx, resolver, pkgs, rels, err)
   205  	}
   206  	return pkgs, rels, err
   207  }
   208  
   209  func invokeParser(ctx context.Context, resolver file.Resolver, location file.Location, logger logger.Logger, parser Parser, env *Environment) ([]pkg.Package, []artifact.Relationship, error) {
   210  	contentReader, err := resolver.FileContentsByLocation(location)
   211  	if err != nil {
   212  		logger.WithFields("location", location.RealPath, "error", err).Debug("unable to fetch contents")
   213  		return nil, nil, err
   214  	}
   215  	defer internal.CloseAndLogError(contentReader, location.AccessPath)
   216  
   217  	discoveredPackages, discoveredRelationships, err := parser(ctx, resolver, env, file.NewLocationReadCloser(location, contentReader))
   218  	if err != nil {
   219  		// these errors are propagated up, and are likely to be coordinate errors
   220  		logger.WithFields("location", location.RealPath, "error", err).Trace("cataloger returned errors")
   221  	}
   222  
   223  	return discoveredPackages, discoveredRelationships, err
   224  }
   225  
   226  // selectFiles takes a set of file trees and resolves and file references of interest for future cataloging
   227  func (c *Cataloger) selectFiles(resolver file.Resolver) []request {
   228  	var requests []request
   229  	for _, proc := range c.requesters {
   230  		requests = append(requests, proc(resolver, Environment{})...)
   231  	}
   232  	return requests
   233  }