github.com/anchore/syft@v1.4.2-0.20240516191711-1bec1fc5d397/syft/pkg/cataloger/generic/cataloger.go (about)

     1  package generic
     2  
     3  import (
     4  	"context"
     5  
     6  	"github.com/anchore/go-logger"
     7  	"github.com/anchore/syft/internal"
     8  	"github.com/anchore/syft/internal/log"
     9  	"github.com/anchore/syft/syft/artifact"
    10  	"github.com/anchore/syft/syft/file"
    11  	"github.com/anchore/syft/syft/linux"
    12  	"github.com/anchore/syft/syft/pkg"
    13  )
    14  
    15  type Processor func([]pkg.Package, []artifact.Relationship, error) ([]pkg.Package, []artifact.Relationship, error)
    16  
    17  type requester func(resolver file.Resolver, env Environment) []request
    18  
    19  type request struct {
    20  	file.Location
    21  	Parser
    22  }
    23  
    24  // Cataloger implements the Catalog interface and is responsible for dispatching the proper parser function for
    25  // a given path or glob pattern. This is intended to be reusable across many package cataloger types.
    26  type Cataloger struct {
    27  	processors        []Processor
    28  	requesters        []requester
    29  	upstreamCataloger string
    30  }
    31  
    32  func (c *Cataloger) WithParserByGlobs(parser Parser, globs ...string) *Cataloger {
    33  	c.requesters = append(c.requesters,
    34  		func(resolver file.Resolver, _ Environment) []request {
    35  			var requests []request
    36  			for _, g := range globs {
    37  				log.WithFields("glob", g).Trace("searching for paths matching glob")
    38  
    39  				matches, err := resolver.FilesByGlob(g)
    40  				if err != nil {
    41  					log.Warnf("unable to process glob=%q: %+v", g, err)
    42  					continue
    43  				}
    44  				requests = append(requests, makeRequests(parser, matches)...)
    45  			}
    46  			return requests
    47  		},
    48  	)
    49  	return c
    50  }
    51  
    52  func (c *Cataloger) WithParserByMimeTypes(parser Parser, types ...string) *Cataloger {
    53  	c.requesters = append(c.requesters,
    54  		func(resolver file.Resolver, _ Environment) []request {
    55  			var requests []request
    56  			log.WithFields("mimetypes", types).Trace("searching for paths matching mimetype")
    57  			matches, err := resolver.FilesByMIMEType(types...)
    58  			if err != nil {
    59  				log.Warnf("unable to process mimetypes=%+v: %+v", types, err)
    60  				return nil
    61  			}
    62  			requests = append(requests, makeRequests(parser, matches)...)
    63  			return requests
    64  		},
    65  	)
    66  	return c
    67  }
    68  
    69  func (c *Cataloger) WithParserByPath(parser Parser, paths ...string) *Cataloger {
    70  	c.requesters = append(c.requesters,
    71  		func(resolver file.Resolver, _ Environment) []request {
    72  			var requests []request
    73  			for _, p := range paths {
    74  				log.WithFields("path", p).Trace("searching for path")
    75  
    76  				matches, err := resolver.FilesByPath(p)
    77  				if err != nil {
    78  					log.Warnf("unable to process path=%q: %+v", p, err)
    79  					continue
    80  				}
    81  				requests = append(requests, makeRequests(parser, matches)...)
    82  			}
    83  			return requests
    84  		},
    85  	)
    86  	return c
    87  }
    88  
    89  func (c *Cataloger) WithProcessors(processors ...Processor) *Cataloger {
    90  	c.processors = append(c.processors, processors...)
    91  	return c
    92  }
    93  
    94  func makeRequests(parser Parser, locations []file.Location) []request {
    95  	var requests []request
    96  	for _, l := range locations {
    97  		requests = append(requests, request{
    98  			Location: l,
    99  			Parser:   parser,
   100  		})
   101  	}
   102  	return requests
   103  }
   104  
   105  // NewCataloger if provided path-to-parser-function and glob-to-parser-function lookups creates a Cataloger
   106  func NewCataloger(upstreamCataloger string) *Cataloger {
   107  	return &Cataloger{
   108  		upstreamCataloger: upstreamCataloger,
   109  	}
   110  }
   111  
   112  // Name returns a string that uniquely describes the upstream cataloger that this Generic Cataloger represents.
   113  func (c *Cataloger) Name() string {
   114  	return c.upstreamCataloger
   115  }
   116  
   117  // Catalog is given an object to resolve file references and content, this function returns any discovered Packages after analyzing the catalog source.
   118  func (c *Cataloger) Catalog(ctx context.Context, resolver file.Resolver) ([]pkg.Package, []artifact.Relationship, error) {
   119  	var packages []pkg.Package
   120  	var relationships []artifact.Relationship
   121  
   122  	logger := log.Nested("cataloger", c.upstreamCataloger)
   123  
   124  	env := Environment{
   125  		// TODO: consider passing into the cataloger, this would affect the cataloger interface (and all implementations). This can be deferred until later.
   126  		LinuxRelease: linux.IdentifyRelease(resolver),
   127  	}
   128  
   129  	for _, req := range c.selectFiles(resolver) {
   130  		location, parser := req.Location, req.Parser
   131  
   132  		log.WithFields("path", location.RealPath).Trace("parsing file contents")
   133  
   134  		discoveredPackages, discoveredRelationships, err := invokeParser(ctx, resolver, location, logger, parser, &env)
   135  		if err != nil {
   136  			continue // logging is handled within invokeParser
   137  		}
   138  
   139  		for _, p := range discoveredPackages {
   140  			p.FoundBy = c.upstreamCataloger
   141  			packages = append(packages, p)
   142  		}
   143  
   144  		relationships = append(relationships, discoveredRelationships...)
   145  	}
   146  	return c.process(packages, relationships, nil)
   147  }
   148  
   149  func (c *Cataloger) process(pkgs []pkg.Package, rels []artifact.Relationship, err error) ([]pkg.Package, []artifact.Relationship, error) {
   150  	for _, proc := range c.processors {
   151  		pkgs, rels, err = proc(pkgs, rels, err)
   152  	}
   153  	return pkgs, rels, err
   154  }
   155  
   156  func invokeParser(ctx context.Context, resolver file.Resolver, location file.Location, logger logger.Logger, parser Parser, env *Environment) ([]pkg.Package, []artifact.Relationship, error) {
   157  	contentReader, err := resolver.FileContentsByLocation(location)
   158  	if err != nil {
   159  		logger.WithFields("location", location.RealPath, "error", err).Warn("unable to fetch contents")
   160  		return nil, nil, err
   161  	}
   162  	defer internal.CloseAndLogError(contentReader, location.AccessPath)
   163  
   164  	discoveredPackages, discoveredRelationships, err := parser(ctx, resolver, env, file.NewLocationReadCloser(location, contentReader))
   165  	if err != nil {
   166  		logger.WithFields("location", location.RealPath, "error", err).Warnf("cataloger failed")
   167  		return nil, nil, err
   168  	}
   169  
   170  	return discoveredPackages, discoveredRelationships, nil
   171  }
   172  
   173  // selectFiles takes a set of file trees and resolves and file references of interest for future cataloging
   174  func (c *Cataloger) selectFiles(resolver file.Resolver) []request {
   175  	var requests []request
   176  	for _, proc := range c.requesters {
   177  		requests = append(requests, proc(resolver, Environment{})...)
   178  	}
   179  	return requests
   180  }