github.com/noqcks/syft@v0.0.0-20230920222752-a9e2c4e288e5/syft/pkg/cataloger/generic/cataloger.go (about)

     1  package generic
     2  
     3  import (
     4  	"path/filepath"
     5  
     6  	"github.com/bmatcuk/doublestar/v4"
     7  
     8  	"github.com/anchore/syft/internal"
     9  	"github.com/anchore/syft/internal/log"
    10  	"github.com/anchore/syft/syft/artifact"
    11  	"github.com/anchore/syft/syft/file"
    12  	"github.com/anchore/syft/syft/linux"
    13  	"github.com/anchore/syft/syft/pkg"
    14  )
    15  
    16  type processor func(resolver file.Resolver, env Environment) []request
    17  type groupedProcessor func(resolver file.Resolver, env Environment) []groupedRequest
    18  
    19  type request struct {
    20  	file.Location
    21  	Parser
    22  }
    23  
    24  type groupedRequest struct {
    25  	Locations           []file.Location
    26  	PrimaryFileLocation file.Location
    27  	GroupedParser
    28  }
    29  
    30  // Cataloger implements the Catalog interface and is responsible for dispatching the proper parser function for
    31  // a given path or glob pattern. This is intended to be reusable across many package cataloger types.
    32  type Cataloger struct {
    33  	processor         []processor
    34  	upstreamCataloger string
    35  }
    36  
    37  // GroupedCataloger is a special case of Cataloger that will process files together
    38  // this is needed for the case of package.json and package-lock.json files for example
    39  type GroupedCataloger struct {
    40  	groupedProcessor  []groupedProcessor
    41  	upstreamCataloger string
    42  }
    43  
    44  func (c *GroupedCataloger) Name() string {
    45  	return c.upstreamCataloger
    46  }
    47  
    48  func isPrimaryFileGlobPresent(primaryFileGlob string, globs []string) bool {
    49  	for _, g := range globs {
    50  		if g == primaryFileGlob {
    51  			return true
    52  		}
    53  	}
    54  	return false
    55  }
    56  
    57  func generateGroupedProcessor(parser GroupedParser, primaryFileGlob string, globs []string) func(resolver file.Resolver, env Environment) []groupedRequest {
    58  	return func(resolver file.Resolver, env Environment) []groupedRequest {
    59  		var requests []groupedRequest
    60  		colocatedFiles := collectColocatedFiles(resolver, globs)
    61  
    62  		// Filter to only directories that contain all specified files
    63  		for _, files := range colocatedFiles {
    64  			allMatched, primaryFileLocation := isAllGlobsMatched(files, globs, primaryFileGlob)
    65  			if allMatched {
    66  				requests = append(requests, makeGroupedRequests(parser, files, primaryFileLocation))
    67  			}
    68  		}
    69  
    70  		return requests
    71  	}
    72  }
    73  
    74  func collectColocatedFiles(resolver file.Resolver, globs []string) map[string][]file.Location {
    75  	colocatedFiles := make(map[string][]file.Location)
    76  	for _, g := range globs {
    77  		log.WithFields("glob", g).Trace("searching for paths matching glob")
    78  		matches, err := resolver.FilesByGlob(g)
    79  		if err != nil {
    80  			log.Warnf("unable to process glob=%q: %+v", g, err)
    81  			continue
    82  		}
    83  		for _, match := range matches {
    84  			dir := filepath.Dir(match.RealPath)
    85  			colocatedFiles[dir] = append(colocatedFiles[dir], match)
    86  		}
    87  	}
    88  	return colocatedFiles
    89  }
    90  
    91  func isAllGlobsMatched(files []file.Location, globs []string, primaryFileGlob string) (bool, file.Location) {
    92  	globMatches := make(map[string]bool)
    93  	var primaryFileLocation file.Location
    94  
    95  	for _, g := range globs {
    96  		for _, file := range files {
    97  			if matched, _ := doublestar.PathMatch(g, file.RealPath); matched {
    98  				if g == primaryFileGlob {
    99  					primaryFileLocation = file
   100  				}
   101  				globMatches[g] = true
   102  				break
   103  			}
   104  		}
   105  	}
   106  
   107  	return len(globMatches) == len(globs), primaryFileLocation
   108  }
   109  
   110  // WithParserByGlobColocation is a special case of WithParserByGlob that will only match files that are colocated
   111  // with all of the provided globs. This is useful for cases where a package is defined by multiple files (e.g. package.json + package-lock.json).
   112  // This function will only match files that are colocated with all of the provided globs.
   113  func (c *GroupedCataloger) WithParserByGlobColocation(parser GroupedParser, primaryFileGlob string, globs []string) *GroupedCataloger {
   114  	if !isPrimaryFileGlobPresent(primaryFileGlob, globs) {
   115  		log.Warnf("primary file glob=%q not present in globs=%+v", primaryFileGlob, globs)
   116  		return c
   117  	}
   118  
   119  	c.groupedProcessor = append(c.groupedProcessor, generateGroupedProcessor(parser, primaryFileGlob, globs))
   120  	return c
   121  }
   122  
   123  func (c *Cataloger) WithParserByGlobs(parser Parser, globs ...string) *Cataloger {
   124  	c.processor = append(c.processor,
   125  		func(resolver file.Resolver, env Environment) []request {
   126  			var requests []request
   127  			for _, g := range globs {
   128  				log.WithFields("glob", g).Trace("searching for paths matching glob")
   129  
   130  				matches, err := resolver.FilesByGlob(g)
   131  				if err != nil {
   132  					log.Warnf("unable to process glob=%q: %+v", g, err)
   133  					continue
   134  				}
   135  				requests = append(requests, makeRequests(parser, matches)...)
   136  			}
   137  			return requests
   138  		},
   139  	)
   140  	return c
   141  }
   142  
   143  // selectFiles takes a set of file trees and resolves and file references of interest for future cataloging
   144  func (c *GroupedCataloger) selectFiles(resolver file.Resolver) []groupedRequest {
   145  	var requests []groupedRequest
   146  	for _, proc := range c.groupedProcessor {
   147  		requests = append(requests, proc(resolver, Environment{})...)
   148  	}
   149  	return requests
   150  }
   151  
   152  // Catalog is given an object to resolve file references and content, this function returns any discovered Packages after analyzing the catalog source.
   153  func (c *GroupedCataloger) Catalog(resolver file.Resolver) ([]pkg.Package, []artifact.Relationship, error) {
   154  	var packages []pkg.Package
   155  	var relationships []artifact.Relationship
   156  
   157  	logger := log.Nested("cataloger", c.upstreamCataloger)
   158  
   159  	env := Environment{
   160  		// TODO: consider passing into the cataloger, this would affect the cataloger interface (and all implementations). This can be deferred until later.
   161  		LinuxRelease: linux.IdentifyRelease(resolver),
   162  	}
   163  
   164  	for _, req := range c.selectFiles(resolver) {
   165  		parser := req.GroupedParser
   166  		var readClosers []file.LocationReadCloser
   167  
   168  		for _, location := range req.Locations {
   169  			log.WithFields("path", location.RealPath).Trace("parsing file contents")
   170  			contentReader, err := resolver.FileContentsByLocation(location)
   171  			if err != nil {
   172  				logger.WithFields("location", location.RealPath, "error", err).Warn("unable to fetch contents")
   173  				continue
   174  			}
   175  			readClosers = append(readClosers, file.NewLocationReadCloser(location, contentReader))
   176  		}
   177  
   178  		// If your parser is expecting multiple file contents, ensure its signature reflects this change
   179  		discoveredPackages, discoveredRelationships, err := parser(resolver, &env, readClosers)
   180  		for _, rc := range readClosers {
   181  			internal.CloseAndLogError(rc, rc.VirtualPath)
   182  		}
   183  		if err != nil {
   184  			logger.WithFields("error", err).Warnf("cataloger failed")
   185  			continue
   186  		}
   187  
   188  		for _, p := range discoveredPackages {
   189  			p.FoundBy = c.upstreamCataloger
   190  			packages = append(packages, p)
   191  		}
   192  
   193  		relationships = append(relationships, discoveredRelationships...)
   194  	}
   195  	return packages, relationships, nil
   196  }
   197  
   198  func makeGroupedRequests(parser GroupedParser, locations []file.Location, primaryFileLocation file.Location) groupedRequest {
   199  	return groupedRequest{
   200  		Locations:           locations,
   201  		PrimaryFileLocation: primaryFileLocation,
   202  		GroupedParser:       parser,
   203  	}
   204  }
   205  
   206  func (c *Cataloger) WithParserByMimeTypes(parser Parser, types ...string) *Cataloger {
   207  	c.processor = append(c.processor,
   208  		func(resolver file.Resolver, env Environment) []request {
   209  			var requests []request
   210  			log.WithFields("mimetypes", types).Trace("searching for paths matching mimetype")
   211  			matches, err := resolver.FilesByMIMEType(types...)
   212  			if err != nil {
   213  				log.Warnf("unable to process mimetypes=%+v: %+v", types, err)
   214  				return nil
   215  			}
   216  			requests = append(requests, makeRequests(parser, matches)...)
   217  			return requests
   218  		},
   219  	)
   220  	return c
   221  }
   222  
   223  func (c *Cataloger) WithParserByPath(parser Parser, paths ...string) *Cataloger {
   224  	c.processor = append(c.processor,
   225  		func(resolver file.Resolver, env Environment) []request {
   226  			var requests []request
   227  			for _, p := range paths {
   228  				log.WithFields("path", p).Trace("searching for path")
   229  
   230  				matches, err := resolver.FilesByPath(p)
   231  				if err != nil {
   232  					log.Warnf("unable to process path=%q: %+v", p, err)
   233  					continue
   234  				}
   235  				requests = append(requests, makeRequests(parser, matches)...)
   236  			}
   237  			return requests
   238  		},
   239  	)
   240  	return c
   241  }
   242  
   243  func makeRequests(parser Parser, locations []file.Location) []request {
   244  	var requests []request
   245  	for _, l := range locations {
   246  		requests = append(requests, request{
   247  			Location: l,
   248  			Parser:   parser,
   249  		})
   250  	}
   251  	return requests
   252  }
   253  
   254  // NewCataloger if provided path-to-parser-function and glob-to-parser-function lookups creates a Cataloger
   255  func NewCataloger(upstreamCataloger string) *Cataloger {
   256  	return &Cataloger{
   257  		upstreamCataloger: upstreamCataloger,
   258  	}
   259  }
   260  
   261  func NewGroupedCataloger(upstreamCataloger string) *GroupedCataloger {
   262  	return &GroupedCataloger{
   263  		upstreamCataloger: upstreamCataloger,
   264  	}
   265  }
   266  
   267  // Name returns a string that uniquely describes the upstream cataloger that this Generic Cataloger represents.
   268  func (c *Cataloger) Name() string {
   269  	return c.upstreamCataloger
   270  }
   271  
   272  // Catalog is given an object to resolve file references and content, this function returns any discovered Packages after analyzing the catalog source.
   273  func (c *Cataloger) Catalog(resolver file.Resolver) ([]pkg.Package, []artifact.Relationship, error) {
   274  	var packages []pkg.Package
   275  	var relationships []artifact.Relationship
   276  
   277  	logger := log.Nested("cataloger", c.upstreamCataloger)
   278  
   279  	env := Environment{
   280  		// TODO: consider passing into the cataloger, this would affect the cataloger interface (and all implementations). This can be deferred until later.
   281  		LinuxRelease: linux.IdentifyRelease(resolver),
   282  	}
   283  
   284  	for _, req := range c.selectFiles(resolver) {
   285  		location, parser := req.Location, req.Parser
   286  
   287  		log.WithFields("path", location.RealPath).Trace("parsing file contents")
   288  
   289  		contentReader, err := resolver.FileContentsByLocation(location)
   290  		if err != nil {
   291  			logger.WithFields("location", location.RealPath, "error", err).Warn("unable to fetch contents")
   292  			continue
   293  		}
   294  		discoveredPackages, discoveredRelationships, err := parser(resolver, &env, file.NewLocationReadCloser(location, contentReader))
   295  		internal.CloseAndLogError(contentReader, location.VirtualPath)
   296  		if err != nil {
   297  			logger.WithFields("location", location.RealPath, "error", err).Warnf("cataloger failed")
   298  			continue
   299  		}
   300  
   301  		for _, p := range discoveredPackages {
   302  			p.FoundBy = c.upstreamCataloger
   303  			packages = append(packages, p)
   304  		}
   305  
   306  		relationships = append(relationships, discoveredRelationships...)
   307  	}
   308  	return packages, relationships, nil
   309  }
   310  
   311  // selectFiles takes a set of file trees and resolves and file references of interest for future cataloging
   312  func (c *Cataloger) selectFiles(resolver file.Resolver) []request {
   313  	var requests []request
   314  	for _, proc := range c.processor {
   315  		requests = append(requests, proc(resolver, Environment{})...)
   316  	}
   317  	return requests
   318  }