github.com/anchore/syft@v1.38.2/syft/pkg/cataloger/binary/classifier_cataloger.go (about)

     1  /*
     2  Package binary provides a concrete cataloger implementations for surfacing possible packages based on signatures found within binary files.
     3  */
     4  package binary
     5  
     6  import (
     7  	"context"
     8  	"encoding/json"
     9  	"fmt"
    10  
    11  	"github.com/anchore/syft/internal/log"
    12  	"github.com/anchore/syft/internal/unknown"
    13  	"github.com/anchore/syft/syft/artifact"
    14  	"github.com/anchore/syft/syft/file"
    15  	"github.com/anchore/syft/syft/pkg"
    16  	"github.com/anchore/syft/syft/pkg/cataloger/internal/binutils"
    17  )
    18  
    19  const catalogerName = "binary-classifier-cataloger"
    20  
    21  type ClassifierCatalogerConfig struct {
    22  	Classifiers []binutils.Classifier `yaml:"classifiers" json:"classifiers" mapstructure:"classifiers"`
    23  }
    24  
    25  func DefaultClassifierCatalogerConfig() ClassifierCatalogerConfig {
    26  	return ClassifierCatalogerConfig{
    27  		Classifiers: DefaultClassifiers(),
    28  	}
    29  }
    30  
    31  func NewClassifierCataloger(cfg ClassifierCatalogerConfig) pkg.Cataloger {
    32  	return &cataloger{
    33  		classifiers: cfg.Classifiers,
    34  	}
    35  }
    36  
    37  func (cfg ClassifierCatalogerConfig) MarshalJSON() ([]byte, error) {
    38  	// only keep the class names
    39  	var names []string
    40  	for _, cls := range cfg.Classifiers {
    41  		names = append(names, cls.Class)
    42  	}
    43  	return json.Marshal(names)
    44  }
    45  
    46  // cataloger is the cataloger responsible for surfacing evidence of a very limited set of binary files,
    47  // which have been identified by the classifiers. The cataloger is _NOT_ a place to catalog any and every
    48  // binary, but rather the specific set that has been curated to be important, predominantly related to toolchain-
    49  // related runtimes like Python, Go, Java, or Node. Some exceptions can be made for widely-used binaries such
    50  // as busybox.
    51  type cataloger struct {
    52  	classifiers []binutils.Classifier
    53  }
    54  
    55  // Name returns a string that uniquely describes the cataloger
    56  func (c cataloger) Name() string {
    57  	return catalogerName
    58  }
    59  
    60  // Catalog is given an object to resolve file references and content, this function returns any discovered Packages
    61  // after analyzing the catalog source.
    62  func (c cataloger) Catalog(_ context.Context, resolver file.Resolver) ([]pkg.Package, []artifact.Relationship, error) {
    63  	var packages []pkg.Package
    64  	var relationships []artifact.Relationship
    65  	var errs error
    66  
    67  	for _, cls := range c.classifiers {
    68  		log.WithFields("classifier", cls.Class).Trace("cataloging binaries")
    69  		newPkgs, err := catalog(resolver, cls)
    70  		if err != nil {
    71  			log.WithFields("error", err, "classifier", cls.Class).Debugf("unable to catalog binary package: %v", err)
    72  			errs = unknown.Join(errs, fmt.Errorf("%s: %w", cls.Class, err))
    73  			continue
    74  		}
    75  	newPackages:
    76  		for i := range newPkgs {
    77  			newPkg := &newPkgs[i]
    78  			purlType := pkg.TypeFromPURL(newPkg.PURL)
    79  			// for certain results, such as hashicorp vault we are returning a golang PURL, so we can use Golang package type,
    80  			// despite not having the known metadata, this should result in downstream grype matching to use the golang matcher
    81  			if purlType != pkg.UnknownPkg {
    82  				newPkg.Type = purlType
    83  			}
    84  			for j := range packages {
    85  				p := &packages[j]
    86  				// consolidate identical packages found in different locations or by different classifiers
    87  				if packagesMatch(p, newPkg) {
    88  					mergePackages(p, newPkg)
    89  					continue newPackages
    90  				}
    91  			}
    92  			packages = append(packages, *newPkg)
    93  		}
    94  	}
    95  
    96  	return packages, relationships, errs
    97  }
    98  
    99  // mergePackages merges information from the extra package into the target package
   100  func mergePackages(target *pkg.Package, extra *pkg.Package) {
   101  	if extra.Type != pkg.BinaryPkg && target.Type == pkg.BinaryPkg {
   102  		target.Type = extra.Type
   103  	}
   104  	// add the locations
   105  	target.Locations.Add(extra.Locations.ToSlice()...)
   106  	// update the metadata to indicate which classifiers were used
   107  	meta, _ := target.Metadata.(pkg.BinarySignature)
   108  	if m, ok := extra.Metadata.(pkg.BinarySignature); ok {
   109  		meta.Matches = append(meta.Matches, m.Matches...)
   110  	}
   111  	target.Metadata = meta
   112  }
   113  
   114  func catalog(resolver file.Resolver, cls binutils.Classifier) (packages []pkg.Package, err error) {
   115  	var errs error
   116  	locations, err := resolver.FilesByGlob(cls.FileGlob)
   117  	if err != nil {
   118  		err = unknown.ProcessPathErrors(err) // convert any file.Resolver path errors to unknowns with locations
   119  		return nil, err
   120  	}
   121  	for _, location := range locations {
   122  		pkgs, err := cls.EvidenceMatcher(cls, binutils.MatcherContext{Resolver: resolver, Location: location})
   123  		if err != nil {
   124  			errs = unknown.Append(errs, location, err)
   125  			continue
   126  		}
   127  		packages = append(packages, pkgs...)
   128  	}
   129  	return packages, errs
   130  }
   131  
   132  // packagesMatch returns true if the binary packages "match" based on basic criteria
   133  func packagesMatch(p1 *pkg.Package, p2 *pkg.Package) bool {
   134  	if p1.Name != p2.Name ||
   135  		p1.Version != p2.Version ||
   136  		p1.Language != p2.Language ||
   137  		p1.Type != p2.Type {
   138  		return false
   139  	}
   140  
   141  	return true
   142  }