github.com/anchore/syft@v1.4.2-0.20240516191711-1bec1fc5d397/syft/pkg/cataloger/binary/classifier_cataloger.go (about)

     1  /*
     2  Package binary provides a concrete cataloger implementations for surfacing possible packages based on signatures found within binary files.
     3  */
     4  package binary
     5  
     6  import (
     7  	"context"
     8  	"encoding/json"
     9  
    10  	"github.com/anchore/syft/internal/log"
    11  	"github.com/anchore/syft/syft/artifact"
    12  	"github.com/anchore/syft/syft/file"
    13  	"github.com/anchore/syft/syft/pkg"
    14  )
    15  
    16  const catalogerName = "binary-classifier-cataloger"
    17  
    18  type ClassifierCatalogerConfig struct {
    19  	Classifiers []Classifier `yaml:"classifiers" json:"classifiers" mapstructure:"classifiers"`
    20  }
    21  
    22  func DefaultClassifierCatalogerConfig() ClassifierCatalogerConfig {
    23  	return ClassifierCatalogerConfig{
    24  		Classifiers: DefaultClassifiers(),
    25  	}
    26  }
    27  
    28  func NewClassifierCataloger(cfg ClassifierCatalogerConfig) pkg.Cataloger {
    29  	return &cataloger{
    30  		classifiers: cfg.Classifiers,
    31  	}
    32  }
    33  
    34  func (cfg ClassifierCatalogerConfig) MarshalJSON() ([]byte, error) {
    35  	// only keep the class names
    36  	var names []string
    37  	for _, cls := range cfg.Classifiers {
    38  		names = append(names, cls.Class)
    39  	}
    40  	return json.Marshal(names)
    41  }
    42  
    43  // cataloger is the cataloger responsible for surfacing evidence of a very limited set of binary files,
    44  // which have been identified by the classifiers. The cataloger is _NOT_ a place to catalog any and every
    45  // binary, but rather the specific set that has been curated to be important, predominantly related to toolchain-
    46  // related runtimes like Python, Go, Java, or Node. Some exceptions can be made for widely-used binaries such
    47  // as busybox.
    48  type cataloger struct {
    49  	classifiers []Classifier
    50  }
    51  
    52  // Name returns a string that uniquely describes the cataloger
    53  func (c cataloger) Name() string {
    54  	return catalogerName
    55  }
    56  
    57  // Catalog is given an object to resolve file references and content, this function returns any discovered Packages
    58  // after analyzing the catalog source.
    59  func (c cataloger) Catalog(_ context.Context, resolver file.Resolver) ([]pkg.Package, []artifact.Relationship, error) {
    60  	var packages []pkg.Package
    61  	var relationships []artifact.Relationship
    62  
    63  	for _, cls := range c.classifiers {
    64  		log.WithFields("classifier", cls.Class).Trace("cataloging binaries")
    65  		newPkgs, err := catalog(resolver, cls)
    66  		if err != nil {
    67  			log.WithFields("error", err, "classifier", cls.Class).Warn("unable to catalog binary package: %w", err)
    68  			continue
    69  		}
    70  	newPackages:
    71  		for i := range newPkgs {
    72  			newPkg := &newPkgs[i]
    73  			for j := range packages {
    74  				p := &packages[j]
    75  				// consolidate identical packages found in different locations or by different classifiers
    76  				if packagesMatch(p, newPkg) {
    77  					mergePackages(p, newPkg)
    78  					continue newPackages
    79  				}
    80  			}
    81  			packages = append(packages, *newPkg)
    82  		}
    83  	}
    84  
    85  	return packages, relationships, nil
    86  }
    87  
    88  // mergePackages merges information from the extra package into the target package
    89  func mergePackages(target *pkg.Package, extra *pkg.Package) {
    90  	// add the locations
    91  	target.Locations.Add(extra.Locations.ToSlice()...)
    92  	// update the metadata to indicate which classifiers were used
    93  	meta, _ := target.Metadata.(pkg.BinarySignature)
    94  	if m, ok := extra.Metadata.(pkg.BinarySignature); ok {
    95  		meta.Matches = append(meta.Matches, m.Matches...)
    96  	}
    97  	target.Metadata = meta
    98  }
    99  
   100  func catalog(resolver file.Resolver, cls Classifier) (packages []pkg.Package, err error) {
   101  	locations, err := resolver.FilesByGlob(cls.FileGlob)
   102  	if err != nil {
   103  		return nil, err
   104  	}
   105  	for _, location := range locations {
   106  		pkgs, err := cls.EvidenceMatcher(resolver, cls, location)
   107  		if err != nil {
   108  			return nil, err
   109  		}
   110  		packages = append(packages, pkgs...)
   111  	}
   112  	return packages, nil
   113  }
   114  
   115  // packagesMatch returns true if the binary packages "match" based on basic criteria
   116  func packagesMatch(p1 *pkg.Package, p2 *pkg.Package) bool {
   117  	if p1.Name != p2.Name ||
   118  		p1.Version != p2.Version ||
   119  		p1.Language != p2.Language ||
   120  		p1.Type != p2.Type {
   121  		return false
   122  	}
   123  
   124  	return true
   125  }