github.com/anchore/syft@v1.38.2/syft/pkg/cataloger/binary/classifier_cataloger.go (about) 1 /* 2 Package binary provides a concrete cataloger implementations for surfacing possible packages based on signatures found within binary files. 3 */ 4 package binary 5 6 import ( 7 "context" 8 "encoding/json" 9 "fmt" 10 11 "github.com/anchore/syft/internal/log" 12 "github.com/anchore/syft/internal/unknown" 13 "github.com/anchore/syft/syft/artifact" 14 "github.com/anchore/syft/syft/file" 15 "github.com/anchore/syft/syft/pkg" 16 "github.com/anchore/syft/syft/pkg/cataloger/internal/binutils" 17 ) 18 19 const catalogerName = "binary-classifier-cataloger" 20 21 type ClassifierCatalogerConfig struct { 22 Classifiers []binutils.Classifier `yaml:"classifiers" json:"classifiers" mapstructure:"classifiers"` 23 } 24 25 func DefaultClassifierCatalogerConfig() ClassifierCatalogerConfig { 26 return ClassifierCatalogerConfig{ 27 Classifiers: DefaultClassifiers(), 28 } 29 } 30 31 func NewClassifierCataloger(cfg ClassifierCatalogerConfig) pkg.Cataloger { 32 return &cataloger{ 33 classifiers: cfg.Classifiers, 34 } 35 } 36 37 func (cfg ClassifierCatalogerConfig) MarshalJSON() ([]byte, error) { 38 // only keep the class names 39 var names []string 40 for _, cls := range cfg.Classifiers { 41 names = append(names, cls.Class) 42 } 43 return json.Marshal(names) 44 } 45 46 // cataloger is the cataloger responsible for surfacing evidence of a very limited set of binary files, 47 // which have been identified by the classifiers. The cataloger is _NOT_ a place to catalog any and every 48 // binary, but rather the specific set that has been curated to be important, predominantly related to toolchain- 49 // related runtimes like Python, Go, Java, or Node. Some exceptions can be made for widely-used binaries such 50 // as busybox. 51 type cataloger struct { 52 classifiers []binutils.Classifier 53 } 54 55 // Name returns a string that uniquely describes the cataloger 56 func (c cataloger) Name() string { 57 return catalogerName 58 } 59 60 // Catalog is given an object to resolve file references and content, this function returns any discovered Packages 61 // after analyzing the catalog source. 62 func (c cataloger) Catalog(_ context.Context, resolver file.Resolver) ([]pkg.Package, []artifact.Relationship, error) { 63 var packages []pkg.Package 64 var relationships []artifact.Relationship 65 var errs error 66 67 for _, cls := range c.classifiers { 68 log.WithFields("classifier", cls.Class).Trace("cataloging binaries") 69 newPkgs, err := catalog(resolver, cls) 70 if err != nil { 71 log.WithFields("error", err, "classifier", cls.Class).Debugf("unable to catalog binary package: %v", err) 72 errs = unknown.Join(errs, fmt.Errorf("%s: %w", cls.Class, err)) 73 continue 74 } 75 newPackages: 76 for i := range newPkgs { 77 newPkg := &newPkgs[i] 78 purlType := pkg.TypeFromPURL(newPkg.PURL) 79 // for certain results, such as hashicorp vault we are returning a golang PURL, so we can use Golang package type, 80 // despite not having the known metadata, this should result in downstream grype matching to use the golang matcher 81 if purlType != pkg.UnknownPkg { 82 newPkg.Type = purlType 83 } 84 for j := range packages { 85 p := &packages[j] 86 // consolidate identical packages found in different locations or by different classifiers 87 if packagesMatch(p, newPkg) { 88 mergePackages(p, newPkg) 89 continue newPackages 90 } 91 } 92 packages = append(packages, *newPkg) 93 } 94 } 95 96 return packages, relationships, errs 97 } 98 99 // mergePackages merges information from the extra package into the target package 100 func mergePackages(target *pkg.Package, extra *pkg.Package) { 101 if extra.Type != pkg.BinaryPkg && target.Type == pkg.BinaryPkg { 102 target.Type = extra.Type 103 } 104 // add the locations 105 target.Locations.Add(extra.Locations.ToSlice()...) 106 // update the metadata to indicate which classifiers were used 107 meta, _ := target.Metadata.(pkg.BinarySignature) 108 if m, ok := extra.Metadata.(pkg.BinarySignature); ok { 109 meta.Matches = append(meta.Matches, m.Matches...) 110 } 111 target.Metadata = meta 112 } 113 114 func catalog(resolver file.Resolver, cls binutils.Classifier) (packages []pkg.Package, err error) { 115 var errs error 116 locations, err := resolver.FilesByGlob(cls.FileGlob) 117 if err != nil { 118 err = unknown.ProcessPathErrors(err) // convert any file.Resolver path errors to unknowns with locations 119 return nil, err 120 } 121 for _, location := range locations { 122 pkgs, err := cls.EvidenceMatcher(cls, binutils.MatcherContext{Resolver: resolver, Location: location}) 123 if err != nil { 124 errs = unknown.Append(errs, location, err) 125 continue 126 } 127 packages = append(packages, pkgs...) 128 } 129 return packages, errs 130 } 131 132 // packagesMatch returns true if the binary packages "match" based on basic criteria 133 func packagesMatch(p1 *pkg.Package, p2 *pkg.Package) bool { 134 if p1.Name != p2.Name || 135 p1.Version != p2.Version || 136 p1.Language != p2.Language || 137 p1.Type != p2.Type { 138 return false 139 } 140 141 return true 142 }