github.com/anchore/syft@v1.4.2-0.20240516191711-1bec1fc5d397/syft/pkg/cataloger/binary/classifier_cataloger.go (about) 1 /* 2 Package binary provides a concrete cataloger implementations for surfacing possible packages based on signatures found within binary files. 3 */ 4 package binary 5 6 import ( 7 "context" 8 "encoding/json" 9 10 "github.com/anchore/syft/internal/log" 11 "github.com/anchore/syft/syft/artifact" 12 "github.com/anchore/syft/syft/file" 13 "github.com/anchore/syft/syft/pkg" 14 ) 15 16 const catalogerName = "binary-classifier-cataloger" 17 18 type ClassifierCatalogerConfig struct { 19 Classifiers []Classifier `yaml:"classifiers" json:"classifiers" mapstructure:"classifiers"` 20 } 21 22 func DefaultClassifierCatalogerConfig() ClassifierCatalogerConfig { 23 return ClassifierCatalogerConfig{ 24 Classifiers: DefaultClassifiers(), 25 } 26 } 27 28 func NewClassifierCataloger(cfg ClassifierCatalogerConfig) pkg.Cataloger { 29 return &cataloger{ 30 classifiers: cfg.Classifiers, 31 } 32 } 33 34 func (cfg ClassifierCatalogerConfig) MarshalJSON() ([]byte, error) { 35 // only keep the class names 36 var names []string 37 for _, cls := range cfg.Classifiers { 38 names = append(names, cls.Class) 39 } 40 return json.Marshal(names) 41 } 42 43 // cataloger is the cataloger responsible for surfacing evidence of a very limited set of binary files, 44 // which have been identified by the classifiers. The cataloger is _NOT_ a place to catalog any and every 45 // binary, but rather the specific set that has been curated to be important, predominantly related to toolchain- 46 // related runtimes like Python, Go, Java, or Node. Some exceptions can be made for widely-used binaries such 47 // as busybox. 48 type cataloger struct { 49 classifiers []Classifier 50 } 51 52 // Name returns a string that uniquely describes the cataloger 53 func (c cataloger) Name() string { 54 return catalogerName 55 } 56 57 // Catalog is given an object to resolve file references and content, this function returns any discovered Packages 58 // after analyzing the catalog source. 59 func (c cataloger) Catalog(_ context.Context, resolver file.Resolver) ([]pkg.Package, []artifact.Relationship, error) { 60 var packages []pkg.Package 61 var relationships []artifact.Relationship 62 63 for _, cls := range c.classifiers { 64 log.WithFields("classifier", cls.Class).Trace("cataloging binaries") 65 newPkgs, err := catalog(resolver, cls) 66 if err != nil { 67 log.WithFields("error", err, "classifier", cls.Class).Warn("unable to catalog binary package: %w", err) 68 continue 69 } 70 newPackages: 71 for i := range newPkgs { 72 newPkg := &newPkgs[i] 73 for j := range packages { 74 p := &packages[j] 75 // consolidate identical packages found in different locations or by different classifiers 76 if packagesMatch(p, newPkg) { 77 mergePackages(p, newPkg) 78 continue newPackages 79 } 80 } 81 packages = append(packages, *newPkg) 82 } 83 } 84 85 return packages, relationships, nil 86 } 87 88 // mergePackages merges information from the extra package into the target package 89 func mergePackages(target *pkg.Package, extra *pkg.Package) { 90 // add the locations 91 target.Locations.Add(extra.Locations.ToSlice()...) 92 // update the metadata to indicate which classifiers were used 93 meta, _ := target.Metadata.(pkg.BinarySignature) 94 if m, ok := extra.Metadata.(pkg.BinarySignature); ok { 95 meta.Matches = append(meta.Matches, m.Matches...) 96 } 97 target.Metadata = meta 98 } 99 100 func catalog(resolver file.Resolver, cls Classifier) (packages []pkg.Package, err error) { 101 locations, err := resolver.FilesByGlob(cls.FileGlob) 102 if err != nil { 103 return nil, err 104 } 105 for _, location := range locations { 106 pkgs, err := cls.EvidenceMatcher(resolver, cls, location) 107 if err != nil { 108 return nil, err 109 } 110 packages = append(packages, pkgs...) 111 } 112 return packages, nil 113 } 114 115 // packagesMatch returns true if the binary packages "match" based on basic criteria 116 func packagesMatch(p1 *pkg.Package, p2 *pkg.Package) bool { 117 if p1.Name != p2.Name || 118 p1.Version != p2.Version || 119 p1.Language != p2.Language || 120 p1.Type != p2.Type { 121 return false 122 } 123 124 return true 125 }