github.com/anchore/syft@v1.4.2-0.20240516191711-1bec1fc5d397/syft/pkg/cataloger/binary/classifier.go (about)

     1  package binary
     2  
     3  import (
     4  	"bytes"
     5  	"debug/elf"
     6  	"debug/macho"
     7  	"debug/pe"
     8  	"encoding/json"
     9  	"fmt"
    10  	"io"
    11  	"regexp"
    12  	"strings"
    13  	"text/template"
    14  
    15  	"github.com/anchore/packageurl-go"
    16  	"github.com/anchore/syft/internal"
    17  	"github.com/anchore/syft/internal/log"
    18  	"github.com/anchore/syft/syft/cpe"
    19  	"github.com/anchore/syft/syft/file"
    20  	"github.com/anchore/syft/syft/pkg"
    21  )
    22  
    23  // Classifier is a generic package classifier that can be used to match a package definition
    24  // to a file that meets the given content criteria of the EvidenceMatcher.
    25  type Classifier struct {
    26  	Class string `json:"class"`
    27  
    28  	// FileGlob is a selector to narrow down file inspection using the **/glob* syntax
    29  	FileGlob string `json:"fileGlob"`
    30  
    31  	// EvidenceMatcher is what will be used to match against the file in the source
    32  	// location. If the matcher returns a package, the file will be considered a candidate.
    33  	EvidenceMatcher EvidenceMatcher `json:"-"`
    34  
    35  	// Information below is used to specify the Package information when returned
    36  
    37  	// Package is the name to use for the package
    38  	Package string `json:"package"`
    39  
    40  	// PURL is the Package URL to use when generating a package
    41  	PURL packageurl.PackageURL `json:"purl"`
    42  
    43  	// CPEs are the specific CPEs we want to include for this binary with updated version information
    44  	CPEs []cpe.CPE `json:"cpes"`
    45  }
    46  
    47  func (cfg Classifier) MarshalJSON() ([]byte, error) {
    48  	type marshalled struct {
    49  		Class    string   `json:"class"`
    50  		FileGlob string   `json:"fileGlob"`
    51  		Package  string   `json:"package"`
    52  		PURL     string   `json:"purl"`
    53  		CPEs     []string `json:"cpes"`
    54  	}
    55  
    56  	var marshalledCPEs []string
    57  	for _, c := range cfg.CPEs {
    58  		marshalledCPEs = append(marshalledCPEs, c.Attributes.BindToFmtString())
    59  	}
    60  
    61  	m := marshalled{
    62  		Class:    cfg.Class,
    63  		FileGlob: cfg.FileGlob,
    64  		Package:  cfg.Package,
    65  		PURL:     cfg.PURL.String(),
    66  		CPEs:     marshalledCPEs,
    67  	}
    68  
    69  	return json.Marshal(m)
    70  }
    71  
    72  // EvidenceMatcher is a function called to catalog Packages that match some sort of evidence
    73  type EvidenceMatcher func(resolver file.Resolver, classifier Classifier, location file.Location) ([]pkg.Package, error)
    74  
    75  func evidenceMatchers(matchers ...EvidenceMatcher) EvidenceMatcher {
    76  	return func(resolver file.Resolver, classifier Classifier, location file.Location) ([]pkg.Package, error) {
    77  		for _, matcher := range matchers {
    78  			match, err := matcher(resolver, classifier, location)
    79  			if err != nil {
    80  				return nil, err
    81  			}
    82  			if match != nil {
    83  				return match, nil
    84  			}
    85  		}
    86  		return nil, nil
    87  	}
    88  }
    89  
    90  func fileNameTemplateVersionMatcher(fileNamePattern string, contentTemplate string) EvidenceMatcher {
    91  	pat := regexp.MustCompile(fileNamePattern)
    92  	return func(resolver file.Resolver, classifier Classifier, location file.Location) ([]pkg.Package, error) {
    93  		if !pat.MatchString(location.RealPath) {
    94  			return nil, nil
    95  		}
    96  
    97  		filepathNamedGroupValues := internal.MatchNamedCaptureGroups(pat, location.RealPath)
    98  
    99  		// versions like 3.5 should not match any character, but explicit dot
   100  		for k, v := range filepathNamedGroupValues {
   101  			filepathNamedGroupValues[k] = strings.ReplaceAll(v, ".", "\\.")
   102  		}
   103  
   104  		tmpl, err := template.New("").Parse(contentTemplate)
   105  		if err != nil {
   106  			return nil, fmt.Errorf("unable to parse classifier template=%q : %w", contentTemplate, err)
   107  		}
   108  
   109  		patternBuf := &bytes.Buffer{}
   110  		err = tmpl.Execute(patternBuf, filepathNamedGroupValues)
   111  		if err != nil {
   112  			return nil, fmt.Errorf("unable to render template: %w", err)
   113  		}
   114  
   115  		tmplPattern, err := regexp.Compile(patternBuf.String())
   116  		if err != nil {
   117  			return nil, fmt.Errorf("unable to compile rendered regex=%q: %w", patternBuf.String(), err)
   118  		}
   119  
   120  		contents, err := getContents(resolver, location)
   121  		if err != nil {
   122  			return nil, fmt.Errorf("unable to get read contents for file: %w", err)
   123  		}
   124  
   125  		matchMetadata := internal.MatchNamedCaptureGroups(tmplPattern, string(contents))
   126  
   127  		p := newClassifierPackage(classifier, location, matchMetadata)
   128  		if p == nil {
   129  			return nil, nil
   130  		}
   131  
   132  		return []pkg.Package{*p}, nil
   133  	}
   134  }
   135  
   136  func FileContentsVersionMatcher(pattern string) EvidenceMatcher {
   137  	pat := regexp.MustCompile(pattern)
   138  	return func(resolver file.Resolver, classifier Classifier, location file.Location) ([]pkg.Package, error) {
   139  		contents, err := getContents(resolver, location)
   140  		if err != nil {
   141  			return nil, fmt.Errorf("unable to get read contents for file: %w", err)
   142  		}
   143  
   144  		matchMetadata := internal.MatchNamedCaptureGroups(pat, string(contents))
   145  
   146  		p := newClassifierPackage(classifier, location, matchMetadata)
   147  		if p == nil {
   148  			return nil, nil
   149  		}
   150  
   151  		return []pkg.Package{*p}, nil
   152  	}
   153  }
   154  
   155  // matchExcluding tests the provided regular expressions against the file, and if matched, DOES NOT return
   156  // anything that the matcher would otherwise return
   157  func matchExcluding(matcher EvidenceMatcher, contentPatternsToExclude ...string) EvidenceMatcher {
   158  	var nonMatchPatterns []*regexp.Regexp
   159  	for _, p := range contentPatternsToExclude {
   160  		nonMatchPatterns = append(nonMatchPatterns, regexp.MustCompile(p))
   161  	}
   162  	return func(resolver file.Resolver, classifier Classifier, location file.Location) ([]pkg.Package, error) {
   163  		contents, err := getContents(resolver, location)
   164  		if err != nil {
   165  			return nil, fmt.Errorf("unable to get read contents for file: %w", err)
   166  		}
   167  		for _, nonMatch := range nonMatchPatterns {
   168  			if nonMatch.Match(contents) {
   169  				return nil, nil
   170  			}
   171  		}
   172  		return matcher(resolver, classifier, location)
   173  	}
   174  }
   175  
   176  //nolint:gocognit
   177  func sharedLibraryLookup(sharedLibraryPattern string, sharedLibraryMatcher EvidenceMatcher) EvidenceMatcher {
   178  	pat := regexp.MustCompile(sharedLibraryPattern)
   179  	return func(resolver file.Resolver, classifier Classifier, location file.Location) (packages []pkg.Package, _ error) {
   180  		libs, err := sharedLibraries(resolver, location)
   181  		if err != nil {
   182  			return nil, err
   183  		}
   184  		for _, lib := range libs {
   185  			if !pat.MatchString(lib) {
   186  				continue
   187  			}
   188  
   189  			locations, err := resolver.FilesByGlob("**/" + lib)
   190  			if err != nil {
   191  				return nil, err
   192  			}
   193  			for _, libraryLocation := range locations {
   194  				pkgs, err := sharedLibraryMatcher(resolver, classifier, libraryLocation)
   195  				if err != nil {
   196  					return nil, err
   197  				}
   198  				for _, p := range pkgs {
   199  					// set the source binary as the first location
   200  					locationSet := file.NewLocationSet(location)
   201  					locationSet.Add(p.Locations.ToSlice()...)
   202  					p.Locations = locationSet
   203  					meta, _ := p.Metadata.(pkg.BinarySignature)
   204  					p.Metadata = pkg.BinarySignature{
   205  						Matches: append([]pkg.ClassifierMatch{
   206  							{
   207  								Classifier: classifier.Class,
   208  								Location:   location,
   209  							},
   210  						}, meta.Matches...),
   211  					}
   212  					packages = append(packages, p)
   213  				}
   214  			}
   215  		}
   216  		return packages, nil
   217  	}
   218  }
   219  
   220  func mustPURL(purl string) packageurl.PackageURL {
   221  	p, err := packageurl.FromString(purl)
   222  	if err != nil {
   223  		panic(fmt.Sprintf("invalid PURL: %s", p))
   224  	}
   225  	return p
   226  }
   227  
   228  func getContents(resolver file.Resolver, location file.Location) ([]byte, error) {
   229  	reader, err := resolver.FileContentsByLocation(location)
   230  	if err != nil {
   231  		return nil, err
   232  	}
   233  	defer internal.CloseAndLogError(reader, location.AccessPath)
   234  
   235  	// TODO: there may be room for improvement here, as this may use an excessive amount of memory. Alternate approach is to leverage a RuneReader.
   236  	contents, err := io.ReadAll(reader)
   237  	if err != nil {
   238  		return nil, fmt.Errorf("unable to get contents for file: %w", err)
   239  	}
   240  
   241  	return contents, nil
   242  }
   243  
   244  // singleCPE returns a []cpe.CPE with Source: Generated based on the cpe string or panics if the
   245  // cpe string cannot be parsed into valid CPE Attributes
   246  func singleCPE(cpeString string) []cpe.CPE {
   247  	return []cpe.CPE{
   248  		cpe.Must(cpeString, cpe.GeneratedSource),
   249  	}
   250  }
   251  
   252  // sharedLibraries returns a list of all shared libraries found within a binary, currently
   253  // supporting: elf, macho, and windows pe
   254  func sharedLibraries(resolver file.Resolver, location file.Location) ([]string, error) {
   255  	contents, err := getContents(resolver, location)
   256  	if err != nil {
   257  		return nil, err
   258  	}
   259  
   260  	r := bytes.NewReader(contents)
   261  
   262  	e, _ := elf.NewFile(r)
   263  	if e != nil {
   264  		symbols, err := e.ImportedLibraries()
   265  		if err != nil {
   266  			log.Debugf("unable to read elf binary at: %s -- %s", location.RealPath, err)
   267  		}
   268  		return symbols, nil
   269  	}
   270  
   271  	m, _ := macho.NewFile(r)
   272  	if m != nil {
   273  		symbols, err := m.ImportedLibraries()
   274  		if err != nil {
   275  			log.Debugf("unable to read macho binary at: %s -- %s", location.RealPath, err)
   276  		}
   277  		return symbols, nil
   278  	}
   279  
   280  	p, _ := pe.NewFile(r)
   281  	if p != nil {
   282  		symbols, err := p.ImportedLibraries()
   283  		if err != nil {
   284  			log.Debugf("unable to read pe binary at: %s -- %s", location.RealPath, err)
   285  		}
   286  		return symbols, nil
   287  	}
   288  
   289  	return nil, nil
   290  }