github.com/noqcks/syft@v0.0.0-20230920222752-a9e2c4e288e5/syft/pkg/cataloger/binary/classifier.go (about)

     1  package binary
     2  
     3  import (
     4  	"bytes"
     5  	"debug/elf"
     6  	"debug/macho"
     7  	"debug/pe"
     8  	"fmt"
     9  	"io"
    10  	"regexp"
    11  	"strings"
    12  	"text/template"
    13  
    14  	"github.com/anchore/packageurl-go"
    15  	"github.com/anchore/syft/internal"
    16  	"github.com/anchore/syft/internal/log"
    17  	"github.com/anchore/syft/syft/cpe"
    18  	"github.com/anchore/syft/syft/file"
    19  	"github.com/anchore/syft/syft/pkg"
    20  	"github.com/anchore/syft/syft/pkg/cataloger/internal/unionreader"
    21  )
    22  
    23  var emptyPURL = packageurl.PackageURL{}
    24  
    25  // classifier is a generic package classifier that can be used to match a package definition
    26  // to a file that meets the given content criteria of the evidenceMatcher.
    27  type classifier struct {
    28  	Class string
    29  
    30  	// FileGlob is a selector to narrow down file inspection using the **/glob* syntax
    31  	FileGlob string
    32  
    33  	// EvidenceMatcher is what will be used to match against the file in the source
    34  	// location. If the matcher returns a package, the file will be considered a candidate.
    35  	EvidenceMatcher evidenceMatcher
    36  
    37  	// Information below is used to specify the Package information when returned
    38  
    39  	// Package is the name to use for the package
    40  	Package string
    41  
    42  	// Language is the language to classify this package as
    43  	Language pkg.Language
    44  
    45  	// Type is the package type to use for the package
    46  	Type pkg.Type
    47  
    48  	// PURL is the Package URL to use when generating a package
    49  	PURL packageurl.PackageURL
    50  
    51  	// CPEs are the specific CPEs we want to include for this binary with updated version information
    52  	CPEs []cpe.CPE
    53  }
    54  
    55  // evidenceMatcher is a function called to catalog Packages that match some sort of evidence
    56  type evidenceMatcher func(resolver file.Resolver, classifier classifier, location file.Location) ([]pkg.Package, error)
    57  
    58  func evidenceMatchers(matchers ...evidenceMatcher) evidenceMatcher {
    59  	return func(resolver file.Resolver, classifier classifier, location file.Location) ([]pkg.Package, error) {
    60  		for _, matcher := range matchers {
    61  			match, err := matcher(resolver, classifier, location)
    62  			if err != nil {
    63  				return nil, err
    64  			}
    65  			if match != nil {
    66  				return match, nil
    67  			}
    68  		}
    69  		return nil, nil
    70  	}
    71  }
    72  
    73  func fileNameTemplateVersionMatcher(fileNamePattern string, contentTemplate string) evidenceMatcher {
    74  	pat := regexp.MustCompile(fileNamePattern)
    75  	return func(resolver file.Resolver, classifier classifier, location file.Location) ([]pkg.Package, error) {
    76  		if !pat.MatchString(location.RealPath) {
    77  			return nil, nil
    78  		}
    79  
    80  		filepathNamedGroupValues := internal.MatchNamedCaptureGroups(pat, location.RealPath)
    81  
    82  		// versions like 3.5 should not match any character, but explicit dot
    83  		for k, v := range filepathNamedGroupValues {
    84  			filepathNamedGroupValues[k] = strings.ReplaceAll(v, ".", "\\.")
    85  		}
    86  
    87  		tmpl, err := template.New("").Parse(contentTemplate)
    88  		if err != nil {
    89  			return nil, fmt.Errorf("unable to parse classifier template=%q : %w", contentTemplate, err)
    90  		}
    91  
    92  		patternBuf := &bytes.Buffer{}
    93  		err = tmpl.Execute(patternBuf, filepathNamedGroupValues)
    94  		if err != nil {
    95  			return nil, fmt.Errorf("unable to render template: %w", err)
    96  		}
    97  
    98  		tmplPattern, err := regexp.Compile(patternBuf.String())
    99  		if err != nil {
   100  			return nil, fmt.Errorf("unable to compile rendered regex=%q: %w", patternBuf.String(), err)
   101  		}
   102  
   103  		contents, err := getContents(resolver, location)
   104  		if err != nil {
   105  			return nil, fmt.Errorf("unable to get read contents for file: %w", err)
   106  		}
   107  
   108  		matchMetadata := internal.MatchNamedCaptureGroups(tmplPattern, string(contents))
   109  
   110  		p := newPackage(classifier, location, matchMetadata)
   111  		if p == nil {
   112  			return nil, nil
   113  		}
   114  
   115  		return []pkg.Package{*p}, nil
   116  	}
   117  }
   118  
   119  func fileContentsVersionMatcher(pattern string) evidenceMatcher {
   120  	pat := regexp.MustCompile(pattern)
   121  	return func(resolver file.Resolver, classifier classifier, location file.Location) ([]pkg.Package, error) {
   122  		contents, err := getContents(resolver, location)
   123  		if err != nil {
   124  			return nil, fmt.Errorf("unable to get read contents for file: %w", err)
   125  		}
   126  
   127  		matchMetadata := internal.MatchNamedCaptureGroups(pat, string(contents))
   128  
   129  		p := newPackage(classifier, location, matchMetadata)
   130  		if p == nil {
   131  			return nil, nil
   132  		}
   133  
   134  		return []pkg.Package{*p}, nil
   135  	}
   136  }
   137  
   138  //nolint:gocognit
   139  func sharedLibraryLookup(sharedLibraryPattern string, sharedLibraryMatcher evidenceMatcher) evidenceMatcher {
   140  	pat := regexp.MustCompile(sharedLibraryPattern)
   141  	return func(resolver file.Resolver, classifier classifier, location file.Location) (packages []pkg.Package, _ error) {
   142  		libs, err := sharedLibraries(resolver, location)
   143  		if err != nil {
   144  			return nil, err
   145  		}
   146  		for _, lib := range libs {
   147  			if !pat.MatchString(lib) {
   148  				continue
   149  			}
   150  
   151  			locations, err := resolver.FilesByGlob("**/" + lib)
   152  			if err != nil {
   153  				return nil, err
   154  			}
   155  			for _, libraryLocation := range locations {
   156  				pkgs, err := sharedLibraryMatcher(resolver, classifier, libraryLocation)
   157  				if err != nil {
   158  					return nil, err
   159  				}
   160  				for _, p := range pkgs {
   161  					// set the source binary as the first location
   162  					locationSet := file.NewLocationSet(location)
   163  					locationSet.Add(p.Locations.ToSlice()...)
   164  					p.Locations = locationSet
   165  					meta, _ := p.Metadata.(pkg.BinaryMetadata)
   166  					p.Metadata = pkg.BinaryMetadata{
   167  						Matches: append([]pkg.ClassifierMatch{
   168  							{
   169  								Classifier: classifier.Class,
   170  								Location:   location,
   171  							},
   172  						}, meta.Matches...),
   173  					}
   174  					packages = append(packages, p)
   175  				}
   176  			}
   177  		}
   178  		return packages, nil
   179  	}
   180  }
   181  
   182  func mustPURL(purl string) packageurl.PackageURL {
   183  	p, err := packageurl.FromString(purl)
   184  	if err != nil {
   185  		panic(fmt.Sprintf("invalid PURL: %s", p))
   186  	}
   187  	return p
   188  }
   189  
   190  func getContents(resolver file.Resolver, location file.Location) ([]byte, error) {
   191  	reader, err := resolver.FileContentsByLocation(location)
   192  	if err != nil {
   193  		return nil, err
   194  	}
   195  
   196  	unionReader, err := unionreader.GetUnionReader(reader)
   197  	if err != nil {
   198  		return nil, fmt.Errorf("unable to get union reader for file: %w", err)
   199  	}
   200  
   201  	// TODO: there may be room for improvement here, as this may use an excessive amount of memory. Alternate approach is to leverage a RuneReader.
   202  	contents, err := io.ReadAll(unionReader)
   203  	if err != nil {
   204  		return nil, fmt.Errorf("unable to get contents for file: %w", err)
   205  	}
   206  
   207  	return contents, nil
   208  }
   209  
   210  // singleCPE returns a []pkg.CPE based on the cpe string or panics if the CPE is invalid
   211  func singleCPE(cpeString string) []cpe.CPE {
   212  	return []cpe.CPE{
   213  		cpe.Must(cpeString),
   214  	}
   215  }
   216  
   217  // sharedLibraries returns a list of all shared libraries found within a binary, currently
   218  // supporting: elf, macho, and windows pe
   219  func sharedLibraries(resolver file.Resolver, location file.Location) ([]string, error) {
   220  	contents, err := getContents(resolver, location)
   221  	if err != nil {
   222  		return nil, err
   223  	}
   224  
   225  	r := bytes.NewReader(contents)
   226  
   227  	e, _ := elf.NewFile(r)
   228  	if e != nil {
   229  		symbols, err := e.ImportedLibraries()
   230  		if err != nil {
   231  			log.Debugf("unable to read elf binary at: %s -- %s", location.RealPath, err)
   232  		}
   233  		return symbols, nil
   234  	}
   235  
   236  	m, _ := macho.NewFile(r)
   237  	if m != nil {
   238  		symbols, err := m.ImportedLibraries()
   239  		if err != nil {
   240  			log.Debugf("unable to read macho binary at: %s -- %s", location.RealPath, err)
   241  		}
   242  		return symbols, nil
   243  	}
   244  
   245  	p, _ := pe.NewFile(r)
   246  	if p != nil {
   247  		symbols, err := p.ImportedLibraries()
   248  		if err != nil {
   249  			log.Debugf("unable to read pe binary at: %s -- %s", location.RealPath, err)
   250  		}
   251  		return symbols, nil
   252  	}
   253  
   254  	return nil, nil
   255  }