github.com/lineaje-labs/syft@v0.98.1-0.20231227153149-9e393f60ff1b/syft/pkg/cataloger/binary/classifier.go (about)

     1  package binary
     2  
     3  import (
     4  	"bytes"
     5  	"debug/elf"
     6  	"debug/macho"
     7  	"debug/pe"
     8  	"fmt"
     9  	"io"
    10  	"regexp"
    11  	"strings"
    12  	"text/template"
    13  
    14  	"github.com/anchore/packageurl-go"
    15  	"github.com/anchore/syft/syft/cpe"
    16  	"github.com/anchore/syft/syft/file"
    17  	"github.com/anchore/syft/syft/pkg"
    18  	"github.com/lineaje-labs/syft/internal"
    19  	"github.com/lineaje-labs/syft/internal/log"
    20  	"github.com/lineaje-labs/syft/syft/pkg/cataloger/internal/unionreader"
    21  )
    22  
    23  var emptyPURL = packageurl.PackageURL{}
    24  
    25  // classifier is a generic package classifier that can be used to match a package definition
    26  // to a file that meets the given content criteria of the evidenceMatcher.
    27  type classifier struct {
    28  	Class string
    29  
    30  	// FileGlob is a selector to narrow down file inspection using the **/glob* syntax
    31  	FileGlob string
    32  
    33  	// EvidenceMatcher is what will be used to match against the file in the source
    34  	// location. If the matcher returns a package, the file will be considered a candidate.
    35  	EvidenceMatcher evidenceMatcher
    36  
    37  	// Information below is used to specify the Package information when returned
    38  
    39  	// Package is the name to use for the package
    40  	Package string
    41  
    42  	// Language is the language to classify this package as
    43  	Language pkg.Language
    44  
    45  	// Type is the package type to use for the package
    46  	Type pkg.Type
    47  
    48  	// PURL is the Package URL to use when generating a package
    49  	PURL packageurl.PackageURL
    50  
    51  	// CPEs are the specific CPEs we want to include for this binary with updated version information
    52  	CPEs []cpe.CPE
    53  }
    54  
    55  // evidenceMatcher is a function called to catalog Packages that match some sort of evidence
    56  type evidenceMatcher func(resolver file.Resolver, classifier classifier, location file.Location) ([]pkg.Package, error)
    57  
    58  func evidenceMatchers(matchers ...evidenceMatcher) evidenceMatcher {
    59  	return func(resolver file.Resolver, classifier classifier, location file.Location) ([]pkg.Package, error) {
    60  		for _, matcher := range matchers {
    61  			match, err := matcher(resolver, classifier, location)
    62  			if err != nil {
    63  				return nil, err
    64  			}
    65  			if match != nil {
    66  				return match, nil
    67  			}
    68  		}
    69  		return nil, nil
    70  	}
    71  }
    72  
    73  func fileNameTemplateVersionMatcher(fileNamePattern string, contentTemplate string) evidenceMatcher {
    74  	pat := regexp.MustCompile(fileNamePattern)
    75  	return func(resolver file.Resolver, classifier classifier, location file.Location) ([]pkg.Package, error) {
    76  		if !pat.MatchString(location.RealPath) {
    77  			return nil, nil
    78  		}
    79  
    80  		filepathNamedGroupValues := internal.MatchNamedCaptureGroups(pat, location.RealPath)
    81  
    82  		// versions like 3.5 should not match any character, but explicit dot
    83  		for k, v := range filepathNamedGroupValues {
    84  			filepathNamedGroupValues[k] = strings.ReplaceAll(v, ".", "\\.")
    85  		}
    86  
    87  		tmpl, err := template.New("").Parse(contentTemplate)
    88  		if err != nil {
    89  			return nil, fmt.Errorf("unable to parse classifier template=%q : %w", contentTemplate, err)
    90  		}
    91  
    92  		patternBuf := &bytes.Buffer{}
    93  		err = tmpl.Execute(patternBuf, filepathNamedGroupValues)
    94  		if err != nil {
    95  			return nil, fmt.Errorf("unable to render template: %w", err)
    96  		}
    97  
    98  		tmplPattern, err := regexp.Compile(patternBuf.String())
    99  		if err != nil {
   100  			return nil, fmt.Errorf("unable to compile rendered regex=%q: %w", patternBuf.String(), err)
   101  		}
   102  
   103  		contents, err := getContents(resolver, location)
   104  		if err != nil {
   105  			return nil, fmt.Errorf("unable to get read contents for file: %w", err)
   106  		}
   107  
   108  		matchMetadata := internal.MatchNamedCaptureGroups(tmplPattern, string(contents))
   109  
   110  		p := newPackage(classifier, location, matchMetadata)
   111  		if p == nil {
   112  			return nil, nil
   113  		}
   114  
   115  		return []pkg.Package{*p}, nil
   116  	}
   117  }
   118  
   119  func fileContentsVersionMatcher(pattern string) evidenceMatcher {
   120  	pat := regexp.MustCompile(pattern)
   121  	return func(resolver file.Resolver, classifier classifier, location file.Location) ([]pkg.Package, error) {
   122  		contents, err := getContents(resolver, location)
   123  		if err != nil {
   124  			return nil, fmt.Errorf("unable to get read contents for file: %w", err)
   125  		}
   126  
   127  		matchMetadata := internal.MatchNamedCaptureGroups(pat, string(contents))
   128  
   129  		p := newPackage(classifier, location, matchMetadata)
   130  		if p == nil {
   131  			return nil, nil
   132  		}
   133  
   134  		return []pkg.Package{*p}, nil
   135  	}
   136  }
   137  
   138  //nolint:gocognit
   139  func sharedLibraryLookup(sharedLibraryPattern string, sharedLibraryMatcher evidenceMatcher) evidenceMatcher {
   140  	pat := regexp.MustCompile(sharedLibraryPattern)
   141  	return func(
   142  		resolver file.Resolver, classifier classifier, location file.Location,
   143  	) (packages []pkg.Package, _ error) {
   144  		libs, err := sharedLibraries(resolver, location)
   145  		if err != nil {
   146  			return nil, err
   147  		}
   148  		for _, lib := range libs {
   149  			if !pat.MatchString(lib) {
   150  				continue
   151  			}
   152  
   153  			locations, err := resolver.FilesByGlob("**/" + lib)
   154  			if err != nil {
   155  				return nil, err
   156  			}
   157  			for _, libraryLocation := range locations {
   158  				pkgs, err := sharedLibraryMatcher(resolver, classifier, libraryLocation)
   159  				if err != nil {
   160  					return nil, err
   161  				}
   162  				for _, p := range pkgs {
   163  					// set the source binary as the first location
   164  					locationSet := file.NewLocationSet(location)
   165  					locationSet.Add(p.Locations.ToSlice()...)
   166  					p.Locations = locationSet
   167  					meta, _ := p.Metadata.(pkg.BinarySignature)
   168  					p.Metadata = pkg.BinarySignature{
   169  						Matches: append([]pkg.ClassifierMatch{
   170  							{
   171  								Classifier: classifier.Class,
   172  								Location:   location,
   173  							},
   174  						}, meta.Matches...),
   175  					}
   176  					packages = append(packages, p)
   177  				}
   178  			}
   179  		}
   180  		return packages, nil
   181  	}
   182  }
   183  
   184  func mustPURL(purl string) packageurl.PackageURL {
   185  	p, err := packageurl.FromString(purl)
   186  	if err != nil {
   187  		panic(fmt.Sprintf("invalid PURL: %s", p))
   188  	}
   189  	return p
   190  }
   191  
   192  func getContents(resolver file.Resolver, location file.Location) ([]byte, error) {
   193  	reader, err := resolver.FileContentsByLocation(location)
   194  	if err != nil {
   195  		return nil, err
   196  	}
   197  
   198  	unionReader, err := unionreader.GetUnionReader(reader)
   199  	if err != nil {
   200  		return nil, fmt.Errorf("unable to get union reader for file: %w", err)
   201  	}
   202  
   203  	// TODO: there may be room for improvement here, as this may use an excessive amount of memory. Alternate approach is to leverage a RuneReader.
   204  	contents, err := io.ReadAll(unionReader)
   205  	if err != nil {
   206  		return nil, fmt.Errorf("unable to get contents for file: %w", err)
   207  	}
   208  
   209  	return contents, nil
   210  }
   211  
   212  // singleCPE returns a []pkg.CPE based on the cpe string or panics if the CPE is invalid
   213  func singleCPE(cpeString string) []cpe.CPE {
   214  	return []cpe.CPE{
   215  		cpe.Must(cpeString),
   216  	}
   217  }
   218  
   219  // sharedLibraries returns a list of all shared libraries found within a binary, currently
   220  // supporting: elf, macho, and windows pe
   221  func sharedLibraries(resolver file.Resolver, location file.Location) ([]string, error) {
   222  	contents, err := getContents(resolver, location)
   223  	if err != nil {
   224  		return nil, err
   225  	}
   226  
   227  	r := bytes.NewReader(contents)
   228  
   229  	e, _ := elf.NewFile(r)
   230  	if e != nil {
   231  		symbols, err := e.ImportedLibraries()
   232  		if err != nil {
   233  			log.Debugf("unable to read elf binary at: %s -- %s", location.RealPath, err)
   234  		}
   235  		return symbols, nil
   236  	}
   237  
   238  	m, _ := macho.NewFile(r)
   239  	if m != nil {
   240  		symbols, err := m.ImportedLibraries()
   241  		if err != nil {
   242  			log.Debugf("unable to read macho binary at: %s -- %s", location.RealPath, err)
   243  		}
   244  		return symbols, nil
   245  	}
   246  
   247  	p, _ := pe.NewFile(r)
   248  	if p != nil {
   249  		symbols, err := p.ImportedLibraries()
   250  		if err != nil {
   251  			log.Debugf("unable to read pe binary at: %s -- %s", location.RealPath, err)
   252  		}
   253  		return symbols, nil
   254  	}
   255  
   256  	return nil, nil
   257  }