github.com/nextlinux/gosbom@v0.81.1-0.20230627115839-1ff50c281391/gosbom/pkg/cataloger/binary/classifier.go (about)

     1  package binary
     2  
     3  import (
     4  	"bytes"
     5  	"debug/elf"
     6  	"debug/macho"
     7  	"debug/pe"
     8  	"fmt"
     9  	"io"
    10  	"regexp"
    11  	"strings"
    12  	"text/template"
    13  
    14  	"github.com/nextlinux/gosbom/gosbom/cpe"
    15  	"github.com/nextlinux/gosbom/gosbom/file"
    16  	"github.com/nextlinux/gosbom/gosbom/pkg"
    17  	"github.com/nextlinux/gosbom/gosbom/pkg/cataloger/internal/unionreader"
    18  	"github.com/nextlinux/gosbom/internal"
    19  	"github.com/nextlinux/gosbom/internal/log"
    20  
    21  	"github.com/anchore/packageurl-go"
    22  )
    23  
    24  var emptyPURL = packageurl.PackageURL{}
    25  
    26  // classifier is a generic package classifier that can be used to match a package definition
    27  // to a file that meets the given content criteria of the evidenceMatcher.
    28  type classifier struct {
    29  	Class string
    30  
    31  	// FileGlob is a selector to narrow down file inspection using the **/glob* syntax
    32  	FileGlob string
    33  
    34  	// EvidenceMatcher is what will be used to match against the file in the source
    35  	// location. If the matcher returns a package, the file will be considered a candidate.
    36  	EvidenceMatcher evidenceMatcher
    37  
    38  	// Information below is used to specify the Package information when returned
    39  
    40  	// Package is the name to use for the package
    41  	Package string
    42  
    43  	// Language is the language to classify this package as
    44  	Language pkg.Language
    45  
    46  	// Type is the package type to use for the package
    47  	Type pkg.Type
    48  
    49  	// PURL is the Package URL to use when generating a package
    50  	PURL packageurl.PackageURL
    51  
    52  	// CPEs are the specific CPEs we want to include for this binary with updated version information
    53  	CPEs []cpe.CPE
    54  }
    55  
    56  // evidenceMatcher is a function called to catalog Packages that match some sort of evidence
    57  type evidenceMatcher func(resolver file.Resolver, classifier classifier, location file.Location) ([]pkg.Package, error)
    58  
    59  func evidenceMatchers(matchers ...evidenceMatcher) evidenceMatcher {
    60  	return func(resolver file.Resolver, classifier classifier, location file.Location) ([]pkg.Package, error) {
    61  		for _, matcher := range matchers {
    62  			match, err := matcher(resolver, classifier, location)
    63  			if err != nil {
    64  				return nil, err
    65  			}
    66  			if match != nil {
    67  				return match, nil
    68  			}
    69  		}
    70  		return nil, nil
    71  	}
    72  }
    73  
    74  func fileNameTemplateVersionMatcher(fileNamePattern string, contentTemplate string) evidenceMatcher {
    75  	pat := regexp.MustCompile(fileNamePattern)
    76  	return func(resolver file.Resolver, classifier classifier, location file.Location) ([]pkg.Package, error) {
    77  		if !pat.MatchString(location.RealPath) {
    78  			return nil, nil
    79  		}
    80  
    81  		filepathNamedGroupValues := internal.MatchNamedCaptureGroups(pat, location.RealPath)
    82  
    83  		// versions like 3.5 should not match any character, but explicit dot
    84  		for k, v := range filepathNamedGroupValues {
    85  			filepathNamedGroupValues[k] = strings.ReplaceAll(v, ".", "\\.")
    86  		}
    87  
    88  		tmpl, err := template.New("").Parse(contentTemplate)
    89  		if err != nil {
    90  			return nil, fmt.Errorf("unable to parse classifier template=%q : %w", contentTemplate, err)
    91  		}
    92  
    93  		patternBuf := &bytes.Buffer{}
    94  		err = tmpl.Execute(patternBuf, filepathNamedGroupValues)
    95  		if err != nil {
    96  			return nil, fmt.Errorf("unable to render template: %w", err)
    97  		}
    98  
    99  		tmplPattern, err := regexp.Compile(patternBuf.String())
   100  		if err != nil {
   101  			return nil, fmt.Errorf("unable to compile rendered regex=%q: %w", patternBuf.String(), err)
   102  		}
   103  
   104  		contents, err := getContents(resolver, location)
   105  		if err != nil {
   106  			return nil, fmt.Errorf("unable to get read contents for file: %w", err)
   107  		}
   108  
   109  		matchMetadata := internal.MatchNamedCaptureGroups(tmplPattern, string(contents))
   110  
   111  		p := newPackage(classifier, location, matchMetadata)
   112  		if p == nil {
   113  			return nil, nil
   114  		}
   115  
   116  		return []pkg.Package{*p}, nil
   117  	}
   118  }
   119  
   120  func fileContentsVersionMatcher(pattern string) evidenceMatcher {
   121  	pat := regexp.MustCompile(pattern)
   122  	return func(resolver file.Resolver, classifier classifier, location file.Location) ([]pkg.Package, error) {
   123  		contents, err := getContents(resolver, location)
   124  		if err != nil {
   125  			return nil, fmt.Errorf("unable to get read contents for file: %w", err)
   126  		}
   127  
   128  		matchMetadata := internal.MatchNamedCaptureGroups(pat, string(contents))
   129  
   130  		p := newPackage(classifier, location, matchMetadata)
   131  		if p == nil {
   132  			return nil, nil
   133  		}
   134  
   135  		return []pkg.Package{*p}, nil
   136  	}
   137  }
   138  
   139  //nolint:gocognit
   140  func sharedLibraryLookup(sharedLibraryPattern string, sharedLibraryMatcher evidenceMatcher) evidenceMatcher {
   141  	pat := regexp.MustCompile(sharedLibraryPattern)
   142  	return func(resolver file.Resolver, classifier classifier, location file.Location) (packages []pkg.Package, _ error) {
   143  		libs, err := sharedLibraries(resolver, location)
   144  		if err != nil {
   145  			return nil, err
   146  		}
   147  		for _, lib := range libs {
   148  			if !pat.MatchString(lib) {
   149  				continue
   150  			}
   151  
   152  			locations, err := resolver.FilesByGlob("**/" + lib)
   153  			if err != nil {
   154  				return nil, err
   155  			}
   156  			for _, libraryLocation := range locations {
   157  				pkgs, err := sharedLibraryMatcher(resolver, classifier, libraryLocation)
   158  				if err != nil {
   159  					return nil, err
   160  				}
   161  				for _, p := range pkgs {
   162  					// set the source binary as the first location
   163  					locationSet := file.NewLocationSet(location)
   164  					locationSet.Add(p.Locations.ToSlice()...)
   165  					p.Locations = locationSet
   166  					meta, _ := p.Metadata.(pkg.BinaryMetadata)
   167  					p.Metadata = pkg.BinaryMetadata{
   168  						Matches: append([]pkg.ClassifierMatch{
   169  							{
   170  								Classifier: classifier.Class,
   171  								Location:   location,
   172  							},
   173  						}, meta.Matches...),
   174  					}
   175  					packages = append(packages, p)
   176  				}
   177  			}
   178  		}
   179  		return packages, nil
   180  	}
   181  }
   182  
   183  func mustPURL(purl string) packageurl.PackageURL {
   184  	p, err := packageurl.FromString(purl)
   185  	if err != nil {
   186  		panic(fmt.Sprintf("invalid PURL: %s", p))
   187  	}
   188  	return p
   189  }
   190  
   191  func getContents(resolver file.Resolver, location file.Location) ([]byte, error) {
   192  	reader, err := resolver.FileContentsByLocation(location)
   193  	if err != nil {
   194  		return nil, err
   195  	}
   196  
   197  	unionReader, err := unionreader.GetUnionReader(reader)
   198  	if err != nil {
   199  		return nil, fmt.Errorf("unable to get union reader for file: %w", err)
   200  	}
   201  
   202  	// TODO: there may be room for improvement here, as this may use an excessive amount of memory. Alternate approach is to leverage a RuneReader.
   203  	contents, err := io.ReadAll(unionReader)
   204  	if err != nil {
   205  		return nil, fmt.Errorf("unable to get contents for file: %w", err)
   206  	}
   207  
   208  	return contents, nil
   209  }
   210  
   211  // singleCPE returns a []pkg.CPE based on the cpe string or panics if the CPE is invalid
   212  func singleCPE(cpeString string) []cpe.CPE {
   213  	return []cpe.CPE{
   214  		cpe.Must(cpeString),
   215  	}
   216  }
   217  
   218  // sharedLibraries returns a list of all shared libraries found within a binary, currently
   219  // supporting: elf, macho, and windows pe
   220  func sharedLibraries(resolver file.Resolver, location file.Location) ([]string, error) {
   221  	contents, err := getContents(resolver, location)
   222  	if err != nil {
   223  		return nil, err
   224  	}
   225  
   226  	r := bytes.NewReader(contents)
   227  
   228  	e, _ := elf.NewFile(r)
   229  	if e != nil {
   230  		symbols, err := e.ImportedLibraries()
   231  		if err != nil {
   232  			log.Debugf("unable to read elf binary at: %s -- %s", location.RealPath, err)
   233  		}
   234  		return symbols, nil
   235  	}
   236  
   237  	m, _ := macho.NewFile(r)
   238  	if m != nil {
   239  		symbols, err := m.ImportedLibraries()
   240  		if err != nil {
   241  			log.Debugf("unable to read macho binary at: %s -- %s", location.RealPath, err)
   242  		}
   243  		return symbols, nil
   244  	}
   245  
   246  	p, _ := pe.NewFile(r)
   247  	if p != nil {
   248  		symbols, err := p.ImportedLibraries()
   249  		if err != nil {
   250  			log.Debugf("unable to read pe binary at: %s -- %s", location.RealPath, err)
   251  		}
   252  		return symbols, nil
   253  	}
   254  
   255  	return nil, nil
   256  }