github.com/anchore/syft@v1.38.2/syft/pkg/cataloger/internal/binutils/classifier.go (about)

     1  package binutils
     2  
     3  import (
     4  	"bytes"
     5  	"debug/elf"
     6  	"debug/macho"
     7  	"debug/pe"
     8  	"encoding/json"
     9  	"fmt"
    10  	"io"
    11  	"maps"
    12  	"regexp"
    13  	"strconv"
    14  	"strings"
    15  	"text/template"
    16  
    17  	"github.com/bmatcuk/doublestar/v4"
    18  
    19  	"github.com/anchore/packageurl-go"
    20  	"github.com/anchore/syft/internal"
    21  	"github.com/anchore/syft/internal/log"
    22  	"github.com/anchore/syft/syft/cpe"
    23  	"github.com/anchore/syft/syft/file"
    24  	"github.com/anchore/syft/syft/internal/unionreader"
    25  	"github.com/anchore/syft/syft/pkg"
    26  )
    27  
    28  // Classifier is a generic package classifier that can be used to match a package definition
    29  // to a file that meets the given content criteria of the EvidenceMatcher.
    30  type Classifier struct {
    31  	Class string `json:"class"`
    32  
    33  	// FileGlob is a selector to narrow down file inspection using the **/glob* syntax
    34  	FileGlob string `json:"fileGlob"`
    35  
    36  	// EvidenceMatcher is what will be used to match against the file in the source
    37  	// location. If the matcher returns a package, the file will be considered a candidate.
    38  	EvidenceMatcher EvidenceMatcher `json:"-"`
    39  
    40  	// The information below is used to specify the Package information when returned
    41  
    42  	// Package is the name to use for the package
    43  	Package string `json:"package"`
    44  
    45  	// PURL is the Package URL to use when generating a package
    46  	PURL packageurl.PackageURL `json:"purl"`
    47  
    48  	// CPEs are the specific CPEs we want to include for this binary with updated version information
    49  	CPEs []cpe.CPE `json:"cpes"`
    50  }
    51  
    52  func (cfg Classifier) MarshalJSON() ([]byte, error) {
    53  	type marshalled struct {
    54  		Class    string   `json:"class"`
    55  		FileGlob string   `json:"fileGlob"`
    56  		Package  string   `json:"package"`
    57  		PURL     string   `json:"purl"`
    58  		CPEs     []string `json:"cpes"`
    59  	}
    60  
    61  	var marshalledCPEs []string
    62  	for _, c := range cfg.CPEs {
    63  		marshalledCPEs = append(marshalledCPEs, c.Attributes.BindToFmtString())
    64  	}
    65  
    66  	m := marshalled{
    67  		Class:    cfg.Class,
    68  		FileGlob: cfg.FileGlob,
    69  		Package:  cfg.Package,
    70  		PURL:     cfg.PURL.String(),
    71  		CPEs:     marshalledCPEs,
    72  	}
    73  
    74  	return json.Marshal(m)
    75  }
    76  
    77  // EvidenceMatcher is a function called to identify based on some sort of evidence in the filesystem contents.
    78  // A non-nil return value indicates a successful match, regardless of packages being returned.
    79  type EvidenceMatcher func(classifier Classifier, context MatcherContext) ([]pkg.Package, error)
    80  
    81  type MatcherContext struct {
    82  	Resolver  file.Resolver
    83  	Location  file.Location
    84  	GetReader func(resolver MatcherContext) (unionreader.UnionReader, error)
    85  }
    86  
    87  // MatchAny returns a combined evidence matcher that returns results from the first
    88  // matcher that returns results
    89  func MatchAny(matchers ...EvidenceMatcher) EvidenceMatcher {
    90  	return func(classifier Classifier, context MatcherContext) ([]pkg.Package, error) {
    91  		for _, matcher := range matchers {
    92  			match, err := matcher(classifier, context)
    93  			if err != nil {
    94  				return nil, err
    95  			}
    96  			// only return when results
    97  			if match != nil {
    98  				return match, nil
    99  			}
   100  		}
   101  		return nil, nil
   102  	}
   103  }
   104  
   105  // MatchAll executes all matchers until one returns nil results, only returning the final results
   106  func MatchAll(matchers ...EvidenceMatcher) EvidenceMatcher {
   107  	return func(classifier Classifier, context MatcherContext) ([]pkg.Package, error) {
   108  		var out []pkg.Package
   109  		for _, matcher := range matchers {
   110  			match, err := matcher(classifier, context)
   111  			if match == nil || err != nil {
   112  				return nil, err
   113  			}
   114  			if len(match) > 0 {
   115  				out = match
   116  			}
   117  		}
   118  		return out, nil
   119  	}
   120  }
   121  
   122  type ContextualEvidenceMatchers struct {
   123  	CatalogerName string
   124  }
   125  
   126  func (c ContextualEvidenceMatchers) FileNameTemplateVersionMatcher(fileNamePattern string, contentTemplate string) EvidenceMatcher {
   127  	return FileNameTemplateVersionMatcher(fileNamePattern, contentTemplate, c.CatalogerName)
   128  }
   129  
   130  func (c ContextualEvidenceMatchers) FileContentsVersionMatcher(patterns ...string) EvidenceMatcher {
   131  	return FileContentsVersionMatcher(c.CatalogerName, patterns...)
   132  }
   133  
   134  func FileNameTemplateVersionMatcher(fileNamePattern, contentTemplate, catalogerName string) EvidenceMatcher {
   135  	pat := regexp.MustCompile(fileNamePattern)
   136  	return func(classifier Classifier, context MatcherContext) ([]pkg.Package, error) {
   137  		if !pat.MatchString(context.Location.RealPath) {
   138  			return nil, nil
   139  		}
   140  
   141  		filepathNamedGroupValues := internal.MatchNamedCaptureGroups(pat, context.Location.RealPath)
   142  
   143  		// versions like 3.5 should not match any character, but explicit dot
   144  		for k, v := range filepathNamedGroupValues {
   145  			filepathNamedGroupValues[k] = strings.ReplaceAll(v, ".", "\\.")
   146  		}
   147  
   148  		tmpl, err := template.New("").Parse(contentTemplate)
   149  		if err != nil {
   150  			return nil, fmt.Errorf("unable to parse classifier template=%q : %w", contentTemplate, err)
   151  		}
   152  
   153  		patternBuf := &bytes.Buffer{}
   154  		err = tmpl.Execute(patternBuf, filepathNamedGroupValues)
   155  		if err != nil {
   156  			return nil, fmt.Errorf("unable to render template: %w", err)
   157  		}
   158  
   159  		tmplPattern, err := regexp.Compile(patternBuf.String())
   160  		if err != nil {
   161  			return nil, fmt.Errorf("unable to compile rendered regex=%q: %w", patternBuf.String(), err)
   162  		}
   163  
   164  		contents, err := getReader(context)
   165  		if err != nil {
   166  			return nil, fmt.Errorf("unable to get read contents for file: %w", err)
   167  		}
   168  
   169  		matchMetadata, err := internal.MatchNamedCaptureGroupsFromReader(tmplPattern, contents)
   170  		if err != nil {
   171  			return nil, fmt.Errorf("unable to match version: %w", err)
   172  		}
   173  
   174  		p := NewClassifierPackage(classifier, context.Location, matchMetadata, catalogerName)
   175  		if p == nil {
   176  			return nil, nil
   177  		}
   178  
   179  		return []pkg.Package{*p}, nil
   180  	}
   181  }
   182  
   183  // FileContentsVersionMatcher will match all provided patterns, extracting named capture groups from each pattern, overwriting earlier results
   184  func FileContentsVersionMatcher(catalogerName string, patterns ...string) EvidenceMatcher {
   185  	if len(patterns) == 0 {
   186  		panic("must specify at least one pattern")
   187  	}
   188  	var pats []*regexp.Regexp
   189  	for _, pattern := range patterns {
   190  		pats = append(pats, regexp.MustCompile(pattern))
   191  	}
   192  	return func(classifier Classifier, context MatcherContext) ([]pkg.Package, error) {
   193  		var matchMetadata map[string]string
   194  
   195  		for _, pat := range pats {
   196  			contents, err := getReader(context)
   197  			if err != nil {
   198  				return nil, fmt.Errorf("unable to get read contents for file: %w", err)
   199  			}
   200  
   201  			match, err := internal.MatchNamedCaptureGroupsFromReader(pat, contents)
   202  			if err != nil {
   203  				return nil, fmt.Errorf("unable to match version: %w", err)
   204  			}
   205  			if match == nil {
   206  				return nil, nil
   207  			}
   208  			if matchMetadata == nil {
   209  				matchMetadata = match
   210  			} else {
   211  				maps.Copy(matchMetadata, match)
   212  			}
   213  		}
   214  
   215  		// Convert {major: 1, minor: 2, patch: 3} to "1.2.3"
   216  		_, versionOk := matchMetadata["version"]
   217  		majorStr, majorOk := matchMetadata["major"]
   218  		minorStr, minorOk := matchMetadata["minor"]
   219  		patchStr, patchOk := matchMetadata["patch"]
   220  
   221  		if !versionOk && majorOk && minorOk && patchOk {
   222  			major, majorErr := strconv.Atoi(majorStr)
   223  			minor, minorErr := strconv.Atoi(minorStr)
   224  			patch, patchErr := strconv.Atoi(patchStr)
   225  
   226  			if majorErr == nil && minorErr == nil && patchErr == nil {
   227  				matchMetadata["version"] = fmt.Sprintf("%d.%d.%d", major, minor, patch)
   228  			}
   229  		}
   230  
   231  		p := NewClassifierPackage(classifier, context.Location, matchMetadata, catalogerName)
   232  		if p == nil {
   233  			if matchMetadata != nil {
   234  				// if we had a successful metadata match, but no packages, return a successful match result
   235  				return []pkg.Package{}, nil
   236  			}
   237  			return nil, nil
   238  		}
   239  
   240  		return []pkg.Package{*p}, nil
   241  	}
   242  }
   243  
   244  func SharedLibraryLookup(sharedLibraryPattern string, sharedLibraryMatcher EvidenceMatcher) EvidenceMatcher {
   245  	pat := regexp.MustCompile(sharedLibraryPattern)
   246  	return func(classifier Classifier, context MatcherContext) (packages []pkg.Package, _ error) {
   247  		libs, err := sharedLibraries(context)
   248  		if err != nil {
   249  			return nil, err
   250  		}
   251  		for _, lib := range libs {
   252  			if !pat.MatchString(lib) {
   253  				continue
   254  			}
   255  
   256  			locations, err := context.Resolver.FilesByGlob("**/" + lib)
   257  			if err != nil {
   258  				return nil, err
   259  			}
   260  			for _, libraryLocation := range locations {
   261  				// create a new resolver without the cached context lookup -- this is decidedly a different file
   262  				newResolver := MatcherContext{
   263  					Resolver: context.Resolver,
   264  					Location: libraryLocation,
   265  				}
   266  				pkgs, err := sharedLibraryMatcher(classifier, newResolver)
   267  				if err != nil {
   268  					return nil, err
   269  				}
   270  				// not a successful match
   271  				if pkgs == nil {
   272  					continue
   273  				}
   274  				for _, p := range pkgs {
   275  					// set the source binary as the first location
   276  					locationSet := file.NewLocationSet(context.Location)
   277  					locationSet.Add(p.Locations.ToSlice()...)
   278  					p.Locations = locationSet
   279  					meta, _ := p.Metadata.(pkg.BinarySignature)
   280  					p.Metadata = pkg.BinarySignature{
   281  						Matches: append([]pkg.ClassifierMatch{
   282  							{
   283  								Classifier: classifier.Class,
   284  								Location:   context.Location,
   285  							},
   286  						}, meta.Matches...),
   287  					}
   288  					packages = append(packages, p)
   289  				}
   290  				// return non-nil package results as a successful match indication if the evidence matcher returned a successful match indication
   291  				if packages == nil {
   292  					packages = pkgs
   293  				}
   294  			}
   295  		}
   296  		return packages, nil
   297  	}
   298  }
   299  
   300  func MatchPath(path string) EvidenceMatcher {
   301  	if !doublestar.ValidatePattern(path) {
   302  		panic("invalid pattern")
   303  	}
   304  	return func(_ Classifier, context MatcherContext) ([]pkg.Package, error) {
   305  		if doublestar.MatchUnvalidated(path, context.Location.RealPath) {
   306  			return []pkg.Package{}, nil // return non-nil
   307  		}
   308  		return nil, nil
   309  	}
   310  }
   311  
   312  func getReader(context MatcherContext) (unionreader.UnionReader, error) {
   313  	if context.GetReader != nil {
   314  		return context.GetReader(context)
   315  	}
   316  	reader, err := context.Resolver.FileContentsByLocation(context.Location) //nolint:gocritic
   317  	if err != nil {
   318  		return nil, err
   319  	}
   320  
   321  	return unionreader.GetUnionReader(reader)
   322  }
   323  
   324  // sharedLibraries returns a list of all shared libraries found within a binary, currently
   325  // supporting: elf, macho, and windows pe
   326  func sharedLibraries(context MatcherContext) ([]string, error) {
   327  	contents, err := getReader(context)
   328  	if err != nil {
   329  		return nil, err
   330  	}
   331  	defer internal.CloseAndLogError(contents, context.Location.RealPath)
   332  
   333  	e, _ := elf.NewFile(contents)
   334  	if e != nil {
   335  		symbols, err := e.ImportedLibraries()
   336  		if err != nil {
   337  			log.Debugf("unable to read elf binary at: %s -- %s", context.Location.RealPath, err)
   338  		}
   339  		return symbols, nil
   340  	}
   341  	if _, err := contents.Seek(0, io.SeekStart); err != nil {
   342  		return nil, fmt.Errorf("unable to seek to beginning of file: %w", err)
   343  	}
   344  
   345  	m, _ := macho.NewFile(contents)
   346  	if m != nil {
   347  		symbols, err := m.ImportedLibraries()
   348  		if err != nil {
   349  			log.Debugf("unable to read macho binary at: %s -- %s", context.Location.RealPath, err)
   350  		}
   351  		return symbols, nil
   352  	}
   353  	if _, err := contents.Seek(0, io.SeekStart); err != nil {
   354  		return nil, fmt.Errorf("unable to seek to beginning of file: %w", err)
   355  	}
   356  
   357  	p, _ := pe.NewFile(contents)
   358  	if p != nil {
   359  		symbols, err := p.ImportedLibraries()
   360  		if err != nil {
   361  			log.Debugf("unable to read pe binary at: %s -- %s", context.Location.RealPath, err)
   362  		}
   363  		return symbols, nil
   364  	}
   365  	if _, err := contents.Seek(0, io.SeekStart); err != nil {
   366  		return nil, fmt.Errorf("unable to seek to beginning of file: %w", err)
   367  	}
   368  
   369  	return nil, nil
   370  }