github.com/anchore/syft@v1.38.2/syft/file/cataloger/executable/cataloger.go (about)

     1  package executable
     2  
     3  import (
     4  	"bytes"
     5  	"context"
     6  	"debug/elf"
     7  	"debug/macho"
     8  	"encoding/binary"
     9  	"fmt"
    10  	"sort"
    11  
    12  	"github.com/bmatcuk/doublestar/v4"
    13  	"github.com/dustin/go-humanize"
    14  
    15  	"github.com/anchore/go-sync"
    16  	"github.com/anchore/syft/internal"
    17  	"github.com/anchore/syft/internal/bus"
    18  	"github.com/anchore/syft/internal/log"
    19  	"github.com/anchore/syft/internal/mimetype"
    20  	"github.com/anchore/syft/internal/unknown"
    21  	"github.com/anchore/syft/syft/cataloging"
    22  	"github.com/anchore/syft/syft/event/monitor"
    23  	"github.com/anchore/syft/syft/file"
    24  	"github.com/anchore/syft/syft/internal/unionreader"
    25  )
    26  
    27  type Config struct {
    28  	MIMETypes []string `json:"mime-types" yaml:"mime-types" mapstructure:"mime-types"`
    29  	Globs     []string `json:"globs" yaml:"globs" mapstructure:"globs"`
    30  }
    31  
    32  type Cataloger struct {
    33  	config Config
    34  }
    35  
    36  func DefaultConfig() Config {
    37  	m := mimetype.ExecutableMIMETypeSet.List()
    38  	sort.Strings(m)
    39  	return Config{
    40  		MIMETypes: m,
    41  		Globs:     nil,
    42  	}
    43  }
    44  
    45  func NewCataloger(cfg Config) *Cataloger {
    46  	return &Cataloger{
    47  		config: cfg,
    48  	}
    49  }
    50  
    51  func (i *Cataloger) Catalog(resolver file.Resolver) (map[file.Coordinates]file.Executable, error) {
    52  	return i.CatalogCtx(context.Background(), resolver)
    53  }
    54  
    55  func (i *Cataloger) CatalogCtx(ctx context.Context, resolver file.Resolver) (map[file.Coordinates]file.Executable, error) {
    56  	locs, err := resolver.FilesByMIMEType(i.config.MIMETypes...)
    57  	if err != nil {
    58  		return nil, fmt.Errorf("unable to get file locations for binaries: %w", err)
    59  	}
    60  
    61  	locs, err = filterByGlobs(locs, i.config.Globs)
    62  	if err != nil {
    63  		return nil, err
    64  	}
    65  
    66  	prog := catalogingProgress(int64(len(locs)))
    67  
    68  	results := make(map[file.Coordinates]file.Executable)
    69  	errs := sync.Collect(&ctx, cataloging.ExecutorFile, sync.ToSeq(locs), func(loc file.Location) (*file.Executable, error) {
    70  		prog.AtomicStage.Set(loc.Path())
    71  
    72  		exec, err := processExecutableLocation(loc, resolver)
    73  		if err != nil {
    74  			err = unknown.New(loc, err)
    75  		}
    76  		return exec, err
    77  	}, func(loc file.Location, exec *file.Executable) {
    78  		if exec != nil {
    79  			prog.Increment()
    80  			results[loc.Coordinates] = *exec
    81  		}
    82  	})
    83  
    84  	log.Debugf("executable cataloger processed %d files", len(results))
    85  
    86  	prog.AtomicStage.Set(fmt.Sprintf("%s executables", humanize.Comma(prog.Current())))
    87  	prog.SetCompleted()
    88  
    89  	return results, errs
    90  }
    91  
    92  func processExecutableLocation(loc file.Location, resolver file.Resolver) (*file.Executable, error) {
    93  	reader, err := resolver.FileContentsByLocation(loc)
    94  	if err != nil {
    95  		log.WithFields("error", err, "path", loc.RealPath).Debug("unable to get file contents")
    96  		return nil, fmt.Errorf("unable to get file contents: %w", err)
    97  	}
    98  	defer internal.CloseAndLogError(reader, loc.RealPath)
    99  
   100  	uReader, err := unionreader.GetUnionReader(reader)
   101  	if err != nil {
   102  		log.WithFields("error", err, "path", loc.RealPath).Debug("unable to get union reader")
   103  		return nil, fmt.Errorf("unable to get union reader: %w", err)
   104  	}
   105  
   106  	return processExecutable(loc, uReader)
   107  }
   108  
   109  func catalogingProgress(locations int64) *monitor.TaskProgress {
   110  	info := monitor.GenericTask{
   111  		Title: monitor.Title{
   112  			Default: "Executables",
   113  		},
   114  		ParentID: monitor.TopLevelCatalogingTaskID,
   115  	}
   116  
   117  	return bus.StartCatalogerTask(info, locations, "")
   118  }
   119  
   120  func filterByGlobs(locs []file.Location, globs []string) ([]file.Location, error) {
   121  	if len(globs) == 0 {
   122  		return locs, nil
   123  	}
   124  	var filteredLocs []file.Location
   125  	for _, loc := range locs {
   126  		matches, err := locationMatchesGlob(loc, globs)
   127  		if err != nil {
   128  			return nil, err
   129  		}
   130  		if matches {
   131  			filteredLocs = append(filteredLocs, loc)
   132  		}
   133  	}
   134  	return filteredLocs, nil
   135  }
   136  
   137  func locationMatchesGlob(loc file.Location, globs []string) (bool, error) {
   138  	for _, glob := range globs {
   139  		for _, path := range []string{loc.RealPath, loc.AccessPath} {
   140  			if path == "" {
   141  				continue
   142  			}
   143  			matches, err := doublestar.Match(glob, path)
   144  			if err != nil {
   145  				return false, fmt.Errorf("unable to match glob %q to path %q: %w", glob, path, err)
   146  			}
   147  			if matches {
   148  				return true, nil
   149  			}
   150  		}
   151  	}
   152  	return false, nil
   153  }
   154  
   155  func processExecutable(loc file.Location, reader unionreader.UnionReader) (*file.Executable, error) {
   156  	data := file.Executable{}
   157  
   158  	// determine the executable format
   159  
   160  	format, err := findExecutableFormat(reader)
   161  	if err != nil {
   162  		log.Debugf("unable to determine executable kind for %v: %v", loc.RealPath, err)
   163  		return nil, fmt.Errorf("unable to determine executable kind: %w", err)
   164  	}
   165  
   166  	if format == "" {
   167  		// this is not an "unknown", so just log -- this binary does not have parseable data in it
   168  		log.Debugf("unable to determine executable format for %q", loc.RealPath)
   169  		return nil, nil
   170  	}
   171  
   172  	data.Format = format
   173  
   174  	switch format {
   175  	case file.ELF:
   176  		if err = findELFFeatures(&data, reader); err != nil {
   177  			log.WithFields("error", err, "path", loc.RealPath).Trace("unable to determine ELF features")
   178  			err = fmt.Errorf("unable to determine ELF features: %w", err)
   179  		}
   180  	case file.PE:
   181  		if err = findPEFeatures(&data, reader); err != nil {
   182  			log.WithFields("error", err, "path", loc.RealPath).Trace("unable to determine PE features")
   183  			err = fmt.Errorf("unable to determine PE features: %w", err)
   184  		}
   185  	case file.MachO:
   186  		if err = findMachoFeatures(&data, reader); err != nil {
   187  			log.WithFields("error", err, "path", loc.RealPath).Trace("unable to determine Macho features")
   188  			err = fmt.Errorf("unable to determine Macho features: %w", err)
   189  		}
   190  	}
   191  
   192  	// always allocate collections for presentation
   193  	if data.ImportedLibraries == nil {
   194  		data.ImportedLibraries = []string{}
   195  	}
   196  
   197  	return &data, err
   198  }
   199  
   200  func findExecutableFormat(reader unionreader.UnionReader) (file.ExecutableFormat, error) {
   201  	// read the first sector of the file
   202  	buf := make([]byte, 512)
   203  	n, err := reader.ReadAt(buf, 0)
   204  	if err != nil {
   205  		return "", fmt.Errorf("unable to read first sector of file: %w", err)
   206  	}
   207  	if n < 512 {
   208  		return "", fmt.Errorf("unable to read enough bytes to determine executable format")
   209  	}
   210  
   211  	switch {
   212  	case isMacho(buf):
   213  		return file.MachO, nil
   214  	case isPE(buf):
   215  		return file.PE, nil
   216  	case isELF(buf):
   217  		return file.ELF, nil
   218  	}
   219  
   220  	return "", nil
   221  }
   222  
   223  func isMacho(by []byte) bool {
   224  	// sourced from https://github.com/gabriel-vasile/mimetype/blob/02af149c0dfd1444d9256fc33c2012bb3153e1d2/internal/magic/binary.go#L44
   225  
   226  	if classOrMachOFat(by) && by[7] < 20 {
   227  		return true
   228  	}
   229  
   230  	if len(by) < 4 {
   231  		return false
   232  	}
   233  
   234  	be := binary.BigEndian.Uint32(by)
   235  	le := binary.LittleEndian.Uint32(by)
   236  
   237  	return be == macho.Magic32 ||
   238  		le == macho.Magic32 ||
   239  		be == macho.Magic64 ||
   240  		le == macho.Magic64
   241  }
   242  
   243  // Java bytecode and Mach-O binaries share the same magic number.
   244  // More info here https://github.com/threatstack/libmagic/blob/master/magic/Magdir/cafebabe
   245  func classOrMachOFat(in []byte) bool {
   246  	// sourced from https://github.com/gabriel-vasile/mimetype/blob/02af149c0dfd1444d9256fc33c2012bb3153e1d2/internal/magic/binary.go#L44
   247  
   248  	// There should be at least 8 bytes for both of them because the only way to
   249  	// quickly distinguish them is by comparing byte at position 7
   250  	if len(in) < 8 {
   251  		return false
   252  	}
   253  
   254  	return bytes.HasPrefix(in, []byte{0xCA, 0xFE, 0xBA, 0xBE})
   255  }
   256  
   257  func isPE(by []byte) bool {
   258  	return bytes.HasPrefix(by, []byte("MZ"))
   259  }
   260  
   261  func isELF(by []byte) bool {
   262  	return bytes.HasPrefix(by, []byte(elf.ELFMAG))
   263  }