github.com/anchore/syft@v1.4.2-0.20240516191711-1bec1fc5d397/syft/file/cataloger/executable/cataloger.go (about)

     1  package executable
     2  
     3  import (
     4  	"bytes"
     5  	"debug/elf"
     6  	"debug/macho"
     7  	"encoding/binary"
     8  	"fmt"
     9  	"sort"
    10  
    11  	"github.com/bmatcuk/doublestar/v4"
    12  	"github.com/dustin/go-humanize"
    13  
    14  	"github.com/anchore/syft/internal"
    15  	"github.com/anchore/syft/internal/bus"
    16  	"github.com/anchore/syft/internal/log"
    17  	"github.com/anchore/syft/internal/mimetype"
    18  	"github.com/anchore/syft/syft/event/monitor"
    19  	"github.com/anchore/syft/syft/file"
    20  	"github.com/anchore/syft/syft/internal/unionreader"
    21  )
    22  
    23  type Config struct {
    24  	MIMETypes []string `json:"mime-types" yaml:"mime-types" mapstructure:"mime-types"`
    25  	Globs     []string `json:"globs" yaml:"globs" mapstructure:"globs"`
    26  }
    27  
    28  type Cataloger struct {
    29  	config Config
    30  }
    31  
    32  func DefaultConfig() Config {
    33  	m := mimetype.ExecutableMIMETypeSet.List()
    34  	sort.Strings(m)
    35  	return Config{
    36  		MIMETypes: m,
    37  		Globs:     nil,
    38  	}
    39  }
    40  
    41  func NewCataloger(cfg Config) *Cataloger {
    42  	return &Cataloger{
    43  		config: cfg,
    44  	}
    45  }
    46  
    47  func (i *Cataloger) Catalog(resolver file.Resolver) (map[file.Coordinates]file.Executable, error) {
    48  	locs, err := resolver.FilesByMIMEType(i.config.MIMETypes...)
    49  	if err != nil {
    50  		return nil, fmt.Errorf("unable to get file locations for binaries: %w", err)
    51  	}
    52  
    53  	locs, err = filterByGlobs(locs, i.config.Globs)
    54  	if err != nil {
    55  		return nil, err
    56  	}
    57  
    58  	prog := catalogingProgress(int64(len(locs)))
    59  
    60  	results := make(map[file.Coordinates]file.Executable)
    61  	for _, loc := range locs {
    62  		prog.AtomicStage.Set(loc.Path())
    63  
    64  		exec := processExecutableLocation(loc, resolver)
    65  
    66  		if exec != nil {
    67  			prog.Increment()
    68  			results[loc.Coordinates] = *exec
    69  		}
    70  	}
    71  
    72  	log.Debugf("executable cataloger processed %d files", len(results))
    73  
    74  	prog.AtomicStage.Set(fmt.Sprintf("%s executables", humanize.Comma(prog.Current())))
    75  	prog.SetCompleted()
    76  
    77  	return results, nil
    78  }
    79  
    80  func processExecutableLocation(loc file.Location, resolver file.Resolver) *file.Executable {
    81  	reader, err := resolver.FileContentsByLocation(loc)
    82  	if err != nil {
    83  		// TODO: known-unknowns
    84  		log.WithFields("error", err).Warnf("unable to get file contents for %q", loc.RealPath)
    85  		return nil
    86  	}
    87  	defer internal.CloseAndLogError(reader, loc.RealPath)
    88  
    89  	uReader, err := unionreader.GetUnionReader(reader)
    90  	if err != nil {
    91  		// TODO: known-unknowns
    92  		log.WithFields("error", err).Warnf("unable to get union reader for %q", loc.RealPath)
    93  		return nil
    94  	}
    95  
    96  	exec, err := processExecutable(loc, uReader)
    97  	if err != nil {
    98  		log.WithFields("error", err).Warnf("unable to process executable %q", loc.RealPath)
    99  	}
   100  	return exec
   101  }
   102  
   103  func catalogingProgress(locations int64) *monitor.CatalogerTaskProgress {
   104  	info := monitor.GenericTask{
   105  		Title: monitor.Title{
   106  			Default: "Executables",
   107  		},
   108  		ParentID: monitor.TopLevelCatalogingTaskID,
   109  	}
   110  
   111  	return bus.StartCatalogerTask(info, locations, "")
   112  }
   113  
   114  func filterByGlobs(locs []file.Location, globs []string) ([]file.Location, error) {
   115  	if len(globs) == 0 {
   116  		return locs, nil
   117  	}
   118  	var filteredLocs []file.Location
   119  	for _, loc := range locs {
   120  		matches, err := locationMatchesGlob(loc, globs)
   121  		if err != nil {
   122  			return nil, err
   123  		}
   124  		if matches {
   125  			filteredLocs = append(filteredLocs, loc)
   126  		}
   127  	}
   128  	return filteredLocs, nil
   129  }
   130  
   131  func locationMatchesGlob(loc file.Location, globs []string) (bool, error) {
   132  	for _, glob := range globs {
   133  		for _, path := range []string{loc.RealPath, loc.AccessPath} {
   134  			if path == "" {
   135  				continue
   136  			}
   137  			matches, err := doublestar.Match(glob, path)
   138  			if err != nil {
   139  				return false, fmt.Errorf("unable to match glob %q to path %q: %w", glob, path, err)
   140  			}
   141  			if matches {
   142  				return true, nil
   143  			}
   144  		}
   145  	}
   146  	return false, nil
   147  }
   148  
   149  func processExecutable(loc file.Location, reader unionreader.UnionReader) (*file.Executable, error) {
   150  	data := file.Executable{}
   151  
   152  	// determine the executable format
   153  
   154  	format, err := findExecutableFormat(reader)
   155  	if err != nil {
   156  		return nil, fmt.Errorf("unable to determine executable kind: %w", err)
   157  	}
   158  
   159  	if format == "" {
   160  		log.Debugf("unable to determine executable format for %q", loc.RealPath)
   161  		return nil, nil
   162  	}
   163  
   164  	data.Format = format
   165  
   166  	switch format {
   167  	case file.ELF:
   168  		if err := findELFFeatures(&data, reader); err != nil {
   169  			log.WithFields("error", err).Tracef("unable to determine ELF features for %q", loc.RealPath)
   170  		}
   171  	case file.PE:
   172  		if err := findPEFeatures(&data, reader); err != nil {
   173  			log.WithFields("error", err).Tracef("unable to determine PE features for %q", loc.RealPath)
   174  		}
   175  	case file.MachO:
   176  		if err := findMachoFeatures(&data, reader); err != nil {
   177  			log.WithFields("error", err).Tracef("unable to determine Macho features for %q", loc.RealPath)
   178  		}
   179  	}
   180  
   181  	// always allocate collections for presentation
   182  	if data.ImportedLibraries == nil {
   183  		data.ImportedLibraries = []string{}
   184  	}
   185  
   186  	return &data, nil
   187  }
   188  
   189  func findExecutableFormat(reader unionreader.UnionReader) (file.ExecutableFormat, error) {
   190  	// read the first sector of the file
   191  	buf := make([]byte, 512)
   192  	n, err := reader.ReadAt(buf, 0)
   193  	if err != nil {
   194  		return "", fmt.Errorf("unable to read first sector of file: %w", err)
   195  	}
   196  	if n < 512 {
   197  		return "", fmt.Errorf("unable to read enough bytes to determine executable format")
   198  	}
   199  
   200  	switch {
   201  	case isMacho(buf):
   202  		return file.MachO, nil
   203  	case isPE(buf):
   204  		return file.PE, nil
   205  	case isELF(buf):
   206  		return file.ELF, nil
   207  	}
   208  
   209  	return "", nil
   210  }
   211  
   212  func isMacho(by []byte) bool {
   213  	// sourced from https://github.com/gabriel-vasile/mimetype/blob/02af149c0dfd1444d9256fc33c2012bb3153e1d2/internal/magic/binary.go#L44
   214  
   215  	if classOrMachOFat(by) && by[7] < 20 {
   216  		return true
   217  	}
   218  
   219  	if len(by) < 4 {
   220  		return false
   221  	}
   222  
   223  	be := binary.BigEndian.Uint32(by)
   224  	le := binary.LittleEndian.Uint32(by)
   225  
   226  	return be == macho.Magic32 ||
   227  		le == macho.Magic32 ||
   228  		be == macho.Magic64 ||
   229  		le == macho.Magic64
   230  }
   231  
   232  // Java bytecode and Mach-O binaries share the same magic number.
   233  // More info here https://github.com/threatstack/libmagic/blob/master/magic/Magdir/cafebabe
   234  func classOrMachOFat(in []byte) bool {
   235  	// sourced from https://github.com/gabriel-vasile/mimetype/blob/02af149c0dfd1444d9256fc33c2012bb3153e1d2/internal/magic/binary.go#L44
   236  
   237  	// There should be at least 8 bytes for both of them because the only way to
   238  	// quickly distinguish them is by comparing byte at position 7
   239  	if len(in) < 8 {
   240  		return false
   241  	}
   242  
   243  	return bytes.HasPrefix(in, []byte{0xCA, 0xFE, 0xBA, 0xBE})
   244  }
   245  
   246  func isPE(by []byte) bool {
   247  	return bytes.HasPrefix(by, []byte("MZ"))
   248  }
   249  
   250  func isELF(by []byte) bool {
   251  	return bytes.HasPrefix(by, []byte(elf.ELFMAG))
   252  }