github.com/anchore/syft@v1.4.2-0.20240516191711-1bec1fc5d397/syft/file/cataloger/executable/cataloger.go (about) 1 package executable 2 3 import ( 4 "bytes" 5 "debug/elf" 6 "debug/macho" 7 "encoding/binary" 8 "fmt" 9 "sort" 10 11 "github.com/bmatcuk/doublestar/v4" 12 "github.com/dustin/go-humanize" 13 14 "github.com/anchore/syft/internal" 15 "github.com/anchore/syft/internal/bus" 16 "github.com/anchore/syft/internal/log" 17 "github.com/anchore/syft/internal/mimetype" 18 "github.com/anchore/syft/syft/event/monitor" 19 "github.com/anchore/syft/syft/file" 20 "github.com/anchore/syft/syft/internal/unionreader" 21 ) 22 23 type Config struct { 24 MIMETypes []string `json:"mime-types" yaml:"mime-types" mapstructure:"mime-types"` 25 Globs []string `json:"globs" yaml:"globs" mapstructure:"globs"` 26 } 27 28 type Cataloger struct { 29 config Config 30 } 31 32 func DefaultConfig() Config { 33 m := mimetype.ExecutableMIMETypeSet.List() 34 sort.Strings(m) 35 return Config{ 36 MIMETypes: m, 37 Globs: nil, 38 } 39 } 40 41 func NewCataloger(cfg Config) *Cataloger { 42 return &Cataloger{ 43 config: cfg, 44 } 45 } 46 47 func (i *Cataloger) Catalog(resolver file.Resolver) (map[file.Coordinates]file.Executable, error) { 48 locs, err := resolver.FilesByMIMEType(i.config.MIMETypes...) 49 if err != nil { 50 return nil, fmt.Errorf("unable to get file locations for binaries: %w", err) 51 } 52 53 locs, err = filterByGlobs(locs, i.config.Globs) 54 if err != nil { 55 return nil, err 56 } 57 58 prog := catalogingProgress(int64(len(locs))) 59 60 results := make(map[file.Coordinates]file.Executable) 61 for _, loc := range locs { 62 prog.AtomicStage.Set(loc.Path()) 63 64 exec := processExecutableLocation(loc, resolver) 65 66 if exec != nil { 67 prog.Increment() 68 results[loc.Coordinates] = *exec 69 } 70 } 71 72 log.Debugf("executable cataloger processed %d files", len(results)) 73 74 prog.AtomicStage.Set(fmt.Sprintf("%s executables", humanize.Comma(prog.Current()))) 75 prog.SetCompleted() 76 77 return results, nil 78 } 79 80 func processExecutableLocation(loc file.Location, resolver file.Resolver) *file.Executable { 81 reader, err := resolver.FileContentsByLocation(loc) 82 if err != nil { 83 // TODO: known-unknowns 84 log.WithFields("error", err).Warnf("unable to get file contents for %q", loc.RealPath) 85 return nil 86 } 87 defer internal.CloseAndLogError(reader, loc.RealPath) 88 89 uReader, err := unionreader.GetUnionReader(reader) 90 if err != nil { 91 // TODO: known-unknowns 92 log.WithFields("error", err).Warnf("unable to get union reader for %q", loc.RealPath) 93 return nil 94 } 95 96 exec, err := processExecutable(loc, uReader) 97 if err != nil { 98 log.WithFields("error", err).Warnf("unable to process executable %q", loc.RealPath) 99 } 100 return exec 101 } 102 103 func catalogingProgress(locations int64) *monitor.CatalogerTaskProgress { 104 info := monitor.GenericTask{ 105 Title: monitor.Title{ 106 Default: "Executables", 107 }, 108 ParentID: monitor.TopLevelCatalogingTaskID, 109 } 110 111 return bus.StartCatalogerTask(info, locations, "") 112 } 113 114 func filterByGlobs(locs []file.Location, globs []string) ([]file.Location, error) { 115 if len(globs) == 0 { 116 return locs, nil 117 } 118 var filteredLocs []file.Location 119 for _, loc := range locs { 120 matches, err := locationMatchesGlob(loc, globs) 121 if err != nil { 122 return nil, err 123 } 124 if matches { 125 filteredLocs = append(filteredLocs, loc) 126 } 127 } 128 return filteredLocs, nil 129 } 130 131 func locationMatchesGlob(loc file.Location, globs []string) (bool, error) { 132 for _, glob := range globs { 133 for _, path := range []string{loc.RealPath, loc.AccessPath} { 134 if path == "" { 135 continue 136 } 137 matches, err := doublestar.Match(glob, path) 138 if err != nil { 139 return false, fmt.Errorf("unable to match glob %q to path %q: %w", glob, path, err) 140 } 141 if matches { 142 return true, nil 143 } 144 } 145 } 146 return false, nil 147 } 148 149 func processExecutable(loc file.Location, reader unionreader.UnionReader) (*file.Executable, error) { 150 data := file.Executable{} 151 152 // determine the executable format 153 154 format, err := findExecutableFormat(reader) 155 if err != nil { 156 return nil, fmt.Errorf("unable to determine executable kind: %w", err) 157 } 158 159 if format == "" { 160 log.Debugf("unable to determine executable format for %q", loc.RealPath) 161 return nil, nil 162 } 163 164 data.Format = format 165 166 switch format { 167 case file.ELF: 168 if err := findELFFeatures(&data, reader); err != nil { 169 log.WithFields("error", err).Tracef("unable to determine ELF features for %q", loc.RealPath) 170 } 171 case file.PE: 172 if err := findPEFeatures(&data, reader); err != nil { 173 log.WithFields("error", err).Tracef("unable to determine PE features for %q", loc.RealPath) 174 } 175 case file.MachO: 176 if err := findMachoFeatures(&data, reader); err != nil { 177 log.WithFields("error", err).Tracef("unable to determine Macho features for %q", loc.RealPath) 178 } 179 } 180 181 // always allocate collections for presentation 182 if data.ImportedLibraries == nil { 183 data.ImportedLibraries = []string{} 184 } 185 186 return &data, nil 187 } 188 189 func findExecutableFormat(reader unionreader.UnionReader) (file.ExecutableFormat, error) { 190 // read the first sector of the file 191 buf := make([]byte, 512) 192 n, err := reader.ReadAt(buf, 0) 193 if err != nil { 194 return "", fmt.Errorf("unable to read first sector of file: %w", err) 195 } 196 if n < 512 { 197 return "", fmt.Errorf("unable to read enough bytes to determine executable format") 198 } 199 200 switch { 201 case isMacho(buf): 202 return file.MachO, nil 203 case isPE(buf): 204 return file.PE, nil 205 case isELF(buf): 206 return file.ELF, nil 207 } 208 209 return "", nil 210 } 211 212 func isMacho(by []byte) bool { 213 // sourced from https://github.com/gabriel-vasile/mimetype/blob/02af149c0dfd1444d9256fc33c2012bb3153e1d2/internal/magic/binary.go#L44 214 215 if classOrMachOFat(by) && by[7] < 20 { 216 return true 217 } 218 219 if len(by) < 4 { 220 return false 221 } 222 223 be := binary.BigEndian.Uint32(by) 224 le := binary.LittleEndian.Uint32(by) 225 226 return be == macho.Magic32 || 227 le == macho.Magic32 || 228 be == macho.Magic64 || 229 le == macho.Magic64 230 } 231 232 // Java bytecode and Mach-O binaries share the same magic number. 233 // More info here https://github.com/threatstack/libmagic/blob/master/magic/Magdir/cafebabe 234 func classOrMachOFat(in []byte) bool { 235 // sourced from https://github.com/gabriel-vasile/mimetype/blob/02af149c0dfd1444d9256fc33c2012bb3153e1d2/internal/magic/binary.go#L44 236 237 // There should be at least 8 bytes for both of them because the only way to 238 // quickly distinguish them is by comparing byte at position 7 239 if len(in) < 8 { 240 return false 241 } 242 243 return bytes.HasPrefix(in, []byte{0xCA, 0xFE, 0xBA, 0xBE}) 244 } 245 246 func isPE(by []byte) bool { 247 return bytes.HasPrefix(by, []byte("MZ")) 248 } 249 250 func isELF(by []byte) bool { 251 return bytes.HasPrefix(by, []byte(elf.ELFMAG)) 252 }