github.com/anchore/syft@v1.38.2/syft/file/cataloger/executable/cataloger.go (about) 1 package executable 2 3 import ( 4 "bytes" 5 "context" 6 "debug/elf" 7 "debug/macho" 8 "encoding/binary" 9 "fmt" 10 "sort" 11 12 "github.com/bmatcuk/doublestar/v4" 13 "github.com/dustin/go-humanize" 14 15 "github.com/anchore/go-sync" 16 "github.com/anchore/syft/internal" 17 "github.com/anchore/syft/internal/bus" 18 "github.com/anchore/syft/internal/log" 19 "github.com/anchore/syft/internal/mimetype" 20 "github.com/anchore/syft/internal/unknown" 21 "github.com/anchore/syft/syft/cataloging" 22 "github.com/anchore/syft/syft/event/monitor" 23 "github.com/anchore/syft/syft/file" 24 "github.com/anchore/syft/syft/internal/unionreader" 25 ) 26 27 type Config struct { 28 MIMETypes []string `json:"mime-types" yaml:"mime-types" mapstructure:"mime-types"` 29 Globs []string `json:"globs" yaml:"globs" mapstructure:"globs"` 30 } 31 32 type Cataloger struct { 33 config Config 34 } 35 36 func DefaultConfig() Config { 37 m := mimetype.ExecutableMIMETypeSet.List() 38 sort.Strings(m) 39 return Config{ 40 MIMETypes: m, 41 Globs: nil, 42 } 43 } 44 45 func NewCataloger(cfg Config) *Cataloger { 46 return &Cataloger{ 47 config: cfg, 48 } 49 } 50 51 func (i *Cataloger) Catalog(resolver file.Resolver) (map[file.Coordinates]file.Executable, error) { 52 return i.CatalogCtx(context.Background(), resolver) 53 } 54 55 func (i *Cataloger) CatalogCtx(ctx context.Context, resolver file.Resolver) (map[file.Coordinates]file.Executable, error) { 56 locs, err := resolver.FilesByMIMEType(i.config.MIMETypes...) 57 if err != nil { 58 return nil, fmt.Errorf("unable to get file locations for binaries: %w", err) 59 } 60 61 locs, err = filterByGlobs(locs, i.config.Globs) 62 if err != nil { 63 return nil, err 64 } 65 66 prog := catalogingProgress(int64(len(locs))) 67 68 results := make(map[file.Coordinates]file.Executable) 69 errs := sync.Collect(&ctx, cataloging.ExecutorFile, sync.ToSeq(locs), func(loc file.Location) (*file.Executable, error) { 70 prog.AtomicStage.Set(loc.Path()) 71 72 exec, err := processExecutableLocation(loc, resolver) 73 if err != nil { 74 err = unknown.New(loc, err) 75 } 76 return exec, err 77 }, func(loc file.Location, exec *file.Executable) { 78 if exec != nil { 79 prog.Increment() 80 results[loc.Coordinates] = *exec 81 } 82 }) 83 84 log.Debugf("executable cataloger processed %d files", len(results)) 85 86 prog.AtomicStage.Set(fmt.Sprintf("%s executables", humanize.Comma(prog.Current()))) 87 prog.SetCompleted() 88 89 return results, errs 90 } 91 92 func processExecutableLocation(loc file.Location, resolver file.Resolver) (*file.Executable, error) { 93 reader, err := resolver.FileContentsByLocation(loc) 94 if err != nil { 95 log.WithFields("error", err, "path", loc.RealPath).Debug("unable to get file contents") 96 return nil, fmt.Errorf("unable to get file contents: %w", err) 97 } 98 defer internal.CloseAndLogError(reader, loc.RealPath) 99 100 uReader, err := unionreader.GetUnionReader(reader) 101 if err != nil { 102 log.WithFields("error", err, "path", loc.RealPath).Debug("unable to get union reader") 103 return nil, fmt.Errorf("unable to get union reader: %w", err) 104 } 105 106 return processExecutable(loc, uReader) 107 } 108 109 func catalogingProgress(locations int64) *monitor.TaskProgress { 110 info := monitor.GenericTask{ 111 Title: monitor.Title{ 112 Default: "Executables", 113 }, 114 ParentID: monitor.TopLevelCatalogingTaskID, 115 } 116 117 return bus.StartCatalogerTask(info, locations, "") 118 } 119 120 func filterByGlobs(locs []file.Location, globs []string) ([]file.Location, error) { 121 if len(globs) == 0 { 122 return locs, nil 123 } 124 var filteredLocs []file.Location 125 for _, loc := range locs { 126 matches, err := locationMatchesGlob(loc, globs) 127 if err != nil { 128 return nil, err 129 } 130 if matches { 131 filteredLocs = append(filteredLocs, loc) 132 } 133 } 134 return filteredLocs, nil 135 } 136 137 func locationMatchesGlob(loc file.Location, globs []string) (bool, error) { 138 for _, glob := range globs { 139 for _, path := range []string{loc.RealPath, loc.AccessPath} { 140 if path == "" { 141 continue 142 } 143 matches, err := doublestar.Match(glob, path) 144 if err != nil { 145 return false, fmt.Errorf("unable to match glob %q to path %q: %w", glob, path, err) 146 } 147 if matches { 148 return true, nil 149 } 150 } 151 } 152 return false, nil 153 } 154 155 func processExecutable(loc file.Location, reader unionreader.UnionReader) (*file.Executable, error) { 156 data := file.Executable{} 157 158 // determine the executable format 159 160 format, err := findExecutableFormat(reader) 161 if err != nil { 162 log.Debugf("unable to determine executable kind for %v: %v", loc.RealPath, err) 163 return nil, fmt.Errorf("unable to determine executable kind: %w", err) 164 } 165 166 if format == "" { 167 // this is not an "unknown", so just log -- this binary does not have parseable data in it 168 log.Debugf("unable to determine executable format for %q", loc.RealPath) 169 return nil, nil 170 } 171 172 data.Format = format 173 174 switch format { 175 case file.ELF: 176 if err = findELFFeatures(&data, reader); err != nil { 177 log.WithFields("error", err, "path", loc.RealPath).Trace("unable to determine ELF features") 178 err = fmt.Errorf("unable to determine ELF features: %w", err) 179 } 180 case file.PE: 181 if err = findPEFeatures(&data, reader); err != nil { 182 log.WithFields("error", err, "path", loc.RealPath).Trace("unable to determine PE features") 183 err = fmt.Errorf("unable to determine PE features: %w", err) 184 } 185 case file.MachO: 186 if err = findMachoFeatures(&data, reader); err != nil { 187 log.WithFields("error", err, "path", loc.RealPath).Trace("unable to determine Macho features") 188 err = fmt.Errorf("unable to determine Macho features: %w", err) 189 } 190 } 191 192 // always allocate collections for presentation 193 if data.ImportedLibraries == nil { 194 data.ImportedLibraries = []string{} 195 } 196 197 return &data, err 198 } 199 200 func findExecutableFormat(reader unionreader.UnionReader) (file.ExecutableFormat, error) { 201 // read the first sector of the file 202 buf := make([]byte, 512) 203 n, err := reader.ReadAt(buf, 0) 204 if err != nil { 205 return "", fmt.Errorf("unable to read first sector of file: %w", err) 206 } 207 if n < 512 { 208 return "", fmt.Errorf("unable to read enough bytes to determine executable format") 209 } 210 211 switch { 212 case isMacho(buf): 213 return file.MachO, nil 214 case isPE(buf): 215 return file.PE, nil 216 case isELF(buf): 217 return file.ELF, nil 218 } 219 220 return "", nil 221 } 222 223 func isMacho(by []byte) bool { 224 // sourced from https://github.com/gabriel-vasile/mimetype/blob/02af149c0dfd1444d9256fc33c2012bb3153e1d2/internal/magic/binary.go#L44 225 226 if classOrMachOFat(by) && by[7] < 20 { 227 return true 228 } 229 230 if len(by) < 4 { 231 return false 232 } 233 234 be := binary.BigEndian.Uint32(by) 235 le := binary.LittleEndian.Uint32(by) 236 237 return be == macho.Magic32 || 238 le == macho.Magic32 || 239 be == macho.Magic64 || 240 le == macho.Magic64 241 } 242 243 // Java bytecode and Mach-O binaries share the same magic number. 244 // More info here https://github.com/threatstack/libmagic/blob/master/magic/Magdir/cafebabe 245 func classOrMachOFat(in []byte) bool { 246 // sourced from https://github.com/gabriel-vasile/mimetype/blob/02af149c0dfd1444d9256fc33c2012bb3153e1d2/internal/magic/binary.go#L44 247 248 // There should be at least 8 bytes for both of them because the only way to 249 // quickly distinguish them is by comparing byte at position 7 250 if len(in) < 8 { 251 return false 252 } 253 254 return bytes.HasPrefix(in, []byte{0xCA, 0xFE, 0xBA, 0xBE}) 255 } 256 257 func isPE(by []byte) bool { 258 return bytes.HasPrefix(by, []byte("MZ")) 259 } 260 261 func isELF(by []byte) bool { 262 return bytes.HasPrefix(by, []byte(elf.ELFMAG)) 263 }