github.com/anchore/syft@v1.38.2/syft/file/cataloger/filecontent/cataloger.go (about) 1 package filecontent 2 3 import ( 4 "bytes" 5 "context" 6 "encoding/base64" 7 "fmt" 8 "io" 9 10 "github.com/dustin/go-humanize" 11 12 "github.com/anchore/syft/internal" 13 "github.com/anchore/syft/internal/bus" 14 intFile "github.com/anchore/syft/internal/file" 15 "github.com/anchore/syft/internal/log" 16 "github.com/anchore/syft/internal/unknown" 17 "github.com/anchore/syft/syft/event/monitor" 18 "github.com/anchore/syft/syft/file" 19 ) 20 21 type Config struct { 22 // Globs are the file patterns that must be matched for a file to be considered for cataloging. 23 Globs []string `yaml:"globs" json:"globs" mapstructure:"globs"` 24 25 // SkipFilesAboveSize is the maximum file size (in bytes) to allow to be considered while cataloging. If the file is larger than this size it will be skipped. 26 SkipFilesAboveSize int64 `yaml:"skip-files-above-size" json:"skip-files-above-size" mapstructure:"skip-files-above-size"` 27 } 28 29 type Cataloger struct { 30 globs []string 31 skipFilesAboveSizeInBytes int64 32 } 33 34 func DefaultConfig() Config { 35 return Config{ 36 SkipFilesAboveSize: 250 * intFile.KB, 37 } 38 } 39 40 func NewCataloger(cfg Config) *Cataloger { 41 return &Cataloger{ 42 globs: cfg.Globs, 43 skipFilesAboveSizeInBytes: cfg.SkipFilesAboveSize, 44 } 45 } 46 47 func (i *Cataloger) Catalog(_ context.Context, resolver file.Resolver) (map[file.Coordinates]string, error) { 48 results := make(map[file.Coordinates]string) 49 var locations []file.Location 50 var errs error 51 52 locations, err := resolver.FilesByGlob(i.globs...) 53 if err != nil { 54 return nil, err 55 } 56 57 prog := catalogingProgress(int64(len(locations))) 58 59 for _, location := range locations { 60 prog.AtomicStage.Set(location.Path()) 61 62 metadata, err := resolver.FileMetadataByLocation(location) 63 if err != nil { 64 errs = unknown.Append(errs, location, err) 65 prog.SetError(err) 66 continue 67 } 68 69 if i.skipFilesAboveSizeInBytes > 0 && metadata.Size() > i.skipFilesAboveSizeInBytes { 70 continue 71 } 72 73 result, err := i.catalogLocation(resolver, location) 74 if internal.IsErrPathPermission(err) { 75 errs = unknown.Append(errs, location, fmt.Errorf("permission error reading file contents: %w", err)) 76 continue 77 } 78 if err != nil { 79 errs = unknown.Append(errs, location, err) 80 continue 81 } 82 83 prog.Increment() 84 85 results[location.Coordinates] = result 86 } 87 88 log.Debugf("file contents cataloger processed %d files", len(results)) 89 90 prog.AtomicStage.Set(fmt.Sprintf("%s files", humanize.Comma(prog.Current()))) 91 prog.SetCompleted() 92 93 return results, errs 94 } 95 96 func (i *Cataloger) catalogLocation(resolver file.Resolver, location file.Location) (string, error) { 97 contentReader, err := resolver.FileContentsByLocation(location) 98 if err != nil { 99 return "", err 100 } 101 defer internal.CloseAndLogError(contentReader, location.AccessPath) 102 103 buf := &bytes.Buffer{} 104 encoder := base64.NewEncoder(base64.StdEncoding, buf) 105 if _, err = io.Copy(encoder, contentReader); err != nil { 106 return "", internal.ErrPath{Context: "content-cataloger", Path: location.RealPath, Err: err} 107 } 108 // note: it's important to close the reader before reading from the buffer since closing will flush the remaining bytes 109 if err := encoder.Close(); err != nil { 110 return "", fmt.Errorf("unable to close base64 encoder: %w", err) 111 } 112 113 return buf.String(), nil 114 } 115 116 func catalogingProgress(locations int64) *monitor.TaskProgress { 117 info := monitor.GenericTask{ 118 Title: monitor.Title{ 119 Default: "File contents", 120 }, 121 ParentID: monitor.TopLevelCatalogingTaskID, 122 } 123 124 return bus.StartCatalogerTask(info, locations, "") 125 }