github.com/anchore/syft@v1.4.2-0.20240516191711-1bec1fc5d397/syft/file/cataloger/filecontent/cataloger.go (about) 1 package filecontent 2 3 import ( 4 "bytes" 5 "context" 6 "encoding/base64" 7 "fmt" 8 "io" 9 10 "github.com/dustin/go-humanize" 11 12 "github.com/anchore/syft/internal" 13 "github.com/anchore/syft/internal/bus" 14 intFile "github.com/anchore/syft/internal/file" 15 "github.com/anchore/syft/internal/log" 16 "github.com/anchore/syft/syft/event/monitor" 17 "github.com/anchore/syft/syft/file" 18 ) 19 20 type Config struct { 21 // Globs are the file patterns that must be matched for a file to be considered for cataloging. 22 Globs []string `yaml:"globs" json:"globs" mapstructure:"globs"` 23 24 // SkipFilesAboveSize is the maximum file size (in bytes) to allow to be considered while cataloging. If the file is larger than this size it will be skipped. 25 SkipFilesAboveSize int64 `yaml:"skip-files-above-size" json:"skip-files-above-size" mapstructure:"skip-files-above-size"` 26 } 27 28 type Cataloger struct { 29 globs []string 30 skipFilesAboveSizeInBytes int64 31 } 32 33 func DefaultConfig() Config { 34 return Config{ 35 SkipFilesAboveSize: 250 * intFile.KB, 36 } 37 } 38 39 func NewCataloger(cfg Config) *Cataloger { 40 return &Cataloger{ 41 globs: cfg.Globs, 42 skipFilesAboveSizeInBytes: cfg.SkipFilesAboveSize, 43 } 44 } 45 46 func (i *Cataloger) Catalog(_ context.Context, resolver file.Resolver) (map[file.Coordinates]string, error) { 47 results := make(map[file.Coordinates]string) 48 var locations []file.Location 49 50 locations, err := resolver.FilesByGlob(i.globs...) 51 if err != nil { 52 return nil, err 53 } 54 55 prog := catalogingProgress(int64(len(locations))) 56 57 for _, location := range locations { 58 prog.AtomicStage.Set(location.Path()) 59 60 metadata, err := resolver.FileMetadataByLocation(location) 61 if err != nil { 62 prog.SetError(err) 63 return nil, err 64 } 65 66 if i.skipFilesAboveSizeInBytes > 0 && metadata.Size() > i.skipFilesAboveSizeInBytes { 67 continue 68 } 69 70 result, err := i.catalogLocation(resolver, location) 71 if internal.IsErrPathPermission(err) { 72 log.Debugf("file contents cataloger skipping - %+v", err) 73 continue 74 } 75 if err != nil { 76 prog.SetError(err) 77 return nil, err 78 } 79 80 prog.Increment() 81 82 results[location.Coordinates] = result 83 } 84 85 log.Debugf("file contents cataloger processed %d files", len(results)) 86 87 prog.AtomicStage.Set(fmt.Sprintf("%s files", humanize.Comma(prog.Current()))) 88 prog.SetCompleted() 89 90 return results, nil 91 } 92 93 func (i *Cataloger) catalogLocation(resolver file.Resolver, location file.Location) (string, error) { 94 contentReader, err := resolver.FileContentsByLocation(location) 95 if err != nil { 96 return "", err 97 } 98 defer internal.CloseAndLogError(contentReader, location.AccessPath) 99 100 buf := &bytes.Buffer{} 101 encoder := base64.NewEncoder(base64.StdEncoding, buf) 102 if _, err = io.Copy(encoder, contentReader); err != nil { 103 return "", internal.ErrPath{Context: "content-cataloger", Path: location.RealPath, Err: err} 104 } 105 // note: it's important to close the reader before reading from the buffer since closing will flush the remaining bytes 106 if err := encoder.Close(); err != nil { 107 return "", fmt.Errorf("unable to close base64 encoder: %w", err) 108 } 109 110 return buf.String(), nil 111 } 112 113 func catalogingProgress(locations int64) *monitor.CatalogerTaskProgress { 114 info := monitor.GenericTask{ 115 Title: monitor.Title{ 116 Default: "File contents", 117 }, 118 ParentID: monitor.TopLevelCatalogingTaskID, 119 } 120 121 return bus.StartCatalogerTask(info, locations, "") 122 }