github.com/anchore/syft@v1.38.2/syft/file/cataloger/filedigest/cataloger.go (about) 1 package filedigest 2 3 import ( 4 "context" 5 "crypto" 6 "errors" 7 "fmt" 8 9 "github.com/dustin/go-humanize" 10 11 "github.com/anchore/go-sync" 12 stereoscopeFile "github.com/anchore/stereoscope/pkg/file" 13 "github.com/anchore/syft/internal" 14 "github.com/anchore/syft/internal/bus" 15 intFile "github.com/anchore/syft/internal/file" 16 "github.com/anchore/syft/internal/log" 17 "github.com/anchore/syft/internal/unknown" 18 "github.com/anchore/syft/syft/cataloging" 19 "github.com/anchore/syft/syft/event/monitor" 20 "github.com/anchore/syft/syft/file" 21 intCataloger "github.com/anchore/syft/syft/file/cataloger/internal" 22 ) 23 24 var ErrUndigestableFile = errors.New("undigestable file") 25 26 type Cataloger struct { 27 hashes []crypto.Hash 28 } 29 30 func NewCataloger(hashes []crypto.Hash) *Cataloger { 31 return &Cataloger{ 32 hashes: intFile.NormalizeHashes(hashes), 33 } 34 } 35 36 func (i *Cataloger) Catalog(ctx context.Context, resolver file.Resolver, coordinates ...file.Coordinates) (map[file.Coordinates][]file.Digest, error) { 37 results := make(map[file.Coordinates][]file.Digest) 38 var locations []file.Location 39 40 if len(coordinates) == 0 { 41 locations = intCataloger.AllRegularFiles(ctx, resolver) 42 } else { 43 for _, c := range coordinates { 44 locs, err := resolver.FilesByPath(c.RealPath) 45 if err != nil { 46 return nil, fmt.Errorf("unable to get file locations for path %q: %w", c.RealPath, err) 47 } 48 locations = append(locations, locs...) 49 } 50 } 51 52 prog := catalogingProgress(int64(len(locations))) 53 54 err := sync.Collect(&ctx, cataloging.ExecutorFile, sync.ToSeq(locations), func(location file.Location) ([]file.Digest, error) { 55 result, err := i.catalogLocation(ctx, resolver, location) 56 57 if errors.Is(err, ErrUndigestableFile) { 58 return nil, nil 59 } 60 61 prog.AtomicStage.Set(location.Path()) 62 63 if internal.IsErrPathPermission(err) { 64 log.Debugf("file digests cataloger skipping %q: %+v", location.RealPath, err) 65 return nil, unknown.New(location, err) 66 } 67 68 if err != nil { 69 prog.SetError(err) 70 return nil, unknown.New(location, err) 71 } 72 73 prog.Increment() 74 75 return result, nil 76 }, func(location file.Location, digests []file.Digest) { 77 if len(digests) > 0 { 78 results[location.Coordinates] = digests 79 } 80 }) 81 82 log.Debugf("file digests cataloger processed %d files", prog.Current()) 83 84 prog.AtomicStage.Set(fmt.Sprintf("%s files", humanize.Comma(prog.Current()))) 85 prog.SetCompleted() 86 87 return results, err 88 } 89 90 func (i *Cataloger) catalogLocation(ctx context.Context, resolver file.Resolver, location file.Location) ([]file.Digest, error) { 91 meta, err := resolver.FileMetadataByLocation(location) 92 if err != nil { 93 return nil, err 94 } 95 96 // we should only attempt to report digests for files that are regular files (don't attempt to resolve links) 97 if meta.Type != stereoscopeFile.TypeRegular { 98 return nil, ErrUndigestableFile 99 } 100 101 contentReader, err := resolver.FileContentsByLocation(location) 102 if err != nil { 103 return nil, err 104 } 105 defer internal.CloseAndLogError(contentReader, location.AccessPath) 106 107 digests, err := intFile.NewDigestsFromFile(ctx, contentReader, i.hashes) 108 if err != nil { 109 return nil, internal.ErrPath{Context: "digests-cataloger", Path: location.RealPath, Err: err} 110 } 111 112 return digests, nil 113 } 114 115 func catalogingProgress(locations int64) *monitor.TaskProgress { 116 info := monitor.GenericTask{ 117 Title: monitor.Title{ 118 Default: "File digests", 119 }, 120 ParentID: monitor.TopLevelCatalogingTaskID, 121 } 122 123 return bus.StartCatalogerTask(info, locations, "") 124 }